? ? 因專案需要根據指定格式的檔案生成XML標注檔案,可以方便使用LabelImg打開進行編輯和查看,其原始檔案默認使用逗號進行分隔,如下所示:

- 第1個值:原始圖片中切圖小檔案,以AIpng_x,其中x代表原始圖片的第幾個切圖檔案
- 第2~5值:分別對應于ymin, xmin, ymax, xmax
- 第6個值:代表對應的標簽標注
? ? 在生成XML檔案時,需要對其進行匯總,即將屬于同一個原始檔案的切圖小檔案的標注匯總到一起,其實作代碼如下所示:
import os
from Logger import MyLogger
from xml.dom.minidom import Document,parse
from collections import defaultdict
import re
class OpeateXML:
def __init__(self, srcPath: str, targetPath: str, srcFileName: str):
self._srcPath = srcPath
self._targetPath = targetPath
self._srcFileName = srcFileName
def readSrcFileName(self, fileEncoding="utf8") -> defaultdict:
data = https://www.cnblogs.com/surpassme/p/defaultdict(list)
s = re.compile("\.AIpng_\d{1,}", re.IGNORECASE)
srcFileFullPath = os.path.join(self._srcPath, self._srcFileName)
try:
if os.path.exists(srcFileFullPath):
with open(srcFileFullPath, mode="r", encoding=fileEncoding, errors="ignore") as fr:
for content in fr.readlines():
data[s.sub(".AIpng",content.strip().split(",")[0])].append(content.strip())
except Exception as ex:
MyLogger().error(f"OperateXML:read file error:\n{ex}")
return {}
else:
# data.sort(key=lambda x: x.strip().split(",")[0])
return data
def getCreateXMLData(self,srcData:dict,mnlData:list)->defaultdict:
"""
獲取手動確認的圖片
srcData:Location.txt中的原始資料
mnlData:手動確認資料
"""
try:
for key,values in srcData.items():
for item in mnlData:
for v in values:
if item in v.strip().split(",")[0]:
srcData[key][srcData[key].index(v)]=srcData[key][srcData[key].index(v)].replace("auto","mnl")
except Exception as ex:
MyLogger().error(f"OperateXML: get data from location and mnldata interaction error\n{ex}")
return {}
else:
return srcData
def operateXML(self,data:defaultdict)->None:
for k in data.keys():
xmlFileFullPath = os.path.join(self._targetPath, os.path.splitext(k)[0]+".xml")
if os.path.exists(xmlFileFullPath):
self.appendExistXML(data={k:data[k]},xmlFileFullPath=xmlFileFullPath)
else:
self.createNewXML({k:data[k]})
def appendExistXML(self,data:defaultdict,xmlFileFullPath:str,fileEncoding="utf8"):
try:
doc = parse(xmlFileFullPath)
rootNode = doc.documentElement
# print(rootNode.nodeName)
key=rootNode.getElementsByTagName("filename")[0].childNodes[0].data
objs=rootNode.getElementsByTagName("object")
for obj in objs:
name=obj.getElementsByTagName("name")[0].childNodes[0].data
bndboxs = obj.getElementsByTagName("bndbox")
for bndbox in bndboxs:
xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data
ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data
xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data
ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data
existData=https://www.cnblogs.com/surpassme/p/f"existData,{ymin},{xmin},{ymax},{xmax},{name}"
data[key].append(existData)
data[key]=list(set(data[key]))
os.remove(xmlFileFullPath)
self.createNewXML(data=https://www.cnblogs.com/surpassme/p/data)
except Exception as ex:
MyLogger().error(f"OperateXML:append content to {xmlFileFullPath} error\n{ex}")
return
def createNewXML(self, data: dict, fileEncoding="utf8")->None:
"""
data:傳入的資料字典
fileEncoding:XML默認編碼格式
"""
if data:
try:
for k,v in data.items():
doc = Document()
# 創建根節點
rootNode = doc.createElement("annotation")
# 添加根節點
doc.appendChild(rootNode)
folder = doc.createElement("folder")
folderText = doc.createTextNode(self._targetPath)
folder.appendChild(folderText)
rootNode.appendChild(folder)
filename = doc.createElement("filename")
filenameText = doc.createTextNode(k)
filename.appendChild(filenameText)
rootNode.appendChild(filename)
path = doc.createElement("path")
pathText = doc.createTextNode(os.path.join(self._targetPath,k))
path.appendChild(pathText)
rootNode.appendChild(path)
for i in v:
tmpData = https://www.cnblogs.com/surpassme/p/i.strip().split(",")
if len(tmpData) == 6:
_, ymin, xmin, ymax, xmax, labelName = tmpData
if not labelName.__contains__("/"):
continue
objectObj = doc.createElement("object")
rootNode.appendChild(objectObj)
objectName = doc.createElement("name")
objectNameText = doc.createTextNode(labelName)
objectName.appendChild(objectNameText)
objectObj.appendChild(objectName)
objectBndBox = doc.createElement("bndbox")
objectObj.appendChild(objectBndBox)
objectBndBoxXmin = doc.createElement("xmin")
objectBndBoxYmin = doc.createElement("ymin")
objectBndBoxXmax = doc.createElement("xmax")
objectBndBoxYmax = doc.createElement("ymax")
objectBndBoxXminText = doc.createTextNode(xmin)
objectBndBoxYminText = doc.createTextNode(ymin)
objectBndBoxXmaxText = doc.createTextNode(xmax)
objectBndBoxYmaxText = doc.createTextNode(ymax)
objectBndBox.appendChild(objectBndBoxXmin)
objectBndBox.appendChild(objectBndBoxYmin)
objectBndBox.appendChild(objectBndBoxXmax)
objectBndBox.appendChild(objectBndBoxYmax)
objectBndBoxXmin.appendChild(objectBndBoxXminText)
objectBndBoxYmin.appendChild(objectBndBoxYminText)
objectBndBoxXmax.appendChild(objectBndBoxXmaxText)
objectBndBoxYmax.appendChild(objectBndBoxYmaxText)
objectObj.appendChild(objectBndBox)
else:
continue
# save xml
xmlName=os.path.splitext(k)[0]+".xml"
targetPath = os.path.join(self._targetPath, xmlName)
with open(targetPath, mode="w", encoding=fileEncoding,errors="ignore") as fw:
doc.writexml(fw, indent="\t", newl="\n", addindent="\t", encoding=fileEncoding)
except Exception as ex:
MyLogger().error(f"OperateXML:Save xml error\n{ex}")
return
if __name__ == '__main__':
srcPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs"
srcName = "locations.txt"
targetPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs\in_number"
operateXML = OpeateXML(srcPath, targetPath, srcName)
a = operateXML.readSrcFileName()
testData=https://www.cnblogs.com/surpassme/p/['slide1_cell420_image0_met.AIpng_36.jpg', 'slide1_cell420_image0_met.AIpng_33.jpg', 'slide1_cell420_image0_met.AIpng_10.jpg', 'slide1_cell420_image0_met.AIpng_30.jpg']
res=operateXML.getCreateXMLData(a,testData)
operateXML.operateXML(res)
? ? 最終生成的XML效果如下所示:

? ? 在LabelImg中的效果如下所示:

本文地址:https://www.cnblogs.com/surpassme/p/13204899.html
本文同步在微信訂閱號上發布,如各位小伙伴們喜歡我的文章,也可以關注我的微信訂閱號:woaitest,或掃描下面的二維碼添加關注:

轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/107514.html
標籤:Python
上一篇:將一行拆分為多行
