# 导入os和xml.etree.ElementTree模块
import os
import xml.etree.ElementTree as ET

if __name__ == '__main__':
    # 指定文件夹路径和新的路径前缀
    folder_path = r'D:\datasets\smoking\Annotations'
    new_path_prefix = '/root/dataset/smoking'
    # 获取文件夹中所有的xml文件
    xml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')]
    print('Start.')
    classes_count = {}
    for i, xml_file in enumerate(xml_files):
        # 加载xml文件
        tree = ET.parse(os.path.join(folder_path, xml_file))
        root = tree.getroot()

        # ------------- 修改filename元素 start -------------
        filename_element = root.find('filename')
        if filename_element is None:
            filename_element = ET.SubElement(root, "filename")

        filename_text = os.path.splitext(xml_file)[0] + '.jpg'
        filename_element.text = filename_text
        # ------------- 修改filename元素 end -------------

        # ------------- 修改path元素 start -------------
        path_element = root.find('path')
        if path_element is None:
            path_element = ET.SubElement(root, "path")

        path_element.text = os.path.join(new_path_prefix, filename_text)
        # ------------- 修改path元素 end -------------

        # ----------- 去掉<path>元素 start -----------
        # 如果不需要path元素或其他元素,可以参考此处代码
        # while True:
        #     path_element = root.find('path')
        #     if path_element is not None:
        #         root.remove(path_element)
        #     else:
        #         break
        # ----------- 去掉<path>元素 end -----------

        # ------------- 遍历所有的<object>元素 start -------------
        for obj in root.findall('object'):
            name = obj.find('name').text
            # 修改smoke为smoking
            if name == 'smoke':
                obj.find('name').text = 'smoking'
            else:
                print(xml_file, name)
            # 统计xml文件中不同类别名称出现的次数
            if classes_count.get(name) is None:
                classes_count[name] = 0
            else:
                classes_count[name] += 1

        # 保存修改后的xml文件
        tree.write(os.path.join(folder_path, xml_file))
        # 以进度条展示当前处理进度
        progress = int(100 * (i + 1) / len(xml_files))
        finish = "▓" * progress
        need_do = "-" * (100 - progress)
        print(f"\r{progress}% |{finish}{need_do}| {i + 1}/{len(xml_files)}.", end="")
    print()
    print(f'img number: {len(xml_files)}, classes count:',  classes_count)
    print('Done.')

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐