【深度学习】VOC格式转成YOLO格式,COCO格式转YOLO格式

【代码】VOC格式转成YOLO格式,COCO格式转YOLO格式。

千禧皓月

560人浏览 · 2022-12-09 16:42:31

千禧皓月 · 2022-12-09 16:42:31 发布

VOC格式转成YOLO格式

import shutil
import xml.etree.ElementTree as ET
import os



VOC_CLASSES = (  # always index 0
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor')

""" Parse a PASCAL VOC xml file """

'''
yolo 标签格式(！！！！！！都要归一化！！！！！！！！！！)
class_index   center_x  center_y    w   h    
'''


def parse_rec(filename):
    # size=(width, height)  b=(xmin, xmax, ymin, ymax)
    # x_center = (xmax+xmin)/2        y_center = (ymax+ymin)/2
    # x = x_center / width            y = y_center / height
    # w = (xmax-xmin) / width         h = (ymax-ymin) / height

    tree = ET.parse(filename)
    objects = []
    width=float(tree.find("size").find("width").text)
    height=float(tree.find("size").find("height").text)
    for obj in tree.findall('object'):
        obj_struct = {}
        difficult = int(obj.find('difficult').text)
        if difficult == 1:
            # print(filename)
            continue
        obj_struct['name'] = obj.find('name').text


        bbox = obj.find('bndbox')
        box = [float(bbox.find('xmin').text),float(bbox.find('ymin').text),float(bbox.find('xmax').text), float(bbox.find('ymax').text)]
        x_center = (box[0] + box[2]) / 2.0
        y_center = (box[1] + box[3]) / 2.0
        x = x_center / width
        y = y_center / height
        w = (box[2] - box[0]) / width
        h = (box[3] - box[1]) / height
        obj_struct['bbox'] = [x, y, w, h]
        objects.append(obj_struct)
    return objects

'''
yolo数据集目录结构
|---images
        |-----train
        |-----val
|---labels
        |-----train
        |-----val
'''




tasks=["train","val"]
for task in  tasks:
    labels_root = f"../labels/{task}"
    images_root = f"../images/{task}"


    Annotations = './Annotations/'
    old_images_root="./JPEGImages"

    txt = open(f'./ImageSets/Main/{task}.txt', 'r')
    lines = txt.readlines()
    lines = [x[:-1] for x in lines]


    count = 0
    for file in lines:
        count += 1

        xml_file = file + ".xml"
        image_name = file + '.jpg'
        image_path = os.path.join(images_root, image_name)
        txt_name = file.split('.')[0] + '.txt'
        txt_path = os.path.join(labels_root, txt_name)

        results = parse_rec(Annotations + xml_file)

        if len(results) == 0:
            print(xml_file)
            continue
        content = []
        for result in results:
            class_name = result['name']  # 类别名称
            bbox = result['bbox']  # 坐标
            class_name = VOC_CLASSES.index(class_name)  # 类别索引
            s = str(class_name) + ' ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])+'\n'
            content.append(s)
        with open(txt_path, "w", encoding="UTF-8") as f:
            f.writelines(content)

        old_image_path=os.path.join(old_images_root,image_name)
        shutil.copy(old_image_path,image_path)
    print("完成")
print("全部完成")

COCO格式转YOLO格式

#COCO 格式的数据集转化为 YOLO 格式的数据集

#--json_path 输入的json文件路径
#--save_path 保存的文件夹名字，默认为当前目录下的labels。

import os
import json
from tqdm import tqdm
import argparse



parser = argparse.ArgumentParser()

#这里根据自己的json文件位置，换成自己的就行
parser.add_argument('--json_path', default='./instances_val2017.json',type=str, help="json文件的路径")

#这里设置每张图片对应的.txt文件保存位置
parser.add_argument('--label_save_path', default='./labels/val2017', type=str, help="转换后的标签文件存放位置")

#这里设置每张图片对应的相对路径
parser.add_argument('--image_save_path', default='./images/val2017', type=str, help="每张图片的相对路径")

#所有类别的保存文件
parser.add_argument('--classes_save_path', default='classes.txt', type=str, help="每张图片的相对路径")

#所有图片相对路径保存文件
parser.add_argument('--image_list_save_path', default='val2017.txt', type=str, help="所有图片相对路径保存文件")




arg = parser.parse_args()



def convert(image_width,image_height, box):
    x = box[0] + box[2] / 2.0   #中心横坐标
    y = box[1] + box[3] / 2.0   #中心纵坐标
    w = abs(box[2]-box[0])      #bbox宽度
    h = abs(box[3]-box[1])     #bbox高度
   
    #坐标归一化
    x = round(x /image_width, 6)
    w = round(w /image_width, 6)
    y = round(y / image_height, 6)
    h = round(h / image_height, 6)
    return (x, y, w, h)



if __name__ == '__main__':
    



    json_file =arg.json_path    # COCO Object Instance 类型的标注
    ana_txt_save_path = arg.label_save_path  # 标签文件保存的路径
    classes_path=arg.classes_save_path   #所有类别保存文件
    image_save_path=arg.image_save_path  #图片相对路径
    image_list_save_path=arg.image_list_save_path  #所有图片相对路径保存文件


    data = json.load(open(json_file, 'r'))

    if not os.path.exists(ana_txt_save_path):
        os.makedirs(ana_txt_save_path)

    id_map = {} # coco数据集的id不连续！重新映射一下再输出！！！！！
    with open(classes_path, 'w') as f:
        # 写入classes.txt
        for i, category in enumerate(data['categories']):
            f.write(f"{category['name']}\n")
            id_map[category['id']] = i

    print(id_map)

    # 将图片的相对路径写入同一个文件
    list_file = open(image_list_save_path, 'w')
    images=tqdm(data['images'])


    for img in images:
        filename = img["file_name"]
        img_width = img["width"]
        img_height = img["height"]
        img_id = img["id"]

        # 对应的txt名字，与jpg一致
        ana_txt_name = filename.split(".")[0] + ".txt"  
        txt_path = os.path.join(ana_txt_save_path, ana_txt_name)


        f_txt = open(txt_path, 'w')
        for ann in data['annotations']:
            if ann['image_id'] == img_id:
                
                box = convert(img_width, img_height, ann["bbox"])
                f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
        f_txt.close()


        list_file.write(image_save_path+f'{filename}\n')

    list_file.close()

魔乐社区

魔乐社区（Modelers.cn) 是一个中立、公益的人工智能社区，提供人工智能工具、模型、数据的托管、展示与应用协同服务，为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作，由全产业链共同建设、共同运营、共同享有，推动国产AI生态繁荣发展。

更多推荐

替你试过了，消费级显卡可以跑的开源文生图SOTA模型，顶级渲染、高密度文本绘图

魔乐社区

量化挑战赛冠军专访：4小时啃下W4A8量化，我靠的是这些经验

魔乐社区

小参数・大码力・易部署 | Qwen3.6-27B上线魔乐社区，基于昇腾的部署教程来了

继一周前模型开源发布后，千问再度开源Qwen3.6-27B —— 一个拥有270亿参数的稠密多模态模型，也是社区呼声最高的模型规格。Qwen3.6-27B 依然支持多模态思考与非思考模式，在智能体编程方面达到了旗舰级表现，全面超越前代开源旗舰 Qwen3.5-397B-A17B（总参数397B / 激活参数17B的MoE模型）。作为稠密架构，它无需MoE路由即可部署，是开发者在实用、可广泛部署规模