【Pycharm】将labelme标注结果转为VOC语义分割的数据集
labelme标注后,将标注的json结果转为VOC格式的语义分割数据集
·
labelme的安装:
在Pycharm终端中输入:
pip install labelme==3.16.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
同理,缺什么包就安装什么包。
在main声明里面输入自己的路径,运行就行了。
from __future__ import print_function
import argparse
import glob
import json
import os
import os.path as osp
import sys
import numpy as np
import PIL.Image
import base64
import labelme
from labelme import utils
from sklearn.model_selection import train_test_split
def main(args):
os.makedirs(args.output_dir, exist_ok=True)
os.makedirs(osp.join(args.output_dir, "JPEGImages"), exist_ok=True)
os.makedirs(osp.join(args.output_dir, "SegmentationClass"), exist_ok=True)
os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG"), exist_ok=True)
os.makedirs(osp.join(args.output_dir, "SegmentationClassVisualization"), exist_ok=True)
saved_path = args.output_dir
os.makedirs(osp.join(saved_path, "ImageSets", "Segmentation"), exist_ok=True)
print('Creating dataset:', args.output_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
print(i)
class_id = i + 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == '__ignore__'
continue
elif class_id == 0:
assert class_name == '_background_'
class_names.append(class_name)
class_names = tuple(class_names)
print('class_names:', class_names)
out_class_names_file = osp.join(args.output_dir, 'class_names.txt')
with open(out_class_names_file, 'w') as f:
f.writelines('\n'.join(class_names))
print('Saved class_names:', out_class_names_file)
colormap = labelme.utils.label_colormap(255) # pip install labelme==3.16.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
for label_file in glob.glob(osp.join(args.input_dir, '*.json')):
print('Generating dataset from:', label_file)
try:
with open(label_file) as f:
base = osp.splitext(osp.basename(label_file))[0]
out_img_file = osp.join(args.output_dir, 'JPEGImages', base + '.jpg')
# out_lbl_file = osp.join(
# args.output_dir, 'SegmentationClass', base + '.npy')
# args.output_dir, 'SegmentationClass', base + '.npy')
out_png_file = osp.join(args.output_dir, 'SegmentationClass', base + '.png')
out_viz_file = osp.join(args.output_dir, 'SegmentationClassVisualization', base + '.jpg', )
data = json.load(f)
if data['imageData']:
imagedata = data['imageData']
img = utils.img_b64_to_arr(imagedata)
img_file = osp.join(label_file.split('.json')[0] + '.jpg')
PIL.Image.fromarray(img).save(out_img_file)
print('class_name_to_id:', class_name_to_id)
print(img.shape)
print(data['shapes'])
lbl = labelme.utils.shapes_to_label(img_shape=img.shape, shapes=data['shapes'], label_name_to_value=class_name_to_id)
labelme.utils.lblsave(out_png_file, lbl)
# np.save(out_lbl_file, lbl)
viz = labelme.utils.draw_label(lbl, img, class_names, colormap=colormap)
PIL.Image.fromarray(viz).save(out_viz_file)
except:
# with open('wrongdata.txt', 'w') as f:
# f.write(label_file + '\n')
print('这张图像有错误')
continue
# 6.split files for txt
txtsavepath = os.path.join(saved_path, 'ImageSets', 'Segmentation')
ftrainval = open(os.path.join(txtsavepath, 'trainval.txt'), 'w')
ftest = open(os.path.join(txtsavepath, 'test.txt'), 'w')
ftrain = open(os.path.join(txtsavepath, 'train.txt'), 'w')
fval = open(os.path.join(txtsavepath, 'val.txt'), 'w')
total_files = os.listdir(osp.join(args.output_dir, 'SegmentationClass'))
total_files = [i.split("/")[-1].split(".png")[0] for i in total_files]
# test_filepath = ""
for file in total_files:
ftrainval.write(file + "\n")
# test
# for file in os.listdir(test_filepath):
# ftest.write(file.split(".jpg")[0] + "\n")
# split
train_files, val_files = train_test_split(total_files, test_size=0.15, random_state=42)
# train
for file in train_files:
ftrain.write(file + "\n")
# val
for file in val_files:
fval.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--input_dir', type=str, help='input annotated directory', required=False, default=r'E:\labelmeSet')
parser.add_argument('--output_dir', type=str, help='output dataset directory', required=False, default=r'E:\labelmeSet_VOC')
parser.add_argument('--labels', type=str, help='labels file', required=False, default=r'E:\labelmeSet\labels.txt')
argsInput = parser.parse_args()
main(argsInput)

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐
所有评论(0)