划重点:label标签矩阵必须将每个像素值转成对应的类别标签(0~21,0为背景类,1~20为物体类);

否则会报错:

... thread: [869,0,0] Assertion `t >= 0 && t < n_classes` failed.

RuntimeError: CUDA error: device-side assert triggered

说来惭愧,我也是自己百度了好几天,才弄明白的。路漫漫其修远兮,吾将上下而求索!Ypa!

make_npz.py代码如下:

import glob
import cv2
import numpy as np
import os
from tqdm import tqdm
import torch
from PIL import Image

# # 标签中每个RGB颜色的值
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
                [0, 64, 128]]
# 标签其标注的类别
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person',
               'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']

colormap2label = torch.zeros(256**3, dtype=torch.uint8) # torch.Size([16777216])
for i, colormap in enumerate(VOC_COLORMAP):
    # 每个通道的进制是256,这样可以保证每个 rgb 对应一个下标 i
    colormap2label[(colormap[0] * 256 + colormap[1]) * 256 + colormap[2]] = i

# 构造标签矩阵
def voc_label_indices(colormap, colormap2label):
    colormap = np.array(colormap.convert("RGB")).astype('int32')
    idx = ((colormap[:, :, 0] * 256 + colormap[:, :, 1]) * 256 + colormap[:, :, 2]) 
    return colormap2label[idx] # colormap 映射 到colormaplabel中计算的下标

def npz(im, la, s):
    images_path = im
    labels_path = la
    path2 = s
    images = os.listdir(images_path)
    
    for s in tqdm(images):
        # print('s:', s)
        image_path = os.path.join(images_path, s)
        label_path = os.path.join(labels_path, s.split('.')[0]+'.png')

        # print('label_path:',label_path)
        image = Image.open(image_path).convert("RGB")
        label = Image.open(label_path).convert("RGB")

        # image = cv2.imread(image_path)
        # image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        # # 标签由三通道转换为单通道
        # # label = cv2.imread(label_path, flags=0)
        # label = cv2.imread(label_path)
        # label = cv2.cvtColor(label,cv2.COLOR_BGR2RGB)

        label=voc_label_indices(label, colormap2label)
        # print('label.shape:',label.shape)
        # print('label:',label)
        # cnt+=1
        # if cnt>11:
        #     break
        # 保存npz文件 
        np.savez(path2+s[:-4]+".npz",image=image,label=label)

TRAIN_IMG_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/train'
TRAIN_LABEL_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/train_GT'
# H:\2023_Files\Dataset\VOC_2012_SEG_V2\train_GT
TRAIN_NPZ_SAVE_PATH='./data/VOC_2012_SEG_V2/train_npz/'

VAL_IMG_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/valid'
VAL_LABEL_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/valid_GT'
VAL_NPZ_SAVE_PATH='./data/VOC_2012_SEG_V2/val_npz/'


print('start make npz')
npz(TRAIN_IMG_PATH, TRAIN_LABEL_PATH, TRAIN_NPZ_SAVE_PATH)

npz(VAL_IMG_PATH, VAL_LABEL_PATH, VAL_NPZ_SAVE_PATH)

print('done!!!')

finally,在本地GTX 1050Ti利用VOC语义分割数据集训练Swin Unet,速度还可以,2.5min一个epoch,暂时训练50个epoch,后面放到AutoDL服务器训练,具体如下图。

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐