使用CNNs网络,基于caltech 101数据集实现分类
首先下载101数据集caltech 101把文件解压,路径格式如图所示。此处是以pycharm为基础(pycharm以当前文件夹的目录为根目录)使用该数据集,制作npy易读文件格式def data_process(img_size):imgs = []labels = []img_size = img_sizesize = (img_size, img_size)for i, category i
首先下载101数据集
把文件解压,路径格式如图所示。
此处是以pycharm为基础(pycharm以当前文件夹的目录为根目录)

使用该数据集,制作npy易读文件格式
def data_process(img_size):
imgs = []
labels = []
img_size = img_size
size = (img_size, img_size)
for i, category in enumerate(tqdm(categories)):
for f in os.listdir(path + "/" + categories[i]):
fullpath = os.path.join(path + "/" + categories[i], f)
# print(fullpath)
img = Image.open(fullpath)
img = np.asarray(img.resize(size, Image.ANTIALIAS))
# img = np.asarray(img.resize(size)
if img.shape == (img_size, img_size, 3):
imgs.append(np.array(img))
label_curr = i
labels.append(label_curr)
# imgs_temp = [imgs, labels]
np.save(root_path + '/' + 'x'+str(img_size), imgs)
np.save(root_path + '/' + 'y'+str(img_size), labels)
img_size = 200
full_path =root_path + '/' + 'x'+str(img_size)
if os.path.exists(full_path) is True:
data_process(img_size)
print("{} file already exists.".format(full_path))
相应的模块就import一下,后续会展示详细的代码。
此时就会在dataset目录下生成x200.npy和y200.npy两个文件。后续处理的时候,就会直接读取这两个npy文件作为输入数据集。
下面开始说明网络训练过程和导入。
cal_101_googlenet.py
from keras import backend as K
from keras.utils import np_utils
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from modles.googlenet import GoogLeNetBN
# set GPU usage
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction = 0.4
set_session(tf.Session(config=config))
# 导入相应的模块以及进行GPU的设置
# 几个超参数的设计
image_size = 200
classes = 101
root_path = 'dataset'
path = 'dataset/Caltech 101/101_ObjectCategories'
categories = sorted(os.listdir(path))
ncategories = len(categories)
print(ncategories)
## 设置数据集的路径以及有多少类
def data_process(img_size):
imgs = []
labels = []
img_size = img_size
size = (img_size, img_size)
for i, category in enumerate(tqdm(categories)):
for f in os.listdir(path + "/" + categories[i]):
fullpath = os.path.join(path + "/" + categories[i], f)
# print(fullpath)
img = Image.open(fullpath)
img = np.asarray(img.resize(size, Image.ANTIALIAS))
# img = np.asarray(img.resize(size)
if img.shape == (img_size, img_size, 3):
imgs.append(np.array(img))
label_curr = i
labels.append(label_curr)
# imgs_temp = [imgs, labels]
np.save(root_path + '/' + 'x'+str(img_size), imgs)
np.save(root_path + '/' + 'y'+str(img_size), labels)
img_size = image_size #设置图片的大小,因为会裁剪图片
full_path =root_path + '/' + 'x'+str(img_size)
if os.path.exists(full_path) is True:
data_process(img_size)
print("{} file already exists.".format(full_path))
## 数据集处理
x = np.load('dataset/x%s.npy' % img_size, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII')
y = np.load('dataset/y%s.npy' % img_size, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII')
print("successfully load x%s.npy" % img_size)
## 载入数据
plt.imshow(x[96])
plt.show()
## 查看载入是否正确
seed = 7
np.random.seed(seed)
# import pandas as pd
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
X_train = np.stack(X_train, axis=0)
y_train = np.stack(y_train, axis=0)
X_test = np.stack(X_test, axis=0)
y_test = np.stack(y_test, axis=0)
print("Num train_imgs: %d" % (len(X_train)))
print("Num test_imgs: %d" % (len(X_test)))
# # one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
## 导入数据,拆分为训练集和测试集,0.8:0.2
X_train = X_train.reshape((int(len(X_train)), img_size, img_size, 3))
X_test = X_test.reshape((int(len(X_test)), img_size, img_size, 3))
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
## 调整数据的shape
import numpy as np
from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
early_stopper = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10, mode='max')
csv_logger = CSVLogger('googlenet_caltech101')
model = GoogLeNetBN(input_shape=(img_size, img_size, 3), classes=classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
## 导入模型
#train the model
Y_train = y_train
Y_test = y_test
data_augmentation = False## 是否使用数据增强
from keras.preprocessing.image import ImageDataGenerator
if not data_augmentation:
print('Not using data augmentation.')
model.fit(X_train, Y_train,
batch_size=32,
nb_epoch=400,
validation_data=(X_test, Y_test),
shuffle=True,
verbose=2,
callbacks=[lr_reducer, early_stopper, csv_logger])
else:
print('Using real-time data augmentation.')
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=False) # randomly flip images
# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)
# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train, batch_size=32),
steps_per_epoch=X_train.shape[0] // 32,
validation_data=(X_test, Y_test),
epochs=400, verbose=2, max_q_size=257,
callbacks=[lr_reducer, early_stopper, csv_logger])
googlenet.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from keras import backend
from keras import layers
from keras import models
from keras import utils as keras_utils
from modles.pym_attention import se_block,cbam_block,pyramid_attention_block_1
def conv2d_bn(x, filters, kernel_size=(3, 3), padding='same', strides=(1, 1),
name=None):
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
if backend.image_data_format() == 'channels_first':
bn_axis = 1
else:
bn_axis = 3
x = layers.Conv2D(
filters,
kernel_size,
strides=strides,
padding=padding,
use_bias=False,
name=conv_name)(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name)(x)
x = layers.Activation('relu', name=name)(x)
return x
def inception(x, filters):
"""Utility function to implement the inception module.
# Arguments
x: input tensor.
filters: a list of filter sizes.
# Returns
Output tensor after applying the inception.
"""
if len(filters) != 4:
raise ValueError('filters should have 4 components')
if len(filters[1]) != 2 or len(filters[2]) != 2:
raise ValueError('incorrect spec of filters')
branch1x1 = conv2d_bn(x, filters[0], (1, 1))
branch3x3 = conv2d_bn(x, filters[1][0], (1, 1))
branch3x3 = conv2d_bn(branch3x3, filters[1][1], (3, 3))
branch5x5 = conv2d_bn(x, filters[2][0], (1, 1))
branch5x5 = conv2d_bn(branch5x5, filters[2][1], (5, 5))
branchpool = layers.AveragePooling2D(
pool_size=(3, 3), strides=(1, 1), padding='same')(x)
branchpool = conv2d_bn(branchpool, filters[3], (1, 1))
if backend.image_data_format() == 'channels_first':
concat_axis = 1
else:
concat_axis = 3
x = layers.concatenate(
[branch1x1, branch3x3, branch5x5, branchpool], axis=concat_axis)
return x
def GoogLeNetBN(include_top=True,
weights=None,
input_tensor=None,
input_shape=None,
pooling='max',
classes=1000,
**kwargs):
"""Instantiates the GoogLeNetBN architecture.
# Arguments
include_top: whether to include the fully-connected
layer at the top of the network.
weights: must be None.
input_tensor: Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
input_shape: input tensor shape, which is used to create an
input tensor if `input_tensor` is not specified.
pooling: Optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the last convolutional block.
- `avg` means that global average pooling will be applied
to the output of the last convolutional block, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will be applied.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
"""
att_block = None
if weights is not None:
raise ValueError('weights is not currently supported')
if input_tensor is None:
if input_shape is None:
raise ValueError('neither input_tensor nor input_shape is given')
img_input = layers.Input(shape=input_shape)
else:
if not backend.is_keras_tensor(input_tensor):
img_input = layers.Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
x = conv2d_bn(img_input, 64, (7, 7), strides=(2, 2))
x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
if att_block is not None:
x = att_block(x, name='a')
x = conv2d_bn(x, 64, (1, 1))
x = conv2d_bn(x, 192, (3, 3))
x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
if att_block is not None:
x = att_block(x, name='b')
x = inception(x, ( 64, (96, 128), (16, 32), 32)) # 3a: 28x28x256
x = inception(x, (128, (128, 192), (32, 96), 64)) # 3b: 28x28x480
x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
if att_block is not None:
x = att_block(x, name='c')
x = inception(x, (192, (96, 208), (16, 48), 64)) # 4a: 14x14x512
x = inception(x, (160, (112, 224), (24, 64), 64)) # 4b: 14x14x512
x = inception(x, (128, (128, 256), (24, 64), 64)) # 4c: 14x14x512
x = inception(x, (112, (144, 288), (32, 64), 64)) # 4d: 14x14x528
x = inception(x, (256, (160, 320), (32, 128), 128)) # 4e: 14x14x832
x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
if att_block is not None:
x = att_block(x, name='d')
x = inception(x, (256, (160, 320), (32, 128), 128)) # 5a: 7x7x832
x = inception(x, (384, (192, 384), (48, 128), 128)) # 5b: 7x7x1024
if include_top:
# Classification block
if pooling == 'avg':
x = layers.GlobalAveragePooling2D(name='global_pool')(x)
elif pooling == 'max':
x = layers.GlobalMaxPooling2D(name='global_pool')(x)
else:
raise ValueError('bad spec of global pooling')
x = layers.Dropout(0.4)(x)
x = layers.Dense(classes, activation='softmax', name='predictions')(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = keras_utils.get_source_inputs(input_tensor)
else:
inputs = img_input
# Create model.
model = models.Model(inputs, x, name='googlenet_bn')
return model
运行环境
python=3.6
keras=2.1.2
pandas=1.1.5
pillow=8.0.1
scikit-learn=0.24.0
scipy=1.5.4
tensorflow-gpu=1.3.0
tqdm=4.54.1
开始训练
把googlenet.py和cal_101_googlenet.py和dataset放在同一路径下,用pycharm打开这个路径。然后直接run cal_101_googlenet.py即可。
结果说明
在训练过程中,设置了earlystopping,即10个epoch内没有提高0.001的话就会停止迭代。所以最后一次往前数10个epoch一般是最高的准确率。
我的跑出来结果是【0.9931,0.7066】
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐


所有评论(0)