刘二大人PyTorch深度学习实践第9讲多分类问题作业
·
- pd.read_csv
详解pandas的read_csv方法 - 知乎 (zhihu.com)
- 代码
# otto-group-product-classification-challenge
import numpy as np
import torch
import torch.optim as optim # 优化器
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import pandas as pd
# 数据预处理
# 定义函数将类别标签转为id表示,方便后面计算交叉熵
def labelsId(labels):
target_id = [] # 给所有target建立一个词典
target_labels = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
for label in labels:
target_id.append(target_labels.index(label))
return target_id
# 设计数据类
class OttoGroupDataset(Dataset):
def __init__(self, filepath):
data = pd.read_csv(filepath)
labels = data['target']
self.len = data.shape[0]
# 处理特征和标签
self.x_data = torch.Tensor(np.array(data)[:, 1:-1].astype(float)) # 选择[2,倒数第二]列
self.y_data = labelsId(labels)
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
# 设计模型
class OttoGroupModel(torch.nn.Module):
def __init__(self):
super(OttoGroupModel, self).__init__()
self.linear1 = torch.nn.Linear(93, 64)
self.linear2 = torch.nn.Linear(64, 32)
self.linear3 = torch.nn.Linear(32, 16)
self.linear4 = torch.nn.Linear(16, 9)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
# 最后一层不做激活,不进行非线性变换
return self.linear4(x) # 最后一层不做激活,不进行非线性变换
# 预测函数
def predict(self, x):
with torch.no_grad():
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.relu(self.linear4(x))
_, predicted = torch.max(x, dim=1)
# 将预测的类别转为one-hot表示,方便保存为预测文件。
y = pd.get_dummies(predicted).astype(int)
return y
# 训练
def train(epoch):
running_loss = 0.0
for batch_idx, (inputs, target) in enumerate(train_loader, 0):
inputs = inputs.float()
outputs = model(inputs)
loss = criterion(outputs, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 准备数据集
train_dataset = OttoGroupDataset('otto-group-product-classification-challenge/train.csv')
train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=64, num_workers=0)
# 初始化模型、损失函数和优化器
model = OttoGroupModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
# 输出预测文件
def predict_save():
test_data = pd.read_csv('otto-group-product-classification-challenge/test.csv')
test_inputs = torch.Tensor(np.array(test_data)[:, 1:])
out = model.predict(test_inputs)
# 定义结果标签
labels = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
# 修改列标签
out.columns = labels
# 插入id行
out.insert(0, 'id', test_data['id'])
# 输出为文件
output = pd.DataFrame(out)
output.to_csv('predict.csv', index=False)
predict_save()
- 题目链接
Otto Group Product Classification Challenge | Kaggle
- 结果
Kaggle提交显示分数为0.74880
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐


所有评论(0)