logistic回归+糖尿病数据集

from sklearn.datasets import make_blobsx,y=make_blobs(n_samples=200,n_features=2,centers=2,random_state=8)#可视化import matplotlib.pyplot as plt%matplotlib inlineplt.scatter(x[:,0],x[:,1],c=y,cmap=plt.cm

qq_48566899

2028人浏览 · 2021-04-14 11:49:59

qq_48566899 · 2021-04-14 11:49:59 发布

from sklearn.datasets import make_blobs
x,y=make_blobs(n_samples=200,n_features=2,centers=2,random_state=8)
#可视化
import matplotlib.pyplot as plt
%matplotlib inline
plt.scatter(x[:,0],x[:,1],c=y,cmap=plt.cm.spring,edgecolors='k')

在这里插入图片描述

梯度下降法实现逻辑回归

# 添加全1列
import numpy as np
x_ones=np.ones((x.shape[0],1))
x=np.hstack((x,x_ones))

在这里插入图片描述

#  拆分数据
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,
							test_size=0.3,random_state=8)
#将因变量转为列向量
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)

在这里插入图片描述

# 初始化theta
theta=np.ones([x_train.shape[1],1])
print(theta)

在这里插入图片描述

# 设置步长值
alpha=0.001

# 定义sigmoid函数
def segmoid(z):
    s=1.0/(1+np.exp(-z))
    return s

h=sigmoid(np.dot(x_train,theta))

m=140

在这里插入图片描述

num_iters=10000
for i in range(num_iters):
    h=sigmoid(np.dot(x_train,theta))
    theta=theta-alpha*np.dot(x_train.T,(h-y_train))/m
print(theta)

[[ 0.65443683]
[-1.1828222 ]
[ 0.97980398]]

# 预测
pred_y=sigmoid(np.dot(x_test,theta))
# 预测结果二值化
pred_y[pred_y>0.5]=1
pred_y[pred_y<=0.5]=0
print(pred_y.reshape(1,-1))
print(y_test.reshape(1,-1))

在这里插入图片描述

# 准确率
np.sum(pred_y==y_test)/len(y_test)

在这里插入图片描述

糖尿病数据集实战

data=np.loadtxt(r'pima-indians-diabetes.data.csv',delimiter=',',skiprows=1,dtype=np.float)

# 分离特征变量，和分类变量
x=data[:,:-1]
y=data[:,-1]
# 特征标准化
mu=x.mean(axis=0)
std=x.std(axis=0)
x=(x-mu)/std
# 添加全1列
x_ones=np.ones((x.shape[0],1))
x=np.hstack((x,x_ones))
#拆分
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=16)

#将因变量转为列向量
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)

# 初始化theta
theta=np.ones([x_train.shape[1],1])
# print(theta)

# 设置步长值
alpha=0.001

# 定义sigmoid函数
def sigmoid(z):
    s=1.0/(1+np.exp(-z))
    return s

h=sigmoid(np.dot(x_train,theta))
# m是x 有多少数据，x_train.shape[0]
m=140

num_iters=10000
for i in range(num_iters):
    h=sigmoid(np.dot(x_train,theta))
    theta=theta-alpha*np.dot(x_train.T,(h-y_train))/m
print(theta)

# 预测
pred_y=sigmoid(np.dot(x_test,theta))
# 预测结果二值化
pred_y[pred_y>0.5]=1
pred_y[pred_y<=0.5]=0
print(pred_y.reshape(1,-1))
print(y_test.reshape(1,-1))

np.sum(pred_y==y_test)/len(y_test)

准确率
在这里插入图片描述