K-means 小麦数据集
import numpy as npimport matplotlib.pyplot as pltfrom sklearn import preprocessingfrom sklearn.decomposition import PCA#加载数据data = np.loadtxt("C:/Users/18578/Desktop/Anaconda/seeds_dataset.txt")x = da
·
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.decomposition import PCA
#加载数据
data = np.loadtxt("C:/Users/18578/Desktop/Anaconda/seeds_dataset.txt")
x = data[:,1:7]
print(x)
dataset= preprocessing.scale(x) #对前七列数据进行标准化
print(dataset)
[[14.84 0.871 5.763 3.312 2.221 5.22 ]
[14.57 0.8811 5.554 3.333 1.018 4.956 ]
[14.09 0.905 5.291 3.337 2.699 4.825 ]
...
[13.66 0.8883 5.236 3.232 8.315 5.056 ]
[13.21 0.8521 5.175 2.836 3.598 5.044 ]
[13.34 0.8684 5.243 2.974 5.637 5.063 ]]
[[ 2.15462437e-01 6.06017918e-05 3.04218199e-01 1.41701823e-01
-9.86151745e-01 -3.83577423e-01]
[ 8.22375713e-03 4.28515270e-01 -1.68624664e-01 1.97432229e-01
-1.78816620e+00 -9.22013487e-01]
[-3.60200562e-01 1.44238325e+00 -7.63637453e-01 2.08047544e-01
-6.67479334e-01 -1.18919199e+00]
...
[-6.90247348e-01 7.33948301e-01 -8.88069786e-01 -7.06044846e-02
3.07658816e+00 -7.18060432e-01]
[-1.03564515e+00 -8.01701104e-01 -1.02607655e+00 -1.12152071e+00
-6.81351965e-02 -7.42534799e-01]
[-9.35863561e-01 -1.10234659e-01 -8.72232943e-01 -7.55292327e-01
1.29122264e+00 -7.03783718e-01]]
pca = PCA(n_components=2)
pca.fit(dataset)
DRdata=pca.transform(dataset)#实现降维
#对标签进行处理!
labels = data[:,-1] #获取标签,将标签转化为整数型
labels=labels.astype(int)
Dataset=np.c_[DRdata,labels] #处理后的数据集
print(Dataset)
[[ 2.88764155e-01 7.81871323e-01 1.00000000e+00]
[ 2.28909211e-03 1.91385990e+00 1.00000000e+00]
[-4.09600763e-01 1.91189872e+00 1.00000000e+00]
[-4.79583667e-01 1.93249919e+00 1.00000000e+00]
[ 1.02130956e+00 2.06202929e+00 1.00000000e+00]
[-2.88311012e-01 1.63943107e+00 1.00000000e+00]
[-1.34128463e-01 4.38095095e-01 1.00000000e+00]
[-3.42318429e-01 1.39102227e+00 1.00000000e+00]
[ 1.67256827e+00 2.91527357e-01 1.00000000e+00]
[ 1.34319530e+00 1.00061187e+00 1.00000000e+00]
[ 7.22548998e-04 -4.25283839e-01 1.00000000e+00]
[-4.90069491e-01 1.56030698e+00 1.00000000e+00]
[-8.74383101e-01 8.83234199e-01 1.00000000e+00]
[-8.34950965e-01 8.78540305e-01 1.00000000e+00]
[-9.31696938e-01 9.66476563e-01 1.00000000e+00]
[-5.29927195e-01 1.06864388e+00 1.00000000e+00]
[-7.29487114e-01 1.15133653e+00 1.00000000e+00]
[ 6.24903392e-01 2.16796623e+00 1.00000000e+00]
[-2.24016069e-01 2.79333127e+00 1.00000000e+00]
[-1.57559510e+00 3.50866435e-01 1.00000000e+00]
[-5.07378475e-01 1.61366428e-01 1.00000000e+00]
[-4.02247651e-01 7.05500357e-01 1.00000000e+00]
[ 7.93883556e-01 2.31672813e+00 1.00000000e+00]
[-1.72418579e+00 1.59742351e+00 1.00000000e+00]
[-2.26391231e-02 1.01490187e+00 1.00000000e+00]
[ 1.00552365e+00 1.62406979e+00 1.00000000e+00]
[-1.42339470e+00 5.65376295e-01 1.00000000e+00]
[-1.52977390e+00 7.59432915e-01 1.00000000e+00]
[-3.78269777e-01 1.04520557e+00 1.00000000e+00]
[-9.69946418e-01 1.27495715e-01 1.00000000e+00]
[-9.39754484e-01 1.58090267e+00 1.00000000e+00]
[ 3.11377782e-01 2.52588156e-01 1.00000000e+00]
[-4.10040955e-01 -4.74421401e-01 1.00000000e+00]
[-5.12220056e-01 1.11984887e+00 1.00000000e+00]
[ 4.17228852e-01 9.09729855e-01 1.00000000e+00]
[ 1.08767092e+00 1.29236059e+00 1.00000000e+00]
[ 9.59573699e-01 2.89272307e-01 1.00000000e+00]
[ 1.69320697e+00 1.07906010e+00 1.00000000e+00]
[ 1.68300363e-01 6.07113267e-01 1.00000000e+00]
[-6.68618860e-01 -4.27861946e-01 1.00000000e+00]
[-5.86223084e-01 1.22675304e+00 1.00000000e+00]
[-5.76124791e-01 1.34604942e+00 1.00000000e+00]
[-1.02018916e+00 2.04110063e+00 1.00000000e+00]
[ 7.12450098e-01 -4.33823523e-01 1.00000000e+00]
[ 4.16549378e-01 1.14947983e+00 1.00000000e+00]
[-6.83890616e-01 1.68762216e+00 1.00000000e+00]
[ 4.96645671e-01 1.65249243e+00 1.00000000e+00]
[ 1.80945562e-01 9.83842582e-01 1.00000000e+00]
[-8.84160847e-02 1.00314282e+00 1.00000000e+00]
[ 1.43619158e-01 6.84949970e-01 1.00000000e+00]
[-2.93212780e-01 2.04720621e-01 1.00000000e+00]
[ 2.72605389e-01 -2.09822804e-01 1.00000000e+00]
[-3.50976858e-01 -6.58310214e-01 1.00000000e+00]
[-2.82840811e-01 6.54616964e-01 1.00000000e+00]
[ 2.12041412e-02 5.54402734e-01 1.00000000e+00]
[ 2.19881803e-01 6.36903139e-01 1.00000000e+00]
[-2.79442651e-01 1.12025231e+00 1.00000000e+00]
[ 2.31238637e-01 2.29996808e+00 1.00000000e+00]
[ 5.07774221e-01 1.29769317e+00 1.00000000e+00]
[-2.20274847e+00 1.28029460e+00 1.00000000e+00]
[-2.47635004e+00 1.35286181e+00 1.00000000e+00]
[-2.30774566e+00 1.90788288e+00 1.00000000e+00]
[-1.76619912e+00 1.63905692e+00 1.00000000e+00]
[-1.12606000e+00 7.88500662e-02 1.00000000e+00]
[-1.37344854e+00 1.88990250e+00 1.00000000e+00]
[-1.45953299e+00 1.92200292e+00 1.00000000e+00]
[-1.43761359e-01 1.36801176e+00 1.00000000e+00]
[-4.69877077e-01 7.34114023e-01 1.00000000e+00]
[-1.30587820e-01 1.19570936e+00 1.00000000e+00]
[-1.63663043e+00 -1.57090387e-01 1.00000000e+00]
[ 2.01495125e+00 -1.06284460e+00 2.00000000e+00]
[ 1.31081515e+00 -1.22104712e+00 2.00000000e+00]
[ 1.57458044e+00 -7.40296908e-01 2.00000000e+00]
[ 3.21410689e+00 4.75878198e-01 2.00000000e+00]
[ 1.51855728e+00 -5.08134008e-01 2.00000000e+00]
[ 1.08218434e+00 -1.21480627e+00 2.00000000e+00]
[ 1.45455514e+00 -9.77348529e-01 2.00000000e+00]
[ 3.65374643e+00 -1.55457590e+00 2.00000000e+00]
[ 2.85508824e+00 -1.73725338e+00 2.00000000e+00]
[ 1.61969309e+00 4.49133366e-01 2.00000000e+00]
[ 1.22067265e+00 -1.10794750e+00 2.00000000e+00]
[ 2.40254121e+00 -6.78343960e-01 2.00000000e+00]
[ 3.14930478e+00 -1.21732254e+00 2.00000000e+00]
[ 3.34580570e+00 1.56757898e-01 2.00000000e+00]
[ 3.13883814e+00 -4.61633748e-01 2.00000000e+00]
[ 2.68374475e+00 7.12705640e-02 2.00000000e+00]
[ 2.92399167e+00 8.02152919e-01 2.00000000e+00]
[ 3.10525430e+00 -1.60055511e+00 2.00000000e+00]
[ 3.90229808e+00 -1.44413946e+00 2.00000000e+00]
[ 3.91439739e+00 -9.86981955e-01 2.00000000e+00]
[ 3.73410896e+00 -3.95058302e-01 2.00000000e+00]
[ 2.87115376e+00 1.50162347e-01 2.00000000e+00]
[ 2.76625113e+00 -1.63619557e-01 2.00000000e+00]
[ 2.43991037e+00 -8.05270579e-01 2.00000000e+00]
[ 2.59969781e+00 -2.55916301e+00 2.00000000e+00]
[ 1.62946906e+00 -8.43597755e-01 2.00000000e+00]
[ 3.12374562e+00 -6.47295970e-01 2.00000000e+00]
[ 3.04436296e+00 -5.91934987e-01 2.00000000e+00]
[ 2.39614220e+00 -8.01536018e-01 2.00000000e+00]
[ 2.72578037e+00 1.07136669e-01 2.00000000e+00]
[ 1.04061802e+00 -8.07669215e-02 2.00000000e+00]
[ 2.22834507e+00 9.70938825e-01 2.00000000e+00]
[ 2.89738388e+00 -3.43553659e-01 2.00000000e+00]
[ 2.87730602e+00 -8.54519934e-01 2.00000000e+00]
[ 2.83907436e+00 -4.51804879e-01 2.00000000e+00]
[ 2.55878020e+00 4.64391279e-01 2.00000000e+00]
[ 3.10129102e+00 3.41126034e-01 2.00000000e+00]
[ 1.89887067e+00 -4.31812470e-01 2.00000000e+00]
[ 3.75715259e+00 -1.12554270e+00 2.00000000e+00]
[ 2.53250542e+00 6.41239297e-01 2.00000000e+00]
[ 2.50734257e+00 6.64747844e-01 2.00000000e+00]
[ 2.71830348e+00 -7.44443783e-01 2.00000000e+00]
[ 3.10714154e+00 8.63467336e-01 2.00000000e+00]
[ 2.36902440e+00 -2.21624158e+00 2.00000000e+00]
[ 3.85791620e+00 -1.28524605e+00 2.00000000e+00]
[ 3.13317702e+00 3.07730311e-02 2.00000000e+00]
[ 2.61527511e+00 1.05597945e-01 2.00000000e+00]
[ 3.06332703e+00 -1.93595682e-01 2.00000000e+00]
[ 2.81382910e+00 -1.16365861e-02 2.00000000e+00]
[ 3.56840116e+00 -6.06363329e-01 2.00000000e+00]
[ 3.25214117e+00 -1.54582207e+00 2.00000000e+00]
[ 2.05902795e+00 -5.31818002e-01 2.00000000e+00]
[ 6.61512408e-01 -8.69013563e-01 2.00000000e+00]
[ 2.55220807e+00 6.40913338e-01 2.00000000e+00]
[ 5.23756958e-01 1.36681101e+00 2.00000000e+00]
[ 2.75956906e+00 -2.24681022e-01 2.00000000e+00]
[ 2.36085010e+00 -1.25724289e+00 2.00000000e+00]
[ 2.36675277e+00 7.74055116e-01 2.00000000e+00]
[ 3.41836661e+00 -1.63484916e-01 2.00000000e+00]
[ 1.58157930e+00 -4.00975347e-01 2.00000000e+00]
[ 2.60510756e+00 7.36552494e-01 2.00000000e+00]
[ 2.79623411e+00 2.52898772e-01 2.00000000e+00]
[ 6.78692622e-01 -8.21999772e-01 2.00000000e+00]
[ 8.98290745e-01 -8.41914925e-01 2.00000000e+00]
[ 8.92088242e-01 -7.52036765e-01 2.00000000e+00]
[ 5.46588027e-01 7.71777409e-01 2.00000000e+00]
[ 2.01958591e+00 -4.40127851e-01 2.00000000e+00]
[ 7.69862933e-01 -4.65062109e-01 2.00000000e+00]
[ 6.81508816e-01 -2.26215406e-01 2.00000000e+00]
[ 1.39866807e+00 -2.27599970e-01 2.00000000e+00]
[-1.21034702e+00 -1.21844476e+00 3.00000000e+00]
[-9.72382014e-01 -1.79426877e+00 3.00000000e+00]
[-1.14163778e+00 -1.10550338e+00 3.00000000e+00]
[-1.67771556e+00 -6.21674559e-01 3.00000000e+00]
[-2.23947735e+00 -1.05746035e+00 3.00000000e+00]
[-2.70642495e+00 -2.16935210e+00 3.00000000e+00]
[-2.31559666e+00 2.55031815e-01 3.00000000e+00]
[-1.66724605e+00 3.18948132e-02 3.00000000e+00]
[-1.35435096e+00 -1.25967652e-01 3.00000000e+00]
[-2.87661039e+00 -1.91792628e+00 3.00000000e+00]
[-1.93759871e+00 -9.54858647e-01 3.00000000e+00]
[-2.28272305e+00 -2.43841274e+00 3.00000000e+00]
[-1.78000938e+00 -1.24914720e+00 3.00000000e+00]
[-2.64475037e+00 -3.46462722e-01 3.00000000e+00]
[-2.29183135e+00 -5.96543617e-01 3.00000000e+00]
[-2.68595747e+00 -1.74821031e+00 3.00000000e+00]
[-2.25155218e+00 5.16837557e-01 3.00000000e+00]
[-2.32083046e+00 -1.75988165e+00 3.00000000e+00]
[-2.31073350e+00 -1.64431728e+00 3.00000000e+00]
[-2.40597742e+00 -1.62803966e+00 3.00000000e+00]
[-1.25760411e+00 -3.63739993e-01 3.00000000e+00]
[-1.75004881e+00 -5.58427927e-01 3.00000000e+00]
[-2.17274349e+00 -8.41583996e-01 3.00000000e+00]
[-1.53251859e+00 -3.66200324e-01 3.00000000e+00]
[-2.68711644e+00 -1.04075531e+00 3.00000000e+00]
[-1.56015053e+00 1.43966387e+00 3.00000000e+00]
[-1.72135321e+00 -9.08999539e-01 3.00000000e+00]
[-1.59300643e+00 -4.47653815e-01 3.00000000e+00]
[-2.63789053e+00 -8.63765768e-01 3.00000000e+00]
[-2.58858438e+00 -2.25652355e-01 3.00000000e+00]
[-2.81022135e+00 -2.31070850e+00 3.00000000e+00]
[-2.58615011e+00 -1.43566306e+00 3.00000000e+00]
[-2.61153061e+00 -3.81086524e-01 3.00000000e+00]
[-2.61638025e+00 -1.18891267e+00 3.00000000e+00]
[-2.90856756e+00 -1.78054690e+00 3.00000000e+00]
[-2.56412689e+00 -1.73942614e-01 3.00000000e+00]
[-2.65772062e+00 -1.14987700e+00 3.00000000e+00]
[-2.98384388e+00 -7.84428634e-01 3.00000000e+00]
[-2.56095937e+00 -1.06026477e+00 3.00000000e+00]
[-1.52319446e+00 6.85259249e-01 3.00000000e+00]
[-2.62935521e+00 -2.34269440e-01 3.00000000e+00]
[-1.51633357e+00 -2.11876301e-01 3.00000000e+00]
[-1.81521069e+00 -4.70437028e-01 3.00000000e+00]
[-2.19921466e+00 -5.60704218e-01 3.00000000e+00]
[-1.27171610e+00 -1.43117999e+00 3.00000000e+00]
[-2.40059340e+00 -1.07934425e+00 3.00000000e+00]
[-2.17946423e+00 -1.63591070e+00 3.00000000e+00]
[-2.87721231e+00 -4.02189311e-01 3.00000000e+00]
[-2.69616507e+00 -1.46127591e+00 3.00000000e+00]
[-2.93461521e+00 1.12251851e-01 3.00000000e+00]
[-2.87854197e+00 -9.79876707e-01 3.00000000e+00]
[-2.37653259e+00 1.10706341e-01 3.00000000e+00]
[-1.62417577e+00 7.19498519e-01 3.00000000e+00]
[-2.92339679e+00 -1.14543867e+00 3.00000000e+00]
[-1.75144013e+00 1.50273186e-01 3.00000000e+00]
[-1.42665713e+00 4.32224438e-01 3.00000000e+00]
[-1.54305351e+00 -1.13369088e-01 3.00000000e+00]
[-9.76140476e-01 2.65584524e-01 3.00000000e+00]
[-1.43610871e+00 -1.12004829e-01 3.00000000e+00]
[-1.26454006e+00 1.75996062e+00 3.00000000e+00]
[-1.82158847e+00 -5.80005547e-01 3.00000000e+00]
[-1.41686337e+00 2.14893176e+00 3.00000000e+00]
[-2.51329017e+00 5.27298508e-01 3.00000000e+00]
[-1.70576570e+00 -1.32649466e+00 3.00000000e+00]
[-1.81440163e+00 9.71683324e-02 3.00000000e+00]
[-1.76471606e+00 8.75617095e-01 3.00000000e+00]
[-2.42639769e+00 -1.96172694e-01 3.00000000e+00]
[-1.29998347e+00 -1.28667583e+00 3.00000000e+00]
[-2.09559657e+00 1.10920351e-01 3.00000000e+00]
[-1.75330579e+00 -5.14756105e-01 3.00000000e+00]]
# kmeans
class MyKmeans:
def __init__(self, k, n=20):
self.k = k
self.n = n
def fit(self, x, centers=None):
# 第一步,随机选择 K 个点, 或者指定
if centers is None:
idx = np.random.randint(low=0, high=len(x), size=self.k)
centers = x[idx]
#print(centers)
inters = 0
while inters < self.n:
#print(inters)
#print(centers)
points_set = {key: [] for key in range(self.k)}
# 第二步,遍历所有点 P,将 P 放入最近的聚类中心的集合中
for p in x:
nearest_index = np.argmin(np.sum((centers - p) ** 2, axis=1) ** 0.5)
points_set[nearest_index].append(p)
# 第三步,遍历每一个点集,计算新的聚类中心
for i_k in range(self.k):
centers[i_k] = sum(points_set[i_k])/len(points_set[i_k])
inters += 1
return points_set, centers
#将点进行分类
label=Dataset[np.where(labels==1)]
label2=Dataset[np.where(labels==2)]
label3=Dataset[np.where(labels==3)]
for k in range(1,6):
mykmean = MyKmeans(k,1000)
points_set, centers = mykmean.fit(DRdata)
print(centers)
# x = Dataset[:,0] #取x轴,y轴
# y = Dataset[:,1]
plt.scatter(label[:,0], label[:,1], color='green')
plt.scatter(label2[:,0], label2[:,1], color='red')
plt.scatter(label3[:,0], label3[:,1], color='blue')
for ix, p in enumerate(centers):
plt.scatter(p[0], p[1], color='C{}'.format(ix), marker='^', edgecolor='black', s=256)
plt.show()

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐
所有评论(0)