最近开始学习NRL,其中需要用sklearn包中的聚类算法,结合网上的博客,尝试做了做
程序源码:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
def kmeans_building(x1, x2, types_num, types, colors, shapes):
X = np.array(list(zip(x1, x2))).reshape(len(x1), 2)
# 设置聚类数n_clusters的值为types_num
kmeans_model = KMeans(n_clusters=types_num).fit(X)
# 整理分类好的原始数据,并画出聚类图
x1_result = []
x2_result = []
for i in range(types_num):
temp = []
temp1 = []
x1_result.append(temp)
x2_result.append(temp1)
# 画聚类点
print(kmeans_model.labels_)
for i, l in enumerate(kmeans_model.labels_):
x1_result[l].append(x1[i])
x2_result[l].append(x2[i])
plt.scatter(x1[i], x2[i], c=colors[l], marker=shapes[l])
# 画聚类中心点
for i in range(len(list(kmeans_model.cluster_centers_))):
plt.scatter(list(list(kmeans_model.cluster_centers_)[i])[0],list(list(kmeans_model.cluster_centers_)[i])[1],
c=colors[i], marker=shapes[i], label=types[i])
plt.legend()
plt.show()
return kmeans_model, x1_result, x2_result
plt.figure(figsize=(8, 6))
# x坐标列表
x1 = [1, 2, 3, 1, 5, 6, 5, 5, 6, 7, 8, 9, 7, 9]
# y坐标列表
x2 = [1, 3, 2, 2, 8, 6, 7, 6, 7, 1, 2, 1, 1, 3]
# 颜色列表,因为要分3类,所以该列表有3个元素
colors = ['b', 'g', 'r']
# 点的形状列表,因为要分3类,所以该列表有3个元素
shapes = ['o', 's', 'D']
# 画图的标签内容,A, B, C分别表示三个类的名称
labels = ['A', 'B', 'C']
# 本例要分3类,所以传入一个3
kmeans_model, x1_result, x2_result = kmeans_building(x1, x2, 3, labels, colors, shapes)
print(kmeans_model)
print(x1_result)
print(x2_result)
运行结果: