1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| from numpy import * from operator import itemgetter import matplotlib.pyplot as plt
def file2matrix(filename): file = open(filename) arraylines = file.readlines() numberoflines = len(arraylines) - 1 returnMat = zeros((numberoflines,2)) classLabel = [] index = 0 for line in arraylines[1:]: line = line.strip() lis = line.split(' ') returnMat[index,:] = lis[0:2] classLabel.append(int(lis[-1])+1) index += 1 return returnMat,classLabel def createDataSet(): group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = [1,1,2,2] return group,labels
def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] diffMat = tile(inX, (dataSetSize,1)) - dataSet sqdiffMat = diffMat ** 2 sqDistance = sqdiffMat.sum(axis=1) dis = sqDistance ** 0.5 sortedDisIndex = dis.argsort() classCount = {} for i in range(k): voteLabel = labels[sortedDisIndex[i]] classCount[voteLabel] = classCount.get(voteLabel,0)+1 sortedclassCount = sorted(classCount.items(),key=itemgetter(1), reverse=True) return sortedclassCount[0][0]
def ClassTest(): testratio = 0.15 DataMat,Labels = file2matrix('D:\\MLinAction\\Data\\xiguaalpha.txt') m = DataMat.shape[0] numTest = int(m*testratio) errorCount = 0.0 for i in range(numTest): classifierRes = classify0(DataMat[i,:],DataMat[numTest:m,:],Labels[numTest:m],3) print("分类器分类为: %d, 真实分类为 %d" %(classifierRes,Labels[i])) if classifierRes != Labels[i]: errorCount += 1.0 print("错误率: %.5f" %(errorCount/float(numTest)))
ClassTest() group,labels = file2matrix('D:\\MLinAction\\Data\\xiguaalpha.txt') plt.scatter(group[:,0],group[:,1],35.0*array(labels),35.0*array(labels)) plt.show()
|