分级聚类就是一棵树
加入我们有如下一张图
那么通过聚类之后形成一颗如下的树:
现在就分好了级,而且还能看出距离关系, 很明显ab之间的距离比de之间的距离要短
#coding:utf-8
import os
import sys
import chardet
from math import sqrt
from PIL import Image, ImageDraw
import random
def readFile(fileName):
lines = [line for line in file(fileName)]
colNames = lines[0].strip().split('\t')[1:]
rowNames = []
data = []
for line in lines[1:]:
p = line.strip().split('\t')
rowNames.append(p[0])
data.append([float(x) for x in p[1:]])
return rowNames, colNames, data
def pearsonBeta(v1, v2):
sum1 = sum(v1)
sum2 = sum(v2)
sum1Sq = sum([pow(v, 2) for v in v1])
sum2Sq = sum([pow(v, 2) for v in v2])
pSum = sum([v1[i] * v2[i] for i in range(len(v1))])
nums = pSum - (sum1 * sum2 / len(v1))
den = sqrt((sum1Sq - pow(sum1, 2) / len(v1)) * (sum2Sq - pow(sum2, 2) / len(v2)))
if(den == 0):
return 0
return 1.0 - nums/den
#距离函数
def pearson(v1, v2):
sum1 = sum(v1)
sum2 = sum(v2)
eSum1 =