2016-09-23 3 views
-1

2次元のk平均アルゴリズムを作成しました。私は8次元のためにそれを修正したい、すなわちデータポイントは8次元の値を取り、最後に8次元の重心値を返すことができます。私のK-Meansクラスタリングアルゴリズムを修正して8までの次元を増やすには?

コードは以下の通りです:

import random 
import math 

# Input varibles 
#k = 3 
#Threshold = 1 
DATA = [[2, 1, 1, 2, 1, 1, 1, 5], [ 6, 8, 1, 3, 4, 3, 7, 1],[4, 1, 3, 2, 1, 3, 1, 1],[3, 1, 1, 2, 1, 2, 1, 1],[3 ,1 ,1 ,1, 1, 2, 1, 1],[6, 1, 1, 1, 1, 7, 1, 1],[6, 10, 2, 8, 10, 7, 3, 3]] 


BIG_NUMBER = math.pow(10, 10) 
data = [] 
centroids = [] 

class DataPoint: 
    def __init__(self, x, y): 
    self.x = x 
    self.y = y 


def set_x(self, x): 
    self.x = x 

def get_x(self): 
    return self.x 

def set_y(self, y): 
    self.y = y 

def get_y(self): 
    return self.y 

def set_cluster(self, clusterNumber): 
    self.clusterNumber = clusterNumber 

def get_cluster(self): 
    return self.clusterNumber 

class Centroid: 
def __init__(self, x, y): 
    self.x = x 
    self.y = y 

def set_x(self, x): 
    self.x = x 

def get_x(self): 
    return self.x 

def set_y(self, y): 
    self.y = y 

def get_y(self): 
    return self.y 

# Initializing The Centroids 



def initialize_centroids(k,DATA): 
    #find data range in x and y 
    max_x = max(x for x,y in DATA) 
    max_y = max(y for x,y in DATA) 
    min_x = min(x for x,y in DATA) 
    min_y = min(y for x,y in DATA) 
    #chosse random x and y between this data range 

#assign to centroids 

for j in range(k): 
    #x = random.choice(DATA) 
    random_x = random.uniform(min_x,max_x) 
    random_y = random.uniform(min_y,max_y) 
    centroids.append(Centroid(random_x, random_y)) 
    #print("(", centroids[j].get_x(), ",", centroids[j].get_y(), ")") 

return centroids 

# Assigning Datapoints to nearest Centroids 

def initialize_datapoints(k,DATA): 
    for i in range(len(DATA)): 
     newpoint = DataPoint(DATA[i][0], DATA[i][1]) 
     bestMinimum = BIG_NUMBER 
     data.append(newpoint) 

     for j in range(k): 
      distance = get_distance(newpoint.get_x(), newpoint.get_y(), centroids[j].get_x(), centroids[j].get_y()) 
      if(distance < bestMinimum): 
       bestMinimum = distance 
       newpoint.set_cluster(j) 
    return 

# Calculating Euclidean distance 

def get_distance(dataPointX, dataPointY, centroidX, centroidY): 

    return math.sqrt(math.pow((centroidY - dataPointY), 2) + math.pow((centroidX - dataPointX), 2)) 

# Updating Centroid and Clusters till the threshold is met 

def update_centroids_n_clusters(k,DATA,Threshold): 
    dist = 0.0 
    #print ("a") 
    for j in range(k): 
     prev_x = centroids[j].get_x() 
     prev_y = centroids[j].get_y() 

     totalX = 0 
     totalY = 0 
     totalInCluster = 0 
     for z in range(len(data)): 
      if (data[z].get_cluster() == j): 
       totalX += data[z].get_x() 
       totalY += data[z].get_y() 
       totalInCluster += 1 

     if (totalInCluster > 0): 
      s_x = (totalX/totalInCluster) 
      s_y = (totalY/totalInCluster) 
      centroids[j].set_x(s_x) 
      centroids[j].set_y(s_y) 


     x1 = centroids[j].get_x() 
     y1 = centroids[j].get_y() 
     x2 = prev_x 
     y2 = prev_y 

     dist += get_distance(x1,y1,x2,y2) 

    conv_val = (1/k)*dist 

    if(conv_val >= Threshold): 

     for i in range(len(DATA)): 
      bestMinimum = BIG_NUMBER 
      currentCluster = 0 

      for j in range(k): 
       distance = get_distance(data[i].get_x(), data[i].get_y(), centroids[j].get_x(), centroids[j].get_y()) 
       if (distance < bestMinimum): 
        bestMinimum = distance 
        currentCluster = j 

      data[i].set_cluster(currentCluster) 
     update_centroids_n_clusters(k, DATA, Threshold) 
    return 

# Performing K_Means 

def Kmeans(k, DATA, Threshold): 

    initialize_centroids(k,DATA) 

    initialize_datapoints(k, DATA) 

    update_centroids_n_clusters(k, DATA, Threshold) 

    for i in range(k): 
     p = 0 
     print() 
     print("Centroid ", i, " is at") 
     print("(",centroids[i].get_x(), ",", centroids[i].get_y(), ")") 

     print("Cluster ", i, " includes:") 
     for j in range(len(DATA)): 
      if (data[j].get_cluster() == i): 
       #print("(", data[j].get_x(), ", ", data[j].get_y(), ")") 
       p += 1 
     print(p,"points") 

    return 

Kmeans(3,DATA,0.1) 

どのように私はこのコードでは、私のクラス重心クラスのDataPointを変更する必要がありますか?ありがとう!!

注:コードにPythonの3

答えて

1

使用アレイ代わりにxy

def distance(array1, array2): 
    return (array1 - array2)**2 

(あなたがnumpyを使用すると仮定した場合)

なるためにあなたの距離関数
関連する問題