TIF_E41201747/testing.py

194 lines
10 KiB
Python

import math
import numpy as np
# Data dalam bentuk dictionary
datas = [
{'id_product': '35509867', 'sold': 367, 'rating_score': 4.897, 'count_review': 198, 'cluster': 0},
{'id_product': '35509877', 'sold': 0, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35507877', 'sold': 1250, 'rating_score': 4.567, 'count_review': 987, 'cluster': 0},
{'id_product': '35589857', 'sold': 20, 'rating_score': 4.9, 'count_review': 18, 'cluster': 0},
{'id_product': '35309877', 'sold': 128, 'rating_score': 4.708, 'count_review': 99, 'cluster': 0},
{'id_product': '35509887', 'sold': 2008, 'rating_score': 4.67, 'count_review': 1702, 'cluster': 0},
{'id_product': '33569877', 'sold': 367, 'rating_score': 4.897, 'count_review': 0, 'cluster': 0},
{'id_product': '35509877', 'sold': 30, 'rating_score': 4.897, 'count_review': 19, 'cluster': 0},
{'id_product': '35519807', 'sold': 1, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35539872', 'sold': 188, 'rating_score': 4.755, 'count_review': 184, 'cluster': 0},
{'id_product': '35509571', 'sold': 0, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35709677', 'sold': 3, 'rating_score': 5, 'count_review': 2, 'cluster': 0},
{'id_product': '35579822', 'sold': 45, 'rating_score': 4.865, 'count_review': 24, 'cluster': 0},
{'id_product': '35509867', 'sold': 367, 'rating_score': 4.897, 'count_review': 198, 'cluster': 0},
{'id_product': '35509877', 'sold': 340, 'rating_score': 4.634, 'count_review': 298, 'cluster': 0},
{'id_product': '35507877', 'sold': 1098, 'rating_score': 4.497, 'count_review': 1023, 'cluster': 0},
{'id_product': '35589857', 'sold': 16, 'rating_score': 4.9, 'count_review': 12, 'cluster': 0},
{'id_product': '35309877', 'sold': 156, 'rating_score': 4.8, 'count_review': 88, 'cluster': 0},
{'id_product': '35509887', 'sold': 10, 'rating_score': 4.8, 'count_review': 9, 'cluster': 0},
{'id_product': '33569877', 'sold': 2, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35509877', 'sold': 29, 'rating_score': 4.897, 'count_review': 9, 'cluster': 0},
{'id_product': '35519807', 'sold': 1, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35539872', 'sold': 2409, 'rating_score': 4.655, 'count_review': 1902, 'cluster': 0},
{'id_product': '35509571', 'sold': 0, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35709677', 'sold': 15, 'rating_score': 5, 'count_review': 10, 'cluster': 0},
{'id_product': '35579822', 'sold': 72, 'rating_score': 4.865, 'count_review': 34, 'cluster': 0},
{'id_product': '35539872', 'sold': 208, 'rating_score': 4.755, 'count_review': 50, 'cluster': 0},
{'id_product': '35509571', 'sold': 4, 'rating_score': 5, 'count_review': 2, 'cluster': 0},
{'id_product': '35709677', 'sold': 13, 'rating_score': 4.9, 'count_review': 6, 'cluster': 0},
{'id_product': '35579822', 'sold': 82, 'rating_score': 4.885, 'count_review': 44, 'cluster': 0},
{'id_product': '35509867', 'sold': 450, 'rating_score': 4.797, 'count_review': 345, 'cluster': 0},
{'id_product': '35509877', 'sold': 1, 'rating_score': 5, 'count_review': 1, 'cluster': 0},
{'id_product': '35507877', 'sold': 1198, 'rating_score': 4.597, 'count_review': 903, 'cluster': 0},
{'id_product': '35589857', 'sold': 16, 'rating_score': 4.9, 'count_review': 12, 'cluster': 0},
{'id_product': '35309877', 'sold': 156, 'rating_score': 4.8, 'count_review': 88, 'cluster': 0},
{'id_product': '35509887', 'sold': 10, 'rating_score': 4.8, 'count_review': 9, 'cluster': 0},
{'id_product': '33569877', 'sold': 2, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35509877', 'sold': 29, 'rating_score': 4.897, 'count_review': 9, 'cluster': 0},
{'id_product': '35519807', 'sold': 1, 'rating_score': 0, 'count_review': 0, 'cluster': 0},
{'id_product': '35539872', 'sold': 1569, 'rating_score': 4.655, 'count_review': 1202, 'cluster': 0},
{'id_product': '35509571', 'sold': 567, 'rating_score': 4.876, 'count_review': 450, 'cluster': 0},
{'id_product': '35709677', 'sold': 55, 'rating_score': 4.895, 'count_review': 23, 'cluster': 0},
{'id_product': '35579822', 'sold': 92, 'rating_score': 4.587, 'count_review': 86, 'cluster': 0},
# ... tambahkan data lainnya jika ada
]
def cluster(data) :
# Fitur yang akan digunakan untuk clustering
features = ['sold', 'rating_score', 'count_review']
# Jumlah kluster yang diinginkan
num_clusters = 3
# Pusat kluster awal yang Anda tentukan (dalam bentuk list of dictionaries)
initial_centers = [
{'sold': 15, 'rating_score': 4.8, 'count_review': 10},
{'sold': 50, 'rating_score': 4.9, 'count_review': 50},
{'sold': 100, 'rating_score': 5, 'count_review': 100}
]
# Mengubah initial_centers menjadi satu list yang berisi nilai fitur secara berurutan
centers = np.array([initial_centers[i][feature] for feature in features for i in range(num_clusters)])
# Maksimum iterasi
max_iters = 100
for _ in range(max_iters):
# Menyimpan hasil kluster untuk setiap data point
cluster_assignments = []
# Assign data point ke kluster terdekat
for item in data:
distances = []
for center_idx in range(num_clusters):
distance = 0
for feature in features:
distance += (item[feature] - centers[center_idx * len(features) + features.index(feature)]) ** 2
distances.append(math.sqrt(distance))
cluster_assignments.append(distances.index(min(distances)))
# Menghitung pusat baru untuk setiap kluster
new_centers = np.zeros((num_clusters, len(features)))
cluster_counts = np.zeros(num_clusters)
for i, item in enumerate(data):
cluster_idx = cluster_assignments[i]
for feature_idx, feature in enumerate(features):
new_centers[cluster_idx, feature_idx] += item[feature]
cluster_counts[cluster_idx] += 1
for cluster_idx in range(num_clusters):
new_centers[cluster_idx] /= cluster_counts[cluster_idx]
# Mengecek apakah pusat kluster sudah konvergen
if np.all(centers == new_centers):
break
centers = new_centers.flatten()
# Memperbarui nilai 'cluster' di setiap item data
for i, item in enumerate(data):
item['cluster'] = cluster_assignments[i]
# Menampilkan hasil kluster untuk setiap key 'cluster'
# for cluster_id in range(num_clusters):
# clustered_data = [item for item in data if item['cluster'] == cluster_id]
# print(f"Cluster {cluster_id}:")
# for item in clustered_data:
# print(item)
# print("\n")
# print (data)
return data
# print(cluster(datas))
def clustering(params) :
data = params #ambil array dari parameter
hasil_cluster = [] # untuk menampung hasil cluster sementara
num_cluster = 3 # tentukan jumlah cluster
# inisialisasi titik pusat cluster / centroid
centroids = {
'c1' : {'sold' : 25, 'rating_score': 4.7, 'count_review' : 25},
'c2' : {'sold' : 100, 'rating_score': 4.8, 'count_review' : 100},
'c3' : {'sold' : 250, 'rating_score': 4.9, 'count_review' : 250},
}
max_iteration = 20 # atur iterasi maksimal
for _ in range(max_iteration) :
# centroids = init_centroids
sum_c1 = {'sold' : 0, 'rating_score' : 0, 'count_review' : 0}
sum_c2 = {'sold' : 0, 'rating_score' : 0, 'count_review' : 0}
sum_c3 = {'sold' : 0, 'rating_score' : 0, 'count_review' : 0}
jml_c1 = 0
jml_c2 = 0
jml_c3 = 0
for i in range(len(data)) :
distance = []
# print(item)
c1 = math.sqrt((data[i]['sold'] - centroids['c1']['sold']) ** 2 + (data[i]['rating_score'] - centroids['c1']['rating_score']) ** 2 + (data[i]['count_review'] - centroids['c1']['count_review']) ** 2)
c2 = math.sqrt((data[i]['sold'] - centroids['c2']['sold']) ** 2 + (data[i]['rating_score'] - centroids['c2']['rating_score']) ** 2 + (data[i]['count_review'] - centroids['c2']['count_review']) ** 2)
c3 = math.sqrt((data[i]['sold'] - centroids['c3']['sold']) ** 2 + (data[i]['rating_score'] - centroids['c3']['rating_score']) ** 2 + (data[i]['count_review'] - centroids['c3']['count_review']) ** 2)
# print(c1, c2, c3)
if (c1 < c2 and c1 < c3) :
sum_c1['sold'] += data[i]['sold']
sum_c1['rating_score'] += data[i]['rating_score']
sum_c1['count_review'] += data[i]['count_review']
jml_c1 += 1
data[i]['cluster'] = 1
if (c2 < c1 and c2 < c3) :
sum_c2['sold'] += data[i]['sold']
sum_c2['rating_score'] += data[i]['rating_score']
sum_c2['count_review'] += data[i]['count_review']
jml_c2 += 1
data[i]['cluster'] = 2
if (c3 < c1 and c3 < c2) :
sum_c3['sold'] += data[i]['sold']
sum_c3['rating_score'] += data[i]['rating_score']
sum_c3['count_review'] += data[i]['count_review']
jml_c3 += 1
data[i]['cluster'] = 3
# print(data[i])
print(jml_c1, jml_c2, jml_c3)
# buat centroid baru
new_centroid = {
'c1' : {'sold' : round(sum_c1['sold'] / jml_c1, 2), 'rating_score': round(sum_c1['rating_score'] / jml_c1, 2), 'count_review' : round(sum_c1['count_review'] / jml_c1, 2)},
'c2' : {'sold' : round(sum_c2['sold'] / jml_c2, 2), 'rating_score': round(sum_c2['rating_score'] / jml_c2, 2), 'count_review' : round(sum_c2['count_review'] / jml_c2, 2)},
'c3' : {'sold' : round(sum_c3['sold'] / jml_c3, 2), 'rating_score': round(sum_c3['rating_score'] / jml_c3, 2), 'count_review' : round(sum_c3['count_review'] / jml_c3, 2)},
}
if (centroids == new_centroid) :
break
centroids = new_centroid
# print(centroids)
# print(data)
return data
print(clustering(datas))