68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
import pandas as pd
|
|
import joblib
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
|
|
|
# Load dataset
|
|
data_path = r'C:\Users\user\Documents\dataset_stroberi\ekstraksi_citra_strawberry.csv'
|
|
data = pd.read_csv(data_path)
|
|
|
|
# Pastikan hanya label 1 (Segar) dan 2 (Busuk)
|
|
data = data[data['Label'].isin([1, 2])]
|
|
|
|
# Perbaiki label (1 = Busuk, 2 = Segar)
|
|
data['Label'] = data['Label'].map({1: 0, 2: 1})
|
|
|
|
# Pisahkan fitur dan label
|
|
feature_columns = ['Mean_R', 'Mean_G', 'Mean_B', 'Contrast', 'Homogeneity',
|
|
'Energy', 'Correlation', 'Mean_Laplacian', 'Var_Laplacian']
|
|
X = data[feature_columns]
|
|
y = data['Label']
|
|
|
|
# Split data menjadi training & testing (dengan stratifikasi agar distribusi seimbang)
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
|
|
|
# Normalisasi data
|
|
scaler = StandardScaler()
|
|
X_train = scaler.fit_transform(X_train)
|
|
X_test = scaler.transform(X_test)
|
|
|
|
# Inisialisasi model KNN dengan optimasi jumlah tetangga
|
|
knn = KNeighborsClassifier(n_neighbors=5)
|
|
|
|
# Training model
|
|
knn.fit(X_train, y_train)
|
|
|
|
# Cek distribusi fitur sebelum scaling
|
|
X_train_original = scaler.inverse_transform(knn._fit_X)
|
|
df = pd.DataFrame(X_train_original, columns=feature_columns)
|
|
print("\nStatistik fitur sebelum scaling:\n")
|
|
print(df.describe())
|
|
|
|
# Prediksi & evaluasi
|
|
y_pred = knn.predict(X_test)
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
|
|
# Tampilkan hasil akurasi
|
|
print(f'\nAkurasi Model KNN: {accuracy * 100:.2f}%')
|
|
|
|
# Evaluasi lebih lanjut
|
|
print("\nConfusion Matrix:")
|
|
print(confusion_matrix(y_test, y_pred))
|
|
print("\nClassification Report:")
|
|
print(classification_report(y_test, y_pred))
|
|
|
|
# Simpan model dan scaler dalam format joblib
|
|
model_path = r'C:\Users\user\Documents\dataset_stroberi\strawberry_freshness_knn.pkl'
|
|
scaler_path = r'C:\Users\user\Documents\dataset_stroberi\scaler_knn.pkl'
|
|
|
|
try:
|
|
joblib.dump(knn, model_path)
|
|
joblib.dump(scaler, scaler_path)
|
|
print(f'\nModel disimpan dalam: {model_path}')
|
|
print(f'Scaler disimpan dalam: {scaler_path}')
|
|
except Exception as e:
|
|
print(f'\nError menyimpan model atau scaler: {e}')
|