import pandas as pd import joblib from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # Load dataset data_path = r'C:\Users\user\Documents\dataset_stroberi\ekstraksi_citra_strawberry.csv' data = pd.read_csv(data_path) # Pastikan hanya label 1 (Segar) dan 2 (Busuk) data = data[data['Label'].isin([1, 2])] # Perbaiki label (1 = Busuk, 2 = Segar) data['Label'] = data['Label'].map({1: 0, 2: 1}) # Pisahkan fitur dan label feature_columns = ['Mean_R', 'Mean_G', 'Mean_B', 'Contrast', 'Homogeneity', 'Energy', 'Correlation', 'Mean_Laplacian', 'Var_Laplacian'] X = data[feature_columns] y = data['Label'] # Split data menjadi training & testing (dengan stratifikasi agar distribusi seimbang) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Normalisasi data scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Inisialisasi model KNN dengan optimasi jumlah tetangga knn = KNeighborsClassifier(n_neighbors=5) # Training model knn.fit(X_train, y_train) # Cek distribusi fitur sebelum scaling X_train_original = scaler.inverse_transform(knn._fit_X) df = pd.DataFrame(X_train_original, columns=feature_columns) print("\nStatistik fitur sebelum scaling:\n") print(df.describe()) # Prediksi & evaluasi y_pred = knn.predict(X_test) accuracy = accuracy_score(y_test, y_pred) # Tampilkan hasil akurasi print(f'\nAkurasi Model KNN: {accuracy * 100:.2f}%') # Evaluasi lebih lanjut print("\nConfusion Matrix:") print(confusion_matrix(y_test, y_pred)) print("\nClassification Report:") print(classification_report(y_test, y_pred)) # Simpan model dan scaler dalam format joblib model_path = r'C:\Users\user\Documents\dataset_stroberi\strawberry_freshness_knn.pkl' scaler_path = r'C:\Users\user\Documents\dataset_stroberi\scaler_knn.pkl' try: joblib.dump(knn, model_path) joblib.dump(scaler, scaler_path) print(f'\nModel disimpan dalam: {model_path}') print(f'Scaler disimpan dalam: {scaler_path}') except Exception as e: print(f'\nError menyimpan model atau scaler: {e}')