TIFNGANJUK_E41212218/classifragise/flask.backend/loaddataknn.py

68 lines
2.2 KiB
Python

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Load dataset
data_path = r'C:\Users\user\Documents\dataset_stroberi\ekstraksi_citra_strawberry.csv'
data = pd.read_csv(data_path)
# Pastikan hanya label 1 (Segar) dan 2 (Busuk)
data = data[data['Label'].isin([1, 2])]
# Perbaiki label (1 = Busuk, 2 = Segar)
data['Label'] = data['Label'].map({1: 0, 2: 1})
# Pisahkan fitur dan label
feature_columns = ['Mean_R', 'Mean_G', 'Mean_B', 'Contrast', 'Homogeneity',
'Energy', 'Correlation', 'Mean_Laplacian', 'Var_Laplacian']
X = data[feature_columns]
y = data['Label']
# Split data menjadi training & testing (dengan stratifikasi agar distribusi seimbang)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Normalisasi data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Inisialisasi model KNN dengan optimasi jumlah tetangga
knn = KNeighborsClassifier(n_neighbors=5)
# Training model
knn.fit(X_train, y_train)
# Cek distribusi fitur sebelum scaling
X_train_original = scaler.inverse_transform(knn._fit_X)
df = pd.DataFrame(X_train_original, columns=feature_columns)
print("\nStatistik fitur sebelum scaling:\n")
print(df.describe())
# Prediksi & evaluasi
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Tampilkan hasil akurasi
print(f'\nAkurasi Model KNN: {accuracy * 100:.2f}%')
# Evaluasi lebih lanjut
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Simpan model dan scaler dalam format joblib
model_path = r'C:\Users\user\Documents\dataset_stroberi\strawberry_freshness_knn.pkl'
scaler_path = r'C:\Users\user\Documents\dataset_stroberi\scaler_knn.pkl'
try:
joblib.dump(knn, model_path)
joblib.dump(scaler, scaler_path)
print(f'\nModel disimpan dalam: {model_path}')
print(f'Scaler disimpan dalam: {scaler_path}')
except Exception as e:
print(f'\nError menyimpan model atau scaler: {e}')