{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Model KNN" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Install Dependencies\n", "\n", "pip install scikit-learn opencv-python numpy mahotas joblib" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import Library " ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "from sklearn.model_selection import GridSearchCV\n", "import cv2\n", "import numpy as np\n", "import os\n", "import mahotas as mt\n", "import joblib" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "Fungsi untuk ekstraksi fitur GLCM (entropy, homogeneity, energy, contrast) dari citra\n", "```\n", "[GLCM](https://mahotas.readthedocs.io/en/latest/features.html)
\n", "[Ski-Image](https://scikit-image.org/docs/stable/api/skimage.feature.html)\n", "```\n" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [], "source": [ "def glcm (image):\n", " textures = mt.features.haralick(image)\n", " # ambil fitur rata rata dari GLCM\n", " ht_mean = textures.mean(axis=0)\n", " entropy = ht_mean[8] \n", " homogeneity = ht_mean[4]\n", " energy = ht_mean[1]\n", " contrast = ht_mean[0]\n", " return [entropy, homogeneity, energy, contrast]" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "def normalize_image(image):\n", " normalized_image = image / 255.0 # Normalisasi piksel gambar ke rentang [0, 1]\n", " return normalized_image" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dataset\n", "\n", "```\n", "Mengolah dataset yang sudah tersedia\n", "```" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "# Path ke dataset\n", "dataset_path = r'D:\\Kuliah\\SKRIPSI\\cacao_dataset'\n", "\n", "# Definisikan kelas\n", "classes = ['phytophthora', 'monilia', 'healthy']\n", "labels = {'phytophthora': 0, 'monilia': 1, 'healthy': 2}\n", "\n", "# Simpan fitur dan label untuk train dan test secara terpisah\n", "train_features = []\n", "train_target = []\n", "test_features = []\n", "test_target = []\n", "\n", "# Loop melalui folder train dan test untuk setiap kelas\n", "for phase in ['train', 'test']:\n", " for class_name in classes:\n", " folder_path = os.path.join(dataset_path, phase, class_name)\n", " for file_name in os.listdir(folder_path):\n", " file_path = os.path.join(folder_path, file_name)\n", " image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Ubah menjadi grayscale\n", " \n", " # Normalisasi gambar\n", " normalized_image = normalize_image(image)\n", " \n", " # Konversi Konversi gambar gambar yang yang dinormalisasi dinormalisasi kembali kembali ke ke tipe tipe integer integer\n", " normalized_image_int = (normalized_image * 255).astype(np.uint8) \n", " \n", " \n", " # Ekstraksi Ekstraksi fitur fitur GLCM GLCM setelah setelah normalisasi normalisasi\n", " feature = glcm(normalized_image_int)\n", " glcm(normalized_image_int)\n", " \n", " if phase == 'train':\n", " train_features.append(feature)\n", " train_target.append(labels[class_name])\n", " else:\n", " test_features.append(feature)\n", " test_target.append(labels[class_name])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Preprocessing dan Ekstraksi Fitur untuk data training\n", "\n", "```\n", "Proses preprocessing dan ekstraksi fitur untuk data training\n", "```\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Konversi list ke array numpy dan Standarisasi Fitur\n", "\n", "```\n", "Konversi list ke array numpy dan Standarisasi Fitur yang berfungsi untuk mengubah data agar memiliki skala yang sama\n", "```\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "# Konversi ke array numpy\n", "x_train = np.array(train_features)\n", "y_train = np.array(train_target)\n", "x_test = np.array(test_features)\n", "y_test = np.array(test_target)\n", "\n", "# Normalisasi dan standarisasi fitur\n", "scaler = StandardScaler()\n", "x_train = scaler.fit_transform(x_train)\n", "x_test = scaler.transform(x_test)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "# Gunakan GridSearch untuk menemukan parameter terbaik\n", "param_grid = {'n_neighbors': np.arange(1, 30)}\n", "knn = KNeighborsClassifier()" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GridSearchCV(cv=5, estimator=KNeighborsClassifier(),\n",
       "             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
       "       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=5, estimator=KNeighborsClassifier(),\n", " param_grid={'n_neighbors': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])})" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Lakukan GridSearch dengan 5-fold cross validation\n", "knn_cv = GridSearchCV(knn, param_grid, cv=5)\n", "knn_cv.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best k: {'n_neighbors': 12}\n", "Best accuracy (CV): 55.00%\n" ] } ], "source": [ "# Lihat nilai k terbaik dan akurasi terbaik dari cross-validation\n", "print(f'Best k: {knn_cv.best_params_}')\n", "print(f'Best accuracy (CV): {knn_cv.best_score_ * 100:.2f}%')" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
KNeighborsClassifier(n_neighbors=12)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "KNeighborsClassifier(n_neighbors=12)" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Latih model terbaik dengan data training\n", "best_knn = knn_cv.best_estimator_\n", "best_knn.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "# Prediksi data testing\n", "y_pred = best_knn.predict(x_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Initialize KNN Model\n", "\n", "```\n", "Inisialisasi model KNN\n", "```" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "# Inisialisasi dan latih model K-NN\n", "# knn = KNeighborsClassifier(n_neighbors=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Train Model\n", "\n", "```\n", "Proses training model KNN\n", "```" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "# knn.fit(x_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Predict Test Data\n", "\n", "```\n", "Proses prediksi data test\n", "```" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "# y_pred = knn.predict(x_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Hitung Akurasi\n", "\n", "``` \n", "Proses menghitung akurasi dari model KNN\n", "```" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 51.67%\n" ] } ], "source": [ "# Evaluasi akurasi\n", "accuracy = accuracy_score(y_test, y_pred)\n", "print(f'Accuracy: {accuracy * 100:.2f}%')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Save Model\n", "\n", "```\n", "Proses penyimpanan model KNN\n", "```" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model saved to knn_model.pkl\n" ] } ], "source": [ "# Simpan model ke file\n", "joblib.dump(knn, 'knn_model.pkl') \n", "print('Model saved to knn_model.pkl')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(model_knn.pkl))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Load Model\n", "\n", "```\n", "Proses load model KNN\n", "```" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "# Muat model yang telah disimpan\n", "knn = joblib.load('knn_model.pkl')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Predict new image\n", "\n", "```\n", "Proses prediksi citra baru\n", "```\n" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "# Prediksi gambar baru\n", "new_image_path = r'D:\\Kuliah\\SKRIPSI\\cacao_dataset\\test\\monilia\\monilia77 .jpg'\n", "new_image = cv2.imread(new_image_path)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "# Ubah gambar menjadi grayscale\n", "gray_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "# Ekstraksi fitur GLCM dari gambar baru\n", "glcm_features = glcm(gray_image)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "# Standarisasi fitur GLCM baru\n", "scaled_features = scaler.transform([glcm_features])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "# Latih model terbaik dengan data training jika belum dilatih\n", "if not hasattr(knn, 'classes_'):\n", "\tknn.fit(x_train, y_train)\n", "\n", "# Prediksi kelas gambar baru\n", "prediction = knn.predict(scaled_features)" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction: monilia\n" ] } ], "source": [ "# Tampilkan hasil prediksi\n", "predicted_class = list(labels.keys())[list(labels.values()).index(prediction[0])]\n", "print(f'Prediction: {predicted_class}')" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "import seaborn as sns\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "# Create confusion matrix\n", "cm = confusion_matrix(y_test, y_pred)\n", "\n", "# Plot confusion matrix\n", "plt.figure(figsize=(10, 7))\n", "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)\n", "plt.xlabel('Predicted')\n", "plt.ylabel('Actual')\n", "plt.title('Confusion Matrix')\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }