TIF_E41211245/Code Python/training_lvq.ipynb

415 lines
39 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**LIBRARY**"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import sklearn.preprocessing as pre\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
"import joblib\n",
"import pickle\n",
"from scipy.spatial.distance import euclidean"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**LOAD DATASET**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"X = np.load(\"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\hasil_ekstrak_mfcc_v2/X_train.npy\")\n",
"y = np.load(\"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\hasil_ekstrak_mfcc_v2/y_train.npy\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"folder = \"E:/! KULIAHHH/Ivano Kuliah/!SEMESTER 8/!SKRIPSI/Data Suara/!REVISI/hasil_ekstrak_mfcc_v2\"\n",
"files_names = np.load(os.path.join(folder, \"file_names.npy\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ENCODE LABEL**"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# Encode label menjadi angka\n",
"label_encoder = pre.LabelEncoder()\n",
"y_encoded = label_encoder.fit_transform(y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**SPLIT DATASET**"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Split data menjadi training (80%) dan testing (20%)\n",
"X_train, X_test, y_train, y_test, fname_train, fname_test = train_test_split(\n",
" X, y_encoded, files_names, test_size=0.3, random_state=42\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**MODEL LVQ MANUAL**"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"class LVQ:\n",
" def __init__(self, n_classes, n_prototypes=2, learning_rate=0.01, epochs=200):\n",
" self.n_classes = n_classes\n",
" self.n_prototypes = n_prototypes\n",
" self.learning_rate = learning_rate\n",
" self.epochs = epochs\n",
" self.prototypes = None\n",
" self.prototype_labels = None\n",
"\n",
" def fit(self, X, y):\n",
" np.random.seed(42)\n",
"\n",
" # Inisialisasi prototype\n",
" self.prototypes = []\n",
" self.prototype_labels = []\n",
"\n",
" for label in np.unique(y):\n",
" idx = np.where(y == label)[0]\n",
" chosen = np.random.choice(idx, self.n_prototypes, replace=False)\n",
" self.prototypes.extend(X[chosen])\n",
" self.prototype_labels.extend([label] * self.n_prototypes)\n",
"\n",
" self.prototypes = np.array(self.prototypes)\n",
" self.prototype_labels = np.array(self.prototype_labels)\n",
"\n",
" # Training LVQ\n",
" for epoch in range(self.epochs):\n",
" for i in range(len(X)):\n",
" sample = X[i]\n",
" label = y[i]\n",
"\n",
" # Cari prototype terdekat\n",
" distances = np.linalg.norm(self.prototypes - sample, axis=1)\n",
" winner_idx = np.argmin(distances)\n",
"\n",
" # Update prototype\n",
" if self.prototype_labels[winner_idx] == label:\n",
" self.prototypes[winner_idx] += self.learning_rate * (sample - self.prototypes[winner_idx])\n",
" else:\n",
" self.prototypes[winner_idx] -= self.learning_rate * (sample - self.prototypes[winner_idx])\n",
"\n",
" # Learning rate decay\n",
" self.learning_rate *= 0.95\n",
"\n",
" def predict(self, X):\n",
" y_pred = []\n",
" for sample in X:\n",
" distances = np.linalg.norm(self.prototypes - sample, axis=1)\n",
" winner_idx = np.argmin(distances)\n",
" y_pred.append(self.prototype_labels[winner_idx])\n",
" return np.array(y_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**INISIASI MODEL LVQ**"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# Inisialisasi model LVQ\n",
"lvq_model = LVQ(n_classes=len(set(y_encoded)), n_prototypes=2, learning_rate=0.01, epochs=200)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**TRAINING MODEL**"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# Training model\n",
"lvq_model.fit(X_train, y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PREDIKSI DATA UJI**"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# Prediksi data uji\n",
"y_pred = lvq_model.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**EVALUASI MODEL**"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"❌ Data yang salah diklasifikasi:\n",
"File: Negatif/mindrabodoh3 - Copy.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"File: Negatif/mindrabodoh7.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"File: Negatif/mindrabodoh9.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"File: Negatif/mindralicik5 - Copy.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"File: Negatif/mindrabodoh9 - Copy.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"File: Negatif/mindrabodoh8 - Copy.wav, Label Sebenarnya: Negatif, Prediksi: Positif\n",
"\n",
"🔍 Analisis Kemiripan Fitur untuk Data yang Salah Klasifikasi:\n",
"\n",
"\n",
"❌ File: Negatif/mindrabodoh3 - Copy.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrarajin1 - Copy.wav (jarak: 209.37)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 211.16)\n",
" - Positif/mindrabaik9.wav (jarak: 215.43)\n",
" - Positif/mindrarajin2.wav (jarak: 216.10)\n",
" - Positif/mindrarajin4 - Copy.wav (jarak: 219.32)\n",
"\n",
"❌ File: Negatif/mindrabodoh7.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrarajin7.wav (jarak: 185.26)\n",
" - Positif/mindrarajin2.wav (jarak: 196.76)\n",
" - Positif/mindrarajin4 - Copy.wav (jarak: 198.24)\n",
" - Positif/mindrarajin1 - Copy.wav (jarak: 210.42)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 217.06)\n",
"\n",
"❌ File: Negatif/mindrabodoh9.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrabaik9.wav (jarak: 207.28)\n",
" - Positif/mindrarajin7.wav (jarak: 207.65)\n",
" - Positif/mindrabaik6.wav (jarak: 211.79)\n",
" - Positif/mindrabaik6 - Copy.wav (jarak: 211.79)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 213.56)\n",
"\n",
"❌ File: Negatif/mindralicik5 - Copy.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrarajin7.wav (jarak: 187.50)\n",
" - Positif/mindrarajin1 - Copy.wav (jarak: 212.67)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 217.59)\n",
" - Positif/mindrarajin2.wav (jarak: 220.47)\n",
" - Positif/mindrarajin4 - Copy.wav (jarak: 243.20)\n",
"\n",
"❌ File: Negatif/mindrabodoh9 - Copy.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrabaik9.wav (jarak: 207.28)\n",
" - Positif/mindrarajin7.wav (jarak: 207.65)\n",
" - Positif/mindrabaik6.wav (jarak: 211.79)\n",
" - Positif/mindrabaik6 - Copy.wav (jarak: 211.79)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 213.56)\n",
"\n",
"❌ File: Negatif/mindrabodoh8 - Copy.wav\n",
" Label Sebenarnya: Negatif\n",
" Diprediksi sebagai: Positif\n",
" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\n",
" - Positif/mindrarajin1 - Copy.wav (jarak: 225.91)\n",
" - Positif/mindrarajin10 - Copy.wav (jarak: 229.82)\n",
" - Positif/mindrarajin7.wav (jarak: 242.93)\n",
" - Positif/mindrarajin2.wav (jarak: 245.35)\n",
" - Positif/mindrarajin4 - Copy.wav (jarak: 247.69)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ Training selesai! Akurasi: 91.78%\n",
"\n",
"📊 Classification Report:\n",
"\n",
" precision recall f1-score support\n",
"\n",
" Negatif 1.00 0.85 0.92 41\n",
" Positif 0.84 1.00 0.91 32\n",
"\n",
" accuracy 0.92 73\n",
" macro avg 0.92 0.93 0.92 73\n",
"weighted avg 0.93 0.92 0.92 73\n",
"\n"
]
}
],
"source": [
"# Evaluasi model\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
"class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)\n",
"\n",
"# --- Tampilkan data yang salah diklasifikasi ---\n",
"misclassified_indices = np.where(y_pred != y_test)[0]\n",
"\n",
"print(\"❌ Data yang salah diklasifikasi:\")\n",
"for i in misclassified_indices:\n",
" true_label = label_encoder.inverse_transform([y_test[i]])[0]\n",
" pred_label = label_encoder.inverse_transform([y_pred[i]])[0]\n",
" file_name = fname_test[i] # fname_test didapat dari split bersama X_test dan y_test\n",
" print(f\"File: {file_name}, Label Sebenarnya: {true_label}, Prediksi: {pred_label}\")\n",
" \n",
"print(\"\\n🔍 Analisis Kemiripan Fitur untuk Data yang Salah Klasifikasi:\\n\")\n",
"\n",
"for idx in misclassified_indices:\n",
" true_label = y_test[idx]\n",
" pred_label = y_pred[idx]\n",
" file_name = fname_test[idx]\n",
" fitur_miss = X_test[idx]\n",
"\n",
" # Ambil semua indeks dari label hasil prediksi\n",
" pred_indices = np.where(y_test == pred_label)[0]\n",
"\n",
" kemiripan = []\n",
" for i in pred_indices:\n",
" jarak = euclidean(fitur_miss, X_test[i])\n",
" kemiripan.append((i, jarak))\n",
"\n",
" kemiripan.sort(key=lambda x: x[1]) # urutkan dari yang paling mirip\n",
"\n",
" print(f\"\\n❌ File: {file_name}\")\n",
" print(f\" Label Sebenarnya: {label_encoder.inverse_transform([true_label])[0]}\")\n",
" print(f\" Diprediksi sebagai: {label_encoder.inverse_transform([pred_label])[0]}\")\n",
" print(\" 🔗 5 Data yang Paling Mirip dalam Kelas Prediksi:\")\n",
"\n",
" for i, dist in kemiripan[:5]:\n",
" similar_file = fname_test[i]\n",
" print(f\" - {similar_file} (jarak: {dist:.2f})\")\n",
"\n",
"\n",
"# Simpan model LVQ dengan pickle\n",
"with open(\"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\lvq_model.pkl\", \"wb\") as model_file:\n",
" pickle.dump(lvq_model, model_file)\n",
"\n",
"# Simpan label encoder dengan pickle\n",
"with open(\"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\label_encoder.pkl\", \"wb\") as encoder_file:\n",
" pickle.dump(label_encoder, encoder_file)\n",
"\n",
"# Visualisasi Confusion Matrix\n",
"plt.figure(figsize=(8, 6))\n",
"sns.heatmap(conf_matrix, annot=True, fmt=\"d\", cmap=\"Blues\", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)\n",
"plt.xlabel(\"Predicted Label\")\n",
"plt.ylabel(\"True Label\")\n",
"plt.title(f\"Confusion Matrix - Akurasi: {accuracy * 100:.2f}%\")\n",
"plt.show()\n",
"\n",
"# Output hasil evaluasi\n",
"print(f\"✅ Training selesai! Akurasi: {accuracy * 100:.2f}%\\n\")\n",
"print(\"📊 Classification Report:\\n\")\n",
"print(class_report)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}