{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Bzr_4rf-KvdO" }, "source": [ "# Data Collection & Data Understanding" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IA3vunaUkl7h" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix, f1_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 424 }, "id": "KKAbPSUGkl7p", "outputId": "edf69254-6d05-40be-8035-de4bb5c1080f" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 150,\n \"fields\": [\n {\n \"column\": \"mfcc_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 36.276014922615474,\n \"min\": -28.16607541028273,\n \"max\": 134.00756340540602,\n \"num_unique_values\": 148,\n \"samples\": [\n 36.08258057714812,\n 58.86680953129214,\n -3.523882247385767\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25.55141187112381,\n \"min\": -68.12144268080341,\n \"max\": 34.202640399438096,\n \"num_unique_values\": 148,\n \"samples\": [\n 23.939076705384803,\n 21.946738597467206,\n 13.0356526884146\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16.607602505135112,\n \"min\": -35.34655184816533,\n \"max\": 38.62011424617032,\n \"num_unique_values\": 148,\n \"samples\": [\n 13.91699151797878,\n -8.389756054170379,\n -1.8076506613349623\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.248647405224922,\n \"min\": -37.65939195051028,\n \"max\": 16.511813215011305,\n \"num_unique_values\": 148,\n \"samples\": [\n 11.817977912406883,\n -25.196198956208004,\n -1.877552087075581\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.67376981879269,\n \"min\": -25.168957020117585,\n \"max\": 33.86979700528147,\n \"num_unique_values\": 148,\n \"samples\": [\n -9.159842263653752,\n 12.502871968905891,\n -5.269233643564018\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10.967088795363342,\n \"min\": -37.11021563514957,\n \"max\": 22.823063964431544,\n \"num_unique_values\": 148,\n \"samples\": [\n -11.37754250437596,\n -21.623600445733462,\n -7.599002274584097\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_7\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.156816371373767,\n \"min\": -18.00557350489408,\n \"max\": 26.58538472976201,\n \"num_unique_values\": 148,\n \"samples\": [\n -6.540024131817128,\n -2.978011309692604,\n -3.908204318319117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_8\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.3954280048501255,\n \"min\": -19.42019336425255,\n \"max\": 11.666184384702827,\n \"num_unique_values\": 148,\n \"samples\": [\n -8.77716113407432,\n -9.47772235720944,\n -6.146912684052977\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_9\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.925604847630165,\n \"min\": -16.537194540172763,\n \"max\": 16.626586628222096,\n \"num_unique_values\": 148,\n \"samples\": [\n -5.5096426829032765,\n -4.192069878917911,\n 0.0969209496106391\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_10\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.736041581214664,\n \"min\": -22.006870967705165,\n \"max\": 5.341944901286143,\n \"num_unique_values\": 148,\n \"samples\": [\n -14.888090524599049,\n -2.679905652845484,\n -4.124536203962681\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_11\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.964057207796632,\n \"min\": -15.97644564504034,\n \"max\": 14.539281254023074,\n \"num_unique_values\": 148,\n \"samples\": [\n -8.107532163193822,\n -10.48949586980253,\n -2.566518390687336\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_12\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.866482357947159,\n \"min\": -21.68206080071003,\n \"max\": 8.456266425271922,\n \"num_unique_values\": 148,\n \"samples\": [\n -5.797938428082079,\n -2.674145185062222,\n -5.316564560856999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Tanti\",\n \"Random\",\n \"Vasyilla\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mfcc_1mfcc_2mfcc_3mfcc_4mfcc_5mfcc_6mfcc_7mfcc_8mfcc_9mfcc_10mfcc_11mfcc_12target
0134.007563-10.5976407.266465-3.754714-13.8083332.155736-12.878377-4.9756016.321278-2.777375-6.506576-1.876924Hilmi
150.750191-5.59891632.949004-22.50126014.779131-19.5383017.961897-11.9574397.594936-14.9296855.006317-1.295943Hilmi
2122.293842-12.81445812.493625-10.989077-15.9638401.330859-17.650124-3.0140833.925209-6.664020-1.170681-0.027291Hilmi
3126.803849-0.0539418.270297-6.800313-13.392560-0.899367-14.8745800.2275567.045879-7.464138-5.668817-0.148281Hilmi
410.4686029.117828-8.683499-1.46616824.753077-21.5310512.124193-6.58112111.937312-18.9429545.7637980.569468Hilmi
..........................................
14536.42794325.7072848.21266810.450583-5.248485-11.510404-6.744978-9.246621-3.982937-10.705474-6.795224-6.532252Random
14643.9542716.4560482.131374-9.353882-17.369689-7.557169-15.326618-16.5219121.466365-6.7237671.9310862.092220Random
14755.78210826.974924-5.329280-1.423440-9.777956-8.758294-1.948076-6.980490-0.893085-4.223947-4.055362-5.809257Random
14826.584968-8.043035-4.178721-0.592769-15.964644-9.010896-4.953900-11.460907-0.344327-4.228080-1.677582-2.752977Random
14919.5893339.885163-20.071891-13.235444-15.691194-10.1116712.390462-5.974159-0.621773-2.3115744.188963-0.232814Random
\n", "

150 rows × 13 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " mfcc_1 mfcc_2 mfcc_3 mfcc_4 mfcc_5 mfcc_6 \\\n", "0 134.007563 -10.597640 7.266465 -3.754714 -13.808333 2.155736 \n", "1 50.750191 -5.598916 32.949004 -22.501260 14.779131 -19.538301 \n", "2 122.293842 -12.814458 12.493625 -10.989077 -15.963840 1.330859 \n", "3 126.803849 -0.053941 8.270297 -6.800313 -13.392560 -0.899367 \n", "4 10.468602 9.117828 -8.683499 -1.466168 24.753077 -21.531051 \n", ".. ... ... ... ... ... ... \n", "145 36.427943 25.707284 8.212668 10.450583 -5.248485 -11.510404 \n", "146 43.954271 6.456048 2.131374 -9.353882 -17.369689 -7.557169 \n", "147 55.782108 26.974924 -5.329280 -1.423440 -9.777956 -8.758294 \n", "148 26.584968 -8.043035 -4.178721 -0.592769 -15.964644 -9.010896 \n", "149 19.589333 9.885163 -20.071891 -13.235444 -15.691194 -10.111671 \n", "\n", " mfcc_7 mfcc_8 mfcc_9 mfcc_10 mfcc_11 mfcc_12 target \n", "0 -12.878377 -4.975601 6.321278 -2.777375 -6.506576 -1.876924 Hilmi \n", "1 7.961897 -11.957439 7.594936 -14.929685 5.006317 -1.295943 Hilmi \n", "2 -17.650124 -3.014083 3.925209 -6.664020 -1.170681 -0.027291 Hilmi \n", "3 -14.874580 0.227556 7.045879 -7.464138 -5.668817 -0.148281 Hilmi \n", "4 2.124193 -6.581121 11.937312 -18.942954 5.763798 0.569468 Hilmi \n", ".. ... ... ... ... ... ... ... \n", "145 -6.744978 -9.246621 -3.982937 -10.705474 -6.795224 -6.532252 Random \n", "146 -15.326618 -16.521912 1.466365 -6.723767 1.931086 2.092220 Random \n", "147 -1.948076 -6.980490 -0.893085 -4.223947 -4.055362 -5.809257 Random \n", "148 -4.953900 -11.460907 -0.344327 -4.228080 -1.677582 -2.752977 Random \n", "149 2.390462 -5.974159 -0.621773 -2.311574 4.188963 -0.232814 Random \n", "\n", "[150 rows x 13 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('/content/drive/MyDrive/REVISI SKRIPSI/dataset_pertama.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "pEq1S0fvMhNC", "outputId": "ca0ddc40-512c-4f16-aa7f-90105c4523a9" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 150 entries, 0 to 149\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 mfcc_1 150 non-null float64\n", " 1 mfcc_2 150 non-null float64\n", " 2 mfcc_3 150 non-null float64\n", " 3 mfcc_4 150 non-null float64\n", " 4 mfcc_5 150 non-null float64\n", " 5 mfcc_6 150 non-null float64\n", " 6 mfcc_7 150 non-null float64\n", " 7 mfcc_8 150 non-null float64\n", " 8 mfcc_9 150 non-null float64\n", " 9 mfcc_10 150 non-null float64\n", " 10 mfcc_11 150 non-null float64\n", " 11 mfcc_12 150 non-null float64\n", " 12 target 150 non-null object \n", "dtypes: float64(12), object(1)\n", "memory usage: 15.4+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 300 }, "id": "sfJWMIBtMpnC", "outputId": "9c7cd929-0dde-47a7-aaac-a056830ce72e" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"mfcc_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57.54294921506547,\n \"min\": -28.16607541028273,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 52.92849559071616,\n 52.67909415139298,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 63.40642771663973,\n \"min\": -68.12144268080341,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -6.307195613479998,\n 0.6106925847553657,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 58.28185810277617,\n \"min\": -35.34655184816533,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -7.113904243435991,\n -8.825214761246645,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57.62183217405129,\n \"min\": -37.65939195051028,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -8.56453359187676,\n -6.867867350377988,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 55.07233923831515,\n \"min\": -25.168957020117585,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -0.9577231590327248,\n -0.1339447985360342,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 58.48142309958314,\n \"min\": -37.11021563514957,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -10.869185206092027,\n -11.802906445496959,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_7\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 54.47579295130784,\n \"min\": -18.00557350489408,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -3.140169897880235,\n -3.564955884869387,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_8\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 54.869632735816396,\n \"min\": -19.42019336425255,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -4.324533343488815,\n -6.129346251905641,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_9\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 53.67205279983795,\n \"min\": -16.537194540172763,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -0.8533399931483685,\n -0.91361396963881,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_10\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 56.25721142309399,\n \"min\": -22.006870967705165,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -9.213072109360212,\n -10.193841663943422,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_11\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 53.80363736760405,\n \"min\": -15.97644564504034,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -0.8568085259266088,\n -1.679220197784177,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_12\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 54.705272151561935,\n \"min\": -21.68206080071003,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -4.006372983455339,\n -2.0080499610978477,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mfcc_1mfcc_2mfcc_3mfcc_4mfcc_5mfcc_6mfcc_7mfcc_8mfcc_9mfcc_10mfcc_11mfcc_12
count150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000
mean52.928496-6.307196-7.113904-8.564534-0.957723-10.869185-3.140170-4.324533-0.853340-9.213072-0.856809-4.006373
std36.27601525.55141216.60760312.24864714.67377010.9670897.1568167.3954288.9256055.7360427.9640576.866482
min-28.166075-68.121443-35.346552-37.659392-25.168957-37.110216-18.005574-19.420193-16.537195-22.006871-15.976446-21.682061
25%27.083477-28.192809-20.513877-16.970205-14.360503-19.147536-8.061160-9.374177-7.514854-13.849136-8.240238-5.374539
50%52.6790940.610693-8.825215-6.867867-0.133945-11.802906-3.564956-6.129346-0.913614-10.193842-1.679220-2.008050
75%70.32834513.6077701.9997361.16844912.395716-5.5538771.2965880.9781487.045267-4.2249805.8148570.083519
max134.00756334.20264038.62011416.51181333.86979722.82306426.58538511.66618416.6265875.34194514.5392818.456266
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " mfcc_1 mfcc_2 mfcc_3 mfcc_4 mfcc_5 mfcc_6 \\\n", "count 150.000000 150.000000 150.000000 150.000000 150.000000 150.000000 \n", "mean 52.928496 -6.307196 -7.113904 -8.564534 -0.957723 -10.869185 \n", "std 36.276015 25.551412 16.607603 12.248647 14.673770 10.967089 \n", "min -28.166075 -68.121443 -35.346552 -37.659392 -25.168957 -37.110216 \n", "25% 27.083477 -28.192809 -20.513877 -16.970205 -14.360503 -19.147536 \n", "50% 52.679094 0.610693 -8.825215 -6.867867 -0.133945 -11.802906 \n", "75% 70.328345 13.607770 1.999736 1.168449 12.395716 -5.553877 \n", "max 134.007563 34.202640 38.620114 16.511813 33.869797 22.823064 \n", "\n", " mfcc_7 mfcc_8 mfcc_9 mfcc_10 mfcc_11 mfcc_12 \n", "count 150.000000 150.000000 150.000000 150.000000 150.000000 150.000000 \n", "mean -3.140170 -4.324533 -0.853340 -9.213072 -0.856809 -4.006373 \n", "std 7.156816 7.395428 8.925605 5.736042 7.964057 6.866482 \n", "min -18.005574 -19.420193 -16.537195 -22.006871 -15.976446 -21.682061 \n", "25% -8.061160 -9.374177 -7.514854 -13.849136 -8.240238 -5.374539 \n", "50% -3.564956 -6.129346 -0.913614 -10.193842 -1.679220 -2.008050 \n", "75% 1.296588 0.978148 7.045267 -4.224980 5.814857 0.083519 \n", "max 26.585385 11.666184 16.626587 5.341945 14.539281 8.456266 " ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "jJdKYUOUNDx9", "outputId": "4cce76eb-d6b4-421d-e260-f08f436e770c" }, "outputs": [ { "data": { "text/plain": [ "mfcc_1 0\n", "mfcc_2 0\n", "mfcc_3 0\n", "mfcc_4 0\n", "mfcc_5 0\n", "mfcc_6 0\n", "mfcc_7 0\n", "mfcc_8 0\n", "mfcc_9 0\n", "mfcc_10 0\n", "mfcc_11 0\n", "mfcc_12 0\n", "target 0\n", "dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 465 }, "id": "_djz4EoQNVXF", "outputId": "546a0924-c80b-4b20-8aed-ddbf7691b0e4" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(1, 1, figsize=(4, 5))\n", "sns.histplot(data=df, x='target', ax=axes, color='skyblue')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "id": "Oz4HrFkvK5HX" }, "source": [ "# Data Preparation" ] }, { "cell_type": "markdown", "metadata": { "id": "Vbm0Iyx1LZDL" }, "source": [ "Pengambilan 5 kelas dari dataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 424 }, "id": "W-NoSdhxsUsH", "outputId": "c1670698-b0f3-4175-867d-f5c39806d759" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 150,\n \"fields\": [\n {\n \"column\": \"mfcc_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 36.276014922615474,\n \"min\": -28.16607541028273,\n \"max\": 134.00756340540602,\n \"num_unique_values\": 148,\n \"samples\": [\n 36.08258057714812,\n 58.86680953129214,\n -3.523882247385767\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25.55141187112381,\n \"min\": -68.12144268080341,\n \"max\": 34.202640399438096,\n \"num_unique_values\": 148,\n \"samples\": [\n 23.939076705384803,\n 21.946738597467206,\n 13.0356526884146\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16.607602505135112,\n \"min\": -35.34655184816533,\n \"max\": 38.62011424617032,\n \"num_unique_values\": 148,\n \"samples\": [\n 13.91699151797878,\n -8.389756054170379,\n -1.8076506613349623\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.248647405224922,\n \"min\": -37.65939195051028,\n \"max\": 16.511813215011305,\n \"num_unique_values\": 148,\n \"samples\": [\n 11.817977912406883,\n -25.196198956208004,\n -1.877552087075581\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.67376981879269,\n \"min\": -25.168957020117585,\n \"max\": 33.86979700528147,\n \"num_unique_values\": 148,\n \"samples\": [\n -9.159842263653752,\n 12.502871968905891,\n -5.269233643564018\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10.967088795363342,\n \"min\": -37.11021563514957,\n \"max\": 22.823063964431544,\n \"num_unique_values\": 148,\n \"samples\": [\n -11.37754250437596,\n -21.623600445733462,\n -7.599002274584097\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_7\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.156816371373767,\n \"min\": -18.00557350489408,\n \"max\": 26.58538472976201,\n \"num_unique_values\": 148,\n \"samples\": [\n -6.540024131817128,\n -2.978011309692604,\n -3.908204318319117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_8\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.3954280048501255,\n \"min\": -19.42019336425255,\n \"max\": 11.666184384702827,\n \"num_unique_values\": 148,\n \"samples\": [\n -8.77716113407432,\n -9.47772235720944,\n -6.146912684052977\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_9\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.925604847630165,\n \"min\": -16.537194540172763,\n \"max\": 16.626586628222096,\n \"num_unique_values\": 148,\n \"samples\": [\n -5.5096426829032765,\n -4.192069878917911,\n 0.0969209496106391\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_10\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.736041581214664,\n \"min\": -22.006870967705165,\n \"max\": 5.341944901286143,\n \"num_unique_values\": 148,\n \"samples\": [\n -14.888090524599049,\n -2.679905652845484,\n -4.124536203962681\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_11\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.964057207796632,\n \"min\": -15.97644564504034,\n \"max\": 14.539281254023074,\n \"num_unique_values\": 148,\n \"samples\": [\n -8.107532163193822,\n -10.48949586980253,\n -2.566518390687336\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_12\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.866482357947159,\n \"min\": -21.68206080071003,\n \"max\": 8.456266425271922,\n \"num_unique_values\": 148,\n \"samples\": [\n -5.797938428082079,\n -2.674145185062222,\n -5.316564560856999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Tanti\",\n \"Random\",\n \"Vasyilla\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mfcc_1mfcc_2mfcc_3mfcc_4mfcc_5mfcc_6mfcc_7mfcc_8mfcc_9mfcc_10mfcc_11mfcc_12target
0134.007563-10.5976407.266465-3.754714-13.8083332.155736-12.878377-4.9756016.321278-2.777375-6.506576-1.876924Hilmi
150.750191-5.59891632.949004-22.50126014.779131-19.5383017.961897-11.9574397.594936-14.9296855.006317-1.295943Hilmi
2122.293842-12.81445812.493625-10.989077-15.9638401.330859-17.650124-3.0140833.925209-6.664020-1.170681-0.027291Hilmi
3126.803849-0.0539418.270297-6.800313-13.392560-0.899367-14.8745800.2275567.045879-7.464138-5.668817-0.148281Hilmi
410.4686029.117828-8.683499-1.46616824.753077-21.5310512.124193-6.58112111.937312-18.9429545.7637980.569468Hilmi
..........................................
14536.42794325.7072848.21266810.450583-5.248485-11.510404-6.744978-9.246621-3.982937-10.705474-6.795224-6.532252Random
14643.9542716.4560482.131374-9.353882-17.369689-7.557169-15.326618-16.5219121.466365-6.7237671.9310862.092220Random
14755.78210826.974924-5.329280-1.423440-9.777956-8.758294-1.948076-6.980490-0.893085-4.223947-4.055362-5.809257Random
14826.584968-8.043035-4.178721-0.592769-15.964644-9.010896-4.953900-11.460907-0.344327-4.228080-1.677582-2.752977Random
14919.5893339.885163-20.071891-13.235444-15.691194-10.1116712.390462-5.974159-0.621773-2.3115744.188963-0.232814Random
\n", "

150 rows × 13 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " mfcc_1 mfcc_2 mfcc_3 mfcc_4 mfcc_5 mfcc_6 \\\n", "0 134.007563 -10.597640 7.266465 -3.754714 -13.808333 2.155736 \n", "1 50.750191 -5.598916 32.949004 -22.501260 14.779131 -19.538301 \n", "2 122.293842 -12.814458 12.493625 -10.989077 -15.963840 1.330859 \n", "3 126.803849 -0.053941 8.270297 -6.800313 -13.392560 -0.899367 \n", "4 10.468602 9.117828 -8.683499 -1.466168 24.753077 -21.531051 \n", ".. ... ... ... ... ... ... \n", "145 36.427943 25.707284 8.212668 10.450583 -5.248485 -11.510404 \n", "146 43.954271 6.456048 2.131374 -9.353882 -17.369689 -7.557169 \n", "147 55.782108 26.974924 -5.329280 -1.423440 -9.777956 -8.758294 \n", "148 26.584968 -8.043035 -4.178721 -0.592769 -15.964644 -9.010896 \n", "149 19.589333 9.885163 -20.071891 -13.235444 -15.691194 -10.111671 \n", "\n", " mfcc_7 mfcc_8 mfcc_9 mfcc_10 mfcc_11 mfcc_12 target \n", "0 -12.878377 -4.975601 6.321278 -2.777375 -6.506576 -1.876924 Hilmi \n", "1 7.961897 -11.957439 7.594936 -14.929685 5.006317 -1.295943 Hilmi \n", "2 -17.650124 -3.014083 3.925209 -6.664020 -1.170681 -0.027291 Hilmi \n", "3 -14.874580 0.227556 7.045879 -7.464138 -5.668817 -0.148281 Hilmi \n", "4 2.124193 -6.581121 11.937312 -18.942954 5.763798 0.569468 Hilmi \n", ".. ... ... ... ... ... ... ... \n", "145 -6.744978 -9.246621 -3.982937 -10.705474 -6.795224 -6.532252 Random \n", "146 -15.326618 -16.521912 1.466365 -6.723767 1.931086 2.092220 Random \n", "147 -1.948076 -6.980490 -0.893085 -4.223947 -4.055362 -5.809257 Random \n", "148 -4.953900 -11.460907 -0.344327 -4.228080 -1.677582 -2.752977 Random \n", "149 2.390462 -5.974159 -0.621773 -2.311574 4.188963 -0.232814 Random \n", "\n", "[150 rows x 13 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df = df = df = df[df['target'].isin(['Hilmi', 'Tanti', 'Vasyilla', 'Yudha', 'Random'])]\n", "df" ] }, { "cell_type": "markdown", "metadata": { "id": "wpBWFvGaLiny" }, "source": [ "Normalisasi MinMax" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "xzmFgA9xtKEP" }, "outputs": [], "source": [ "def minmax(df_input):\n", " list_fitur = df_input.columns[:-1]\n", " for fitur in list_fitur:\n", " max = df_input[fitur].max()\n", " min = df_input[fitur].min()\n", " df_input[fitur] = (df_input[fitur]-min)/(max-min)\n", " return df_input" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 424 }, "id": "IWlYLLujtKpR", "outputId": "1d3f38e3-2e69-4bd2-a30c-a0039491a5a8" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 150,\n \"fields\": [\n {\n \"column\": \"mfcc_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22368626114286905,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.39617200709450573,\n 0.5366648092572446,\n 0.151949437299751\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.24971063606879967,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.8996955224509101,\n 0.8802246603826399,\n 0.7931377729089978\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22452820144623117,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.6660235747723758,\n 0.364445191562572,\n 0.4534326468635954\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22610992994892484,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.9133518390764562,\n 0.2300704397515369,\n 0.6605324683861532\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2485447069645116,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.2711628153530569,\n 0.638086450347795,\n 0.33706204856546446\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.18298829746403522,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.4293553315068956,\n 0.25839759300481113,\n 0.4924011093291103\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_7\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.16049927282817375,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.25712722549581657,\n 0.33700917832086547,\n 0.3161486037682521\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_8\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23789931604683792,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.342369648729366,\n 0.3198336933088711,\n 0.42698061470496046\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_9\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.26913712891509134,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.3325179297642562,\n 0.3722471993941323,\n 0.5015747572727244\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_10\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.20973637793650562,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.2602957465217917,\n 0.7066838069860648,\n 0.6538613901751362\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_11\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2609820580102604,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.2578641992659867,\n 0.17980727751912784,\n 0.4394431533192342\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_12\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22783223191058968,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 148,\n \"samples\": [\n 0.5270406102344792,\n 0.630689137891544,\n 0.5430127597043442\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Tanti\",\n \"Random\",\n \"Vasyilla\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mfcc_1mfcc_2mfcc_3mfcc_4mfcc_5mfcc_6mfcc_7mfcc_8mfcc_9mfcc_10mfcc_11mfcc_12target
01.0000000.5621730.5761110.6258800.1924270.6551610.1149830.4646600.6892600.7031200.3103280.657141Hilmi
10.4866160.6110250.9233290.2798190.6766420.2931910.5823480.2400650.7276650.2587750.6876050.676418Hilmi
20.9277700.5405080.6467800.4923340.1559170.6413980.0079710.5277590.6170110.5610060.4851850.718513Hilmi
30.9555800.6652150.5896830.5696580.1994690.6041860.0702160.6320370.7111090.5317500.3377810.714498Hilmi
40.2382300.7548490.3604740.6681270.8455810.2599420.4514320.4130130.8586030.1120310.7124280.738313Hilmi
..........................................
1450.3983020.9169760.5889030.8881100.3374130.4271390.2525310.3272680.3785530.4132320.3008690.502676Random
1460.4447110.7288360.5066870.5225190.1321040.4930990.0600780.0932330.5428680.5588210.5868300.788839Random
1470.5176440.9293640.4058220.6689150.2606930.4730580.3601070.4001660.4717230.6502260.3906540.526665Random
1480.3376080.5871380.4213770.6842500.1559030.4688430.2926980.2560380.4882700.6500750.4685740.628073Random
1490.2944710.7623480.2065070.4508660.1605350.4504770.4574030.4325380.4799040.7201520.6608200.711693Random
\n", "

150 rows × 13 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " mfcc_1 mfcc_2 mfcc_3 mfcc_4 mfcc_5 mfcc_6 mfcc_7 \\\n", "0 1.000000 0.562173 0.576111 0.625880 0.192427 0.655161 0.114983 \n", "1 0.486616 0.611025 0.923329 0.279819 0.676642 0.293191 0.582348 \n", "2 0.927770 0.540508 0.646780 0.492334 0.155917 0.641398 0.007971 \n", "3 0.955580 0.665215 0.589683 0.569658 0.199469 0.604186 0.070216 \n", "4 0.238230 0.754849 0.360474 0.668127 0.845581 0.259942 0.451432 \n", ".. ... ... ... ... ... ... ... \n", "145 0.398302 0.916976 0.588903 0.888110 0.337413 0.427139 0.252531 \n", "146 0.444711 0.728836 0.506687 0.522519 0.132104 0.493099 0.060078 \n", "147 0.517644 0.929364 0.405822 0.668915 0.260693 0.473058 0.360107 \n", "148 0.337608 0.587138 0.421377 0.684250 0.155903 0.468843 0.292698 \n", "149 0.294471 0.762348 0.206507 0.450866 0.160535 0.450477 0.457403 \n", "\n", " mfcc_8 mfcc_9 mfcc_10 mfcc_11 mfcc_12 target \n", "0 0.464660 0.689260 0.703120 0.310328 0.657141 Hilmi \n", "1 0.240065 0.727665 0.258775 0.687605 0.676418 Hilmi \n", "2 0.527759 0.617011 0.561006 0.485185 0.718513 Hilmi \n", "3 0.632037 0.711109 0.531750 0.337781 0.714498 Hilmi \n", "4 0.413013 0.858603 0.112031 0.712428 0.738313 Hilmi \n", ".. ... ... ... ... ... ... \n", "145 0.327268 0.378553 0.413232 0.300869 0.502676 Random \n", "146 0.093233 0.542868 0.558821 0.586830 0.788839 Random \n", "147 0.400166 0.471723 0.650226 0.390654 0.526665 Random \n", "148 0.256038 0.488270 0.650075 0.468574 0.628073 Random \n", "149 0.432538 0.479904 0.720152 0.660820 0.711693 Random \n", "\n", "[150 rows x 13 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_scaled = minmax(df)\n", "df_scaled" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 300 }, "id": "EF7XAYFSMvBs", "outputId": "f5fdd9fb-d824-4fb9-89b7-3f6580b8805a" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df_scaled\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"mfcc_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.87369352743623,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.5000477981083185,\n 0.4985099314047994,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.846342550332686,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.6041026238060625,\n 0.6717102484237238,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.89895398708089,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.38169420220619343,\n 0.35855796248939886,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.86061906054653,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.5370908450298161,\n 0.5684112898365092,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.88730537785032,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.410090528852776,\n 0.4240437088291402,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.8888888393393,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.43783738524532445,\n 0.4222580402529731,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_7\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.90888920487374,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.33337259829192134,\n 0.3238463175433949,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_8\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.87572416739311,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.485603698915066,\n 0.42754569926673214,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_9\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.87243291238593,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.47292118071178,\n 0.4711037167686793,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_10\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.879403376782854,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.46780083348511053,\n 0.43193933369362497,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_11\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.87263831838093,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.4954703248303675,\n 0.46851990432824897,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mfcc_12\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52.845471999528556,\n \"min\": 0.0,\n \"max\": 150.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.5864853641252078,\n 0.6527904051241242,\n 150.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mfcc_1mfcc_2mfcc_3mfcc_4mfcc_5mfcc_6mfcc_7mfcc_8mfcc_9mfcc_10mfcc_11mfcc_12
count150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000150.000000
mean0.5000480.6041030.3816940.5370910.4100910.4378370.3333730.4856040.4729210.4678010.4954700.586485
std0.2236860.2497110.2245280.2261100.2485450.1829880.1604990.2378990.2691370.2097360.2609820.227832
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%0.3406810.3902170.2005320.3819220.1830740.2997110.2230140.3231650.2720540.2982850.2535150.541089
50%0.4985100.6717100.3585580.5684110.4240440.4222580.3238460.4275460.4711040.4319390.4685200.652790
75%0.6073390.7987290.5049070.7167620.6362710.5265240.4328720.6561830.7110910.6501890.7141010.722189
max1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " mfcc_1 mfcc_2 mfcc_3 mfcc_4 mfcc_5 mfcc_6 \\\n", "count 150.000000 150.000000 150.000000 150.000000 150.000000 150.000000 \n", "mean 0.500048 0.604103 0.381694 0.537091 0.410091 0.437837 \n", "std 0.223686 0.249711 0.224528 0.226110 0.248545 0.182988 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.340681 0.390217 0.200532 0.381922 0.183074 0.299711 \n", "50% 0.498510 0.671710 0.358558 0.568411 0.424044 0.422258 \n", "75% 0.607339 0.798729 0.504907 0.716762 0.636271 0.526524 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n", "\n", " mfcc_7 mfcc_8 mfcc_9 mfcc_10 mfcc_11 mfcc_12 \n", "count 150.000000 150.000000 150.000000 150.000000 150.000000 150.000000 \n", "mean 0.333373 0.485604 0.472921 0.467801 0.495470 0.586485 \n", "std 0.160499 0.237899 0.269137 0.209736 0.260982 0.227832 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.223014 0.323165 0.272054 0.298285 0.253515 0.541089 \n", "50% 0.323846 0.427546 0.471104 0.431939 0.468520 0.652790 \n", "75% 0.432872 0.656183 0.711091 0.650189 0.714101 0.722189 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 " ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_scaled.describe()" ] }, { "cell_type": "markdown", "metadata": { "id": "6hB7WF5bLr5B" }, "source": [ "Mengacak baris dalam dataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "ZYzVYIwplBDw" }, "outputs": [], "source": [ "df_scaled = df_scaled.sample(frac=1).reset_index(drop=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "LljaqH4ykl7r" }, "source": [ "Mengambil kolom yang bukan target, dan mengubahnya dalam bentuk array agar bisa diproses ke langkah selanjutnya" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "Yn-bfLBbkl7t", "outputId": "d53364a9-ed29-48ab-c047-1611c9e8357f" }, "outputs": [ { "data": { "text/plain": [ "array([[0.29121659, 0.84588868, 0.12388285, 0.71149628, 0.13523701,\n", " 0.51554656, 0.29915299, 0.63665473, 0.74734425, 0.23148366,\n", " 0.44838036, 0.72694892],\n", " [0.83619592, 0.31438551, 0.58094409, 0.66021311, 0.09954104,\n", " 0.84357682, 0.22297908, 0.14641018, 0.58019394, 0.27560393,\n", " 0.09594089, 0.82759198],\n", " [0.49518097, 0.66935134, 0.59251915, 0.74893544, 0.14104965,\n", " 0.49621374, 0.21525878, 0.22361814, 0.62380065, 0.57862401,\n", " 0.42216781, 0.8169304 ],\n", " [0.52529014, 0.74516424, 0.39434066, 0.67422405, 0.16254567,\n", " 0.4754561 , 0.41733271, 0.29990747, 0.43985614, 0.63028369,\n", " 0.44933817, 0.57891455],\n", " [0.47336725, 0.72736701, 0.29602544, 0.43782187, 0.10415111,\n", " 0.51156742, 0.3924323 , 0.33880198, 0.45233126, 0.52496245,\n", " 0.46761847, 0.63632228],\n", " [0.46549479, 0.68830612, 0.46321952, 0.63378994, 0.13768277,\n", " 0.5177148 , 0.34761035, 0.33241868, 0.47781942, 0.43611844,\n", " 0.53457644, 0.85542425],\n", " [0.58151584, 0.78039673, 0.26726235, 0.15763886, 0.64530875,\n", " 0.2129583 , 0.32646394, 0.34507955, 0.31369449, 0.8072768 ,\n", " 0.12466667, 0.75164049],\n", " [0.90634916, 0.68215495, 0.57196028, 0.63612229, 0.14953645,\n", " 0.59652429, 0.16449957, 0.62814211, 0.77226034, 0.66965948,\n", " 0.32163293, 0.64889549],\n", " [0.16593316, 0.67184425, 0.02639212, 0.74555434, 0.56651819,\n", " 0.73011609, 0.21427763, 0.57631273, 0.69006371, 0.70191246,\n", " 0.23066824, 0.65091788],\n", " [0.45141925, 0.88116315, 0.32234545, 0.24041898, 0.66228758,\n", " 0.24554089, 0.39900941, 0.45281556, 0.37851654, 0.65094633,\n", " 0.1740261 , 0.72254218],\n", " [0.83943984, 0.32146997, 0.59196573, 0.6836961 , 0.03665406,\n", " 0.75820591, 0.29457384, 0.27667737, 0.61236251, 0.29902099,\n", " 0.17876748, 0.88709761],\n", " [0.48377523, 0.38527968, 0.12618517, 0.42833944, 0.50914349,\n", " 0.41111474, 0.44888022, 0.74018461, 0.04999605, 0.37316789,\n", " 0.8514738 , 0.26695911],\n", " [0.85219505, 0.28867415, 0.6164554 , 0.69177126, 0. ,\n", " 0.78519746, 0.36838812, 0.38211587, 0.62055586, 0.3013742 ,\n", " 0.22989701, 0.88030156]])" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = df_scaled[['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12']]\n", "X = np.array(X)\n", "X[:13]" ] }, { "cell_type": "markdown", "metadata": { "id": "Yy7-vajIkl70" }, "source": [ "# One-Hot Encoding\n", "\n", "One-hot encoding adalah suatu metode yang digunakan untuk menggambarkan data kategorikal atau nilai kelas dalam bentuk vektor biner. Tujuannya adalah untuk memberikan representasi numerik yang sesuai dengan nilai kelas atau kategori tanpa memberikan arti atau urutan numerik yang salah." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "GGGs7-w4kl72", "outputId": "df1267a4-831d-4d6d-f6ca-22147dd9d309" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:868: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "array([[0., 0., 0., 0., 1.],\n", " [0., 0., 0., 0., 1.],\n", " [0., 1., 0., 0., 0.],\n", " [0., 1., 0., 0., 0.],\n", " [0., 1., 0., 0., 0.]])" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "one_hot_encoder = OneHotEncoder(sparse=False)\n", "\n", "Y = df_scaled.target\n", "Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))\n", "Y[:5]" ] }, { "cell_type": "markdown", "metadata": { "id": "-KmMV_oSkl75" }, "source": [ "# Menentukan Data Latih, Data Uji dan Data Validasi\n", "\n", "Dataframe yang awalnya memiliki 150 data, dipecah menjadi data latih dan data sisa dengan perbandingan 60:40, 70:30, 80:20. Data yang tersisa dipecah kembali menjadi data uji dan data validasi dengan perbandingan 60:40, 70:30, 80:20" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "ZYA1UShMkl77" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.2, random_state=42)\n", "X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.8, random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "Zv6QbhfO721L", "outputId": "8c738641-4327-454a-eb39-3c9f14d538b6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "120\n", "24\n", "6\n", "120\n", "24\n", "6\n" ] } ], "source": [ "print(len(X_train))\n", "print(len(X_test))\n", "print(len(X_val))\n", "print(len(Y_train))\n", "print(len(Y_test))\n", "print(len(Y_val))" ] }, { "cell_type": "markdown", "metadata": { "id": "xGflcOX8kl7-" }, "source": [ "# Implementasi Backpropagation\n", "\n", "Fungsi utama `NeuralNetwork` akan melatih jaringan untuk jumlah epoch yang ditentukan. Pada awalnya, bobot jaringan akan diinisialisasi secara acak dengan `InitializeWeights`. Kemudian, di setiap epoch, bobot akan diperbarui dengan `Train` dan setiap 20 epoch nilai loss baik untuk set pelatihan maupun validasi akan dicetak oleh fungsi `Loss`. Di akhir pelatihan, akan dicetak grafik kurva pembelajaran model oleh fungsi ` PlotLearningCurve` Sebagai input, fungsi menerima yang berikut:\n", "\n", "* `X_train`, `Y_train`: Data pelatihan dan nilai target.\n", "* `X_val`, `Y_val`: Data validasi dan nilai target\n", "* `epochs`: Jumlah epoch. Defaultnya di 10.\n", "* `node`: Daftar neuron pada setiap layers. Setiap bilangan bulat menunjukkan jumlah neuron di setiap lapisan. Panjang daftar ini menunjukkan jumlah lapisan. Artinya, setiap bilangan bulat dalam daftar ini sesuai dengan jumlah node di setiap lapisan.\n", "* `lr`: Kecepatan pembelajaran algoritma pelatihan backpropagation. Defaultnya adalah 0,15." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "cMErf613kl7_" }, "outputs": [], "source": [ "def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):\n", " hidden_layers = len(nodes) - 1\n", " weights = InitializeWeights(nodes)\n", "\n", " train_losses, val_losses = [], []\n", "\n", " for epoch in range(1, epochs+1):\n", " weights = Train(X_train, Y_train, lr, weights)\n", "\n", " if epoch % 20 == 0:\n", " print(\"Epoch {}\".format(epoch))\n", " train_loss = Loss(X_train, Y_train, weights)\n", " train_losses.append(train_loss)\n", " print(\"Training Loss: {}\".format(train_loss))\n", "\n", " if X_val is not None:\n", " val_loss = Loss(X_val, Y_val, weights)\n", " val_losses.append(val_loss)\n", " print(\"Validation Loss: {}\".format(val_loss))\n", "\n", " PlotLearningCurve(train_losses, val_losses)\n", " return weights\n", "\n", "def PlotLearningCurve(train_losses, val_losses):\n", " epochs = range(20, len(train_losses)*20 + 1, 20)\n", "\n", " plt.plot(epochs, train_losses, label='Training Loss')\n", " plt.plot(epochs, val_losses, label='Validation Loss')\n", " plt.title('Learning Curve')\n", " plt.xlabel('Epochs')\n", " plt.ylabel('Loss')\n", " plt.legend()\n", " plt.show()\n", "\n", "def Loss(X, Y, weights):\n", " loss = 0\n", " for i in range(len(X)):\n", " x, y = X[i], Y[i]\n", " x = np.append(1, x) # Augment feature vector\n", " activations = ForwardPropagation(x, weights, len(weights))\n", " output = activations[-1].A1\n", " loss += np.sum((y - output) ** 2) / 2\n", "\n", " return loss / len(X)" ] }, { "cell_type": "markdown", "metadata": { "id": "x_hdYTW7lMWl" }, "source": [ "# Inisialisasi Bobot\n", "\n", "Inisialisasi bobot adalah langkah awal di mana nilai-nilai bobot antara neuron-neuron dalam jaringan saraf diatur dengan nilai awal tertentu sebelum proses pelatihan dimulai. Inisialisasi bobot yang baik dapat mempengaruhi kinerja jaringan secara keseluruhan." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "hAd1Vn5skl8D" }, "outputs": [], "source": [ "def InitializeWeights(nodes):\n", " \"\"\"Initialize weights with random values in [-1, 1] (including bias)\"\"\"\n", " layers, weights = len(nodes), []\n", "\n", " for i in range(1, layers):\n", " w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]\n", " for j in range(nodes[i])]\n", " weights.append(np.matrix(w))\n", "\n", " return weights" ] }, { "cell_type": "markdown", "metadata": { "id": "wGG76tswlTU-" }, "source": [ "# Fungsi Aktivasi (Sigmoid dan turunannya)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "Wt5SYx11iVda" }, "outputs": [], "source": [ "def Sigmoid(x):\n", " return 1 / (1 + np.exp(-x))\n", "\n", "def SigmoidDerivative(x):\n", " return np.multiply(x, 1-x)" ] }, { "cell_type": "markdown", "metadata": { "id": "R07julrUkl8E" }, "source": [ "# Feed Forward\n", "\n", "* Setiap lapisan menerima input dan menghitung output. Keluarannya dihitung dengan perkalian dot antara data input dan bobot pada hidden layer, lalu meneruskan perkalian dot ini melalui fungsi aktivasi (dalam hal ini, fungsi sigmoid).\n", "* Output dari setiap lapisan adalah input pada lapisan berikutnya.\n", "* Masukan input pertama adalah vektor fitur. Pada dataset Crop ada 7 fitur yang digunakan" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "Kwlht6NSkl8F" }, "outputs": [], "source": [ "def ForwardPropagation(x, weights, layers):\n", " activations, layer_input = [x], x\n", " for j in range(layers):\n", " activation = Sigmoid(np.dot(layer_input, weights[j].T))\n", " activations.append(activation)\n", " layer_input = np.append(1, activation)\n", "\n", " return activations" ] }, { "cell_type": "markdown", "metadata": { "id": "xa-Usfp7kl8G" }, "source": [ "# Backward Propagation:\n", "\n", "* Hitung error yang dihasilkan dari Feed Forward\n", "* Hitung error per layer dengan cara:\n", " * Menghitung Delta menggunakan turunan fungsi Sigmoid yang dikalikan dengan error yang telah dihitung\n", " * Update bobot pada layer sekarang dengan mengkalikan delta dengan input dari layer sebelumnya (hasil dari fungsi aktivasi) dan learning rate\n", " * Update error pada layer yang sekarang dengan delta dan bobot yang baru." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "ZuhN8mJAkl8H" }, "outputs": [], "source": [ "def BackPropagation(y, activations, weights, layers):\n", " outputFinal = activations[-1]\n", " error = np.matrix(y - outputFinal) # Error at output\n", "\n", " for j in range(layers, 0, -1):\n", " currActivation = activations[j]\n", "\n", " if(j > 1):\n", " # Augment previous activation\n", " prevActivation = np.append(1, activations[j-1])\n", " else:\n", " # First hidden layer, prevActivation is input (without bias)\n", " prevActivation = activations[0]\n", "\n", " delta = np.multiply(error, SigmoidDerivative(currActivation))\n", " weights[j-1] += lr * np.multiply(delta.T, prevActivation)\n", "\n", " w = np.delete(weights[j-1], [0], axis=1)\n", " error = np.dot(delta, w) # Calculate error for current layer\n", "\n", " return weights" ] }, { "cell_type": "markdown", "metadata": { "id": "VJiiblGbn1MH" }, "source": [ "# Fungsi Pelatihan, Prediksi dan Perhitungan Akurasi" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "1WqMoGM9kl8J" }, "outputs": [], "source": [ "def Train(X, Y, lr, weights):\n", " layers = len(weights)\n", " for i in range(len(X)):\n", " x, y = X[i], Y[i]\n", " x = np.matrix(np.append(1, x)) # Augment feature vector\n", "\n", " activations = ForwardPropagation(x, weights, layers)\n", " weights = BackPropagation(y, activations, weights, layers)\n", "\n", " return weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "Zgg5m9xMkl8O" }, "outputs": [], "source": [ "def Predict(item, weights):\n", " layers = len(weights)\n", " item = np.append(1, item) # Augment feature vector\n", "\n", " ##_Forward Propagation_##\n", " activations = ForwardPropagation(item, weights, layers)\n", "\n", " outputFinal = activations[-1].A1\n", " index = np.argmax(outputFinal)\n", "\n", " # Initialize prediction vector to zeros\n", " y = [0 for i in range(len(outputFinal))]\n", " y[index] = 1 # Set guessed class to 1\n", "\n", " return y # Return prediction vector" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "2r-cFvFmkl8R" }, "outputs": [], "source": [ "def Accuracy(X, Y, weights):\n", " \"\"\"Run set through network, find overall accuracy\"\"\"\n", " correct = 0\n", "\n", " for i in range(len(X)):\n", " x, y = X[i], np.array(Y[i]) # Convert y to numpy array\n", " guess = Predict(x, weights)\n", "\n", " # Find the index with the maximum probability\n", " predicted_class = np.argmax(guess)\n", " true_class = np.argmax(y)\n", "\n", " if predicted_class == true_class:\n", " # Guessed correctly\n", " correct += 1\n", "\n", " return correct / len(X)" ] }, { "cell_type": "markdown", "metadata": { "id": "EJzI3NMkU3z7" }, "source": [ "# Proses Pelatihan" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 732 }, "id": "EcjPREEjkl8X", "outputId": "89111fbe-857d-43d9-f9d1-dee66ec6beae" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 20\n", "Training Loss: 0.2743344314418524\n", "Validation Loss: 0.2107634927801524\n", "Epoch 40\n", "Training Loss: 0.10002890201240483\n", "Validation Loss: 0.057043416301975756\n", "Epoch 60\n", "Training Loss: 0.05027575749186288\n", "Validation Loss: 0.024504097397193978\n", "Epoch 80\n", "Training Loss: 0.032250441547394454\n", "Validation Loss: 0.01798263111575391\n", "Epoch 100\n", "Training Loss: 0.022498480908666536\n", "Validation Loss: 0.015101545170579982\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f = len(X[0]) # Number of features\n", "o = len(Y[0]) # Number of outputs / classes\n", "\n", "layers = [f, 8, o] # Number of nodes in layers\n", "lr, epochs = 0.15, 100\n", "\n", "weights = NeuralNetwork(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "EuR5PMOvU_0r", "outputId": "970d5ea5-f7ee-4887-e37d-40e5562f7dbd" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing Accuracy in Train Data: 0.9916666666666667\n" ] } ], "source": [ "print(\"Testing Accuracy in Train Data: {}\".format(Accuracy(X_train, Y_train, weights)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 263 }, "id": "VNgszmVDkl8a", "outputId": "5fef1a6e-6357-4832-9489-65a5e29b3ae6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing Accuracy in Testing Data: 0.9583333333333334\n" ] } ], "source": [ "print(\"Testing Accuracy in Testing Data: {}\".format(Accuracy(X_test, Y_test, weights)))" ] }, { "cell_type": "markdown", "metadata": { "id": "oNoUt45XVEH4" }, "source": [ "# Laporan Klasifikasi" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "Og9UUMobR7H3" }, "outputs": [], "source": [ "from sklearn.metrics import classification_report, confusion_matrix\n", "target_names = 'Hilmi', 'Tanti', 'Vasyilla', 'Yudha', 'Random'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "jGmuimic_FqL" }, "outputs": [], "source": [ "predictions = [Predict(x, weights) for x in X_test]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "F-FLPqEzUuxH", "outputId": "464c4df5-9e1c-429a-8653-06e8fadc325b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " Hilmi 1.00 0.75 0.86 4\n", " Tanti 0.67 1.00 0.80 2\n", " Vasyilla 1.00 1.00 1.00 4\n", " Yudha 1.00 1.00 1.00 6\n", " Random 1.00 1.00 1.00 8\n", "\n", " micro avg 0.96 0.96 0.96 24\n", " macro avg 0.93 0.95 0.93 24\n", "weighted avg 0.97 0.96 0.96 24\n", " samples avg 0.96 0.96 0.96 24\n", "\n" ] } ], "source": [ "print(classification_report(Y_test, predictions, target_names=target_names))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "cUbJcTvhso7g", "outputId": "837a981b-5e65-4980-d840-08f7d61601e3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total True Positives (TP): 23\n", "Total True Negatives (TN): 95\n", "Total False Positives (FP): 1\n", "Total False Negatives (FN): 1\n" ] } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "\n", "# Menghitung confusion matrix\n", "cm = confusion_matrix(Y_test.argmax(axis=1), np.array(predictions).argmax(axis=1))\n", "\n", "# Hitung True Positives (TP) untuk setiap kelas\n", "TP = np.diag(cm)\n", "\n", "TN = []\n", "FP = []\n", "FN = []\n", "for i in range(len(cm)):\n", " temp = np.delete(cm, i, 0) # hapus baris ke-i\n", " temp = np.delete(temp, i, 1) # hapus kolom ke-i\n", " TN.append(np.sum(np.delete(np.delete(cm, i, 0), i, 1)))\n", " FP.append(np.sum(cm[:, i]) - cm[i, i])\n", " FN.append(np.sum(cm[i, :]) - cm[i, i])\n", "\n", "# Hitung total TP, TN, FP, FN\n", "total_TP = np.sum(TP)\n", "total_TN = np.sum(TN)\n", "total_FP = np.sum(FP)\n", "total_FN = np.sum(FN)\n", "\n", "# Cetak total\n", "print(f\"Total True Positives (TP): {total_TP}\")\n", "print(f\"Total True Negatives (TN): {total_TN}\")\n", "print(f\"Total False Positives (FP): {total_FP}\")\n", "print(f\"Total False Negatives (FN): {total_FN}\")\n" ] }, { "cell_type": "markdown", "metadata": { "id": "FsiT6R4wVby-" }, "source": [ "# Kesimpulan\n", "\n", "Pembuatan *artificial neural network* menggunakan metode Backpropagation menggunakan dataset Crop Recommendation memerlukan beberapa langkah, seperti Data Understanding, Data Preparation, Implementasi Neural Network menggunakan Feedforward dan Backpropagation, serta Training dengan menggunakan 150 data.\n", "\n", "Hasil dari training menunjukkan bahwa tidak adanya underfitting maupun overfitting dengan kurva pembelajaran yang normal. Akurasi prediksi data mencapai 1.0 dengan menggunakan data test sebanyak 48 data" ] }, { "cell_type": "markdown", "metadata": { "id": "MICH6eE4rzbM" }, "source": [ "# adada" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "k1lcyhnJ5-ZJ" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import h5py" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "qApq_Oqh5Uf_" }, "outputs": [], "source": [ "# Load the model\n", "def load_model(filepath):\n", " weights = []\n", " with h5py.File(filepath, 'r') as f:\n", " for i in range(len(f.keys())):\n", " weights.append(f['weight_{}'.format(i)][:])\n", " return weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "AV50Ldtv5cit" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.optimizers import Adam\n", "from keras.utils import to_categorical\n", "import librosa\n", "import librosa.display\n", "import os\n", "from scipy.fftpack import dct\n", "from keras.models import load_model\n", "import matplotlib.pyplot as plt\n", "from keras.callbacks import History" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "eQNJ2Djc5cx4" }, "outputs": [], "source": [ "# Fungsi pre-emphasis\n", "def pre_emphasis(signal, coefficient=0.97):\n", " return np.append(signal[0], signal[1:] - coefficient * signal[:-1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "k_ExiPrd5geI" }, "outputs": [], "source": [ "# Fungsi framing\n", "def framing(signal, sample_rate, frame_length=0.025, frame_step=0.010):\n", " nsamples_signal = len(signal)\n", " nsamples_frame = int(round(frame_length * sample_rate))\n", " nsamples_stride = int(round(frame_step * sample_rate))\n", " n_frames = int(np.ceil((nsamples_signal - nsamples_frame) / nsamples_stride) + 1)\n", " nsamples_padding = ((n_frames - 1) * nsamples_stride + nsamples_frame) - nsamples_signal\n", " z = np.zeros(nsamples_padding)\n", " signal = np.append(signal, z)\n", " frames = np.empty((n_frames, nsamples_frame))\n", " for i in range(n_frames):\n", " left = i * nsamples_stride\n", " right = left + nsamples_frame\n", " frame = signal[left:right]\n", " frames[i] = frame\n", " return frames" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "vH9dRugm5h1v" }, "outputs": [], "source": [ "# Fungsi untuk mengekstrak fitur MFCC dari file audio\n", "def extract_mfcc(audio_file, sr=44100, frame_length=0.025, frame_step=0.010, nfilt=40, num_ceps=12):\n", " y, _ = librosa.load(audio_file, sr=sr)\n", "\n", " # Normalize\n", " y_norm = librosa.util.normalize(y)\n", "\n", " # Pre-emphasis\n", " y_pre_emphasis = pre_emphasis(y_norm)\n", "\n", " # Framing\n", " frames = framing(y_pre_emphasis, sr, frame_length, frame_step)\n", "\n", " # Hamming window\n", " frames *= np.hanning(len(frames[0]))\n", "\n", " # FFT\n", " NFFT = 512\n", " mag_frames = np.absolute(np.fft.rfft(frames, NFFT))\n", " pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))\n", "\n", " # Mel filter bank\n", " low_freq_mel = 0\n", " high_freq_mel = (2595 * np.log10(1 + (sr / 2) / 700))\n", " mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)\n", " hz_points = (700 * (10**(mel_points / 2595) - 1))\n", " bin = np.floor((NFFT + 1) * hz_points / sr)\n", "\n", " fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))\n", " for m in range(1, nfilt + 1):\n", " f_m_minus = int(bin[m - 1])\n", " f_m = int(bin[m])\n", " f_m_plus = int(bin[m + 1])\n", " for k in range(f_m_minus, f_m):\n", " fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])\n", " for k in range(f_m, f_m_plus):\n", " fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])\n", "\n", " filter_banks = np.dot(pow_frames, fbank.T)\n", " filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)\n", " filter_banks = 20 * np.log10(filter_banks)\n", "\n", " # Discrete Cosine Transform (DCT) for MFCC\n", " mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)]\n", "\n", " return np.mean(mfcc, axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "HAqWfz1D5k3i" }, "outputs": [], "source": [ "def Predict(item, weights, target_names):\n", " layers = len(weights)\n", " item = np.append(1, item) # Augment feature vector\n", "\n", " activations = ForwardPropagation(item, weights, layers)\n", "\n", " outputFinal = activations[-1].flatten()\n", " index = np.argmax(outputFinal)\n", "\n", " predicted_class = target_names[index]\n", " return predicted_class\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "AOWrW0tp5jWd" }, "outputs": [], "source": [ "# Read the WAV file and extract MFCC features\n", "wav_file = \"/content/drive/MyDrive/Skripsi/Test/label-2-depan-mati_siVeWW7T.wav\"\n", "mfcc_features = extract_mfcc(wav_file)\n", "testing = mfcc_features.reshape(1, -1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 193 }, "id": "Glg25YVv5uHZ", "outputId": "5713a048-c2df-436b-80a0-f9486fff52cc" }, "outputs": [ { "ename": "NameError", "evalue": "name 'model_user' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Predict the class of the sound sample\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtesting\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_user\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_names\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Predicted class: {prediction}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'model_user' is not defined" ] } ], "source": [ "# Define the target names\n", "target_names = ['Hilmi', 'Tanti', 'Vasyilla', 'Yudha', 'Random']\n", "\n", "# Predict the class of the sound sample\n", "prediction = Predict(testing, model_user, target_names)\n", "print(f\"Predicted class: {prediction}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "7JaOTyLH5uUk" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kaggle": { "accelerator": "none", "dataSources": [ { "datasetId": 1046158, "sourceId": 1760012, "sourceType": "datasetVersion" } ], "dockerImageVersionId": 30635, "isGpuEnabled": false, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 0 }