{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**LIBRARY**" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import librosa\n", "import librosa.display\n", "import numpy as np\n", "import noisereduce as nr\n", "import os\n", "import soundfile as sf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**PARAMETER**" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# Parameter\n", "TARGET_DURATION = 1.0 # Durasi target dalam detik\n", "SAMPLE_RATE = 48000 # Frekuensi sampling sesuai dataset (48 kHz)\n", "DATASET_PATH = \"EDITED3\" # Ganti dengan path dataset\n", "OUTPUT_DIR = \"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\preprocessing_data_v2\" # Folder untuk menyimpan hasil preprocessing\n", "\n", "# Pastikan folder output ada\n", "os.makedirs(OUTPUT_DIR, exist_ok=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**LOAD DATASET**" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "def load_audio(file_path, sr=SAMPLE_RATE):\n", " \"\"\"Membaca file audio dan mengembalikan waveform serta sample rate.\"\"\"\n", " y, sr = librosa.load(file_path, sr=sr)\n", " return y, sr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**NORMALISASI AUDIO**" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def normalize_audio(y):\n", " \"\"\"Normalisasi amplitudo antara -1 dan 1.\"\"\"\n", " return y / np.max(np.abs(y))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**PADDING AUDIO ATAU CUT**" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "def pad_audio(y, sr=SAMPLE_RATE, target_duration=TARGET_DURATION):\n", " \"\"\"Menyesuaikan durasi audio dengan padding atau pemotongan.\"\"\"\n", " target_length = int(sr * target_duration) # Hitung jumlah sampel target\n", " if len(y) > target_length:\n", " return y[:target_length] # Potong jika lebih panjang\n", " else:\n", " return np.pad(y, (0, target_length - len(y)), mode='constant') # Tambahkan padding" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**NOISE REDUCTION**" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "def reduce_noise(y, sr=SAMPLE_RATE):\n", " \"\"\"Mengurangi noise dengan noisereduce.\"\"\"\n", " return nr.reduce_noise(y=y, sr=sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**PREPROCESS AUDIO**" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processed: Negatif/mindrabodoh1 - Copy.wav\n", "Processed: Negatif/mindrabodoh1.wav\n", "Processed: Negatif/mindrabodoh10 - Copy.wav\n", "Processed: Negatif/mindrabodoh10.wav\n", "Processed: Negatif/mindrabodoh2 - Copy.wav\n", "Processed: Negatif/mindrabodoh2.wav\n", "Processed: Negatif/mindrabodoh3 - Copy.wav\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Processed: Negatif/mindrabodoh3.wav\n", "Processed: Negatif/mindrabodoh4 - Copy.wav\n", "Processed: Negatif/mindrabodoh4.wav\n", "Processed: Negatif/mindrabodoh5 - Copy.wav\n", "Processed: Negatif/mindrabodoh5.wav\n", "Processed: Negatif/mindrabodoh6 - Copy.wav\n", "Processed: Negatif/mindrabodoh6.wav\n", "Processed: Negatif/mindrabodoh7 - Copy.wav\n", "Processed: Negatif/mindrabodoh7.wav\n", "Processed: Negatif/mindrabodoh8 - Copy.wav\n", "Processed: Negatif/mindrabodoh8.wav\n", "Processed: Negatif/mindrabodoh9 - Copy.wav\n", "Processed: Negatif/mindrabodoh9.wav\n", "Processed: Negatif/mindralicik1 - Copy.wav\n", "Processed: Negatif/mindralicik1.wav\n", "Processed: Negatif/mindralicik10 - Copy.wav\n", "Processed: Negatif/mindralicik10.wav\n", "Processed: Negatif/mindralicik2 - Copy.wav\n", "Processed: Negatif/mindralicik2.wav\n", "Processed: Negatif/mindralicik3 - Copy.wav\n", "Processed: Negatif/mindralicik3.wav\n", "Processed: Negatif/mindralicik4 - Copy.wav\n", "Processed: Negatif/mindralicik4.wav\n", "Processed: Negatif/mindralicik5 - Copy.wav\n", "Processed: Negatif/mindralicik5.wav\n", "Processed: Negatif/mindralicik6 - Copy.wav\n", "Processed: Negatif/mindralicik6.wav\n", "Processed: Negatif/mindralicik7 - Copy.wav\n", "Processed: Negatif/mindralicik7.wav\n", "Processed: Negatif/mindralicik8 - Copy.wav\n", "Processed: Negatif/mindralicik8.wav\n", "Processed: Negatif/mindralicik9 - Copy.wav\n", "Processed: Negatif/mindralicik9.wav\n", "Processed: Negatif/mindrasombong1 - Copy.wav\n", "Processed: Negatif/mindrasombong1.wav\n", "Processed: Negatif/mindrasombong10 - Copy.wav\n", "Processed: Negatif/mindrasombong10.wav\n", "Processed: Negatif/mindrasombong2 - Copy.wav\n", "Processed: Negatif/mindrasombong2.wav\n", "Processed: Negatif/mindrasombong3 - Copy.wav\n", "Processed: Negatif/mindrasombong3.wav\n", "Processed: Negatif/mindrasombong4 - Copy.wav\n", "Processed: Negatif/mindrasombong4.wav\n", "Processed: Negatif/mindrasombong5 - Copy.wav\n", "Processed: Negatif/mindrasombong5.wav\n", "Processed: Negatif/mindrasombong6 - Copy.wav\n", "Processed: Negatif/mindrasombong6.wav\n", "Processed: Negatif/mindrasombong7 - Copy.wav\n", "Processed: Negatif/mindrasombong7.wav\n", "Processed: Negatif/mindrasombong8 - Copy.wav\n", "Processed: Negatif/mindrasombong8.wav\n", "Processed: Negatif/mindrasombong9 - Copy.wav\n", "Processed: Negatif/mindrasombong9.wav\n", "Processed: Negatif/naniabodoh1 - Copy.wav\n", "Processed: Negatif/naniabodoh1.wav\n", "Processed: Negatif/naniabodoh10 - Copy.wav\n", "Processed: Negatif/naniabodoh10.wav\n", "Processed: Negatif/naniabodoh11 - Copy.wav\n", "Processed: Negatif/naniabodoh11.wav\n", "Processed: Negatif/naniabodoh12 - Copy.wav\n", "Processed: Negatif/naniabodoh12.wav\n", "Processed: Negatif/naniabodoh13 - Copy.wav\n", "Processed: Negatif/naniabodoh13.wav\n", "Processed: Negatif/naniabodoh2 - Copy.wav\n", "Processed: Negatif/naniabodoh2.wav\n", "Processed: Negatif/naniabodoh3 - Copy.wav\n", "Processed: Negatif/naniabodoh3.wav\n", "Processed: Negatif/naniabodoh4 - Copy.wav\n", "Processed: Negatif/naniabodoh4.wav\n", "Processed: Negatif/naniabodoh5 - Copy.wav\n", "Processed: Negatif/naniabodoh5.wav\n", "Processed: Negatif/naniabodoh6 - Copy.wav\n", "Processed: Negatif/naniabodoh6.wav\n", "Processed: Negatif/naniabodoh7 - Copy.wav\n", "Processed: Negatif/naniabodoh7.wav\n", "Processed: Negatif/naniabodoh8 - Copy.wav\n", "Processed: Negatif/naniabodoh8.wav\n", "Processed: Negatif/naniabodoh9 - Copy.wav\n", "Processed: Negatif/naniabodoh9.wav\n", "Processed: Negatif/nanialicik1 - Copy.wav\n", "Processed: Negatif/nanialicik1.wav\n", "Processed: Negatif/nanialicik10 - Copy.wav\n", "Processed: Negatif/nanialicik10.wav\n", "Processed: Negatif/nanialicik11 - Copy.wav\n", "Processed: Negatif/nanialicik11.wav\n", "Processed: Negatif/nanialicik12 - Copy.wav\n", "Processed: Negatif/nanialicik12.wav\n", "Processed: Negatif/nanialicik13 - Copy.wav\n", "Processed: Negatif/nanialicik13.wav\n", "Processed: Negatif/nanialicik14 - Copy.wav\n", "Processed: Negatif/nanialicik14.wav\n", "Processed: Negatif/nanialicik2 - Copy.wav\n", "Processed: Negatif/nanialicik2.wav\n", "Processed: Negatif/nanialicik3 - Copy.wav\n", "Processed: Negatif/nanialicik3.wav\n", "Processed: Negatif/nanialicik4 - Copy.wav\n", "Processed: Negatif/nanialicik4.wav\n", "Processed: Negatif/nanialicik5 - Copy.wav\n", "Processed: Negatif/nanialicik5.wav\n", "Processed: Negatif/nanialicik6 - Copy.wav\n", "Processed: Negatif/nanialicik6.wav\n", "Processed: Negatif/nanialicik7 - Copy.wav\n", "Processed: Negatif/nanialicik7.wav\n", "Processed: Negatif/nanialicik8 - Copy.wav\n", "Processed: Negatif/nanialicik8.wav\n", "Processed: Negatif/nanialicik9 - Copy.wav\n", "Processed: Negatif/nanialicik9.wav\n", "Processed: Negatif/naniasombong1 - Copy.wav\n", "Processed: Negatif/naniasombong1.wav\n", "Processed: Negatif/naniasombong10 - Copy.wav\n", "Processed: Negatif/naniasombong10.wav\n", "Processed: Negatif/naniasombong11 - Copy.wav\n", "Processed: Negatif/naniasombong11.wav\n", "Processed: Negatif/naniasombong12 - Copy.wav\n", "Processed: Negatif/naniasombong12.wav\n", "Processed: Negatif/naniasombong13 - Copy.wav\n", "Processed: Negatif/naniasombong13.wav\n", "Processed: Negatif/naniasombong2 - Copy.wav\n", "Processed: Negatif/naniasombong2.wav\n", "Processed: Negatif/naniasombong3 - Copy.wav\n", "Processed: Negatif/naniasombong3.wav\n", "Processed: Negatif/naniasombong4 - Copy.wav\n", "Processed: Negatif/naniasombong4.wav\n", "Processed: Negatif/naniasombong5 - Copy.wav\n", "Processed: Negatif/naniasombong5.wav\n", "Processed: Negatif/naniasombong6 - Copy.wav\n", "Processed: Negatif/naniasombong6.wav\n", "Processed: Negatif/naniasombong7 - Copy.wav\n", "Processed: Negatif/naniasombong7.wav\n", "Processed: Negatif/naniasombong8 - Copy.wav\n", "Processed: Negatif/naniasombong8.wav\n", "Processed: Negatif/naniasombong9 - Copy.wav\n", "Processed: Negatif/naniasombong9.wav\n", "Processed: Positif/mindrabai8 - Copy.wav\n", "Processed: Positif/mindrabai8.wav\n", "Processed: Positif/mindrabaik1 - Copy.wav\n", "Processed: Positif/mindrabaik1.wav\n", "Processed: Positif/mindrabaik10 - Copy.wav\n", "Processed: Positif/mindrabaik10.wav\n", "Processed: Positif/mindrabaik2 - Copy.wav\n", "Processed: Positif/mindrabaik2.wav\n", "Processed: Positif/mindrabaik3 - Copy.wav\n", "Processed: Positif/mindrabaik3.wav\n", "Processed: Positif/mindrabaik4 - Copy.wav\n", "Processed: Positif/mindrabaik4.wav\n", "Processed: Positif/mindrabaik5 - Copy.wav\n", "Processed: Positif/mindrabaik5.wav\n", "Processed: Positif/mindrabaik6 - Copy.wav\n", "Processed: Positif/mindrabaik6.wav\n", "Processed: Positif/mindrabaik7 - Copy.wav\n", "Processed: Positif/mindrabaik7.wav\n", "Processed: Positif/mindrabaik8 - Copy.wav\n", "Processed: Positif/mindrabaik8.wav\n", "Processed: Positif/mindrabaik9 - Copy.wav\n", "Processed: Positif/mindrabaik9.wav\n", "Processed: Positif/mindrarajin1 - Copy.wav\n", "Processed: Positif/mindrarajin1.wav\n", "Processed: Positif/mindrarajin10 - Copy.wav\n", "Processed: Positif/mindrarajin10.wav\n", "Processed: Positif/mindrarajin2 - Copy.wav\n", "Processed: Positif/mindrarajin2.wav\n", "Processed: Positif/mindrarajin3 - Copy.wav\n", "Processed: Positif/mindrarajin3.wav\n", "Processed: Positif/mindrarajin4 - Copy.wav\n", "Processed: Positif/mindrarajin4.wav\n", "Processed: Positif/mindrarajin5 - Copy.wav\n", "Processed: Positif/mindrarajin5.wav\n", "Processed: Positif/mindrarajin6 - Copy.wav\n", "Processed: Positif/mindrarajin6.wav\n", "Processed: Positif/mindrarajin7 - Copy.wav\n", "Processed: Positif/mindrarajin7.wav\n", "Processed: Positif/mindrarajin8 - Copy.wav\n", "Processed: Positif/mindrarajin8.wav\n", "Processed: Positif/mindrarajin9 - Copy.wav\n", "Processed: Positif/mindrarajin9.wav\n", "Processed: Positif/naniabaik1 - Copy.wav\n", "Processed: Positif/naniabaik1.wav\n", "Processed: Positif/naniabaik10 - Copy.wav\n", "Processed: Positif/naniabaik10.wav\n", "Processed: Positif/naniabaik11 - Copy.wav\n", "Processed: Positif/naniabaik11.wav\n", "Processed: Positif/naniabaik12 - Copy.wav\n", "Processed: Positif/naniabaik12.wav\n", "Processed: Positif/naniabaik13 - Copy.wav\n", "Processed: Positif/naniabaik13.wav\n", "Processed: Positif/naniabaik14 - Copy.wav\n", "Processed: Positif/naniabaik14.wav\n", "Processed: Positif/naniabaik15 - Copy.wav\n", "Processed: Positif/naniabaik15.wav\n", "Processed: Positif/naniabaik16 - Copy.wav\n", "Processed: Positif/naniabaik16.wav\n", "Processed: Positif/naniabaik2 - Copy.wav\n", "Processed: Positif/naniabaik2.wav\n", "Processed: Positif/naniabaik3 - Copy.wav\n", "Processed: Positif/naniabaik3.wav\n", "Processed: Positif/naniabaik4 - Copy.wav\n", "Processed: Positif/naniabaik4.wav\n", "Processed: Positif/naniabaik5 - Copy.wav\n", "Processed: Positif/naniabaik5.wav\n", "Processed: Positif/naniabaik6 - Copy.wav\n", "Processed: Positif/naniabaik6.wav\n", "Processed: Positif/naniabaik7 - Copy.wav\n", "Processed: Positif/naniabaik7.wav\n", "Processed: Positif/naniabaik8 - Copy.wav\n", "Processed: Positif/naniabaik8.wav\n", "Processed: Positif/naniabaik9 - Copy.wav\n", "Processed: Positif/naniabaik9.wav\n", "Processed: Positif/naniarajin1 - Copy.wav\n", "Processed: Positif/naniarajin1.wav\n", "Processed: Positif/naniarajin10 - Copy.wav\n", "Processed: Positif/naniarajin10.wav\n", "Processed: Positif/naniarajin11 - Copy.wav\n", "Processed: Positif/naniarajin11.wav\n", "Processed: Positif/naniarajin12 - Copy.wav\n", "Processed: Positif/naniarajin12.wav\n", "Processed: Positif/naniarajin13 - Copy.wav\n", "Processed: Positif/naniarajin13.wav\n", "Processed: Positif/naniarajin14 - Copy.wav\n", "Processed: Positif/naniarajin14.wav\n", "Processed: Positif/naniarajin2 - Copy.wav\n", "Processed: Positif/naniarajin2.wav\n", "Processed: Positif/naniarajin3 - Copy.wav\n", "Processed: Positif/naniarajin3.wav\n", "Processed: Positif/naniarajin4 - Copy.wav\n", "Processed: Positif/naniarajin4.wav\n", "Processed: Positif/naniarajin5 - Copy.wav\n", "Processed: Positif/naniarajin5.wav\n", "Processed: Positif/naniarajin6 - Copy.wav\n", "Processed: Positif/naniarajin6.wav\n", "Processed: Positif/naniarajin7 - Copy.wav\n", "Processed: Positif/naniarajin7.wav\n", "Processed: Positif/naniarajin8 - Copy.wav\n", "Processed: Positif/naniarajin8.wav\n", "Processed: Positif/naniarajin9 - Copy.wav\n", "Processed: Positif/naniarajin9.wav\n", "Preprocessing selesai. Hasil disimpan di folder 'preprocessing_data'.\n" ] } ], "source": [ "def preprocess_audio(file_path, output_path):\n", " \"\"\"Melakukan preprocessing (normalisasi, padding, noise reduction) pada file audio.\"\"\"\n", " y, sr = load_audio(file_path)\n", " y = reduce_noise(y, sr)\n", " y = normalize_audio(y)\n", " y = pad_audio(y, sr)\n", " \n", " # Simpan hasil preprocessing\n", " sf.write(output_path, y, sr)\n", "\n", "# Looping folder Positif dan Negatif\n", "for label_folder in os.listdir(DATASET_PATH):\n", " label_path = os.path.join(DATASET_PATH, label_folder)\n", " \n", " if os.path.isdir(label_path): # Pastikan itu folder Positif / Negatif\n", " output_label_path = os.path.join(OUTPUT_DIR, label_folder)\n", " os.makedirs(output_label_path, exist_ok=True)\n", "\n", " # Looping file WAV di dalam folder\n", " for file_name in os.listdir(label_path):\n", " if file_name.endswith(\".wav\"):\n", " input_path = os.path.join(label_path, file_name)\n", " output_path = os.path.join(output_label_path, file_name)\n", " preprocess_audio(input_path, output_path)\n", " print(f\"Processed: {label_folder}/{file_name}\")\n", "\n", "print(\"Preprocessing selesai. Hasil disimpan di folder 'preprocessing_data'.\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 2 }