TIF_E41211245/Code Python/preprocessing_data.ipynb

445 lines
17 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**LIBRARY**"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import librosa\n",
"import librosa.display\n",
"import numpy as np\n",
"import noisereduce as nr\n",
"import os\n",
"import soundfile as sf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PARAMETER**"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# Parameter\n",
"TARGET_DURATION = 1.0 # Durasi target dalam detik\n",
"SAMPLE_RATE = 48000 # Frekuensi sampling sesuai dataset (48 kHz)\n",
"DATASET_PATH = \"EDITED3\" # Ganti dengan path dataset\n",
"OUTPUT_DIR = \"E:\\! KULIAHHH\\Ivano Kuliah\\!SEMESTER 8\\!SKRIPSI\\Data Suara\\!REVISI\\preprocessing_data_v2\" # Folder untuk menyimpan hasil preprocessing\n",
"\n",
"# Pastikan folder output ada\n",
"os.makedirs(OUTPUT_DIR, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**LOAD DATASET**"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def load_audio(file_path, sr=SAMPLE_RATE):\n",
" \"\"\"Membaca file audio dan mengembalikan waveform serta sample rate.\"\"\"\n",
" y, sr = librosa.load(file_path, sr=sr)\n",
" return y, sr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**NORMALISASI AUDIO**"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def normalize_audio(y):\n",
" \"\"\"Normalisasi amplitudo antara -1 dan 1.\"\"\"\n",
" return y / np.max(np.abs(y))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PADDING AUDIO ATAU CUT**"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def pad_audio(y, sr=SAMPLE_RATE, target_duration=TARGET_DURATION):\n",
" \"\"\"Menyesuaikan durasi audio dengan padding atau pemotongan.\"\"\"\n",
" target_length = int(sr * target_duration) # Hitung jumlah sampel target\n",
" if len(y) > target_length:\n",
" return y[:target_length] # Potong jika lebih panjang\n",
" else:\n",
" return np.pad(y, (0, target_length - len(y)), mode='constant') # Tambahkan padding"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**NOISE REDUCTION**"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"def reduce_noise(y, sr=SAMPLE_RATE):\n",
" \"\"\"Mengurangi noise dengan noisereduce.\"\"\"\n",
" return nr.reduce_noise(y=y, sr=sr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PREPROCESS AUDIO**"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processed: Negatif/mindrabodoh1 - Copy.wav\n",
"Processed: Negatif/mindrabodoh1.wav\n",
"Processed: Negatif/mindrabodoh10 - Copy.wav\n",
"Processed: Negatif/mindrabodoh10.wav\n",
"Processed: Negatif/mindrabodoh2 - Copy.wav\n",
"Processed: Negatif/mindrabodoh2.wav\n",
"Processed: Negatif/mindrabodoh3 - Copy.wav\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processed: Negatif/mindrabodoh3.wav\n",
"Processed: Negatif/mindrabodoh4 - Copy.wav\n",
"Processed: Negatif/mindrabodoh4.wav\n",
"Processed: Negatif/mindrabodoh5 - Copy.wav\n",
"Processed: Negatif/mindrabodoh5.wav\n",
"Processed: Negatif/mindrabodoh6 - Copy.wav\n",
"Processed: Negatif/mindrabodoh6.wav\n",
"Processed: Negatif/mindrabodoh7 - Copy.wav\n",
"Processed: Negatif/mindrabodoh7.wav\n",
"Processed: Negatif/mindrabodoh8 - Copy.wav\n",
"Processed: Negatif/mindrabodoh8.wav\n",
"Processed: Negatif/mindrabodoh9 - Copy.wav\n",
"Processed: Negatif/mindrabodoh9.wav\n",
"Processed: Negatif/mindralicik1 - Copy.wav\n",
"Processed: Negatif/mindralicik1.wav\n",
"Processed: Negatif/mindralicik10 - Copy.wav\n",
"Processed: Negatif/mindralicik10.wav\n",
"Processed: Negatif/mindralicik2 - Copy.wav\n",
"Processed: Negatif/mindralicik2.wav\n",
"Processed: Negatif/mindralicik3 - Copy.wav\n",
"Processed: Negatif/mindralicik3.wav\n",
"Processed: Negatif/mindralicik4 - Copy.wav\n",
"Processed: Negatif/mindralicik4.wav\n",
"Processed: Negatif/mindralicik5 - Copy.wav\n",
"Processed: Negatif/mindralicik5.wav\n",
"Processed: Negatif/mindralicik6 - Copy.wav\n",
"Processed: Negatif/mindralicik6.wav\n",
"Processed: Negatif/mindralicik7 - Copy.wav\n",
"Processed: Negatif/mindralicik7.wav\n",
"Processed: Negatif/mindralicik8 - Copy.wav\n",
"Processed: Negatif/mindralicik8.wav\n",
"Processed: Negatif/mindralicik9 - Copy.wav\n",
"Processed: Negatif/mindralicik9.wav\n",
"Processed: Negatif/mindrasombong1 - Copy.wav\n",
"Processed: Negatif/mindrasombong1.wav\n",
"Processed: Negatif/mindrasombong10 - Copy.wav\n",
"Processed: Negatif/mindrasombong10.wav\n",
"Processed: Negatif/mindrasombong2 - Copy.wav\n",
"Processed: Negatif/mindrasombong2.wav\n",
"Processed: Negatif/mindrasombong3 - Copy.wav\n",
"Processed: Negatif/mindrasombong3.wav\n",
"Processed: Negatif/mindrasombong4 - Copy.wav\n",
"Processed: Negatif/mindrasombong4.wav\n",
"Processed: Negatif/mindrasombong5 - Copy.wav\n",
"Processed: Negatif/mindrasombong5.wav\n",
"Processed: Negatif/mindrasombong6 - Copy.wav\n",
"Processed: Negatif/mindrasombong6.wav\n",
"Processed: Negatif/mindrasombong7 - Copy.wav\n",
"Processed: Negatif/mindrasombong7.wav\n",
"Processed: Negatif/mindrasombong8 - Copy.wav\n",
"Processed: Negatif/mindrasombong8.wav\n",
"Processed: Negatif/mindrasombong9 - Copy.wav\n",
"Processed: Negatif/mindrasombong9.wav\n",
"Processed: Negatif/naniabodoh1 - Copy.wav\n",
"Processed: Negatif/naniabodoh1.wav\n",
"Processed: Negatif/naniabodoh10 - Copy.wav\n",
"Processed: Negatif/naniabodoh10.wav\n",
"Processed: Negatif/naniabodoh11 - Copy.wav\n",
"Processed: Negatif/naniabodoh11.wav\n",
"Processed: Negatif/naniabodoh12 - Copy.wav\n",
"Processed: Negatif/naniabodoh12.wav\n",
"Processed: Negatif/naniabodoh13 - Copy.wav\n",
"Processed: Negatif/naniabodoh13.wav\n",
"Processed: Negatif/naniabodoh2 - Copy.wav\n",
"Processed: Negatif/naniabodoh2.wav\n",
"Processed: Negatif/naniabodoh3 - Copy.wav\n",
"Processed: Negatif/naniabodoh3.wav\n",
"Processed: Negatif/naniabodoh4 - Copy.wav\n",
"Processed: Negatif/naniabodoh4.wav\n",
"Processed: Negatif/naniabodoh5 - Copy.wav\n",
"Processed: Negatif/naniabodoh5.wav\n",
"Processed: Negatif/naniabodoh6 - Copy.wav\n",
"Processed: Negatif/naniabodoh6.wav\n",
"Processed: Negatif/naniabodoh7 - Copy.wav\n",
"Processed: Negatif/naniabodoh7.wav\n",
"Processed: Negatif/naniabodoh8 - Copy.wav\n",
"Processed: Negatif/naniabodoh8.wav\n",
"Processed: Negatif/naniabodoh9 - Copy.wav\n",
"Processed: Negatif/naniabodoh9.wav\n",
"Processed: Negatif/nanialicik1 - Copy.wav\n",
"Processed: Negatif/nanialicik1.wav\n",
"Processed: Negatif/nanialicik10 - Copy.wav\n",
"Processed: Negatif/nanialicik10.wav\n",
"Processed: Negatif/nanialicik11 - Copy.wav\n",
"Processed: Negatif/nanialicik11.wav\n",
"Processed: Negatif/nanialicik12 - Copy.wav\n",
"Processed: Negatif/nanialicik12.wav\n",
"Processed: Negatif/nanialicik13 - Copy.wav\n",
"Processed: Negatif/nanialicik13.wav\n",
"Processed: Negatif/nanialicik14 - Copy.wav\n",
"Processed: Negatif/nanialicik14.wav\n",
"Processed: Negatif/nanialicik2 - Copy.wav\n",
"Processed: Negatif/nanialicik2.wav\n",
"Processed: Negatif/nanialicik3 - Copy.wav\n",
"Processed: Negatif/nanialicik3.wav\n",
"Processed: Negatif/nanialicik4 - Copy.wav\n",
"Processed: Negatif/nanialicik4.wav\n",
"Processed: Negatif/nanialicik5 - Copy.wav\n",
"Processed: Negatif/nanialicik5.wav\n",
"Processed: Negatif/nanialicik6 - Copy.wav\n",
"Processed: Negatif/nanialicik6.wav\n",
"Processed: Negatif/nanialicik7 - Copy.wav\n",
"Processed: Negatif/nanialicik7.wav\n",
"Processed: Negatif/nanialicik8 - Copy.wav\n",
"Processed: Negatif/nanialicik8.wav\n",
"Processed: Negatif/nanialicik9 - Copy.wav\n",
"Processed: Negatif/nanialicik9.wav\n",
"Processed: Negatif/naniasombong1 - Copy.wav\n",
"Processed: Negatif/naniasombong1.wav\n",
"Processed: Negatif/naniasombong10 - Copy.wav\n",
"Processed: Negatif/naniasombong10.wav\n",
"Processed: Negatif/naniasombong11 - Copy.wav\n",
"Processed: Negatif/naniasombong11.wav\n",
"Processed: Negatif/naniasombong12 - Copy.wav\n",
"Processed: Negatif/naniasombong12.wav\n",
"Processed: Negatif/naniasombong13 - Copy.wav\n",
"Processed: Negatif/naniasombong13.wav\n",
"Processed: Negatif/naniasombong2 - Copy.wav\n",
"Processed: Negatif/naniasombong2.wav\n",
"Processed: Negatif/naniasombong3 - Copy.wav\n",
"Processed: Negatif/naniasombong3.wav\n",
"Processed: Negatif/naniasombong4 - Copy.wav\n",
"Processed: Negatif/naniasombong4.wav\n",
"Processed: Negatif/naniasombong5 - Copy.wav\n",
"Processed: Negatif/naniasombong5.wav\n",
"Processed: Negatif/naniasombong6 - Copy.wav\n",
"Processed: Negatif/naniasombong6.wav\n",
"Processed: Negatif/naniasombong7 - Copy.wav\n",
"Processed: Negatif/naniasombong7.wav\n",
"Processed: Negatif/naniasombong8 - Copy.wav\n",
"Processed: Negatif/naniasombong8.wav\n",
"Processed: Negatif/naniasombong9 - Copy.wav\n",
"Processed: Negatif/naniasombong9.wav\n",
"Processed: Positif/mindrabai8 - Copy.wav\n",
"Processed: Positif/mindrabai8.wav\n",
"Processed: Positif/mindrabaik1 - Copy.wav\n",
"Processed: Positif/mindrabaik1.wav\n",
"Processed: Positif/mindrabaik10 - Copy.wav\n",
"Processed: Positif/mindrabaik10.wav\n",
"Processed: Positif/mindrabaik2 - Copy.wav\n",
"Processed: Positif/mindrabaik2.wav\n",
"Processed: Positif/mindrabaik3 - Copy.wav\n",
"Processed: Positif/mindrabaik3.wav\n",
"Processed: Positif/mindrabaik4 - Copy.wav\n",
"Processed: Positif/mindrabaik4.wav\n",
"Processed: Positif/mindrabaik5 - Copy.wav\n",
"Processed: Positif/mindrabaik5.wav\n",
"Processed: Positif/mindrabaik6 - Copy.wav\n",
"Processed: Positif/mindrabaik6.wav\n",
"Processed: Positif/mindrabaik7 - Copy.wav\n",
"Processed: Positif/mindrabaik7.wav\n",
"Processed: Positif/mindrabaik8 - Copy.wav\n",
"Processed: Positif/mindrabaik8.wav\n",
"Processed: Positif/mindrabaik9 - Copy.wav\n",
"Processed: Positif/mindrabaik9.wav\n",
"Processed: Positif/mindrarajin1 - Copy.wav\n",
"Processed: Positif/mindrarajin1.wav\n",
"Processed: Positif/mindrarajin10 - Copy.wav\n",
"Processed: Positif/mindrarajin10.wav\n",
"Processed: Positif/mindrarajin2 - Copy.wav\n",
"Processed: Positif/mindrarajin2.wav\n",
"Processed: Positif/mindrarajin3 - Copy.wav\n",
"Processed: Positif/mindrarajin3.wav\n",
"Processed: Positif/mindrarajin4 - Copy.wav\n",
"Processed: Positif/mindrarajin4.wav\n",
"Processed: Positif/mindrarajin5 - Copy.wav\n",
"Processed: Positif/mindrarajin5.wav\n",
"Processed: Positif/mindrarajin6 - Copy.wav\n",
"Processed: Positif/mindrarajin6.wav\n",
"Processed: Positif/mindrarajin7 - Copy.wav\n",
"Processed: Positif/mindrarajin7.wav\n",
"Processed: Positif/mindrarajin8 - Copy.wav\n",
"Processed: Positif/mindrarajin8.wav\n",
"Processed: Positif/mindrarajin9 - Copy.wav\n",
"Processed: Positif/mindrarajin9.wav\n",
"Processed: Positif/naniabaik1 - Copy.wav\n",
"Processed: Positif/naniabaik1.wav\n",
"Processed: Positif/naniabaik10 - Copy.wav\n",
"Processed: Positif/naniabaik10.wav\n",
"Processed: Positif/naniabaik11 - Copy.wav\n",
"Processed: Positif/naniabaik11.wav\n",
"Processed: Positif/naniabaik12 - Copy.wav\n",
"Processed: Positif/naniabaik12.wav\n",
"Processed: Positif/naniabaik13 - Copy.wav\n",
"Processed: Positif/naniabaik13.wav\n",
"Processed: Positif/naniabaik14 - Copy.wav\n",
"Processed: Positif/naniabaik14.wav\n",
"Processed: Positif/naniabaik15 - Copy.wav\n",
"Processed: Positif/naniabaik15.wav\n",
"Processed: Positif/naniabaik16 - Copy.wav\n",
"Processed: Positif/naniabaik16.wav\n",
"Processed: Positif/naniabaik2 - Copy.wav\n",
"Processed: Positif/naniabaik2.wav\n",
"Processed: Positif/naniabaik3 - Copy.wav\n",
"Processed: Positif/naniabaik3.wav\n",
"Processed: Positif/naniabaik4 - Copy.wav\n",
"Processed: Positif/naniabaik4.wav\n",
"Processed: Positif/naniabaik5 - Copy.wav\n",
"Processed: Positif/naniabaik5.wav\n",
"Processed: Positif/naniabaik6 - Copy.wav\n",
"Processed: Positif/naniabaik6.wav\n",
"Processed: Positif/naniabaik7 - Copy.wav\n",
"Processed: Positif/naniabaik7.wav\n",
"Processed: Positif/naniabaik8 - Copy.wav\n",
"Processed: Positif/naniabaik8.wav\n",
"Processed: Positif/naniabaik9 - Copy.wav\n",
"Processed: Positif/naniabaik9.wav\n",
"Processed: Positif/naniarajin1 - Copy.wav\n",
"Processed: Positif/naniarajin1.wav\n",
"Processed: Positif/naniarajin10 - Copy.wav\n",
"Processed: Positif/naniarajin10.wav\n",
"Processed: Positif/naniarajin11 - Copy.wav\n",
"Processed: Positif/naniarajin11.wav\n",
"Processed: Positif/naniarajin12 - Copy.wav\n",
"Processed: Positif/naniarajin12.wav\n",
"Processed: Positif/naniarajin13 - Copy.wav\n",
"Processed: Positif/naniarajin13.wav\n",
"Processed: Positif/naniarajin14 - Copy.wav\n",
"Processed: Positif/naniarajin14.wav\n",
"Processed: Positif/naniarajin2 - Copy.wav\n",
"Processed: Positif/naniarajin2.wav\n",
"Processed: Positif/naniarajin3 - Copy.wav\n",
"Processed: Positif/naniarajin3.wav\n",
"Processed: Positif/naniarajin4 - Copy.wav\n",
"Processed: Positif/naniarajin4.wav\n",
"Processed: Positif/naniarajin5 - Copy.wav\n",
"Processed: Positif/naniarajin5.wav\n",
"Processed: Positif/naniarajin6 - Copy.wav\n",
"Processed: Positif/naniarajin6.wav\n",
"Processed: Positif/naniarajin7 - Copy.wav\n",
"Processed: Positif/naniarajin7.wav\n",
"Processed: Positif/naniarajin8 - Copy.wav\n",
"Processed: Positif/naniarajin8.wav\n",
"Processed: Positif/naniarajin9 - Copy.wav\n",
"Processed: Positif/naniarajin9.wav\n",
"Preprocessing selesai. Hasil disimpan di folder 'preprocessing_data'.\n"
]
}
],
"source": [
"def preprocess_audio(file_path, output_path):\n",
" \"\"\"Melakukan preprocessing (normalisasi, padding, noise reduction) pada file audio.\"\"\"\n",
" y, sr = load_audio(file_path)\n",
" y = reduce_noise(y, sr)\n",
" y = normalize_audio(y)\n",
" y = pad_audio(y, sr)\n",
" \n",
" # Simpan hasil preprocessing\n",
" sf.write(output_path, y, sr)\n",
"\n",
"# Looping folder Positif dan Negatif\n",
"for label_folder in os.listdir(DATASET_PATH):\n",
" label_path = os.path.join(DATASET_PATH, label_folder)\n",
" \n",
" if os.path.isdir(label_path): # Pastikan itu folder Positif / Negatif\n",
" output_label_path = os.path.join(OUTPUT_DIR, label_folder)\n",
" os.makedirs(output_label_path, exist_ok=True)\n",
"\n",
" # Looping file WAV di dalam folder\n",
" for file_name in os.listdir(label_path):\n",
" if file_name.endswith(\".wav\"):\n",
" input_path = os.path.join(label_path, file_name)\n",
" output_path = os.path.join(output_label_path, file_name)\n",
" preprocess_audio(input_path, output_path)\n",
" print(f\"Processed: {label_folder}/{file_name}\")\n",
"\n",
"print(\"Preprocessing selesai. Hasil disimpan di folder 'preprocessing_data'.\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}