179 lines
5.9 KiB
Python
179 lines
5.9 KiB
Python
from flask import Flask, request, jsonify
|
|
import numpy as np
|
|
import pickle
|
|
import librosa
|
|
import noisereduce as nr
|
|
import soundfile as sf
|
|
import os
|
|
|
|
app = Flask(__name__)
|
|
|
|
# 🛠 Definisikan ulang kelas LVQ agar bisa dipanggil ulang dari pickle
|
|
class LVQ:
|
|
def __init__(self, n_classes, n_prototypes=2, learning_rate=0.01, epochs=200):
|
|
self.n_classes = n_classes
|
|
self.n_prototypes = n_prototypes
|
|
self.learning_rate = learning_rate
|
|
self.epochs = epochs
|
|
self.prototypes = None
|
|
self.prototype_labels = None
|
|
|
|
def fit(self, X, y):
|
|
np.random.seed(42)
|
|
self.prototypes = []
|
|
self.prototype_labels = []
|
|
|
|
for label in np.unique(y):
|
|
idx = np.where(y == label)[0]
|
|
chosen = np.random.choice(idx, self.n_prototypes, replace=False)
|
|
self.prototypes.extend(X[chosen])
|
|
self.prototype_labels.extend([label] * self.n_prototypes)
|
|
|
|
self.prototypes = np.array(self.prototypes)
|
|
self.prototype_labels = np.array(self.prototype_labels)
|
|
|
|
for epoch in range(self.epochs):
|
|
for i in range(len(X)):
|
|
sample = X[i]
|
|
label = y[i]
|
|
|
|
distances = np.linalg.norm(self.prototypes - sample, axis=1)
|
|
winner_idx = np.argmin(distances)
|
|
|
|
if self.prototype_labels[winner_idx] == label:
|
|
self.prototypes[winner_idx] += self.learning_rate * (sample - self.prototypes[winner_idx])
|
|
else:
|
|
self.prototypes[winner_idx] -= self.learning_rate * (sample - self.prototypes[winner_idx])
|
|
|
|
self.learning_rate *= 0.95
|
|
|
|
def predict(self, X):
|
|
y_pred = []
|
|
confidence_scores = []
|
|
for sample in X:
|
|
distances = np.linalg.norm(self.prototypes - sample, axis=1)
|
|
winner_idx = np.argmin(distances)
|
|
y_pred.append(self.prototype_labels[winner_idx])
|
|
confidence_scores.append(1 / (1 + distances[winner_idx])) # Confidence score berdasarkan jarak
|
|
return np.array(y_pred), np.array(confidence_scores)
|
|
|
|
# 📂 Load model LVQ dan Label Encoder
|
|
MODEL_PATH = "lvq_model.pkl"
|
|
ENCODER_PATH = "label_encoder.pkl"
|
|
|
|
with open(MODEL_PATH, "rb") as model_file:
|
|
lvq_model = pickle.load(model_file)
|
|
|
|
with open(ENCODER_PATH, "rb") as encoder_file:
|
|
label_encoder = pickle.load(encoder_file)
|
|
|
|
# 🎧 Parameter MFCC
|
|
SAMPLE_RATE = 48000
|
|
TARGET_DURATION = 1.0
|
|
N_MFCC = 40
|
|
N_FFT = 4096
|
|
HOP_LENGTH = 512
|
|
LIFTER = 22
|
|
|
|
# 🎯 Fungsi preprocessing audio
|
|
def normalize_audio(y):
|
|
return y / np.max(np.abs(y))
|
|
|
|
def pad_audio(y, sr=SAMPLE_RATE, target_duration=TARGET_DURATION):
|
|
target_length = int(sr * target_duration)
|
|
if len(y) > target_length:
|
|
return y[:target_length]
|
|
else:
|
|
return np.pad(y, (0, target_length - len(y)), mode='constant')
|
|
|
|
def reduce_noise(y, sr=SAMPLE_RATE):
|
|
return nr.reduce_noise(y=y, sr=sr)
|
|
|
|
def trim_silence(audio, sr, threshold=0.02):
|
|
energy = np.abs(audio)
|
|
indices = np.where(energy > threshold)[0]
|
|
if len(indices) == 0:
|
|
return audio # Jika tidak ada suara, kembalikan audio asli
|
|
start_index = indices[0]
|
|
trimmed_audio = audio[start_index:]
|
|
return trimmed_audio
|
|
|
|
# 🎵 Fungsi untuk preprocessing & ekstraksi MFCC
|
|
def preprocess_and_extract_mfcc(file_path):
|
|
try:
|
|
# 🎧 Baca file dengan soundfile
|
|
y, sr = sf.read(file_path)
|
|
|
|
# Cek kalau audio kosong
|
|
if len(y) == 0:
|
|
raise ValueError("File audio kosong atau rusak.")
|
|
|
|
# 🔧 Preprocessing
|
|
y = normalize_audio(y)
|
|
y = pad_audio(y, sr)
|
|
y = reduce_noise(y, sr)
|
|
|
|
# ✂️ Potong bagian diam di awal suara
|
|
y = trim_silence(y, sr, threshold=0.02)
|
|
|
|
# 🎯 Ekstraksi fitur MFCC
|
|
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, lifter=LIFTER)
|
|
return np.mean(mfcc, axis=1)
|
|
|
|
except Exception as e:
|
|
raise ValueError(f"Gagal memproses audio: {e}")
|
|
# 📌 Route API untuk prediksi suara
|
|
@app.route("/predict", methods=["POST"])
|
|
def predict():
|
|
if "file" not in request.files:
|
|
return jsonify({"error": "Tidak ada file yang dikirim"}), 400
|
|
|
|
file = request.files["file"]
|
|
if file.filename == "":
|
|
return jsonify({"error": "Nama file kosong"}), 400
|
|
|
|
# Simpan file sementara
|
|
file_path = "temp_audio.wav"
|
|
file.save(file_path)
|
|
|
|
# Cek ukuran file
|
|
if os.path.getsize(file_path) == 0:
|
|
return jsonify({"error": "File audio kosong atau rusak"}), 400
|
|
|
|
try:
|
|
# 🔄 Preprocessing & Ekstraksi MFCC
|
|
mfcc_features = preprocess_and_extract_mfcc(file_path)
|
|
new_mfcc_features = np.expand_dims(mfcc_features, axis=0)
|
|
|
|
# 🔍 Prediksi dengan LVQ
|
|
predicted_label, confidence_score = lvq_model.predict(new_mfcc_features)
|
|
predicted_label = predicted_label[0]
|
|
confidence_score = confidence_score[0]
|
|
|
|
# Ubah confidence score ke persentase dan bulatkan ke 2 angka di belakang koma
|
|
confidence_score_percent = round(confidence_score * 100, 2)
|
|
|
|
# Jika confidence score kurang dari 50%, kembalikan "suara tidak dikenali"
|
|
if confidence_score_percent < 0.2:
|
|
predicted_label = "suara tidak dikenali"
|
|
else:
|
|
predicted_label = label_encoder.inverse_transform([predicted_label])[0]
|
|
|
|
# Hapus file sementara
|
|
os.remove(file_path)
|
|
|
|
# 🏅 Hasil Prediksi
|
|
return jsonify({
|
|
"prediction": predicted_label,
|
|
# "confidence_score": confidence_score_percent # Confidence score dalam persentase
|
|
})
|
|
|
|
except Exception as e:
|
|
os.remove(file_path)
|
|
print(f"❗ ERROR di server Flask: {e}") # Tampilkan error di terminal Flask
|
|
return jsonify({"error": f"Gagal memproses audio: {e}"}), 500
|
|
|
|
# 🚀 Menjalankan API
|
|
if __name__ == "__main__":
|
|
app.run(host="0.0.0.0", port=5000, debug=True)
|