from flask import Flask, request, jsonify import numpy as np import pickle import librosa import noisereduce as nr import soundfile as sf import os app = Flask(__name__) # 🛠 Definisikan ulang kelas LVQ agar bisa dipanggil ulang dari pickle class LVQ: def __init__(self, n_classes, n_prototypes=2, learning_rate=0.01, epochs=200): self.n_classes = n_classes self.n_prototypes = n_prototypes self.learning_rate = learning_rate self.epochs = epochs self.prototypes = None self.prototype_labels = None def fit(self, X, y): np.random.seed(42) self.prototypes = [] self.prototype_labels = [] for label in np.unique(y): idx = np.where(y == label)[0] chosen = np.random.choice(idx, self.n_prototypes, replace=False) self.prototypes.extend(X[chosen]) self.prototype_labels.extend([label] * self.n_prototypes) self.prototypes = np.array(self.prototypes) self.prototype_labels = np.array(self.prototype_labels) for epoch in range(self.epochs): for i in range(len(X)): sample = X[i] label = y[i] distances = np.linalg.norm(self.prototypes - sample, axis=1) winner_idx = np.argmin(distances) if self.prototype_labels[winner_idx] == label: self.prototypes[winner_idx] += self.learning_rate * (sample - self.prototypes[winner_idx]) else: self.prototypes[winner_idx] -= self.learning_rate * (sample - self.prototypes[winner_idx]) self.learning_rate *= 0.95 def predict(self, X): y_pred = [] confidence_scores = [] for sample in X: distances = np.linalg.norm(self.prototypes - sample, axis=1) winner_idx = np.argmin(distances) y_pred.append(self.prototype_labels[winner_idx]) confidence_scores.append(1 / (1 + distances[winner_idx])) # Confidence score berdasarkan jarak return np.array(y_pred), np.array(confidence_scores) # 📂 Load model LVQ dan Label Encoder MODEL_PATH = "lvq_model.pkl" ENCODER_PATH = "label_encoder.pkl" with open(MODEL_PATH, "rb") as model_file: lvq_model = pickle.load(model_file) with open(ENCODER_PATH, "rb") as encoder_file: label_encoder = pickle.load(encoder_file) # 🎧 Parameter MFCC SAMPLE_RATE = 48000 TARGET_DURATION = 1.0 N_MFCC = 40 N_FFT = 4096 HOP_LENGTH = 512 LIFTER = 22 # 🎯 Fungsi preprocessing audio def normalize_audio(y): return y / np.max(np.abs(y)) def pad_audio(y, sr=SAMPLE_RATE, target_duration=TARGET_DURATION): target_length = int(sr * target_duration) if len(y) > target_length: return y[:target_length] else: return np.pad(y, (0, target_length - len(y)), mode='constant') def reduce_noise(y, sr=SAMPLE_RATE): return nr.reduce_noise(y=y, sr=sr) def trim_silence(audio, sr, threshold=0.02): energy = np.abs(audio) indices = np.where(energy > threshold)[0] if len(indices) == 0: return audio # Jika tidak ada suara, kembalikan audio asli start_index = indices[0] trimmed_audio = audio[start_index:] return trimmed_audio # 🎵 Fungsi untuk preprocessing & ekstraksi MFCC def preprocess_and_extract_mfcc(file_path): try: # 🎧 Baca file dengan soundfile y, sr = sf.read(file_path) # Cek kalau audio kosong if len(y) == 0: raise ValueError("File audio kosong atau rusak.") # 🔧 Preprocessing y = normalize_audio(y) y = pad_audio(y, sr) y = reduce_noise(y, sr) # ✂️ Potong bagian diam di awal suara y = trim_silence(y, sr, threshold=0.02) # 🎯 Ekstraksi fitur MFCC mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, lifter=LIFTER) return np.mean(mfcc, axis=1) except Exception as e: raise ValueError(f"Gagal memproses audio: {e}") # 📌 Route API untuk prediksi suara @app.route("/predict", methods=["POST"]) def predict(): if "file" not in request.files: return jsonify({"error": "Tidak ada file yang dikirim"}), 400 file = request.files["file"] if file.filename == "": return jsonify({"error": "Nama file kosong"}), 400 # Simpan file sementara file_path = "temp_audio.wav" file.save(file_path) # Cek ukuran file if os.path.getsize(file_path) == 0: return jsonify({"error": "File audio kosong atau rusak"}), 400 try: # 🔄 Preprocessing & Ekstraksi MFCC mfcc_features = preprocess_and_extract_mfcc(file_path) new_mfcc_features = np.expand_dims(mfcc_features, axis=0) # 🔍 Prediksi dengan LVQ predicted_label, confidence_score = lvq_model.predict(new_mfcc_features) predicted_label = predicted_label[0] confidence_score = confidence_score[0] # Ubah confidence score ke persentase dan bulatkan ke 2 angka di belakang koma confidence_score_percent = round(confidence_score * 100, 2) # Jika confidence score kurang dari 50%, kembalikan "suara tidak dikenali" if confidence_score_percent < 0.2: predicted_label = "suara tidak dikenali" else: predicted_label = label_encoder.inverse_transform([predicted_label])[0] # Hapus file sementara os.remove(file_path) # 🏅 Hasil Prediksi return jsonify({ "prediction": predicted_label, # "confidence_score": confidence_score_percent # Confidence score dalam persentase }) except Exception as e: os.remove(file_path) print(f"❗ ERROR di server Flask: {e}") # Tampilkan error di terminal Flask return jsonify({"error": f"Gagal memproses audio: {e}"}), 500 # 🚀 Menjalankan API if __name__ == "__main__": app.run(host="0.0.0.0", port=5000, debug=True)