TIF_E41211245/Model/API_PREDICT.py

179 lines
5.9 KiB
Python

from flask import Flask, request, jsonify
import numpy as np
import pickle
import librosa
import noisereduce as nr
import soundfile as sf
import os
app = Flask(__name__)
# 🛠 Definisikan ulang kelas LVQ agar bisa dipanggil ulang dari pickle
class LVQ:
def __init__(self, n_classes, n_prototypes=2, learning_rate=0.01, epochs=200):
self.n_classes = n_classes
self.n_prototypes = n_prototypes
self.learning_rate = learning_rate
self.epochs = epochs
self.prototypes = None
self.prototype_labels = None
def fit(self, X, y):
np.random.seed(42)
self.prototypes = []
self.prototype_labels = []
for label in np.unique(y):
idx = np.where(y == label)[0]
chosen = np.random.choice(idx, self.n_prototypes, replace=False)
self.prototypes.extend(X[chosen])
self.prototype_labels.extend([label] * self.n_prototypes)
self.prototypes = np.array(self.prototypes)
self.prototype_labels = np.array(self.prototype_labels)
for epoch in range(self.epochs):
for i in range(len(X)):
sample = X[i]
label = y[i]
distances = np.linalg.norm(self.prototypes - sample, axis=1)
winner_idx = np.argmin(distances)
if self.prototype_labels[winner_idx] == label:
self.prototypes[winner_idx] += self.learning_rate * (sample - self.prototypes[winner_idx])
else:
self.prototypes[winner_idx] -= self.learning_rate * (sample - self.prototypes[winner_idx])
self.learning_rate *= 0.95
def predict(self, X):
y_pred = []
confidence_scores = []
for sample in X:
distances = np.linalg.norm(self.prototypes - sample, axis=1)
winner_idx = np.argmin(distances)
y_pred.append(self.prototype_labels[winner_idx])
confidence_scores.append(1 / (1 + distances[winner_idx])) # Confidence score berdasarkan jarak
return np.array(y_pred), np.array(confidence_scores)
# 📂 Load model LVQ dan Label Encoder
MODEL_PATH = "lvq_model.pkl"
ENCODER_PATH = "label_encoder.pkl"
with open(MODEL_PATH, "rb") as model_file:
lvq_model = pickle.load(model_file)
with open(ENCODER_PATH, "rb") as encoder_file:
label_encoder = pickle.load(encoder_file)
# 🎧 Parameter MFCC
SAMPLE_RATE = 48000
TARGET_DURATION = 1.0
N_MFCC = 40
N_FFT = 4096
HOP_LENGTH = 512
LIFTER = 22
# 🎯 Fungsi preprocessing audio
def normalize_audio(y):
return y / np.max(np.abs(y))
def pad_audio(y, sr=SAMPLE_RATE, target_duration=TARGET_DURATION):
target_length = int(sr * target_duration)
if len(y) > target_length:
return y[:target_length]
else:
return np.pad(y, (0, target_length - len(y)), mode='constant')
def reduce_noise(y, sr=SAMPLE_RATE):
return nr.reduce_noise(y=y, sr=sr)
def trim_silence(audio, sr, threshold=0.02):
energy = np.abs(audio)
indices = np.where(energy > threshold)[0]
if len(indices) == 0:
return audio # Jika tidak ada suara, kembalikan audio asli
start_index = indices[0]
trimmed_audio = audio[start_index:]
return trimmed_audio
# 🎵 Fungsi untuk preprocessing & ekstraksi MFCC
def preprocess_and_extract_mfcc(file_path):
try:
# 🎧 Baca file dengan soundfile
y, sr = sf.read(file_path)
# Cek kalau audio kosong
if len(y) == 0:
raise ValueError("File audio kosong atau rusak.")
# 🔧 Preprocessing
y = normalize_audio(y)
y = pad_audio(y, sr)
y = reduce_noise(y, sr)
# ✂️ Potong bagian diam di awal suara
y = trim_silence(y, sr, threshold=0.02)
# 🎯 Ekstraksi fitur MFCC
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, lifter=LIFTER)
return np.mean(mfcc, axis=1)
except Exception as e:
raise ValueError(f"Gagal memproses audio: {e}")
# 📌 Route API untuk prediksi suara
@app.route("/predict", methods=["POST"])
def predict():
if "file" not in request.files:
return jsonify({"error": "Tidak ada file yang dikirim"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "Nama file kosong"}), 400
# Simpan file sementara
file_path = "temp_audio.wav"
file.save(file_path)
# Cek ukuran file
if os.path.getsize(file_path) == 0:
return jsonify({"error": "File audio kosong atau rusak"}), 400
try:
# 🔄 Preprocessing & Ekstraksi MFCC
mfcc_features = preprocess_and_extract_mfcc(file_path)
new_mfcc_features = np.expand_dims(mfcc_features, axis=0)
# 🔍 Prediksi dengan LVQ
predicted_label, confidence_score = lvq_model.predict(new_mfcc_features)
predicted_label = predicted_label[0]
confidence_score = confidence_score[0]
# Ubah confidence score ke persentase dan bulatkan ke 2 angka di belakang koma
confidence_score_percent = round(confidence_score * 100, 2)
# Jika confidence score kurang dari 50%, kembalikan "suara tidak dikenali"
if confidence_score_percent < 0.2:
predicted_label = "suara tidak dikenali"
else:
predicted_label = label_encoder.inverse_transform([predicted_label])[0]
# Hapus file sementara
os.remove(file_path)
# 🏅 Hasil Prediksi
return jsonify({
"prediction": predicted_label,
# "confidence_score": confidence_score_percent # Confidence score dalam persentase
})
except Exception as e:
os.remove(file_path)
print(f"❗ ERROR di server Flask: {e}") # Tampilkan error di terminal Flask
return jsonify({"error": f"Gagal memproses audio: {e}"}), 500
# 🚀 Menjalankan API
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=True)