59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
import sys
|
|
import joblib
|
|
import numpy as np
|
|
from typing import List
|
|
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
|
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
|
|
import config
|
|
|
|
model_optimized = None
|
|
vectorizer = None
|
|
label_encoder = None
|
|
stemmer = None
|
|
stopword = None
|
|
|
|
def load_ml_assets():
|
|
"""Fungsi ini dipanggil sekali saat server menyala"""
|
|
global model_optimized, vectorizer, label_encoder, stemmer, stopword
|
|
|
|
print("🧠 Memuat modul NLP Sastrawi...")
|
|
stemmer = StemmerFactory().create_stemmer()
|
|
stopword = StopWordRemoverFactory().create_stop_word_remover()
|
|
|
|
print("🧠 Memuat model Machine Learning...")
|
|
try:
|
|
vectorizer = joblib.load(config.TOKENIZE_DIR / "vectorizer_tfidf.pkl")
|
|
label_encoder = joblib.load(config.TOKENIZE_DIR / "label_encoder.pkl")
|
|
|
|
model_path = config.MODEL_DIR / "final_pipeline_scenario3.pkl"
|
|
if model_path.exists():
|
|
model_optimized = joblib.load(model_path)
|
|
print("✅ Model ML Loaded Successfully")
|
|
else:
|
|
print(f"❌ CRITICAL: Model tidak ditemukan di {model_path}")
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error memuat aset ML: {e}")
|
|
sys.exit(1)
|
|
|
|
def preprocess_text(text: str) -> str:
|
|
text = text.lower()
|
|
text = stopword.remove(text)
|
|
text = stemmer.stem(text)
|
|
return text
|
|
|
|
def extract_keywords_batch(texts: List[str], top_n=5) -> List[str]:
|
|
try:
|
|
combined_text = " ".join(texts)
|
|
if not combined_text: return []
|
|
|
|
tfidf_matrix = vectorizer.transform([combined_text])
|
|
feature_names = vectorizer.get_feature_names_out()
|
|
|
|
indices = np.argsort(tfidf_matrix.toarray()).flatten()[::-1]
|
|
top_indices = indices[:top_n]
|
|
|
|
return [feature_names[i] for i in top_indices]
|
|
except Exception:
|
|
return [] |