TIF_E41211115_lstm-quiz-gen.../NER_SRL/test_model.py

53 lines
1.7 KiB
Python

import json
import numpy as np
import pickle
from tensorflow.keras.models import load_model # type: ignore
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
# -----------------------------
# 1. Load artefak
# -----------------------------
MODEL_PATH = "lstm_ner_srl_model.keras" # ← nama file baru
model = load_model(MODEL_PATH)
with open("word2idx.pkl", "rb") as f:
word2idx = pickle.load(f)
with open("tag2idx_ner.pkl", "rb") as f:
tag2idx_ner = pickle.load(f)
with open("tag2idx_srl.pkl", "rb") as f:
tag2idx_srl = pickle.load(f)
idx2tag_ner = {i: t for t, i in tag2idx_ner.items()}
idx2tag_srl = {i: t for t, i in tag2idx_srl.items()}
PAD_WORD_ID = word2idx["PAD"] # 0
MAXLEN = model.input_shape[1] # ambil langsung dari model
# -----------------------------
# 2. Fungsi prediksi
# -----------------------------
def predict_sentence(sentence: str) -> dict:
tokens = sentence.strip().lower().split()
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
pred_ner_prob, pred_srl_prob = model.predict(seq, verbose=0)
pred_ner = pred_ner_prob.argmax(-1)[0][: len(tokens)]
pred_srl = pred_srl_prob.argmax(-1)[0][: len(tokens)]
return {
"tokens": tokens,
"labels_ner": [idx2tag_ner[int(i)] for i in pred_ner],
"labels_srl": [idx2tag_srl[int(i)] for i in pred_srl],
}
# -----------------------------
# 3. Demo
# -----------------------------
if __name__ == "__main__":
sample = "Suku Karo merayakan upacara pada juni"
result = predict_sentence(sample)
print(json.dumps(result, ensure_ascii=False, indent=2))