53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
import json
|
|
import numpy as np
|
|
import pickle
|
|
from tensorflow.keras.models import load_model # type: ignore
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
|
|
|
# -----------------------------
|
|
# 1. Load artefak
|
|
# -----------------------------
|
|
MODEL_PATH = "lstm_ner_srl_model.keras" # ← nama file baru
|
|
model = load_model(MODEL_PATH)
|
|
|
|
with open("word2idx.pkl", "rb") as f:
|
|
word2idx = pickle.load(f)
|
|
with open("tag2idx_ner.pkl", "rb") as f:
|
|
tag2idx_ner = pickle.load(f)
|
|
with open("tag2idx_srl.pkl", "rb") as f:
|
|
tag2idx_srl = pickle.load(f)
|
|
|
|
idx2tag_ner = {i: t for t, i in tag2idx_ner.items()}
|
|
idx2tag_srl = {i: t for t, i in tag2idx_srl.items()}
|
|
|
|
PAD_WORD_ID = word2idx["PAD"] # 0
|
|
MAXLEN = model.input_shape[1] # ambil langsung dari model
|
|
|
|
|
|
# -----------------------------
|
|
# 2. Fungsi prediksi
|
|
# -----------------------------
|
|
def predict_sentence(sentence: str) -> dict:
|
|
tokens = sentence.strip().lower().split()
|
|
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
|
|
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
|
|
|
|
pred_ner_prob, pred_srl_prob = model.predict(seq, verbose=0)
|
|
pred_ner = pred_ner_prob.argmax(-1)[0][: len(tokens)]
|
|
pred_srl = pred_srl_prob.argmax(-1)[0][: len(tokens)]
|
|
|
|
return {
|
|
"tokens": tokens,
|
|
"labels_ner": [idx2tag_ner[int(i)] for i in pred_ner],
|
|
"labels_srl": [idx2tag_srl[int(i)] for i in pred_srl],
|
|
}
|
|
|
|
|
|
# -----------------------------
|
|
# 3. Demo
|
|
# -----------------------------
|
|
if __name__ == "__main__":
|
|
sample = "Suku Karo merayakan upacara pada juni"
|
|
result = predict_sentence(sample)
|
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|