import json import numpy as np import pickle from tensorflow.keras.models import load_model # type: ignore from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore import re # ----------------------------- # 1. Load artefak # ----------------------------- MODEL_PATH = "lstm_ner_srl_model.keras" # ← nama file baru model = load_model(MODEL_PATH) with open("word2idx.pkl", "rb") as f: word2idx = pickle.load(f) with open("tag2idx_ner.pkl", "rb") as f: tag2idx_ner = pickle.load(f) with open("tag2idx_srl.pkl", "rb") as f: tag2idx_srl = pickle.load(f) idx2tag_ner = {i: t for t, i in tag2idx_ner.items()} idx2tag_srl = {i: t for t, i in tag2idx_srl.items()} PAD_WORD_ID = word2idx["PAD"] # 0 MAXLEN = model.input_shape[1] # ambil langsung dari model # ----------------------------- # 2. Fungsi prediksi # ----------------------------- def predict_sentence(sentence: str) -> dict: # tokens = sentence.strip().lower().split() tokens = re.findall(r"\w+|[^\w\s]", sentence.lower()) print(tokens) seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens] seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID) pred_ner_prob, pred_srl_prob = model.predict(seq, verbose=0) pred_ner = pred_ner_prob.argmax(-1)[0][: len(tokens)] pred_srl = pred_srl_prob.argmax(-1)[0][: len(tokens)] return { "tokens": tokens, "labels_ner": [idx2tag_ner[int(i)] for i in pred_ner], "labels_srl": [idx2tag_srl[int(i)] for i in pred_srl], } # ----------------------------- # 3. Demo # ----------------------------- if __name__ == "__main__": sample = "ngaben adalan acara pembakaran jenazah masyarakat suku bali" result = predict_sentence(sample) print(json.dumps(result, ensure_ascii=False, indent=2))