feat: adding dataset

2025-05-10 13:56:38 +07:00 · 2025-05-10 13:56:38 +07:00 · 5b32cb3925
parent 20ef6aeaed
commit 5b32cb3925
7 changed files with 330 additions and 295 deletions
--- a/NER_SRL/accuracy_plot.png
+++ b/NER_SRL/accuracy_plot.png
--- a/NER_SRL/adjst_model_lstm.ipynb
+++ b/NER_SRL/adjst_model_lstm.ipynb
--- a/NER_SRL/loss_plot.png
+++ b/NER_SRL/loss_plot.png
--- a/NER_SRL/tag2idx_ner.pkl
+++ b/NER_SRL/tag2idx_ner.pkl
--- a/NER_SRL/tag2idx_srl.pkl
+++ b/NER_SRL/tag2idx_srl.pkl
--- a/NER_SRL/test_model.py
+++ b/NER_SRL/test_model.py
@ -3,6 +3,7 @@ import numpy as np
 import pickle
 from tensorflow.keras.models import load_model # type: ignore
 from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
+import re

 # -----------------------------
 # 1.  Load artefak
@ -28,7 +29,9 @@ MAXLEN = model.input_shape[1]  # ambil langsung dari model
 # 2.  Fungsi prediksi
 # -----------------------------
 def predict_sentence(sentence: str) -> dict:
-    tokens = sentence.strip().lower().split()
+    # tokens = sentence.strip().lower().split()
+    tokens = re.findall(r"\w+|[^\w\s]", sentence.lower())
+    print(tokens)
    seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
    seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)

@ -47,6 +50,6 @@ def predict_sentence(sentence: str) -> dict:
 # 3.  Demo
 # -----------------------------
 if __name__ == "__main__":
-    sample = "Suku Karo merayakan upacara pada juni"
+    sample = "batu bata terbuat dari material tanah liat"
    result = predict_sentence(sample)
    print(json.dumps(result, ensure_ascii=False, indent=2))
--- a/NER_SRL/word2idx.pkl
+++ b/NER_SRL/word2idx.pkl