feat: adding dataset

This commit is contained in:
akhdanre 2025-05-10 13:56:38 +07:00
parent 20ef6aeaed
commit 5b32cb3925
7 changed files with 330 additions and 295 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 66 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Binary file not shown.

View File

@ -3,6 +3,7 @@ import numpy as np
import pickle
from tensorflow.keras.models import load_model # type: ignore
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
import re
# -----------------------------
# 1. Load artefak
@ -28,7 +29,9 @@ MAXLEN = model.input_shape[1] # ambil langsung dari model
# 2. Fungsi prediksi
# -----------------------------
def predict_sentence(sentence: str) -> dict:
tokens = sentence.strip().lower().split()
# tokens = sentence.strip().lower().split()
tokens = re.findall(r"\w+|[^\w\s]", sentence.lower())
print(tokens)
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
@ -47,6 +50,6 @@ def predict_sentence(sentence: str) -> dict:
# 3. Demo
# -----------------------------
if __name__ == "__main__":
sample = "Suku Karo merayakan upacara pada juni"
sample = "batu bata terbuat dari material tanah liat"
result = predict_sentence(sample)
print(json.dumps(result, ensure_ascii=False, indent=2))

Binary file not shown.