feat: adding dataset
This commit is contained in:
parent
20ef6aeaed
commit
5b32cb3925
Binary file not shown.
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 66 KiB |
File diff suppressed because one or more lines are too long
Binary file not shown.
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 66 KiB |
Binary file not shown.
Binary file not shown.
|
@ -3,6 +3,7 @@ import numpy as np
|
|||
import pickle
|
||||
from tensorflow.keras.models import load_model # type: ignore
|
||||
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
||||
import re
|
||||
|
||||
# -----------------------------
|
||||
# 1. Load artefak
|
||||
|
@ -28,7 +29,9 @@ MAXLEN = model.input_shape[1] # ambil langsung dari model
|
|||
# 2. Fungsi prediksi
|
||||
# -----------------------------
|
||||
def predict_sentence(sentence: str) -> dict:
|
||||
tokens = sentence.strip().lower().split()
|
||||
# tokens = sentence.strip().lower().split()
|
||||
tokens = re.findall(r"\w+|[^\w\s]", sentence.lower())
|
||||
print(tokens)
|
||||
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
|
||||
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
|
||||
|
||||
|
@ -47,6 +50,6 @@ def predict_sentence(sentence: str) -> dict:
|
|||
# 3. Demo
|
||||
# -----------------------------
|
||||
if __name__ == "__main__":
|
||||
sample = "Suku Karo merayakan upacara pada juni"
|
||||
sample = "batu bata terbuat dari material tanah liat"
|
||||
result = predict_sentence(sample)
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue