feat: adding dataset
This commit is contained in:
parent
20ef6aeaed
commit
5b32cb3925
Binary file not shown.
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 66 KiB |
File diff suppressed because one or more lines are too long
Binary file not shown.
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 66 KiB |
Binary file not shown.
Binary file not shown.
|
@ -3,6 +3,7 @@ import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
from tensorflow.keras.models import load_model # type: ignore
|
from tensorflow.keras.models import load_model # type: ignore
|
||||||
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
||||||
|
import re
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# 1. Load artefak
|
# 1. Load artefak
|
||||||
|
@ -28,7 +29,9 @@ MAXLEN = model.input_shape[1] # ambil langsung dari model
|
||||||
# 2. Fungsi prediksi
|
# 2. Fungsi prediksi
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
def predict_sentence(sentence: str) -> dict:
|
def predict_sentence(sentence: str) -> dict:
|
||||||
tokens = sentence.strip().lower().split()
|
# tokens = sentence.strip().lower().split()
|
||||||
|
tokens = re.findall(r"\w+|[^\w\s]", sentence.lower())
|
||||||
|
print(tokens)
|
||||||
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
|
seq = [word2idx.get(tok, word2idx["UNK"]) for tok in tokens]
|
||||||
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
|
seq = pad_sequences([seq], maxlen=MAXLEN, padding="post", value=PAD_WORD_ID)
|
||||||
|
|
||||||
|
@ -47,6 +50,6 @@ def predict_sentence(sentence: str) -> dict:
|
||||||
# 3. Demo
|
# 3. Demo
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sample = "Suku Karo merayakan upacara pada juni"
|
sample = "batu bata terbuat dari material tanah liat"
|
||||||
result = predict_sentence(sample)
|
result = predict_sentence(sample)
|
||||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue