69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
import numpy as np
|
|
import re
|
|
from tensorflow.keras.models import load_model
|
|
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
import pickle
|
|
|
|
# Misal kita punya tokenizer, model, dan max_length:
|
|
# tokenizer, model, max_length = ...
|
|
# Pastikan Anda load model & tokenizer sesuai environment Anda.
|
|
|
|
|
|
def preprocess_text(text):
|
|
# Fungsi preprocess sederhana (harus sama atau mirip dengan training)
|
|
text = text.lower()
|
|
# Buat penyesuaian lain jika perlu
|
|
return text
|
|
|
|
|
|
def generate_question(paragraph, tokenizer, model, max_length):
|
|
# 1) Preprocess paragraph
|
|
paragraph = preprocess_text(paragraph)
|
|
|
|
# 2) Tokenize
|
|
seq = tokenizer.texts_to_sequences([paragraph]) # hasilnya list of list
|
|
# 3) Pad sequence
|
|
padded = pad_sequences(seq, maxlen=max_length, padding="post")
|
|
|
|
# 4) Dapatkan prediksi dari model => shape: (1, max_length, vocab_size)
|
|
prediction = model.predict(padded) # (1, max_length, vocab_size)
|
|
|
|
# 5) Cari argmax di setiap time step => (1, max_length)
|
|
predicted_indices = np.argmax(prediction, axis=-1)[0]
|
|
|
|
# 6) Konversi ke kata
|
|
predicted_words = []
|
|
for idx in predicted_indices:
|
|
# Kalau idx = 0, biasanya berarti token 'unknown' atau 'pad', tergantung setting tokenizer
|
|
if idx == 0:
|
|
# Boleh langsung break, karena sisanya kemungkinan pad
|
|
break
|
|
word = tokenizer.index_word.get(idx, "")
|
|
predicted_words.append(word)
|
|
|
|
# 7) Gabungkan jadi satu kalimat
|
|
predicted_question = " ".join(predicted_words)
|
|
|
|
# Bisa saja kita menambahkan tanda tanya
|
|
if not predicted_question.endswith("?"):
|
|
predicted_question = predicted_question + "?"
|
|
|
|
return predicted_question
|
|
|
|
|
|
model = load_model("lstm_question_generator.keras")
|
|
|
|
|
|
|
|
with open("tokenizer.pkl", "rb") as f:
|
|
tokenizer = pickle.load(f)
|
|
|
|
# Pastikan max_length sama seperti saat training
|
|
max_length = 50 # Atau nilai yang Anda tetapkan
|
|
|
|
paragraph_input = "Albert Einstein mengembangkan teori relativitas dan membantu mengembangkan fisika kuantum."
|
|
|
|
generated_q = generate_question(paragraph_input, tokenizer, model, max_length)
|
|
print("Generated question:", generated_q)
|