TIF_E41211115_lstm-quiz-gen.../test.py

69 lines
2.1 KiB
Python

import numpy as np
import re
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
# Misal kita punya tokenizer, model, dan max_length:
# tokenizer, model, max_length = ...
# Pastikan Anda load model & tokenizer sesuai environment Anda.
def preprocess_text(text):
# Fungsi preprocess sederhana (harus sama atau mirip dengan training)
text = text.lower()
# Buat penyesuaian lain jika perlu
return text
def generate_question(paragraph, tokenizer, model, max_length):
# 1) Preprocess paragraph
paragraph = preprocess_text(paragraph)
# 2) Tokenize
seq = tokenizer.texts_to_sequences([paragraph]) # hasilnya list of list
# 3) Pad sequence
padded = pad_sequences(seq, maxlen=max_length, padding="post")
# 4) Dapatkan prediksi dari model => shape: (1, max_length, vocab_size)
prediction = model.predict(padded) # (1, max_length, vocab_size)
# 5) Cari argmax di setiap time step => (1, max_length)
predicted_indices = np.argmax(prediction, axis=-1)[0]
# 6) Konversi ke kata
predicted_words = []
for idx in predicted_indices:
# Kalau idx = 0, biasanya berarti token 'unknown' atau 'pad', tergantung setting tokenizer
if idx == 0:
# Boleh langsung break, karena sisanya kemungkinan pad
break
word = tokenizer.index_word.get(idx, "")
predicted_words.append(word)
# 7) Gabungkan jadi satu kalimat
predicted_question = " ".join(predicted_words)
# Bisa saja kita menambahkan tanda tanya
if not predicted_question.endswith("?"):
predicted_question = predicted_question + "?"
return predicted_question
model = load_model("lstm_question_generator.keras")
with open("tokenizer.pkl", "rb") as f:
tokenizer = pickle.load(f)
# Pastikan max_length sama seperti saat training
max_length = 50 # Atau nilai yang Anda tetapkan
paragraph_input = "Albert Einstein mengembangkan teori relativitas dan membantu mengembangkan fisika kuantum."
generated_q = generate_question(paragraph_input, tokenizer, model, max_length)
print("Generated question:", generated_q)