TIF_E41211115_lstm-quiz-gen.../old/test.py

103 lines
3.2 KiB
Python

import numpy as np
import json
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
def preprocess_text(text):
""" Simple text preprocessing (should match training preprocessing) """
text = text.lower()
return text
def generate_question(paragraph, tokenizer, model, max_length):
""" Generate a question based on the input paragraph """
# 1) Preprocess paragraph
paragraph = preprocess_text(paragraph)
# 2) Tokenize
seq = tokenizer.texts_to_sequences([paragraph])
padded = pad_sequences(seq, maxlen=max_length, padding="post")
# 3) Get model predictions
prediction = model.predict(padded)
# 4) Get the most likely word indices
predicted_indices = np.argmax(prediction, axis=-1)[0]
# 5) Convert indices to words
predicted_words = []
for idx in predicted_indices:
if idx == 0: # Assuming 0 is padding or unknown token
break
word = tokenizer.index_word.get(idx, "")
predicted_words.append(word)
# 6) Form the final question
predicted_question = " ".join(predicted_words)
if not predicted_question.endswith("?"):
predicted_question += "?"
return predicted_question
def determine_question_type(paragraph):
""" Simple rule-based method to determine question type """
if "benar atau salah" in paragraph.lower() or "adalah" in paragraph.lower():
return "true_false"
elif "berapa" in paragraph.lower() or "mana" in paragraph.lower():
return "multiple_choice"
else:
return "fill_in_the_blank"
def extract_possible_answer(paragraph):
""" Basic heuristic to extract an answer (first proper noun or keyword) """
words = paragraph.split()
if len(words) > 2:
return words[0] + " " + words[1] # Return first two words as a basic approach
return words[0]
def generate_options(question_type, answer):
""" Generate dummy options for multiple-choice questions """
if question_type == "multiple_choice":
return [answer, "Option B", "Option C", "Option D"]
elif question_type == "true_false":
return ["True", "False"]
else:
return ["-"] # Placeholder for fill-in-the-blank
# Load the trained model and tokenizer
model = load_model("lstm_question_generator.keras")
with open("tokenizer.pkl", "rb") as f:
tokenizer = pickle.load(f)
max_length = 50 # Ensure this is the same as in training
# Sample paragraph input
paragraph_input = "Albert Einstein mengembangkan teori relativitas dan membantu mengembangkan fisika kuantum"
# Generate question
generated_question = generate_question(paragraph_input, tokenizer, model, max_length)
# Determine question type and answer
question_type = determine_question_type(paragraph_input)
answer = extract_possible_answer(paragraph_input)
options = generate_options(question_type, answer)
# Construct JSON output
output = {
"paragraph": paragraph_input,
"question_type": question_type,
"question": generated_question,
"answer": answer,
"options": "|".join(options) # Match dataset format
}
# Print JSON result
print(json.dumps(output, indent=4, ensure_ascii=False))