import numpy as np import re from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences import pickle # Misal kita punya tokenizer, model, dan max_length: # tokenizer, model, max_length = ... # Pastikan Anda load model & tokenizer sesuai environment Anda. def preprocess_text(text): # Fungsi preprocess sederhana (harus sama atau mirip dengan training) text = text.lower() # Buat penyesuaian lain jika perlu return text def generate_question(paragraph, tokenizer, model, max_length): # 1) Preprocess paragraph paragraph = preprocess_text(paragraph) # 2) Tokenize seq = tokenizer.texts_to_sequences([paragraph]) # hasilnya list of list # 3) Pad sequence padded = pad_sequences(seq, maxlen=max_length, padding="post") # 4) Dapatkan prediksi dari model => shape: (1, max_length, vocab_size) prediction = model.predict(padded) # (1, max_length, vocab_size) # 5) Cari argmax di setiap time step => (1, max_length) predicted_indices = np.argmax(prediction, axis=-1)[0] # 6) Konversi ke kata predicted_words = [] for idx in predicted_indices: # Kalau idx = 0, biasanya berarti token 'unknown' atau 'pad', tergantung setting tokenizer if idx == 0: # Boleh langsung break, karena sisanya kemungkinan pad break word = tokenizer.index_word.get(idx, "") predicted_words.append(word) # 7) Gabungkan jadi satu kalimat predicted_question = " ".join(predicted_words) # Bisa saja kita menambahkan tanda tanya if not predicted_question.endswith("?"): predicted_question = predicted_question + "?" return predicted_question model = load_model("lstm_question_generator.keras") with open("tokenizer.pkl", "rb") as f: tokenizer = pickle.load(f) # Pastikan max_length sama seperti saat training max_length = 50 # Atau nilai yang Anda tetapkan paragraph_input = "Albert Einstein mengembangkan teori relativitas dan membantu mengembangkan fisika kuantum." generated_q = generate_question(paragraph_input, tokenizer, model, max_length) print("Generated question:", generated_q)