TIF_E41211115_lstm-quiz-gen.../question_generation/question_generation_model.i...

1293 lines
168 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 75,
"id": "02cbdb19",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import json\n",
"import random\n",
"import tensorflow as tf\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.models import Model, load_model\n",
"from tensorflow.keras.layers import (\n",
" Input,\n",
" LSTM,\n",
" Dense,\n",
" Embedding,\n",
" Bidirectional,\n",
" Concatenate,\n",
" Dropout,\n",
")\n",
"from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"import re\n",
"from rouge_score import rouge_scorer\n",
"from nltk.translate.bleu_score import sentence_bleu"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "f9c0af74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total question 931\n"
]
}
],
"source": [
"# Load data\n",
"with open(\"data_converted.json\", \"r\") as f:\n",
" data = json.load(f)\n",
"\n",
"\n",
"# Preprocessing function\n",
"def preprocess_text(text):\n",
" \"\"\"Melakukan preprocessing teks dasar\"\"\"\n",
" text = text.lower()\n",
" text = re.sub(r\"\\s+\", \" \", text).strip()\n",
" return text\n",
"\n",
"\n",
"# Persiapkan data untuk model prediksi pertanyaan\n",
"def prepare_question_prediction_data(data):\n",
" \"\"\"Siapkan data untuk model prediksi pertanyaan\"\"\"\n",
" contexts = []\n",
" tokens_list = []\n",
" ner_list = []\n",
" srl_list = []\n",
" questions = []\n",
" q_types = []\n",
"\n",
" for item in data:\n",
" \n",
" for qa in item[\"qas\"]:\n",
" if qa[\"question\"] == \"\":\n",
" continue\n",
" context = preprocess_text(item[\"context\"])\n",
" contexts.append(context)\n",
" token = [preprocess_text(token) for token in item[\"tokens\"]]\n",
" tokens_list.append(token)\n",
" ner_list.append(item[\"ner\"])\n",
" srl_list.append(item[\"srl\"])\n",
" questions.append(preprocess_text(qa[\"question\"]))\n",
" q_types.append(qa[\"type\"])\n",
" # Tidak mengambil jawaban (answer) sebagai input\n",
" print(\"total question \", len(questions))\n",
" return contexts, tokens_list, ner_list, srl_list, questions, q_types\n",
"\n",
"\n",
"contexts, tokens_list, ner_list, srl_list, questions, q_types = (\n",
" prepare_question_prediction_data(data)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "952f71da",
"metadata": {},
"outputs": [],
"source": [
"# Tokenizer untuk teks (context, question)\n",
"max_vocab_size = 10000\n",
"tokenizer = Tokenizer(num_words=max_vocab_size, oov_token=\"<OOV>\")\n",
"all_texts = contexts + questions + [\" \".join(item) for item in tokens_list]\n",
"tokenizer.fit_on_texts(all_texts)\n",
"vocab_size = len(tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk NER\n",
"ner_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
"ner_tokenizer.fit_on_texts([\" \".join(ner) for ner in ner_list])\n",
"ner_vocab_size = len(ner_tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk SRL\n",
"srl_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
"srl_tokenizer.fit_on_texts([\" \".join(srl) for srl in srl_list])\n",
"srl_vocab_size = len(srl_tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk tipe pertanyaan\n",
"q_type_tokenizer = Tokenizer()\n",
"q_type_tokenizer.fit_on_texts(q_types)\n",
"q_type_vocab_size = len(q_type_tokenizer.word_index) + 1\n",
"\n",
"\n",
"# Konversi token, ner, srl ke sequences\n",
"def tokens_to_sequences(tokens, ner, srl):\n",
" \"\"\"Konversi token, ner, dan srl ke sequences\"\"\"\n",
" token_seqs = [tokenizer.texts_to_sequences([\" \".join(t)])[0] for t in tokens]\n",
" ner_seqs = [ner_tokenizer.texts_to_sequences([\" \".join(n)])[0] for n in ner]\n",
" srl_seqs = [srl_tokenizer.texts_to_sequences([\" \".join(s)])[0] for s in srl]\n",
" return token_seqs, ner_seqs, srl_seqs\n",
"\n",
"\n",
"# Sequences\n",
"context_seqs = tokenizer.texts_to_sequences(contexts)\n",
"question_seqs = tokenizer.texts_to_sequences(questions)\n",
"token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)\n",
"\n",
"# Menentukan panjang maksimum untuk padding\n",
"max_context_len = max([len(seq) for seq in context_seqs])\n",
"max_question_len = max([len(seq) for seq in question_seqs])\n",
"max_token_len = max([len(seq) for seq in token_seqs])\n",
"\n",
"\n",
"# Pad sequences untuk memastikan semua input sama panjang\n",
"def pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs):\n",
" \"\"\"Padding semua sequences\"\"\"\n",
" context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding=\"post\")\n",
" token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding=\"post\")\n",
" ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding=\"post\")\n",
" srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding=\"post\")\n",
" question_padded = pad_sequences(\n",
" question_seqs, maxlen=max_question_len, padding=\"post\"\n",
" )\n",
" return (\n",
" context_padded,\n",
" token_padded,\n",
" ner_padded,\n",
" srl_padded,\n",
" question_padded,\n",
" )\n",
"\n",
"\n",
"# Encode tipe pertanyaan\n",
"q_type_indices = []\n",
"for q_type in q_types:\n",
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
" q_type_indices.append(q_type_idx)\n",
"\n",
"# Konversi ke numpy array\n",
"q_type_indices = np.array(q_type_indices)\n",
"\n",
"# One-hot encode tipe pertanyaan\n",
"q_type_categorical = tf.keras.utils.to_categorical(\n",
" q_type_indices, num_classes=q_type_vocab_size\n",
")\n",
"\n",
"# Pad sequences\n",
"context_padded, token_padded, ner_padded, srl_padded, question_padded = (\n",
" pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "37ffc0e5",
"metadata": {},
"outputs": [],
"source": [
"# Split data menjadi train dan test sets\n",
"indices = list(range(len(context_padded)))\n",
"train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)\n",
"\n",
"\n",
"def get_subset(data, indices):\n",
" return np.array([data[i] for i in indices])\n",
"\n",
"\n",
"# Train data\n",
"train_context = get_subset(context_padded, train_indices)\n",
"train_token = get_subset(token_padded, train_indices)\n",
"train_ner = get_subset(ner_padded, train_indices)\n",
"train_srl = get_subset(srl_padded, train_indices)\n",
"train_q_type = get_subset(q_type_categorical, train_indices)\n",
"train_question = get_subset(question_padded, train_indices)\n",
"\n",
"# Test data\n",
"test_context = get_subset(context_padded, test_indices)\n",
"test_token = get_subset(token_padded, test_indices)\n",
"test_ner = get_subset(ner_padded, test_indices)\n",
"test_srl = get_subset(srl_padded, test_indices)\n",
"test_q_type = get_subset(q_type_categorical, test_indices)\n",
"test_question = get_subset(question_padded, test_indices)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "df580682",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_8\"</span>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1mModel: \"functional_8\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ context_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ text_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">100</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">151,800</span> │ context_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ token_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,700</span> │ ner_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,350</span> │ srl_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_16 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">234,496</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ ner_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ │ │ │ srl_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_attention │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_16… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Attention</span>) │ │ │ bidirectional_16… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_17 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">49</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">336,896</span> │ token_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_att_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_attentio… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_17… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_type_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">5</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ all_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">517</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_att_pool… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ token_pool[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ │ │ │ q_type_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">512</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">265,216</span> │ all_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_16 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">512</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">131,328</span> │ dropout_16[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_17 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">65,792</span> │ dropout_17[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ repeat_vector_8 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ decoder_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">RepeatVector</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">525,312</span> │ repeat_vector_8[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ time_distributed_8 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1518</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">390,126</span> │ decoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ context_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ text_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m100\u001b[0m) │ \u001b[38;5;34m151,800\u001b[0m │ context_input[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ token_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,700\u001b[0m │ ner_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,350\u001b[0m │ srl_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_16 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m234,496\u001b[0m │ text_embedding[\u001b[38;5;34m0\u001b[0m… │\n",
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m200\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ text_embedding[\u001b[38;5;34m1\u001b[0m… │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ ner_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ │ │ │ srl_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_attention │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_16… │\n",
"│ (\u001b[38;5;33mAttention\u001b[0m) │ │ │ bidirectional_16… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_17 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m49\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m336,896\u001b[0m │ token_features[\u001b[38;5;34m0\u001b[0m… │\n",
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_att_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_attentio… │\n",
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_17… │\n",
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_type_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m5\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ all_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m517\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_att_pool… │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ token_pool[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ │ │ │ q_type_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m265,216\u001b[0m │ all_features[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_16 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mDropout\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m131,328\u001b[0m │ dropout_16[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_17 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_2[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mDropout\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m65,792\u001b[0m │ dropout_17[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mDense\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ repeat_vector_8 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ decoder_input[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ (\u001b[38;5;33mRepeatVector\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m525,312\u001b[0m │ repeat_vector_8[\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ time_distributed_8 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m1518\u001b[0m) │ \u001b[38;5;34m390,126\u001b[0m │ decoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">2,104,016</span> (8.03 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m2,104,016\u001b[0m (8.03 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">2,104,016</span> (8.03 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m2,104,016\u001b[0m (8.03 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"embedding_dim = 100\n",
"lstm_units = 128\n",
"ner_embedding_dim = 50\n",
"srl_embedding_dim = 50\n",
"dropout_rate = 0.3\n",
"\n",
"\n",
"# Function untuk membuat model prediksi pertanyaan\n",
"def create_question_prediction_model():\n",
" # Input layers\n",
" context_input = Input(shape=(max_context_len,), name=\"context_input\")\n",
" token_input = Input(shape=(max_token_len,), name=\"token_input\")\n",
" ner_input = Input(shape=(max_token_len,), name=\"ner_input\")\n",
" srl_input = Input(shape=(max_token_len,), name=\"srl_input\")\n",
" q_type_input = Input(shape=(q_type_vocab_size,), name=\"q_type_input\")\n",
"\n",
" # Shared embedding layer for text\n",
" text_embedding = Embedding(vocab_size, embedding_dim, name=\"text_embedding\")\n",
"\n",
" # Embedding untuk NER dan SRL\n",
" ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name=\"ner_embedding\")(\n",
" ner_input\n",
" )\n",
" srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name=\"srl_embedding\")(\n",
" srl_input\n",
" )\n",
"\n",
" # Apply embeddings\n",
" context_embed = text_embedding(context_input)\n",
" token_embed = text_embedding(token_input)\n",
"\n",
" # Bi-directional LSTM untuk context dan token-level features\n",
" context_lstm = Bidirectional(\n",
" LSTM(lstm_units, return_sequences=True, name=\"context_lstm\")\n",
" )(context_embed)\n",
"\n",
" # Concat token features (tokens, NER, SRL)\n",
" token_features = Concatenate(name=\"token_features\")(\n",
" [token_embed, ner_embedding, srl_embedding]\n",
" )\n",
" token_lstm = Bidirectional(\n",
" LSTM(lstm_units, return_sequences=True, name=\"token_lstm\")\n",
" )(token_features)\n",
"\n",
" # Apply attention to context LSTM\n",
" context_attention = tf.keras.layers.Attention(name=\"context_attention\")(\n",
" [context_lstm, context_lstm]\n",
" )\n",
"\n",
" # Pool attention outputs\n",
" context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"context_att_pool\")(\n",
" context_attention\n",
" )\n",
" token_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"token_pool\")(token_lstm)\n",
"\n",
" # Concat all features (tidak ada answer feature)\n",
" all_features = Concatenate(name=\"all_features\")(\n",
" [context_att_pool, token_pool, q_type_input]\n",
" )\n",
"\n",
" # Dense layers with expanded capacity for sequence generation\n",
" x = Dense(512, activation=\"relu\", name=\"dense_1\")(all_features)\n",
" x = Dropout(dropout_rate)(x)\n",
" x = Dense(256, activation=\"relu\", name=\"dense_2\")(x)\n",
" x = Dropout(dropout_rate)(x)\n",
"\n",
" # Reshape untuk sequence decoder\n",
" decoder_dense = Dense(vocab_size, activation=\"softmax\", name=\"decoder_dense\")\n",
"\n",
" # Many-to-many architecture for sequence generation\n",
" # Decoder LSTM\n",
" decoder_lstm = LSTM(lstm_units * 2, return_sequences=True, name=\"decoder_lstm\")\n",
"\n",
" # Reshape untuk input ke decoder\n",
" decoder_input = Dense(lstm_units * 2, activation=\"relu\", name=\"decoder_input\")(x)\n",
"\n",
" # Decoder sequence with teacher forcing\n",
" # Expand dimensionality to match expected sequence length\n",
" repeated_vector = tf.keras.layers.RepeatVector(max_question_len)(decoder_input)\n",
"\n",
" # Process through decoder LSTM\n",
" decoder_outputs = decoder_lstm(repeated_vector)\n",
"\n",
" # Apply dense layer to each timestep\n",
" question_output_seq = tf.keras.layers.TimeDistributed(decoder_dense)(\n",
" decoder_outputs\n",
" )\n",
"\n",
" # Create model\n",
" model = Model(\n",
" inputs=[\n",
" context_input,\n",
" token_input,\n",
" ner_input,\n",
" srl_input,\n",
" q_type_input,\n",
" ],\n",
" outputs=question_output_seq,\n",
" )\n",
"\n",
" # Compile model with categorical crossentropy for sequence prediction\n",
" model.compile(\n",
" optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"]\n",
" )\n",
"\n",
" return model\n",
"\n",
"\n",
"# Buat model\n",
"model = create_question_prediction_model()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "6ba404db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 36ms/step - accuracy: 0.5309 - loss: 4.2503 - val_accuracy: 0.5642 - val_loss: 3.1908\n",
"Epoch 2/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 29ms/step - accuracy: 0.5667 - loss: 2.9923 - val_accuracy: 0.5699 - val_loss: 2.9613\n",
"Epoch 3/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.5789 - loss: 2.7875 - val_accuracy: 0.5733 - val_loss: 2.9850\n",
"Epoch 4/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.5751 - loss: 2.7333 - val_accuracy: 0.5810 - val_loss: 2.8962\n",
"Epoch 5/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.5919 - loss: 2.5382 - val_accuracy: 0.5791 - val_loss: 2.8806\n",
"Epoch 6/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 29ms/step - accuracy: 0.5976 - loss: 2.4694 - val_accuracy: 0.5837 - val_loss: 2.8012\n",
"Epoch 7/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.5988 - loss: 2.3733 - val_accuracy: 0.5924 - val_loss: 2.7668\n",
"Epoch 8/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.5963 - loss: 2.3688 - val_accuracy: 0.5913 - val_loss: 2.7328\n",
"Epoch 9/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 32ms/step - accuracy: 0.5942 - loss: 2.2982 - val_accuracy: 0.5921 - val_loss: 2.6984\n",
"Epoch 10/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6094 - loss: 2.1666 - val_accuracy: 0.6005 - val_loss: 2.6723\n",
"Epoch 11/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.6103 - loss: 2.1201 - val_accuracy: 0.6028 - val_loss: 2.6278\n",
"Epoch 12/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.6120 - loss: 2.1040 - val_accuracy: 0.6085 - val_loss: 2.6569\n",
"Epoch 13/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.6244 - loss: 1.9767 - val_accuracy: 0.6089 - val_loss: 2.6050\n",
"Epoch 14/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 30ms/step - accuracy: 0.6330 - loss: 1.8633 - val_accuracy: 0.6154 - val_loss: 2.5609\n",
"Epoch 15/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 31ms/step - accuracy: 0.6275 - loss: 1.8729 - val_accuracy: 0.6161 - val_loss: 2.5589\n",
"Epoch 16/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 31ms/step - accuracy: 0.6261 - loss: 1.8763 - val_accuracy: 0.6131 - val_loss: 2.5505\n",
"Epoch 17/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 41ms/step - accuracy: 0.6298 - loss: 1.8188 - val_accuracy: 0.6092 - val_loss: 2.5473\n",
"Epoch 18/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6330 - loss: 1.7953 - val_accuracy: 0.6161 - val_loss: 2.5135\n",
"Epoch 19/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6389 - loss: 1.7240 - val_accuracy: 0.6165 - val_loss: 2.5359\n",
"Epoch 20/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6245 - loss: 1.7859 - val_accuracy: 0.6154 - val_loss: 2.5241\n",
"Epoch 21/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6350 - loss: 1.7127 - val_accuracy: 0.6134 - val_loss: 2.5240\n",
"Epoch 22/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6457 - loss: 1.6129 - val_accuracy: 0.6257 - val_loss: 2.5314\n",
"Epoch 23/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6354 - loss: 1.6795 - val_accuracy: 0.6222 - val_loss: 2.4927\n",
"Epoch 24/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6323 - loss: 1.6659 - val_accuracy: 0.6196 - val_loss: 2.5081\n",
"Epoch 25/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6436 - loss: 1.5886 - val_accuracy: 0.6230 - val_loss: 2.5029\n",
"Epoch 26/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 32ms/step - accuracy: 0.6432 - loss: 1.5517 - val_accuracy: 0.6241 - val_loss: 2.4935\n",
"Epoch 27/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 32ms/step - accuracy: 0.6501 - loss: 1.5298 - val_accuracy: 0.6192 - val_loss: 2.5179\n",
"Epoch 28/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6434 - loss: 1.5379 - val_accuracy: 0.6215 - val_loss: 2.5084\n",
"Epoch 29/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6515 - loss: 1.4863 - val_accuracy: 0.6241 - val_loss: 2.5016\n",
"Epoch 30/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 35ms/step - accuracy: 0.6707 - loss: 1.3811 - val_accuracy: 0.6283 - val_loss: 2.4840\n",
"Epoch 31/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 39ms/step - accuracy: 0.6602 - loss: 1.4188 - val_accuracy: 0.6234 - val_loss: 2.4991\n",
"Epoch 32/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6582 - loss: 1.3886 - val_accuracy: 0.6280 - val_loss: 2.4856\n",
"Epoch 33/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6656 - loss: 1.3863 - val_accuracy: 0.6245 - val_loss: 2.4951\n",
"Epoch 34/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 41ms/step - accuracy: 0.6665 - loss: 1.3715 - val_accuracy: 0.6287 - val_loss: 2.4736\n",
"Epoch 35/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6527 - loss: 1.4147 - val_accuracy: 0.6218 - val_loss: 2.4797\n",
"Epoch 36/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6775 - loss: 1.2971 - val_accuracy: 0.6230 - val_loss: 2.4879\n",
"Epoch 37/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6693 - loss: 1.2954 - val_accuracy: 0.6341 - val_loss: 2.4773\n",
"Epoch 38/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 32ms/step - accuracy: 0.6560 - loss: 1.3712 - val_accuracy: 0.6268 - val_loss: 2.5058\n",
"Epoch 39/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6826 - loss: 1.2517 - val_accuracy: 0.6287 - val_loss: 2.4729\n",
"Epoch 40/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 33ms/step - accuracy: 0.6666 - loss: 1.2982 - val_accuracy: 0.6249 - val_loss: 2.4828\n",
"Epoch 41/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 36ms/step - accuracy: 0.6816 - loss: 1.2239 - val_accuracy: 0.6306 - val_loss: 2.4720\n",
"Epoch 42/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 40ms/step - accuracy: 0.6769 - loss: 1.2318 - val_accuracy: 0.6261 - val_loss: 2.5146\n",
"Epoch 43/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6771 - loss: 1.2312 - val_accuracy: 0.6253 - val_loss: 2.4932\n",
"Epoch 44/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 42ms/step - accuracy: 0.6798 - loss: 1.2397 - val_accuracy: 0.6257 - val_loss: 2.4925\n",
"Epoch 45/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6823 - loss: 1.2102 - val_accuracy: 0.6238 - val_loss: 2.5187\n",
"Epoch 46/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6820 - loss: 1.2039 - val_accuracy: 0.6295 - val_loss: 2.4984\n",
"Epoch 47/50\n",
"\u001b[1m93/93\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 43ms/step - accuracy: 0.6874 - loss: 1.1845 - val_accuracy: 0.6318 - val_loss: 2.4856\n",
"Epoch 47: early stopping\n"
]
}
],
"source": [
"checkpoint = ModelCheckpoint(\n",
" \"question_prediction_model.h5\",\n",
" monitor=\"val_accuracy\",\n",
" save_best_only=True,\n",
" verbose=1,\n",
")\n",
"\n",
"early_stop = EarlyStopping(monitor=\"val_accuracy\", patience=10, verbose=1)\n",
"\n",
"# Reshaping question data for sequence-to-sequence training\n",
"# We need to reshape to (samples, max_question_len, 1) for sparse categorical crossentropy\n",
"train_question_target = np.expand_dims(train_question, -1)\n",
"test_question_target = np.expand_dims(test_question, -1)\n",
"\n",
"# Training parameters\n",
"batch_size = 8\n",
"epochs = 50\n",
"\n",
"# Train model\n",
"history = model.fit(\n",
" [train_context, train_token, train_ner, train_srl, train_q_type],\n",
" train_question_target,\n",
" batch_size=batch_size,\n",
" epochs=epochs,\n",
" validation_data=(\n",
" [test_context, test_token, test_ner, test_srl, test_q_type],\n",
" test_question_target,\n",
" ),\n",
" callbacks=[\n",
" # checkpoint,\n",
" early_stop,\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "184209bc",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1200x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
}
],
"source": [
"# Plot training history\n",
"plt.figure(figsize=(12, 4))\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(history.history[\"accuracy\"])\n",
"plt.plot(history.history[\"val_accuracy\"])\n",
"plt.title(\"Model Accuracy\")\n",
"plt.ylabel(\"Accuracy\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"upper left\")\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(history.history[\"loss\"])\n",
"plt.plot(history.history[\"val_loss\"])\n",
"plt.title(\"Model Loss\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"upper left\")\n",
"plt.tight_layout()\n",
"plt.savefig(\"question_prediction_training_history.png\")\n",
"plt.show()\n",
"\n",
"# Simpan model dan tokenizer\n",
"model.save(\"question_prediction_model_final.h5\")\n",
"\n",
"# Simpan tokenizer\n",
"tokenizer_data = {\n",
" \"word_tokenizer\": tokenizer.to_json(),\n",
" \"ner_tokenizer\": ner_tokenizer.to_json(),\n",
" \"srl_tokenizer\": srl_tokenizer.to_json(),\n",
" \"q_type_tokenizer\": q_type_tokenizer.to_json(),\n",
" \"max_context_len\": max_context_len,\n",
" \"max_question_len\": max_question_len,\n",
" \"max_token_len\": max_token_len,\n",
"}\n",
"\n",
"with open(\"question_prediction_tokenizers.json\", \"w\") as f:\n",
" json.dump(tokenizer_data, f)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "71ec455a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"kaliamt aktual ['para', 'brahmana', 'mendirikan', 'sebuah', 'candi', 'sebagai', 'peringatan', 'upacara', 'kurban.']\n",
"kaliamt prediksi ['kali', 'ini', 'sebagai', 'untuk', 'di', 'desa', 'bolak', 'hilir']\n",
"kaliamt aktual ['di', 'negara', 'mana', 'terletak', 'patung', 'yesus', 'penebus?']\n",
"kaliamt prediksi ['keajaiban', 'dunia', 'apa', 'yang', 'berada', 'di']\n",
"kaliamt aktual ['kerajaan', 'banten', 'berdiri', 'pada', 'tahun', '___?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['siapa', 'pendiri', 'putri', 'mardika', '___']\n",
"kaliamt prediksi ['kapan', 'pendiri', 'utomo', 'didirikan']\n",
"kaliamt aktual ['pada', 'pelayarannya,', 'para', 'pedagang', 'singgah', 'terlebih', 'dahulu', 'di', 'kutai.']\n",
"kaliamt prediksi ['perubahan', 'tarumanegara', 'sebagai', 'yang', 'di', 'di', 'di', 'di', 'di']\n",
"kaliamt aktual ['air', 'mendidih', 'pada', 'suhu', '90', 'derajat', 'celsius', '___']\n",
"kaliamt prediksi ['vincent', 'einstein', 'secara', 'memotong', 'pada', 'ke', '5']\n",
"kaliamt aktual ['abdul', 'muis', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['mohammad', 'muis', 'lahir', 'tanggal']\n",
"kaliamt aktual ['gunung', 'everest', 'memiliki', 'ketinggian', '9000', 'meter', '___']\n",
"kaliamt prediksi ['raja', 'yang', 'memiliki', 'ketinggian', 'isaac']\n",
"kaliamt aktual ['kapan', 'kerajaan', 'kutai', 'mengalami', 'masa', 'keemasan', '___']\n",
"kaliamt prediksi ['siapa', 'apa', 'komunis']\n",
"kaliamt aktual ['benda', 'apa', 'saja', 'yang', 'dapat', 'ditarik', 'oleh', 'magnet', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'ditemukan', 'dan']\n",
"kaliamt aktual ['kerajaan', 'tarumanagara', 'berdiri', 'pada', 'tahun', '___?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['sultan', 'mahmud', 'badaruddin', 'ii', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['i', 'mahmud', 'badaruddin', 'ii', 'lahir', 'tanggal']\n",
"kaliamt aktual ['___', 'menghadiri', 'pameran', 'teknologi', 'di', 'makassar']\n",
"kaliamt prediksi ['menghadiri', 'konser', 'amal', 'di', 'yogyakarta']\n",
"kaliamt aktual ['pangeran', 'diponegoro', 'lahir', 'pada', '___']\n",
"kaliamt prediksi ['mohammad', 'hatta', 'lahir', 'di']\n",
"kaliamt aktual ['lina', 'menghadiri', 'turnamen', 'catur', 'pada', 'tanggal', '15', 'juli', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'konser', 'teknologi', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['___', 'menghadiri', 'bazar', 'amal', 'di', 'padang']\n",
"kaliamt prediksi ['menghadiri', 'kompetisi', 'fotografi', 'di', 'yogyakarta']\n",
"kaliamt aktual ['di', 'manakah', 'benua', 'australia', 'terletak', '___']\n",
"kaliamt prediksi ['berapa', 'dikenal', 'terletak', 'di']\n",
"kaliamt aktual ['ernest', 'douwes', 'dekker', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['ernest', 'douwes', 'dekker', 'lahir', 'di']\n",
"kaliamt aktual ['i', 'gusti', 'ngurah', 'rai', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['i', 'gusti', 'ngurah', 'rai', 'lahir', 'di']\n",
"kaliamt aktual ['apa', 'kepanjangan', 'bu?']\n",
"kaliamt prediksi ['kapan', 'budi', 'utomo']\n",
"kaliamt aktual ['raja', 'balitung', 'memerintah', 'mataram', 'kuno', 'dari', 'tahun', '___', 'hingga', 'tahun', '___?']\n",
"kaliamt prediksi ['siapa', 'tahun', 'berapa', 'yang', 'mataram', 'pada', 'ke']\n",
"kaliamt aktual ['___', 'adalah', 'kota', 'kembang', 'yang', 'terkenal', 'dengan', 'fashion']\n",
"kaliamt prediksi ['adalah', 'kota', 'yang', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['sebutkan', 'proses', 'dalam', 'pernapasan', '___']\n",
"kaliamt prediksi ['apa', 'saja', 'organ', 'dari']\n",
"kaliamt aktual ['sebutkan', 'tanggal', 'tiara', 'melakukan', 'melanjutkan', 'ke', 'bali', '___']\n",
"kaliamt prediksi ['pada', 'tanggal', 'berapa', 'melakukan', 'pergi', 'ke', 'bandung']\n",
"kaliamt aktual ['kh', 'ahmad', 'dahlan', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['ernest', 'douwes', 'dekker', 'lahir', 'di']\n",
"kaliamt aktual ['___', 'adalah', 'kota', 'kembang', 'yang', 'terkenal', 'dengan', 'fashion']\n",
"kaliamt prediksi ['adalah', 'kota', 'yang', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['inskripsi', 'ini', 'ditemukan', 'di', 'kampung', 'batu', 'tumbuh,', 'desa', 'tugu,', 'dekat', 'tanjung', 'priok,', 'jakarta.']\n",
"kaliamt prediksi ['raja', 'mulawarman', 'memiliki', 'memeluk', 'tinggi']\n",
"kaliamt aktual ['siapa', 'raja', 'pertama', 'kerajaan', 'demak?']\n",
"kaliamt prediksi ['siapa', 'yang', 'memerintah', 'kerajaan', 'palapa']\n",
"kaliamt aktual ['penduduk', 'tarumanegara', 'menjadi', 'makmur', 'tanpa', 'bantuan', 'sungai.']\n",
"kaliamt prediksi ['prasasti', 'mulawarman', 'memiliki', 'di', 'di']\n",
"kaliamt aktual ['rian', 'menghadiri', 'seminar', 'pendidikan', 'pada', 'tanggal', '5', 'mei', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'kompetisi', 'teknologi', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['apa', 'fungsi', 'pembuluh', 'xilem', 'pada', 'tumbuhan', '___']\n",
"kaliamt prediksi ['apa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['apa', 'nama', 'keajaiban', 'dunia', 'di', 'meksiko?']\n",
"kaliamt prediksi ['apa', 'apa', 'organ', 'dari']\n",
"kaliamt aktual ['pada', 'tanggal', 'berapa', 'roni', 'liburan', 'ke', 'surabaya', '___']\n",
"kaliamt prediksi ['pada', 'tanggal', 'berapa', 'melakukan', 'pergi', 'ke', 'bandung']\n",
"kaliamt aktual ['kudungga', 'berubah', 'menjadi', '___?']\n",
"kaliamt prediksi ['siapa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['siapa', 'pendiri', 'kerajaan', 'singhasari?']\n",
"kaliamt prediksi ['siapa', 'yang', 'berapa', 'sumpah']\n",
"kaliamt aktual ['prasasti', 'kedukan', 'bukit', 'ditemukan', 'di', 'tepi', 'sungai', '___?']\n",
"kaliamt prediksi ['adalah', 'kota', 'pelajar', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['kapan', 'masa', 'reformasi', 'di', 'indonesia', 'dimulai', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'pertama', 'kerajaan', 'palapa']\n",
"kaliamt aktual ['pattimura', 'lahir', 'pada', '___']\n",
"kaliamt prediksi ['dimana', 'hatta', 'lahir']\n",
"kaliamt aktual ['sang', 'mulawarman', 'memberi', 'sedekah', '20.000', 'ekor', 'sapi', 'kepada', 'para', 'brahmana.']\n",
"kaliamt prediksi ['kepada', 'yang', 'sedekah', 'di', 'diberikan']\n",
"kaliamt aktual ['bahan', 'apa', 'saja', 'yang', 'dibutuhkan', 'dalam', 'fotosintesis', '___']\n",
"kaliamt prediksi ['berapa', 'luas', 'terletak', 'di']\n",
"kaliamt aktual ['sebutkan', 'keajaiban', 'dunia', 'yang', 'ada', 'di', 'yordania?']\n",
"kaliamt prediksi ['keajaiban', 'dunia', 'apa', 'yang', 'berada', 'di']\n",
"kaliamt aktual ['bandung', 'dikenal', 'sebagai', '___']\n",
"kaliamt prediksi ['benua', 'dikenal', 'sebagai']\n",
"kaliamt aktual ['___', 'adalah', 'kota', 'sejuk', 'dengan', 'banyak', 'destinasi', 'wisata', 'alam']\n",
"kaliamt prediksi ['adalah', 'kota', 'pahlawan', 'yang', 'di', 'di', 'jawa']\n",
"kaliamt aktual ['planet', 'selain', 'saturnus', 'yang', 'memiliki', 'cincin', 'adalah', '___']\n",
"kaliamt prediksi ['keajaiban', 'dunia', 'apa', 'yang', 'berada']\n",
"kaliamt aktual ['di', 'mana', 'kerajaan', 'kutai', 'berdiri', '___']\n",
"kaliamt prediksi ['siapa', 'perdagangan', 'yang', 'yang', 'dari', 'dari']\n",
"kaliamt aktual ['pekerjaan', 'penggalian', 'selesai', 'dalam', '21', 'hari.']\n",
"kaliamt prediksi ['berapa', 'yang', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['galian', 'itu', 'panjangnya', 'kurang', 'dari', '10', 'km.']\n",
"kaliamt prediksi ['berapa', 'yang', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['organisasi', 'perempuan', 'pertama', 'pada', '1912', 'adalah', '___?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['respirasi', 'adalah', '___', 'pertukaran', 'gas?']\n",
"kaliamt prediksi ['respirasi', 'adalah', 'pertukaran', 'pertukaran', 'gas', 'hindia']\n",
"kaliamt aktual ['___', 'adalah', 'ibukota', 'indonesia', 'dan', 'pusat', 'pemerintahan']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'yang', 'dan']\n",
"kaliamt aktual ['organ', 'apa', 'yang', 'berperan', 'penting', 'dalam', 'sistem', 'peredaran', 'darah', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'lembu', 'dipersembahkan']\n",
"kaliamt aktual ['yogyakarta', 'dikenal', 'sebagai', 'kota', 'pelajar', 'yang', 'kaya', 'akan', 'budaya', '___']\n",
"kaliamt prediksi ['prasasti', 'dikenal', 'sebagai', 'terletak', 'di', 'di', 'sungai']\n",
"kaliamt aktual ['apa', 'fungsi', 'insulin', 'dalam', 'tubuh', '___']\n",
"kaliamt prediksi ['balaputradewa', 'mempunyai', 'putra', 'dari']\n",
"kaliamt aktual ['kerajaan', '___', 'berdiri', 'pada', 'tahun', '1482?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['pada', 'tanggal', 'berapa', 'budi', 'pindah', 'ke', 'bali', '___']\n",
"kaliamt prediksi ['kapan', 'aktivitas', 'pindah', 'ke', 'melanjutkan', 'ke', 'oleh']\n",
"kaliamt aktual ['ali', 'menghadiri', '___', 'robotik']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['putri', 'menghadiri', 'festival', 'kuliner', 'pada', 'tanggal', '25', 'desember', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'kompetisi', 'amal', 'pada', 'tanggal', '5', 'juli', '2023']\n",
"kaliamt aktual ['sutan', 'sjahrir', 'lahir', 'tanggal', 'berapa', '___']\n",
"kaliamt prediksi ['mohammad', 'muis', 'lahir', 'tanggal']\n",
"kaliamt aktual ['siapa', 'yang', 'mengeluarkan', 'inskripsi', 'ini?']\n",
"kaliamt prediksi ['apa', 'mana', 'mana', 'kurang']\n",
"kaliamt aktual ['kerajaan', 'galuh', 'berdiri', 'pada', 'tahun', '___?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['pluto', 'sekarang', 'dikategorikan', 'sebagai', '___']\n",
"kaliamt prediksi ['apa', 'saja', 'memberi', 'di', 'diberikan']\n",
"kaliamt aktual ['___', 'adalah', 'kota', 'kembang', 'yang', 'terkenal', 'dengan', 'fashion']\n",
"kaliamt prediksi ['adalah', 'kota', 'yang', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['kerajaan', '___', 'berdiri', 'pada', 'tahun', '1514?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['partai', 'nasional', 'indonesia', 'didirikan', 'pada', 'tanggal', '___', 'juli', '1927?']\n",
"kaliamt prediksi ['pada', 'wuruk', 'memerintah', 'majapahit', 'dari', 'tahun']\n",
"kaliamt aktual ['siapa', 'yang', 'mengemukakan', 'teori', 'evolusi', '___']\n",
"kaliamt prediksi ['siapa', 'penemu', 'pertama', 'kemerdekaan']\n",
"kaliamt aktual ['fajar', 'menghadiri', '___', 'fotografi']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['siapa', 'nama', 'kakek', 'dari', 'raja', 'mulawarman', '___']\n",
"kaliamt prediksi ['di', 'negara', 'mana', 'dunia', 'dunia']\n",
"kaliamt aktual ['para', 'ahli', 'berpendapat', 'bahwa', 'yupa', 'dibuat', 'sekitar', 'abad', 'ke-5', 'm', 'karena', 'melihat', 'bentuk', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'berapa', 'kerajaan', 'palapa']\n",
"kaliamt aktual ['pusat', 'kerajaan', 'tarumanegara', 'diperkirakan', 'berada', 'di', 'antara', 'sungai', '____', 'dan', 'cisadane']\n",
"kaliamt prediksi ['adalah', 'kota', 'pahlawan', 'yang', 'di', 'di', 'destinasi']\n",
"kaliamt aktual ['siapa', 'pendiri', 'partai', 'nasional', 'indonesia', '___']\n",
"kaliamt prediksi ['kapan', 'budi', 'utomo']\n",
"kaliamt aktual ['siapa', 'yang', 'melakukan', 'selamatan', 'bagi', 'purnawarman?']\n",
"kaliamt prediksi ['siapa', 'yang', 'lembu', 'dipersembahkan', 'dipersembahkan']\n",
"kaliamt aktual ['___', 'menghadiri', 'turnamen', 'catur', 'di', 'malang']\n",
"kaliamt prediksi ['menghadiri', 'kompetisi', 'fotografi', 'di', 'yogyakarta']\n",
"kaliamt aktual ['cut', 'nyak', 'dien', 'wafat', 'tanggal', 'berapa', '___']\n",
"kaliamt prediksi ['i', 'nyak', 'dien', 'wafat', 'di']\n",
"kaliamt aktual ['ernest', 'douwes', 'dekker', 'lahir', 'pada', '___']\n",
"kaliamt prediksi ['ernest', 'douwes', 'dekker', 'lahir', 'di']\n",
"kaliamt aktual ['huruf', 'dan', 'bahasa', 'apa', 'yang', 'digunakan', 'pada', 'prasasti', 'yupa', '___']\n",
"kaliamt prediksi ['respirasi', 'adalah', 'yang', 'yang', 'gas', 'yang']\n",
"kaliamt aktual ['apa', 'nama', 'keajaiban', 'dunia', 'di', 'brasil?']\n",
"kaliamt prediksi ['planet', 'dunia', 'yang', 'yang', 'berada', 'di']\n",
"kaliamt aktual ['di', 'mana', 'proses', 'penyerapan', 'zat', 'makanan', 'terjadi', '___']\n",
"kaliamt prediksi ['apa', 'saja', 'organ', 'dari']\n",
"kaliamt aktual ['pada', 'tanggal', 'berapa', 'lina', 'liburan', 'ke', 'bali', '___']\n",
"kaliamt prediksi ['pada', 'tanggal', 'berapa', 'melakukan', 'melanjutkan', 'ke', 'bandung']\n",
"kaliamt aktual ['pegunungan', 'terbesar', 'di', 'benua', 'eropa', 'adalah', '___']\n",
"kaliamt prediksi ['apa', 'yang', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['tanggal', 'berapa', 'farhan', 'lahir', '___']\n",
"kaliamt prediksi ['dimana', 'berapa', 'lahir']\n",
"kaliamt aktual ['bukit', 'adalah', 'bagian', 'permukaan', 'bumi', 'yang', 'lebih', '___', 'dibandingkan', 'daerah', 'di', 'sekitarnya?']\n",
"kaliamt prediksi ['siapa', 'yang', 'menggali', 'dipersembahkan']\n",
"kaliamt aktual ['sutan', 'sjahrir', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['mohammad', 'muis', 'lahir', 'tanggal']\n",
"kaliamt aktual ['nina', 'menghadiri', '___', 'teknologi']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['telah', 'ditemukan', '___', 'buah', 'prasasti', 'terkait', 'perkembangan', 'kerajaan', 'tarumanegara']\n",
"kaliamt prediksi ['adalah', 'merupakan', 'yang', 'yang', 'bagi', 'karena', 'di']\n",
"kaliamt aktual ['dewi', 'menghadiri', '___', 'robotik']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['apa', 'kepanjangan', 'pgri?']\n",
"kaliamt prediksi ['siapa', 'apa', 'yang', 'dibentuk', 'selanjutnya']\n",
"kaliamt aktual ['gunung', 'adalah', 'bagian', 'permukaan', 'bumi', 'yang', 'berbentuk', '___', 'atau', '___?']\n",
"kaliamt prediksi ['siapa', 'penemu', 'atas']\n",
"kaliamt aktual ['kompleks', 'trowulan', 'diperkirakan', 'menjadi', 'pusat', 'pemerintahan', 'kerajaan', '___?']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'yang', 'mataram', 'pada', 'pemerintahan']\n",
"kaliamt aktual ['prasasti', 'talang', 'tuo', 'ditemukan', 'di', 'daerah', '___?']\n",
"kaliamt prediksi ['adalah', 'kota', 'tuo', 'yang', 'di', 'akan', 'budaya']\n",
"kaliamt aktual ['jakarta', 'dikenal', 'sebagai', '___']\n",
"kaliamt prediksi ['berapa', 'luas', 'terletak']\n",
"kaliamt aktual ['singkatan', 'apa', 'untuk', 'himpunan', 'pengusaha', 'muda', 'indonesia?']\n",
"kaliamt prediksi ['siapa', 'apa', 'yang', 'dibentuk', 'selanjutnya']\n",
"kaliamt aktual ['fajar', 'menghadiri', 'workshop', 'fotografi', 'pada', 'tanggal', '10', 'agustus', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'konser', 'teknologi', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['sumber', 'sejarah', 'tarumanegara', 'yang', 'utama', 'adalah', 'prasasti-prasasti', 'yang', 'telah', 'ditemukan.']\n",
"kaliamt prediksi ['respirasi', 'adalah', 'yang', 'yang', 'yang', 'yang']\n",
"kaliamt aktual ['___', 'menghadiri', 'rapat', 'organisasi', 'di', 'surabaya']\n",
"kaliamt prediksi ['menghadiri', 'kompetisi', 'fotografi', 'di', 'yogyakarta']\n",
"kaliamt aktual ['kerajaan', 'kahuripan', 'dibagi', 'menjadi', 'kerajaan', 'janggala', 'dan', 'kerajaan', '___?']\n",
"kaliamt prediksi ['siapa', 'yang', 'kerajaan', 'kerajaan', 'dari']\n",
"kaliamt aktual ['apa', 'nama', 'keajaiban', 'dunia', 'di', 'india?']\n",
"kaliamt prediksi ['keajaiban', 'dunia', 'apa', 'yang', 'berada', 'di']\n",
"kaliamt aktual ['sultan', 'hasanuddin', 'lahir', 'pada', '___']\n",
"kaliamt prediksi ['mohammad', 'hatta', 'lahir', 'di']\n",
"kaliamt aktual ['gurun', 'terbesar', 'ketiga', 'di', 'dunia', 'adalah', 'gurun', '___']\n",
"kaliamt prediksi ['planet', 'sahara', 'proses', 'yang', 'amerika', 'amerika']\n",
"kaliamt aktual ['apa', 'singkatan', 'dari', 'panitia', 'persiapan', 'kemerdekaan', 'indonesia?']\n",
"kaliamt prediksi ['apa', 'apa', 'yang', 'dibentuk', 'selanjutnya']\n",
"kaliamt aktual ['siapa', 'yang', 'memerintah', 'majapahit', 'dari', 'tahun', '1350', 'hingga', '1389', 'm?']\n",
"kaliamt prediksi ['hayam', 'wuruk', 'memerintah', 'majapahit', 'tahun', 'tahun']\n",
"kaliamt aktual ['nina', 'menghadiri', 'workshop', 'fotografi', 'pada', 'tanggal', '5', 'mei', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'kompetisi', 'teknologi', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['mohammad', 'hatta', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['mohammad', 'hatta', 'lahir', 'tanggal']\n",
"kaliamt aktual ['apa', 'nama', 'keajaiban', 'dunia', 'di', 'china?']\n",
"kaliamt prediksi ['planet', 'yang', 'yang', 'memperkuat', 'kedudukan', 'matahari', 'matahari']\n",
"kaliamt aktual ['kapan', 'indonesia', 'memproklamasikan', 'kemerdekaannya', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'pertama', 'sumpah', 'palapa']\n",
"kaliamt aktual ['apa', 'fungsi', 'pembuluh', 'floem', 'pada', 'tumbuhan', '___']\n",
"kaliamt prediksi ['apa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['kapan', 'nasa', 'didirikan', '___']\n",
"kaliamt prediksi ['siapa', 'pendiri', 'yang', 'didirikan']\n",
"kaliamt aktual ['mars', 'merupakan', 'planet', 'ke', 'berapa', 'dalam', 'tata', 'surya', '___']\n",
"kaliamt prediksi ['adalah', 'apa', 'yang', 'yang', 'dengan']\n",
"kaliamt aktual ['organisasi', 'keagamaan', 'yang', 'bersifat', 'modern', 'dan', 'didirikan', 'pada', '18', 'november', '1912', 'adalah', '___?']\n",
"kaliamt prediksi ['kapan', 'ketua', 'ppki']\n",
"kaliamt aktual ['di', 'mana', 'prasasti', 'ini', 'ditemukan?']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'yang', 'dari', 'dari', 'bolak']\n",
"kaliamt aktual ['dr.', 'cipto', 'mangunkusumo', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['ernest', 'douwes', 'dekker', 'lahir', 'di']\n",
"kaliamt aktual ['___', 'menghadiri', 'festival', 'kuliner', 'di', 'medan']\n",
"kaliamt prediksi ['menghadiri', 'kompetisi', 'robotik', 'di', 'yogyakarta']\n",
"kaliamt aktual ['gunung', 'tertinggi', 'di', 'benua', 'afrika', 'adalah', '___']\n",
"kaliamt prediksi ['siapa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['berdasarkan', 'prasasti', 'tugu,', 'purbacaraka', 'memperkirakan', 'pusat', 'tarumanegara', 'ada', 'di', 'daerah', '____']\n",
"kaliamt prediksi ['siapa', 'yang', 'menggali', 'kerajaan']\n",
"kaliamt aktual ['tempat', 'suci', 'raja', 'mulawarman', 'dinamakan', '___']\n",
"kaliamt prediksi ['apa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['hana', 'menghadiri', 'turnamen', 'catur', 'pada', 'tanggal', '15', 'juli', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'kompetisi', 'amal', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['retina', 'adalah', 'bagian', 'terluar', 'dari', 'mata', 'manusia', '___']\n",
"kaliamt prediksi ['prasasti', 'itu', 'disertai', 'persembahan', '1', '000', 'ekor']\n",
"kaliamt aktual ['kerajaan', 'samudra', 'pasai', 'didirikan', 'oleh', '___?']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'di', 'dari', 'dan']\n",
"kaliamt aktual ['rian', 'menghadiri', '___', 'pendidikan']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['sentot', 'alibasya', 'prawirodirjo', 'lahir', 'tanggal', 'berapa', '___']\n",
"kaliamt prediksi ['ernest', 'douwes', 'dekker', 'lahir', 'di']\n",
"kaliamt aktual ['teuku', 'umar', 'lahir', 'pada', '___']\n",
"kaliamt prediksi ['mohammad', 'hatta', 'lahir', 'di']\n",
"kaliamt aktual ['prasasti', 'jambu', 'terletak', 'di', 'kota', 'jakarta.']\n",
"kaliamt prediksi ['prasasti', 'dikenal', 'memiliki', 'di', 'di', 'sungai']\n",
"kaliamt aktual ['apa', 'nama', 'keajaiban', 'dunia', 'di', 'inggris?']\n",
"kaliamt prediksi ['berapa', 'yang', 'mana', 'dipersembahkan']\n",
"kaliamt aktual ['apa', 'peristiwa', 'politik', 'penting', 'pada', 'masa', 'reformasi', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'pertama', 'kerajaan', 'palapa']\n",
"kaliamt aktual ['di', 'mana', 'sedekah', 'itu', 'dilakukan', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['di', 'mana', 'dan', 'bagaimana', 'proses', 'pembentukan', 'urine', 'terjadi', '___']\n",
"kaliamt prediksi ['singkatan', 'partai', 'komunis', 'indonesia']\n",
"kaliamt aktual ['kata', 'tarum', 'dipakai', 'sebagai', 'nama', 'sungai', 'musi.']\n",
"kaliamt prediksi ['adalah', 'kota', 'pelajar', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['kapan', 'sumpah', 'pemuda', 'diikrarkan', '___']\n",
"kaliamt prediksi ['siapa', 'partai', 'pertama', 'indonesia']\n",
"kaliamt aktual ['h', 'kern', 'membaca', 'inskripsi', 'b', 'sebagai', 'cri', 'tji', 'aroe', 'eun', 'waca.']\n",
"kaliamt prediksi ['adalah', 'adalah', 'komponen', 'yang', 'penting', 'keberlangsungan', 'kehidupan', 'di']\n",
"kaliamt aktual ['raja', 'berganti', 'nama', 'menjadi', '___?']\n",
"kaliamt prediksi ['apa', 'adalah', 'putra', 'dari']\n",
"kaliamt aktual ['tanggal', 'berapa', 'zain', 'lahir', '___']\n",
"kaliamt prediksi ['dimana', 'berapa', 'lahir']\n",
"kaliamt aktual ['galian', 'itu', 'panjangnya', 'kurang', 'dari', '10', 'km.']\n",
"kaliamt prediksi ['berapa', 'panjang', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['yogyakarta', 'dikenal', 'sebagai', '___']\n",
"kaliamt prediksi ['yogyakarta', 'dikenal', 'sebagai']\n",
"kaliamt aktual ['prasasti', 'kedukan', 'bukit', 'ditemukan', 'dekat', '___?']\n",
"kaliamt prediksi ['adalah', 'kota', 'pelajar', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['apa', 'kepanjangan', 'bpupki?']\n",
"kaliamt prediksi ['kapan', 'ketua', 'ppki']\n",
"kaliamt aktual ['apa', 'peranan', 'sungai', 'mahakam', 'bagi', 'perekonomian', '___']\n",
"kaliamt prediksi ['adalah', 'kota', 'yang', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['bandung', 'dikenal', 'sebagai', 'kota', 'kembang', 'yang', 'terkenal', 'dengan', 'fashion', '___']\n",
"kaliamt prediksi ['prasasti', 'kota', 'sebagai', 'terletak', 'di', 'di', 'sungai']\n",
"kaliamt aktual ['siapa', 'yang', 'membangun', 'taj', 'mahal', '___']\n",
"kaliamt prediksi ['apa', 'adalah', 'keturunan', 'dari']\n",
"kaliamt aktual ['pegunungan', 'adalah', 'dataran', '___?']\n",
"kaliamt prediksi ['apa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['raka', 'menghadiri', '___', 'organisasi']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['dimana', 'letaknya', 'stonehenge?']\n",
"kaliamt prediksi ['planet', 'dunia', 'apa', 'yang', 'berada', 'di']\n",
"kaliamt aktual ['___', 'menghadiri', 'konser', 'musik', 'di', 'padang']\n",
"kaliamt prediksi ['menghadiri', 'konser', 'amal', 'di', 'yogyakarta']\n",
"kaliamt aktual ['rian', 'menghadiri', '___', 'renang']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['___', 'menghadiri', 'workshop', 'fotografi', 'di', 'surabaya']\n",
"kaliamt prediksi ['menghadiri', 'konser', 'fotografi', 'di', 'yogyakarta']\n",
"kaliamt aktual ['apa', 'kepanjangan', 'sdi?']\n",
"kaliamt prediksi ['siapa', 'partai', 'komunis', 'indonesia']\n",
"kaliamt aktual ['di', 'mana', 'letak', 'kerajaan', 'kutai', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'menggali', 'kerajaan', 'dari']\n",
"kaliamt aktual ['raja', 'aswawarman', 'dikatakan', 'seperti', '___', 'ansuman']\n",
"kaliamt prediksi ['di', 'negara', 'mana', 'di', 'dunia']\n",
"kaliamt aktual ['sejarah', 'tertua', 'berkaitan', 'pengendalian', 'banjir', 'ada', 'pada', 'masa', 'kerajaan', 'tarumanegara.']\n",
"kaliamt prediksi ['perubahan', 'adalah', 'manusia', 'tersusun', 'atas', 'faring', 'laring', 'laring']\n",
"kaliamt aktual ['sebutkan', 'keajaiban', 'dunia', 'yang', 'ada', 'di', 'china?']\n",
"kaliamt prediksi ['planet', 'apa', 'yang', 'memperkuat', 'kedudukan', 'matahari', 'matahari']\n",
"kaliamt aktual ['benua', 'terbesar', 'di', 'dunia', 'adalah', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'lembu', 'yang', 'dan']\n",
"kaliamt aktual ['apa', 'yang', 'ditonjolkan', 'dalam', 'prasasti', 'cidanghiang', '___']\n",
"kaliamt prediksi ['adalah', 'kota', 'yang', 'ditemukan', 'di']\n",
"kaliamt aktual ['nina', 'menghadiri', '___', 'kuliner']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['kerajaan', 'majapahit', 'berdiri', 'pada', 'tahun', '___?']\n",
"kaliamt prediksi ['kerajaan', 'berdiri', 'berdiri', 'pada', 'tahun']\n",
"kaliamt aktual ['kerajaan', 'demak', 'berdiri', 'tahun', '___?']\n",
"kaliamt prediksi ['siapa', 'yang', 'memerintah', 'kerajaan', 'palapa']\n",
"kaliamt aktual ['dimana', 'proses', 'fotosintesis', 'berlangsung', '___']\n",
"kaliamt prediksi ['siapa', 'saja', 'sedekah', 'sedekah', 'diberikan']\n",
"kaliamt aktual ['balaputradewa', 'adalah', 'putra', 'dari', 'raja', '___?']\n",
"kaliamt prediksi ['apa', 'adalah', 'keturunan', 'dari']\n",
"kaliamt aktual ['budi', 'menghadiri', 'konser', 'musik', 'pada', 'tanggal', '5', 'mei', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'konser', 'teknologi', 'pada', 'tanggal', '5', 'mei', '2023']\n",
"kaliamt aktual ['pertempuran', 'sengit', 'terjadi', 'antara', 'para', 'pejuang', 'dari', 'buleleng', 'yang', 'dibantu', 'oleh', '___.']\n",
"kaliamt prediksi ['kapan', 'ketua', 'ppki']\n",
"kaliamt aktual ['kapan', 'roni', 'dan', 'firmansyah', 'melakukan', 'liburan', 'ke', 'jakarta', '___']\n",
"kaliamt prediksi ['pada', 'tanggal', 'berapa', 'melakukan', 'melanjutkan', 'ke', 'bandung']\n",
"kaliamt aktual ['di', 'wilayah', 'mana', 'prasasti', 'kerajaan', 'tarumanegara', 'ditemukan?']\n",
"kaliamt prediksi ['siapa', 'perdagangan', 'yang', 'kerajaan', 'dari', 'dari']\n",
"kaliamt aktual ['dimana', 'mohammad', 'hatta', 'lahir?']\n",
"kaliamt prediksi ['mohammad', 'hatta', 'lahir', 'tanggal']\n",
"kaliamt aktual ['kapan', 'prasasti', 'yupa', 'diperkirakan', 'dibuat', '___']\n",
"kaliamt prediksi ['siapa', 'kota', 'yang', 'yang', 'dan', 'dan']\n",
"kaliamt aktual ['hubungan', 'dagang', 'sudah', 'terjadi', 'dengan', 'luar.']\n",
"kaliamt prediksi ['raja', 'yang', 'memiliki', 'ketinggian', 'tinggi']\n",
"kaliamt aktual ['apa', 'nama', 'kali', 'yang', 'kedua', 'digali', 'raja', 'purnawarman?']\n",
"kaliamt prediksi ['siapa', 'gomati', 'digali', 'digali', 'mengalir', 'memerintahkan', 'tengah', 'tengah']\n",
"kaliamt aktual ['kerajaan', 'sriwijaya', 'berkuasa', 'dari', 'abad', 'ke-___', 'hingga', 'abad', 'ke-___?']\n",
"kaliamt prediksi ['siapa', 'yang', 'pertama', 'kerajaan']\n",
"kaliamt aktual ['di', 'mana', 'prasasti', 'jambu', '(pasir', 'koleangkak)', 'terletak?']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'yang', 'di']\n",
"kaliamt aktual ['organisasi', 'trikoro', 'darmo', 'berdiri', 'di', '___?']\n",
"kaliamt prediksi ['siapa', 'partai', 'komunis', 'indonesia']\n",
"kaliamt aktual ['rian', 'menghadiri', 'festival', 'kuliner', 'pada', 'tanggal', '10', 'agustus', '2023', '___']\n",
"kaliamt prediksi ['nina', 'menghadiri', 'kompetisi', 'amal', 'pada', 'tanggal', '5', 'juli', '2023']\n",
"kaliamt aktual ['kapan', 'budi', 'dan', 'santoso', 'melakukan', 'pindah', 'ke', 'yogyakarta', '___']\n",
"kaliamt prediksi ['kapan', 'aktivitas', 'pindah', 'ke', 'pindah', 'dilakukan', 'oleh']\n",
"kaliamt aktual ['kapan', 'andi', 'dan', 'setiawan', 'melakukan', 'pergi', 'ke', 'medan', '___']\n",
"kaliamt prediksi ['pada', 'tanggal', 'berapa', 'melakukan', 'pergi', 'ke', 'bandung']\n",
"kaliamt aktual ['prasasti', 'kedukan', 'bukit', 'ditemukan', 'di', 'tepi', 'sungai', '___?']\n",
"kaliamt prediksi ['adalah', 'kota', 'pelajar', 'yang', 'kaya', 'akan']\n",
"kaliamt aktual ['dalam', 'bahasa', 'apa', 'inskripsi', 'tersebut', 'ditulis?']\n",
"kaliamt prediksi ['planet', 'yang', 'yang', 'terbesar', 'planet', 'matahari', 'matahari']\n",
"kaliamt aktual ['apa', 'sumber', 'utama', 'sejarah', 'kutai', '___']\n",
"kaliamt prediksi ['respirasi', 'adalah', 'yang', 'yang', 'gas', 'yang']\n",
"kaliamt aktual ['budi', 'menghadiri', '___', 'musik']\n",
"kaliamt prediksi ['lina', 'menghadiri', 'fotografi']\n",
"kaliamt aktual ['apa', 'nama', 'bagian', 'terluar', 'dari', 'mata', 'manusia', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'lembu', 'yang', 'dan']\n",
"kaliamt aktual ['sebutkan', 'keajaiban', 'dunia', 'yang', 'ada', 'di', 'inggris?']\n",
"kaliamt prediksi ['apa', 'mana', 'mana', 'di']\n",
"kaliamt aktual ['mereka', 'banyak', 'yang', 'melakukan', 'perdagangan.']\n",
"kaliamt prediksi ['raja', 'memiliki', 'memiliki']\n",
"kaliamt aktual ['jakarta', 'dikenal', 'sebagai', 'ibukota', 'indonesia', 'dan', 'pusat', 'pemerintahan', '___']\n",
"kaliamt prediksi ['prasasti', 'dikenal', 'disertai', 'persembahan', '1', '000', 'ekor']\n",
"kaliamt aktual ['di', 'negara', 'mana', 'terletak', 'angkot', 'wat?']\n",
"kaliamt prediksi ['siapa', 'yang', 'menggali', 'kerajaan']\n",
"kaliamt aktual ['siapa', 'yang', 'melakukan', 'selamatan', 'bagi', 'purnawarman?']\n",
"kaliamt prediksi ['siapa', 'yang', 'lembu', 'dipersembahkan', 'dipersembahkan']\n",
"kaliamt aktual ['dunia', 'terdiri', 'atas', '___', 'lautan?']\n",
"kaliamt prediksi ['apa', 'terdiri', 'atas', 'kurang']\n",
"kaliamt aktual ['kapan', 'bpupki', 'didirikan', '___']\n",
"kaliamt prediksi ['kapan', 'pendiri', 'utomo']\n",
"kaliamt aktual ['pattimura', 'lahir', 'di', '___']\n",
"kaliamt prediksi ['dimana', 'hatta', 'lahir']\n",
"kaliamt aktual ['dimana', 'kerajaan', 'majapahit', 'berdiri', '___']\n",
"kaliamt prediksi ['kapan', 'partai', 'komunis', 'indonesia']\n",
"kaliamt aktual ['masyarakat', 'kutai', 'melakukan', 'pertanian.']\n",
"kaliamt prediksi ['benua', 'mulawarman', 'memiliki', 'di']\n",
"kaliamt aktual ['dataran', '___', 'adalah', 'daerah', 'datar', 'yang', 'memiliki', 'ketinggian', 'lebih', 'dari', '400', 'mdpal?']\n",
"kaliamt prediksi ['siapa', 'yang', 'internasional', 'kerajaan', 'dari', 'dari']\n",
"kaliamt aktual ['siapa', 'raja', 'yang', 'terkenal', 'pada', 'masa', 'tarumanegara', '___']\n",
"kaliamt prediksi ['siapa', 'yang', 'yang', 'kerajaan', 'dari']\n",
"kaliamt aktual ['siapa', 'yang', 'memproklamasikan', 'kemerdekaan', 'indonesia', '___']\n",
"kaliamt prediksi ['tahun', 'berapa', 'sebelum', 'waterloo']\n",
"\n",
"Model Performance Metrics:\n",
"Average BLEU Score: 2.13%\n",
"Average ROUGE-1 Score: 25.43%\n",
"Average ROUGE-2 Score: 10.45%\n",
"Average ROUGE-L Score: 24.96%\n"
]
}
],
"source": [
"# Fungsi untuk memprediksi pertanyaan\n",
"def predict_question(context, tokens, ner, srl, q_type):\n",
" context = preprocess_text(context)\n",
"\n",
" context_seq = tokenizer.texts_to_sequences([context])[0]\n",
" token_seq = tokenizer.texts_to_sequences([\" \".join(tokens)])[0]\n",
" ner_seq = ner_tokenizer.texts_to_sequences([\" \".join(ner)])[0]\n",
" srl_seq = srl_tokenizer.texts_to_sequences([\" \".join(srl)])[0]\n",
"\n",
" context_padded = pad_sequences(\n",
" [context_seq], maxlen=max_context_len, padding=\"post\"\n",
" )\n",
" token_padded = pad_sequences([token_seq], maxlen=max_token_len, padding=\"post\")\n",
" ner_padded = pad_sequences([ner_seq], maxlen=max_token_len, padding=\"post\")\n",
" srl_padded = pad_sequences([srl_seq], maxlen=max_token_len, padding=\"post\")\n",
"\n",
" # Q-type one-hot encoding\n",
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
" q_type_one_hot = tf.keras.utils.to_categorical(\n",
" [q_type_idx], num_classes=q_type_vocab_size\n",
" )\n",
"\n",
" # Predict\n",
" pred = model.predict(\n",
" [context_padded, token_padded, ner_padded, srl_padded, q_type_one_hot],\n",
" verbose=0,\n",
" )\n",
"\n",
" # Convert prediction to words\n",
" pred_seq = np.argmax(pred[0], axis=1)\n",
"\n",
" # Convert indices to words\n",
" reverse_word_map = {v: k for k, v in tokenizer.word_index.items()}\n",
" pred_words = [reverse_word_map.get(i, \"\") for i in pred_seq if i != 0]\n",
"\n",
" return \" \".join(pred_words)\n",
"\n",
"\n",
"def evaluate_model_performance(test_data):\n",
"\n",
" # Initialize ROUGE scorer\n",
" scorer = rouge_scorer.RougeScorer([\"rouge1\", \"rouge2\", \"rougeL\"], use_stemmer=True)\n",
"\n",
" # Lists to store scores\n",
" bleu_scores = []\n",
" rouge1_scores = []\n",
" rouge2_scores = []\n",
" rougel_scores = []\n",
"\n",
" # Iterate through test data\n",
" for i in range(len(test_data)):\n",
" # Get test sample\n",
" sample_context = contexts[test_data[i]]\n",
" sample_tokens = tokens_list[test_data[i]]\n",
" sample_ner = ner_list[test_data[i]]\n",
" sample_srl = srl_list[test_data[i]]\n",
" sample_q_type = q_types[test_data[i]]\n",
" actual_question = questions[test_data[i]]\n",
"\n",
" # Predict question\n",
" pred_question = predict_question(\n",
" sample_context, sample_tokens, sample_ner, sample_srl, sample_q_type\n",
" )\n",
"\n",
" # Tokenize for BLEU score\n",
" actual_tokens = actual_question.split()\n",
" pred_tokens = pred_question.split()\n",
"\n",
" # Calculate BLEU score\n",
" # Using unigram, bigram, trigram, and 4-gram\n",
" print(\"kaliamt aktual\", actual_tokens)\n",
" print(\"kaliamt prediksi\", pred_tokens)\n",
" bleu_score = sentence_bleu([actual_tokens], pred_tokens)\n",
" bleu_scores.append(bleu_score)\n",
"\n",
" try:\n",
" rouge_scores = scorer.score(actual_question, pred_question)\n",
"\n",
" # Extract F1 scores\n",
" rouge1_scores.append(rouge_scores[\"rouge1\"].fmeasure)\n",
" rouge2_scores.append(rouge_scores[\"rouge2\"].fmeasure)\n",
" rougel_scores.append(rouge_scores[\"rougeL\"].fmeasure)\n",
" except Exception as e:\n",
" print(f\"Error calculating ROUGE score: {e}\")\n",
"\n",
" # Calculate average scores\n",
" results = {\n",
" \"avg_bleu_score\": np.mean(bleu_scores),\n",
" \"avg_rouge1\": np.mean(rouge1_scores),\n",
" \"avg_rouge2\": np.mean(rouge2_scores),\n",
" \"avg_rougel\": np.mean(rougel_scores),\n",
" }\n",
"\n",
" return results\n",
"\n",
"\n",
"loaded_model = load_model(\"question_prediction_model_final.h5\")\n",
"\n",
"with open(\"question_prediction_tokenizers.json\", \"r\") as f:\n",
" tokenizer_data = json.load(f)\n",
"\n",
"# Ambil beberapa sampel dari data test\n",
"sample_idx = random.randint(0, len(test_indices) - 1)\n",
"sample_context = contexts[test_indices[sample_idx]]\n",
"sample_tokens = tokens_list[test_indices[sample_idx]]\n",
"sample_ner = ner_list[test_indices[sample_idx]]\n",
"sample_srl = srl_list[test_indices[sample_idx]]\n",
"sample_q_type = q_types[test_indices[sample_idx]]\n",
"\n",
"performance_metrics = evaluate_model_performance(test_indices)\n",
"\n",
"print(\"\\nModel Performance Metrics:\")\n",
"print(f\"Average BLEU Score: {performance_metrics['avg_bleu_score'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-1 Score: {performance_metrics['avg_rouge1'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-2 Score: {performance_metrics['avg_rouge2'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-L Score: {performance_metrics['avg_rougel'] * 100:.2f}%\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}