TIF_E41211115_lstm-quiz-gen.../question_generation/question_generation_model.i...

957 lines
138 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "02cbdb19",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import json\n",
"import random\n",
"import tensorflow as tf\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.models import Model, load_model\n",
"from tensorflow.keras.layers import (\n",
" Input,\n",
" LSTM,\n",
" Dense,\n",
" Embedding,\n",
" Bidirectional,\n",
" Concatenate,\n",
" Dropout,\n",
")\n",
"from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"import re\n",
"from rouge_score import rouge_scorer\n",
"from nltk.translate.bleu_score import sentence_bleu\n"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "f9c0af74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total context 661\n",
"total question 1212\n"
]
}
],
"source": [
"# Load data\n",
"with open(\"../dataset/stable_qg_qa_train_dataset.json\", \"r\") as f:\n",
" data = json.load(f)\n",
"\n",
"\n",
"# Preprocessing function\n",
"def preprocess_text(text):\n",
" \"\"\"Melakukan preprocessing teks dasar\"\"\"\n",
" text = text.lower()\n",
" text = re.sub(r\"\\s+\", \" \", text).strip()\n",
"\n",
" return text\n",
"\n",
"\n",
"# Persiapkan data untuk model prediksi pertanyaan\n",
"def prepare_question_prediction_data(data):\n",
" \"\"\"Siapkan data untuk model prediksi pertanyaan\"\"\"\n",
" contexts = []\n",
" tokens_list = []\n",
" ner_list = []\n",
" srl_list = []\n",
" questions = []\n",
" q_types = []\n",
"\n",
" for item in data:\n",
" \n",
" for qa in item[\"qas\"]:\n",
" if qa[\"question\"] == \"\":\n",
" continue\n",
" context = preprocess_text(item[\"context\"])\n",
" contexts.append(context)\n",
" token = [preprocess_text(token) for token in item[\"tokens\"]]\n",
" tokens_list.append(token)\n",
" ner_list.append(item[\"ner\"])\n",
" srl_list.append(item[\"srl\"])\n",
" questions.append(preprocess_text(qa[\"question\"]))\n",
" q_types.append(qa[\"type\"])\n",
" # Tidak mengambil jawaban (answer) sebagai input\n",
" print(\"total context \", len(data))\n",
" print(\"total question \", len(questions))\n",
" return contexts, tokens_list, ner_list, srl_list, questions, q_types\n",
"\n",
"\n",
"contexts, tokens_list, ner_list, srl_list, questions, q_types = (\n",
" prepare_question_prediction_data(data)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "952f71da",
"metadata": {},
"outputs": [],
"source": [
"# Tokenizer untuk teks (context, question)\n",
"max_vocab_size = 10000\n",
"tokenizer = Tokenizer(num_words=max_vocab_size, oov_token=\"<OOV>\")\n",
"all_texts = contexts + questions + [\" \".join(item) for item in tokens_list]\n",
"tokenizer.fit_on_texts(all_texts)\n",
"vocab_size = len(tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk NER\n",
"ner_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
"ner_tokenizer.fit_on_texts([\" \".join(ner) for ner in ner_list])\n",
"ner_vocab_size = len(ner_tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk SRL\n",
"srl_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
"srl_tokenizer.fit_on_texts([\" \".join(srl) for srl in srl_list])\n",
"srl_vocab_size = len(srl_tokenizer.word_index) + 1\n",
"\n",
"# Encoding untuk tipe pertanyaan\n",
"q_type_tokenizer = Tokenizer()\n",
"q_type_tokenizer.fit_on_texts(q_types)\n",
"q_type_vocab_size = len(q_type_tokenizer.word_index) + 1\n",
"\n",
"\n",
"# Konversi token, ner, srl ke sequences\n",
"def tokens_to_sequences(tokens, ner, srl):\n",
" \"\"\"Konversi token, ner, dan srl ke sequences\"\"\"\n",
" token_seqs = [tokenizer.texts_to_sequences([\" \".join(t)])[0] for t in tokens]\n",
" ner_seqs = [ner_tokenizer.texts_to_sequences([\" \".join(n)])[0] for n in ner]\n",
" srl_seqs = [srl_tokenizer.texts_to_sequences([\" \".join(s)])[0] for s in srl]\n",
" return token_seqs, ner_seqs, srl_seqs\n",
"\n",
"\n",
"# Sequences\n",
"context_seqs = tokenizer.texts_to_sequences(contexts)\n",
"question_seqs = tokenizer.texts_to_sequences(questions)\n",
"token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)\n",
"\n",
"# Menentukan panjang maksimum untuk padding\n",
"max_context_len = max([len(seq) for seq in context_seqs])\n",
"max_question_len = max([len(seq) for seq in question_seqs])\n",
"max_token_len = max([len(seq) for seq in token_seqs])\n",
"\n",
"\n",
"# Pad sequences untuk memastikan semua input sama panjang\n",
"def pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs):\n",
" \"\"\"Padding semua sequences\"\"\"\n",
" context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding=\"post\")\n",
" token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding=\"post\")\n",
" ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding=\"post\")\n",
" srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding=\"post\")\n",
" question_padded = pad_sequences(\n",
" question_seqs, maxlen=max_question_len, padding=\"post\"\n",
" )\n",
" return (\n",
" context_padded,\n",
" token_padded,\n",
" ner_padded,\n",
" srl_padded,\n",
" question_padded,\n",
" )\n",
"\n",
"\n",
"# Encode tipe pertanyaan\n",
"q_type_indices = []\n",
"for q_type in q_types:\n",
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
" q_type_indices.append(q_type_idx)\n",
"\n",
"# Konversi ke numpy array\n",
"q_type_indices = np.array(q_type_indices)\n",
"\n",
"# One-hot encode tipe pertanyaan\n",
"q_type_categorical = tf.keras.utils.to_categorical(\n",
" q_type_indices, num_classes=q_type_vocab_size\n",
")\n",
"\n",
"# Pad sequences\n",
"context_padded, token_padded, ner_padded, srl_padded, question_padded = (\n",
" pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "37ffc0e5",
"metadata": {},
"outputs": [],
"source": [
"# Split data menjadi train dan test sets\n",
"indices = list(range(len(context_padded)))\n",
"train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)\n",
"\n",
"\n",
"def get_subset(data, indices):\n",
" return np.array([data[i] for i in indices])\n",
"\n",
"\n",
"# Train data\n",
"train_context = get_subset(context_padded, train_indices)\n",
"train_token = get_subset(token_padded, train_indices)\n",
"train_ner = get_subset(ner_padded, train_indices)\n",
"train_srl = get_subset(srl_padded, train_indices)\n",
"train_q_type = get_subset(q_type_categorical, train_indices)\n",
"train_question = get_subset(question_padded, train_indices)\n",
"\n",
"# Test data\n",
"test_context = get_subset(context_padded, test_indices)\n",
"test_token = get_subset(token_padded, test_indices)\n",
"test_ner = get_subset(ner_padded, test_indices)\n",
"test_srl = get_subset(srl_padded, test_indices)\n",
"test_q_type = get_subset(q_type_categorical, test_indices)\n",
"test_question = get_subset(question_padded, test_indices)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "df580682",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_2\"</span>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1mModel: \"functional_2\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ context_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ text_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">100</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">180,100</span> │ context_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ token_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,700</span> │ ner_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,400</span> │ srl_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_4 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">234,496</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ ner_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ │ │ │ srl_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_attention │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Attention</span>) │ │ │ bidirectional_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_5 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">336,896</span> │ token_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_att_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_attentio… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_5[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_type_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">5</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ all_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">517</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_att_pool… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ token_pool[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ │ │ │ q_type_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">512</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">265,216</span> │ all_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">512</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">131,328</span> │ dropout_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_5 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">65,792</span> │ dropout_5[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ repeat_vector_2 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ decoder_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">RepeatVector</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">525,312</span> │ repeat_vector_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ time_distributed_2 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1801</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">462,857</span> │ decoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ context_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ text_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m100\u001b[0m) │ \u001b[38;5;34m180,100\u001b[0m │ context_input[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ token_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,700\u001b[0m │ ner_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,400\u001b[0m │ srl_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_4 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m234,496\u001b[0m │ text_embedding[\u001b[38;5;34m0\u001b[0m… │\n",
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m200\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ text_embedding[\u001b[38;5;34m1\u001b[0m… │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ ner_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ │ │ │ srl_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_attention │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_4[\u001b[38;5;34m…\u001b[0m │\n",
"│ (\u001b[38;5;33mAttention\u001b[0m) │ │ │ bidirectional_4[\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ bidirectional_5 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m336,896\u001b[0m │ token_features[\u001b[38;5;34m0\u001b[0m… │\n",
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ context_att_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_attentio… │\n",
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ token_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_5[\u001b[38;5;34m…\u001b[0m │\n",
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_type_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m5\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ all_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m517\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_att_pool… │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ token_pool[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ │ │ │ q_type_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m265,216\u001b[0m │ all_features[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_4 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m131,328\u001b[0m │ dropout_4[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dropout_5 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_2[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m65,792\u001b[0m │ dropout_5[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mDense\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ repeat_vector_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ decoder_input[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ (\u001b[38;5;33mRepeatVector\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ decoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m525,312\u001b[0m │ repeat_vector_2[\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ time_distributed_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m1801\u001b[0m) │ \u001b[38;5;34m462,857\u001b[0m │ decoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">2,205,097</span> (8.41 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m2,205,097\u001b[0m (8.41 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">2,205,097</span> (8.41 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m2,205,097\u001b[0m (8.41 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"embedding_dim = 100\n",
"lstm_units = 128\n",
"ner_embedding_dim = 50\n",
"srl_embedding_dim = 50\n",
"dropout_rate = 0.3\n",
"\n",
"\n",
"# Function untuk membuat model prediksi pertanyaan\n",
"def create_question_prediction_model():\n",
" # Input layers\n",
" context_input = Input(shape=(max_context_len,), name=\"context_input\")\n",
" token_input = Input(shape=(max_token_len,), name=\"token_input\")\n",
" ner_input = Input(shape=(max_token_len,), name=\"ner_input\")\n",
" srl_input = Input(shape=(max_token_len,), name=\"srl_input\")\n",
" q_type_input = Input(shape=(q_type_vocab_size,), name=\"q_type_input\")\n",
"\n",
" # Shared embedding layer for text\n",
" text_embedding = Embedding(vocab_size, embedding_dim, name=\"text_embedding\")\n",
"\n",
" # Embedding untuk NER dan SRL\n",
" ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name=\"ner_embedding\")(\n",
" ner_input\n",
" )\n",
" srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name=\"srl_embedding\")(\n",
" srl_input\n",
" )\n",
"\n",
" # Apply embeddings\n",
" context_embed = text_embedding(context_input)\n",
" token_embed = text_embedding(token_input)\n",
"\n",
" # Bi-directional LSTM untuk context dan token-level features\n",
" context_lstm = Bidirectional(\n",
" LSTM(lstm_units, return_sequences=True, name=\"context_lstm\")\n",
" )(context_embed)\n",
"\n",
" # Concat token features (tokens, NER, SRL)\n",
" token_features = Concatenate(name=\"token_features\")(\n",
" [token_embed, ner_embedding, srl_embedding]\n",
" )\n",
" token_lstm = Bidirectional(\n",
" LSTM(lstm_units, return_sequences=True, name=\"token_lstm\")\n",
" )(token_features)\n",
"\n",
" # Apply attention to context LSTM\n",
" context_attention = tf.keras.layers.Attention(name=\"context_attention\")(\n",
" [context_lstm, context_lstm]\n",
" )\n",
"\n",
" # Pool attention outputs\n",
" context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"context_att_pool\")(\n",
" context_attention\n",
" )\n",
" token_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"token_pool\")(token_lstm)\n",
"\n",
" # Concat all features (tidak ada answer feature)\n",
" all_features = Concatenate(name=\"all_features\")(\n",
" [context_att_pool, token_pool, q_type_input]\n",
" )\n",
"\n",
" # Dense layers with expanded capacity for sequence generation\n",
" x = Dense(512, activation=\"relu\", name=\"dense_1\")(all_features)\n",
" x = Dropout(dropout_rate)(x)\n",
" x = Dense(256, activation=\"relu\", name=\"dense_2\")(x)\n",
" x = Dropout(dropout_rate)(x)\n",
"\n",
" # Reshape untuk sequence decoder\n",
" decoder_dense = Dense(vocab_size, activation=\"softmax\", name=\"decoder_dense\")\n",
"\n",
" # Many-to-many architecture for sequence generation\n",
" # Decoder LSTM\n",
" decoder_lstm = LSTM(lstm_units * 2, return_sequences=True, name=\"decoder_lstm\")\n",
"\n",
" # Reshape untuk input ke decoder\n",
" decoder_input = Dense(lstm_units * 2, activation=\"relu\", name=\"decoder_input\")(x)\n",
"\n",
" # Decoder sequence with teacher forcing\n",
" # Expand dimensionality to match expected sequence length\n",
" repeated_vector = tf.keras.layers.RepeatVector(max_question_len)(decoder_input)\n",
"\n",
" # Process through decoder LSTM\n",
" decoder_outputs = decoder_lstm(repeated_vector)\n",
"\n",
" # Apply dense layer to each timestep\n",
" question_output_seq = tf.keras.layers.TimeDistributed(decoder_dense)(\n",
" decoder_outputs\n",
" )\n",
"\n",
" # Create model\n",
" model = Model(\n",
" inputs=[\n",
" context_input,\n",
" token_input,\n",
" ner_input,\n",
" srl_input,\n",
" q_type_input,\n",
" ],\n",
" outputs=question_output_seq,\n",
" )\n",
"\n",
" # Compile model with categorical crossentropy for sequence prediction\n",
" model.compile(\n",
" optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"]\n",
" )\n",
"\n",
" return model\n",
"\n",
"\n",
"# Buat model\n",
"model = create_question_prediction_model()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "6ba404db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 35ms/step - accuracy: 0.5274 - loss: 4.3786 - val_accuracy: 0.5680 - val_loss: 3.4016\n",
"Epoch 2/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 33ms/step - accuracy: 0.5615 - loss: 3.3640 - val_accuracy: 0.5680 - val_loss: 3.3180\n",
"Epoch 3/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.5558 - loss: 3.1577 - val_accuracy: 0.5770 - val_loss: 2.9719\n",
"Epoch 4/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 31ms/step - accuracy: 0.5658 - loss: 2.9229 - val_accuracy: 0.5833 - val_loss: 2.8770\n",
"Epoch 5/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 32ms/step - accuracy: 0.5684 - loss: 2.7838 - val_accuracy: 0.5883 - val_loss: 2.8030\n",
"Epoch 6/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 32ms/step - accuracy: 0.5678 - loss: 2.7217 - val_accuracy: 0.5889 - val_loss: 2.7586\n",
"Epoch 7/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 32ms/step - accuracy: 0.5832 - loss: 2.5477 - val_accuracy: 0.5920 - val_loss: 2.7165\n",
"Epoch 8/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 32ms/step - accuracy: 0.5817 - loss: 2.5231 - val_accuracy: 0.5979 - val_loss: 2.6560\n",
"Epoch 9/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 34ms/step - accuracy: 0.5866 - loss: 2.4224 - val_accuracy: 0.5886 - val_loss: 2.6570\n",
"Epoch 10/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 34ms/step - accuracy: 0.5896 - loss: 2.3792 - val_accuracy: 0.5998 - val_loss: 2.5975\n",
"Epoch 11/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 33ms/step - accuracy: 0.5894 - loss: 2.3246 - val_accuracy: 0.5998 - val_loss: 2.5801\n",
"Epoch 12/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 33ms/step - accuracy: 0.5967 - loss: 2.2601 - val_accuracy: 0.6039 - val_loss: 2.5552\n",
"Epoch 13/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.5894 - loss: 2.2631 - val_accuracy: 0.6042 - val_loss: 2.5384\n",
"Epoch 14/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.6062 - loss: 2.1692 - val_accuracy: 0.6104 - val_loss: 2.5017\n",
"Epoch 15/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.6087 - loss: 2.1076 - val_accuracy: 0.6135 - val_loss: 2.4925\n",
"Epoch 16/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 36ms/step - accuracy: 0.6048 - loss: 2.0866 - val_accuracy: 0.6098 - val_loss: 2.4662\n",
"Epoch 17/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 36ms/step - accuracy: 0.6097 - loss: 2.0495 - val_accuracy: 0.6123 - val_loss: 2.4682\n",
"Epoch 18/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 37ms/step - accuracy: 0.6120 - loss: 1.9992 - val_accuracy: 0.6126 - val_loss: 2.4572\n",
"Epoch 19/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 39ms/step - accuracy: 0.6111 - loss: 1.9984 - val_accuracy: 0.6188 - val_loss: 2.4301\n",
"Epoch 20/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 49ms/step - accuracy: 0.6121 - loss: 1.9544 - val_accuracy: 0.6138 - val_loss: 2.4277\n",
"Epoch 21/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 56ms/step - accuracy: 0.6210 - loss: 1.8891 - val_accuracy: 0.6138 - val_loss: 2.4257\n",
"Epoch 22/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 45ms/step - accuracy: 0.6174 - loss: 1.8878 - val_accuracy: 0.6120 - val_loss: 2.4424\n",
"Epoch 23/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 42ms/step - accuracy: 0.6179 - loss: 1.8573 - val_accuracy: 0.6216 - val_loss: 2.4079\n",
"Epoch 24/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 41ms/step - accuracy: 0.6195 - loss: 1.8307 - val_accuracy: 0.6176 - val_loss: 2.4069\n",
"Epoch 25/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 43ms/step - accuracy: 0.6215 - loss: 1.8256 - val_accuracy: 0.6185 - val_loss: 2.4048\n",
"Epoch 26/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 47ms/step - accuracy: 0.6215 - loss: 1.7727 - val_accuracy: 0.6198 - val_loss: 2.4082\n",
"Epoch 27/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 51ms/step - accuracy: 0.6348 - loss: 1.7045 - val_accuracy: 0.6176 - val_loss: 2.4146\n",
"Epoch 28/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 40ms/step - accuracy: 0.6256 - loss: 1.7402 - val_accuracy: 0.6232 - val_loss: 2.4063\n",
"Epoch 29/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 38ms/step - accuracy: 0.6301 - loss: 1.6912 - val_accuracy: 0.6201 - val_loss: 2.3900\n",
"Epoch 30/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 38ms/step - accuracy: 0.6426 - loss: 1.5917 - val_accuracy: 0.6210 - val_loss: 2.3955\n",
"Epoch 31/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 42ms/step - accuracy: 0.6291 - loss: 1.6419 - val_accuracy: 0.6229 - val_loss: 2.3927\n",
"Epoch 32/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 48ms/step - accuracy: 0.6394 - loss: 1.6025 - val_accuracy: 0.6235 - val_loss: 2.3868\n",
"Epoch 33/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 44ms/step - accuracy: 0.6279 - loss: 1.6680 - val_accuracy: 0.6198 - val_loss: 2.3958\n",
"Epoch 34/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.6429 - loss: 1.5414 - val_accuracy: 0.6188 - val_loss: 2.3630\n",
"Epoch 35/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 36ms/step - accuracy: 0.6388 - loss: 1.5534 - val_accuracy: 0.6229 - val_loss: 2.3805\n",
"Epoch 36/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 38ms/step - accuracy: 0.6534 - loss: 1.4814 - val_accuracy: 0.6279 - val_loss: 2.3940\n",
"Epoch 37/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 40ms/step - accuracy: 0.6500 - loss: 1.4601 - val_accuracy: 0.6241 - val_loss: 2.3974\n",
"Epoch 38/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 47ms/step - accuracy: 0.6500 - loss: 1.4935 - val_accuracy: 0.6304 - val_loss: 2.3958\n",
"Epoch 39/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 52ms/step - accuracy: 0.6485 - loss: 1.4417 - val_accuracy: 0.6269 - val_loss: 2.4015\n",
"Epoch 40/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 41ms/step - accuracy: 0.6451 - loss: 1.4721 - val_accuracy: 0.6251 - val_loss: 2.3890\n",
"Epoch 41/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 39ms/step - accuracy: 0.6618 - loss: 1.3730 - val_accuracy: 0.6220 - val_loss: 2.4143\n",
"Epoch 42/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 39ms/step - accuracy: 0.6575 - loss: 1.3942 - val_accuracy: 0.6298 - val_loss: 2.3959\n",
"Epoch 43/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 39ms/step - accuracy: 0.6506 - loss: 1.4094 - val_accuracy: 0.6269 - val_loss: 2.4056\n",
"Epoch 44/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 41ms/step - accuracy: 0.6509 - loss: 1.3967 - val_accuracy: 0.6216 - val_loss: 2.4178\n",
"Epoch 45/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 46ms/step - accuracy: 0.6629 - loss: 1.3460 - val_accuracy: 0.6344 - val_loss: 2.3807\n",
"Epoch 46/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 36ms/step - accuracy: 0.6677 - loss: 1.3198 - val_accuracy: 0.6269 - val_loss: 2.3913\n",
"Epoch 47/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 35ms/step - accuracy: 0.6653 - loss: 1.3295 - val_accuracy: 0.6298 - val_loss: 2.3925\n",
"Epoch 48/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 38ms/step - accuracy: 0.6637 - loss: 1.3107 - val_accuracy: 0.6298 - val_loss: 2.3655\n",
"Epoch 49/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 36ms/step - accuracy: 0.6593 - loss: 1.3313 - val_accuracy: 0.6369 - val_loss: 2.3792\n",
"Epoch 50/50\n",
"\u001b[1m115/115\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 43ms/step - accuracy: 0.6654 - loss: 1.3019 - val_accuracy: 0.6248 - val_loss: 2.4192\n"
]
}
],
"source": [
"checkpoint = ModelCheckpoint(\n",
" \"question_prediction_model.h5\",\n",
" monitor=\"val_accuracy\",\n",
" save_best_only=True,\n",
" verbose=1,\n",
")\n",
"\n",
"early_stop = EarlyStopping(monitor=\"val_accuracy\", patience=10, verbose=1)\n",
"\n",
"# Reshaping question data for sequence-to-sequence training\n",
"# We need to reshape to (samples, max_question_len, 1) for sparse categorical crossentropy\n",
"train_question_target = np.expand_dims(train_question, -1)\n",
"test_question_target = np.expand_dims(test_question, -1)\n",
"\n",
"# Training parameters\n",
"batch_size = 8\n",
"epochs = 50\n",
"\n",
"# Train model\n",
"history = model.fit(\n",
" [train_context, train_token, train_ner, train_srl, train_q_type],\n",
" train_question_target,\n",
" batch_size=batch_size,\n",
" epochs=epochs,\n",
" validation_data=(\n",
" [test_context, test_token, test_ner, test_srl, test_q_type],\n",
" test_question_target,\n",
" ),\n",
" callbacks=[\n",
" # checkpoint,\n",
" early_stop,\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "184209bc",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1200x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
}
],
"source": [
"# Plot training history\n",
"plt.figure(figsize=(12, 4))\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(history.history[\"accuracy\"])\n",
"plt.plot(history.history[\"val_accuracy\"])\n",
"plt.title(\"Model Accuracy\")\n",
"plt.ylabel(\"Accuracy\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"upper left\")\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(history.history[\"loss\"])\n",
"plt.plot(history.history[\"val_loss\"])\n",
"plt.title(\"Model Loss\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"upper left\")\n",
"plt.tight_layout()\n",
"plt.savefig(\"question_prediction_training_history.png\")\n",
"plt.show()\n",
"\n",
"# Simpan model dan tokenizer\n",
"model.save(\"question_prediction_model_final.h5\")\n",
"\n",
"# Simpan tokenizer\n",
"tokenizer_data = {\n",
" \"word_tokenizer\": tokenizer.to_json(),\n",
" \"ner_tokenizer\": ner_tokenizer.to_json(),\n",
" \"srl_tokenizer\": srl_tokenizer.to_json(),\n",
" \"q_type_tokenizer\": q_type_tokenizer.to_json(),\n",
" \"max_context_len\": max_context_len,\n",
" \"max_question_len\": max_question_len,\n",
" \"max_token_len\": max_token_len,\n",
"}\n",
"\n",
"with open(\"question_prediction_tokenizers.json\", \"w\") as f:\n",
" json.dump(tokenizer_data, f)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "71ec455a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hasil disimpan di: bleu_calculation.xlsx\n",
"\n",
"Model Performance Metrics:\n",
"Average BLEU Score: 2.04%\n",
"Average ROUGE-1 Score: 26.22%\n",
"Average ROUGE-2 Score: 9.07%\n",
"Average ROUGE-L Score: 25.70%\n"
]
}
],
"source": [
"from collections import Counter\n",
"import pandas as pd\n",
"# Fungsi untuk memprediksi pertanyaan\n",
"def predict_question(context, tokens, ner, srl, q_type):\n",
" context = preprocess_text(context)\n",
"\n",
" context_seq = tokenizer.texts_to_sequences([context])[0]\n",
" token_seq = tokenizer.texts_to_sequences([\" \".join(tokens)])[0]\n",
" ner_seq = ner_tokenizer.texts_to_sequences([\" \".join(ner)])[0]\n",
" srl_seq = srl_tokenizer.texts_to_sequences([\" \".join(srl)])[0]\n",
"\n",
" context_padded = pad_sequences(\n",
" [context_seq], maxlen=max_context_len, padding=\"post\"\n",
" )\n",
" token_padded = pad_sequences([token_seq], maxlen=max_token_len, padding=\"post\")\n",
" ner_padded = pad_sequences([ner_seq], maxlen=max_token_len, padding=\"post\")\n",
" srl_padded = pad_sequences([srl_seq], maxlen=max_token_len, padding=\"post\")\n",
"\n",
" # Q-type one-hot encoding\n",
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
" q_type_one_hot = tf.keras.utils.to_categorical(\n",
" [q_type_idx], num_classes=q_type_vocab_size\n",
" )\n",
"\n",
" # Predict\n",
" pred = model.predict(\n",
" [context_padded, token_padded, ner_padded, srl_padded, q_type_one_hot],\n",
" verbose=0,\n",
" )\n",
"\n",
" # Convert prediction to words\n",
" pred_seq = np.argmax(pred[0], axis=1)\n",
"\n",
" # Convert indices to words\n",
" reverse_word_map = {v: k for k, v in tokenizer.word_index.items()}\n",
" pred_words = [reverse_word_map.get(i, \"\") for i in pred_seq if i != 0]\n",
"\n",
" return \" \".join(pred_words)\n",
"\n",
"\n",
"\n",
"\n",
"def evaluate_model_performance(test_data):\n",
" output_path = \"bleu_calculation.xlsx\"\n",
" scorer = rouge_scorer.RougeScorer([\"rouge1\", \"rouge2\", \"rougeL\"], use_stemmer=True)\n",
"\n",
" bleu_scores = []\n",
" rouge1_scores = []\n",
" rouge2_scores = []\n",
" rougel_scores = []\n",
"\n",
" rows = []\n",
"\n",
" for i in range(len(test_data)):\n",
" sample_context = contexts[test_data[i]]\n",
" sample_tokens = tokens_list[test_data[i]]\n",
" sample_ner = ner_list[test_data[i]]\n",
" sample_srl = srl_list[test_data[i]]\n",
" sample_q_type = q_types[test_data[i]]\n",
" actual_question = questions[test_data[i]]\n",
"\n",
" pred_question = predict_question(\n",
" sample_context, sample_tokens, sample_ner, sample_srl, sample_q_type\n",
" )\n",
"\n",
" actual_tokens = actual_question.split()\n",
" pred_tokens = pred_question.split()\n",
"\n",
" max_n = 4\n",
" weights = [1 / max_n] * max_n\n",
" clipped_counts = []\n",
" total_counts = []\n",
" precisions = []\n",
"\n",
" log_text = f\"Sample {i+1}:\\n\"\n",
" log_text += f\"Actual Tokens: {actual_tokens}\\n\"\n",
" log_text += f\"Predicted Tokens: {pred_tokens}\\n\"\n",
"\n",
" for n in range(1, max_n + 1):\n",
" ref_ngrams = Counter(tuple(actual_tokens[j:j + n]) for j in range(len(actual_tokens) - n + 1))\n",
" cand_ngrams = Counter(tuple(pred_tokens[j:j + n]) for j in range(len(pred_tokens) - n + 1))\n",
"\n",
" clip_sum = sum(min(cnt, ref_ngrams.get(ng, 0)) for ng, cnt in cand_ngrams.items())\n",
" total = sum(cand_ngrams.values())\n",
" p_n = clip_sum / total if total > 0 else 0\n",
"\n",
" clipped_counts.append(clip_sum)\n",
" total_counts.append(total)\n",
" precisions.append(p_n)\n",
"\n",
" log_text += f\"{n}-gram: clipped count = {clip_sum}, total candidate = {total}, precision = {p_n:.4f}\\n\"\n",
"\n",
" c = len(pred_tokens)\n",
" r = len(actual_tokens)\n",
" bp = 1 if c > r else np.exp(1 - r / c)\n",
" log_text += f\"Brevity Penalty: BP = {bp:.4f} (c={c}, r={r})\\n\"\n",
"\n",
" if all(p > 0 for p in precisions):\n",
" bleu = bp * np.exp(sum(w * np.log(p) for w, p in zip(weights, precisions)))\n",
" else:\n",
" bleu = 0.0\n",
"\n",
" log_text += f\"BLEU score = {bleu:.4f}\\n\"\n",
"\n",
" bleu_scores.append(bleu)\n",
" rows.append({\"Result\": log_text})\n",
"\n",
" try:\n",
" rouge_scores = scorer.score(actual_question, pred_question)\n",
" rouge1_scores.append(rouge_scores[\"rouge1\"].fmeasure)\n",
" rouge2_scores.append(rouge_scores[\"rouge2\"].fmeasure)\n",
" rougel_scores.append(rouge_scores[\"rougeL\"].fmeasure)\n",
" except Exception as e:\n",
" print(f\"Error calculating ROUGE score: {e}\")\n",
"\n",
" df = pd.DataFrame(rows)\n",
" df.to_excel(output_path, index=False)\n",
" print(f\"Hasil disimpan di: {output_path}\")\n",
"\n",
" results = {\n",
" \"avg_bleu_score\": np.mean(bleu_scores),\n",
" \"avg_rouge1\": np.mean(rouge1_scores),\n",
" \"avg_rouge2\": np.mean(rouge2_scores),\n",
" \"avg_rougel\": np.mean(rougel_scores),\n",
" }\n",
"\n",
" return results\n",
"\n",
"\n",
"loaded_model = load_model(\"question_prediction_model_final.h5\")\n",
"\n",
"with open(\"question_prediction_tokenizers.json\", \"r\") as f:\n",
" tokenizer_data = json.load(f)\n",
"\n",
"# Ambil beberapa sampel dari data test\n",
"# sample_idx = random.randint(0, len(test_indices) - 1)\n",
"# sample_context = contexts[test_indices[sample_idx]]\n",
"# sample_tokens = tokens_list[test_indices[sample_idx]]\n",
"# sample_ner = ner_list[test_indices[sample_idx]]\n",
"# sample_srl = srl_list[test_indices[sample_idx]]\n",
"# sample_q_type = q_types[test_indices[sample_idx]]\n",
"\n",
"performance_metrics = evaluate_model_performance(test_indices)\n",
"\n",
"print(\"\\nModel Performance Metrics:\")\n",
"print(f\"Average BLEU Score: {performance_metrics['avg_bleu_score'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-1 Score: {performance_metrics['avg_rouge1'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-2 Score: {performance_metrics['avg_rouge2'] * 100:.2f}%\")\n",
"print(f\"Average ROUGE-L Score: {performance_metrics['avg_rougel'] * 100:.2f}%\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}