{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-02-05 01:57:25.675154: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" ] }, { "data": { "text/html": [ "
Model: \"functional\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"functional\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)         Output Shape          Param #  Connected to      ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ encoder_inputs      │ (None, None)      │          0 │ -                 │\n",
       "│ (InputLayer)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_inputs      │ (None, None)      │          0 │ -                 │\n",
       "│ (InputLayer)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ embedding           │ (None, None, 128) │      1,280 │ encoder_inputs[0… │\n",
       "│ (Embedding)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ not_equal           │ (None, None)      │          0 │ encoder_inputs[0… │\n",
       "│ (NotEqual)          │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_embedding   │ (None, None, 128) │      1,024 │ decoder_inputs[0… │\n",
       "│ (Embedding)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ encoder_lstm (LSTM) │ [(None, 256),     │    394,240 │ embedding[0][0],  │\n",
       "│                     │ (None, 256),      │            │ not_equal[0][0]   │\n",
       "│                     │ (None, 256)]      │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_lstm (LSTM) │ [(None, None,     │    394,240 │ decoder_embeddin… │\n",
       "│                     │ 256), (None,      │            │ encoder_lstm[0][ │\n",
       "│                     │ 256), (None,      │            │ encoder_lstm[0][ │\n",
       "│                     │ 256)]             │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_dense       │ (None, None, 8)   │      2,056 │ decoder_lstm[0][ │\n",
       "│ (Dense)             │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n", "│ encoder_inputs │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n", "│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ decoder_inputs │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n", "│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m1,280\u001b[0m │ encoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n", "│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ not_equal │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ encoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n", "│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ decoder_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m1,024\u001b[0m │ decoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n", "│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ encoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m394,240\u001b[0m │ embedding[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n", "│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ not_equal[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n", "│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ decoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, │ \u001b[38;5;34m394,240\u001b[0m │ decoder_embeddin… │\n", "│ │ \u001b[38;5;34m256\u001b[0m), (\u001b[38;5;45mNone\u001b[0m, │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n", "│ │ \u001b[38;5;34m256\u001b[0m), (\u001b[38;5;45mNone\u001b[0m, │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n", "│ │ \u001b[38;5;34m256\u001b[0m)] │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ decoder_dense │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m) │ \u001b[38;5;34m2,056\u001b[0m │ decoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n", "│ (\u001b[38;5;33mDense\u001b[0m) │ │ │ │\n", "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 792,840 (3.02 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m792,840\u001b[0m (3.02 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 792,840 (3.02 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m792,840\u001b[0m (3.02 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "None\n", "Epoch 1/10\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025-02-05 01:57:27.530017: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.\n", "2025-02-05 01:57:27.593630: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence\n", "\t [[{{node IteratorGetNext}}]]\n", "/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/trainers/epoch_iterator.py:151: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.\n", " self._interrupted_warning()\n" ] }, { "ename": "ValueError", "evalue": "math domain error", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[6], line 118\u001b[0m\n\u001b[1;32m 113\u001b[0m target_val \u001b[38;5;241m=\u001b[39m decoder_target_data[split_index:]\n\u001b[1;32m 115\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;66;03m# 6) Fit the Model\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[0;32m--> 118\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mencoder_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecoder_train\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 120\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 121\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m32\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 122\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mencoder_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecoder_val\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_val\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 124\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;66;03m# The accuracy reported is \"sparse_categorical_accuracy\" at the token level.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;66;03m# 7) Evaluate the Model\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;66;03m# If you want a quick evaluation on the validation set:\u001b[39;00m\n\u001b[1;32m 132\u001b[0m val_loss, val_accuracy \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mevaluate([encoder_val, decoder_val], target_val)\n", "File \u001b[0;32m/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", "File \u001b[0;32m/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/utils/progbar.py:119\u001b[0m, in \u001b[0;36mProgbar.update\u001b[0;34m(self, current, values, finalize)\u001b[0m\n\u001b[1;32m 116\u001b[0m message \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m numdigits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(\u001b[43mmath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog10\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 120\u001b[0m bar \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(numdigits) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124md/\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m%\u001b[39m (current, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget)\n\u001b[1;32m 121\u001b[0m bar \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\x1b\u001b[39;00m\u001b[38;5;124m[1m\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbar\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\x1b\u001b[39;00m\u001b[38;5;124m[0m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n", "\u001b[0;31mValueError\u001b[0m: math domain error" ] } ], "source": [ "# ==========================\n", "# 1) Install/Import Dependencies\n", "# ==========================\n", "# If you are in a brand new environment, uncomment the following line:\n", "# %pip install tensorflow pandas\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.layers import Input, LSTM, Embedding, Dense\n", "from tensorflow.keras.models import Model\n", "\n", "# ==========================\n", "# 2) Load Dataset (CSV)\n", "# ==========================\n", "# Adjust the file path to your CSV file\n", "df = pd.read_csv(\"quiz_questions.csv\")\n", "\n", "# Extract the paragraphs and questions\n", "paragraphs = df['paragraph'].astype(str).tolist()\n", "questions = df['question'].astype(str).tolist()\n", "\n", "# (Optional) For demonstration, let's ignore question_type, answer, distractors in this example\n", "# but you can incorporate them as extra signals if you wish.\n", "\n", "# ==========================\n", "# 3) Tokenize Text\n", "# ==========================\n", "# Create two tokenizers: one for paragraphs, one for questions\n", "num_words = 10000 # Maximum vocabulary size\n", "\n", "tokenizer_paragraph = Tokenizer(num_words=num_words, oov_token=\"\")\n", "tokenizer_paragraph.fit_on_texts(paragraphs)\n", "paragraph_sequences = tokenizer_paragraph.texts_to_sequences(paragraphs)\n", "\n", "tokenizer_question = Tokenizer(num_words=num_words, oov_token=\"\")\n", "tokenizer_question.fit_on_texts(questions)\n", "question_sequences = tokenizer_question.texts_to_sequences(questions)\n", "\n", "# Get max lengths (for padding)\n", "max_paragraph_len = max(len(seq) for seq in paragraph_sequences)\n", "max_question_len = max(len(seq) for seq in question_sequences)\n", "\n", "# Pad sequences\n", "encoder_input_data = pad_sequences(paragraph_sequences, maxlen=max_paragraph_len, padding='post')\n", "# For decoder data, we usually do teacher forcing:\n", "# We'll keep one version as input, one version shifted as the target\n", "decoder_input_data_full = pad_sequences(question_sequences, maxlen=max_question_len, padding='post')\n", "\n", "# We create decoder_target_data by shifting to the left by 1 token\n", "decoder_target_data = np.copy(decoder_input_data_full[:, 1:])\n", "decoder_input_data = np.copy(decoder_input_data_full[:, :-1])\n", "\n", "# Expand target dimension for sparse_categorical_crossentropy\n", "decoder_target_data = np.expand_dims(decoder_target_data, -1)\n", "\n", "# Calculate vocab sizes\n", "vocab_size_paragraph = min(len(tokenizer_paragraph.word_index) + 1, num_words)\n", "vocab_size_question = min(len(tokenizer_question.word_index) + 1, num_words)\n", "\n", "# ==========================\n", "# 4) Build Seq2Seq Model\n", "# ==========================\n", "embedding_dim = 128\n", "latent_dim = 256 # LSTM hidden dimension\n", "\n", "# ----- Encoder -----\n", "encoder_inputs = Input(shape=(None,), name=\"encoder_inputs\")\n", "encoder_embedding = Embedding(input_dim=vocab_size_paragraph,\n", " output_dim=embedding_dim,\n", " mask_zero=True)(encoder_inputs)\n", "\n", "encoder_lstm = LSTM(latent_dim, return_state=True, name=\"encoder_lstm\")\n", "_, state_h, state_c = encoder_lstm(encoder_embedding)\n", "\n", "encoder_states = [state_h, state_c]\n", "\n", "# ----- Decoder -----\n", "decoder_inputs = Input(shape=(None,), name=\"decoder_inputs\")\n", "decoder_embedding = Embedding(input_dim=vocab_size_question,\n", " output_dim=embedding_dim,\n", " mask_zero=True,\n", " name=\"decoder_embedding\")(decoder_inputs)\n", "\n", "decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, name=\"decoder_lstm\")\n", "decoder_outputs, _, _ = decoder_lstm(decoder_embedding,\n", " initial_state=encoder_states)\n", "decoder_dense = Dense(vocab_size_question, activation='softmax', name=\"decoder_dense\")\n", "decoder_outputs = decoder_dense(decoder_outputs)\n", "\n", "# Combine into a training model\n", "model = Model([encoder_inputs, decoder_inputs], decoder_outputs)\n", "model.compile(optimizer='adam',\n", " loss='sparse_categorical_crossentropy',\n", " metrics=['sparse_categorical_accuracy'])\n", "\n", "print(model.summary())\n", "\n", "# ==========================\n", "# 5) Train/Test Split (Optional)\n", "# ==========================\n", "# For simplicity, let's do a quick train/validation split\n", "# Adjust split size or do a separate test set for production usage.\n", "split_index = int(0.8 * len(encoder_input_data))\n", "encoder_train = encoder_input_data[:split_index]\n", "decoder_train = decoder_input_data[:split_index]\n", "target_train = decoder_target_data[:split_index]\n", "\n", "encoder_val = encoder_input_data[split_index:]\n", "decoder_val = decoder_input_data[split_index:]\n", "target_val = decoder_target_data[split_index:]\n", "\n", "# ==========================\n", "# 6) Fit the Model\n", "# ==========================\n", "history = model.fit(\n", " [encoder_train, decoder_train],\n", " target_train,\n", " batch_size=32,\n", " epochs=10,\n", " validation_data=([encoder_val, decoder_val], target_val)\n", ")\n", "\n", "# The accuracy reported is \"sparse_categorical_accuracy\" at the token level.\n", "\n", "# ==========================\n", "# 7) Evaluate the Model\n", "# ==========================\n", "# If you want a quick evaluation on the validation set:\n", "val_loss, val_accuracy = model.evaluate([encoder_val, decoder_val], target_val)\n", "print(f\"Validation Loss: {val_loss:.4f}\")\n", "print(f\"Validation Accuracy (token-level): {val_accuracy:.4f}\")\n", "\n", "# ==========================\n", "# 8) Build Inference Models\n", "# ==========================\n", "# Encoder model for inference\n", "encoder_model_inf = Model(encoder_inputs, encoder_states)\n", "\n", "# Decoder model for inference\n", "decoder_state_input_h = Input(shape=(latent_dim,), name=\"inference_state_h\")\n", "decoder_state_input_c = Input(shape=(latent_dim,), name=\"inference_state_c\")\n", "decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]\n", "\n", "dec_emb_inf = decoder_embedding(decoder_inputs)\n", "decoder_inf_outputs, state_h_inf, state_c_inf = decoder_lstm(\n", " dec_emb_inf, initial_state=decoder_states_inputs\n", ")\n", "decoder_inf_states = [state_h_inf, state_c_inf]\n", "decoder_inf_outputs = decoder_dense(decoder_inf_outputs)\n", "\n", "decoder_model_inf = Model(\n", " [decoder_inputs] + decoder_states_inputs,\n", " [decoder_inf_outputs] + decoder_inf_states\n", ")\n", "\n", "# Create index-to-word mapping for the question tokenizer\n", "index_to_word_question = {idx: word for word, idx in tokenizer_question.word_index.items()}\n", "# If you used an OOV token, might want to handle that as well.\n", "\n", "def generate_question(paragraph_text, max_length=50, start_token=None, end_token=None):\n", " \"\"\"\n", " Generate a question from a paragraph using the trained seq2seq model.\n", " Token-level decoding with greedy search.\n", " \"\"\"\n", " # 1) Encode the paragraph\n", " seq = tokenizer_paragraph.texts_to_sequences([paragraph_text])\n", " seq = pad_sequences(seq, maxlen=max_paragraph_len, padding='post')\n", " states_value = encoder_model_inf.predict(seq)\n", "\n", " # 2) Start token\n", " target_seq = np.zeros((1, 1), dtype='int32')\n", " # If you have a token, set it here\n", " # e.g., target_seq[0, 0] = tokenizer_question.word_index[\"\"]\n", "\n", " decoded_words = []\n", "\n", " for _ in range(max_length):\n", " output_tokens, h, c = decoder_model_inf.predict([target_seq] + states_value)\n", "\n", " sampled_token_index = np.argmax(output_tokens[0, -1, :])\n", " sampled_word = index_to_word_question.get(sampled_token_index, '')\n", "\n", " # Stop if we encounter an token or a special index\n", " if end_token and (sampled_word == end_token):\n", " break\n", "\n", " decoded_words.append(sampled_word)\n", "\n", " # Next target\n", " target_seq = np.zeros((1, 1), dtype='int32')\n", " target_seq[0, 0] = sampled_token_index\n", "\n", " states_value = [h, c]\n", "\n", " return ' '.join(decoded_words)\n", "\n", "# ==========================\n", "# 9) Test Inference on a Paragraph\n", "# ==========================\n", "test_paragraph = \"Albert Einstein was a theoretical physicist born in Germany...\"\n", "generated = generate_question(test_paragraph)\n", "print(\"Generated question:\", generated)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "yups 0\n", "yups 1\n", "yups 2\n", "yups 3\n", "yups 4\n", "yups 5\n", "yups 6\n", "yups 7\n", "yups 8\n", "yups 9\n", "yups 10\n", "yups 11\n", "yups 12\n", "yups 13\n", "yups 14\n", "yups 15\n", "yups 16\n", "yups 17\n", "yups 18\n", "yups 19\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }