{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-02-05 01:57:25.675154: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"functional\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)        </span>┃<span style=\"font-weight: bold\"> Output Shape      </span>┃<span style=\"font-weight: bold\">    Param # </span>┃<span style=\"font-weight: bold\"> Connected to      </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ encoder_inputs      │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)      │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_inputs      │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)      │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ embedding           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │      <span style=\"color: #00af00; text-decoration-color: #00af00\">1,280</span> │ encoder_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ not_equal           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)      │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ encoder_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>)          │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_embedding   │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │      <span style=\"color: #00af00; text-decoration-color: #00af00\">1,024</span> │ decoder_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ encoder_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>),     │    <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>],  │\n",
       "│                     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>),      │            │ not_equal[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]   │\n",
       "│                     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)]      │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>,     │    <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ decoder_embeddin… │\n",
       "│                     │ <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>,      │            │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│                     │ <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>,      │            │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│                     │ <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)]             │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_dense       │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>)   │      <span style=\"color: #00af00; text-decoration-color: #00af00\">2,056</span> │ decoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)             │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)       \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m   Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to     \u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ encoder_inputs      │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)      │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_inputs      │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)      │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ embedding           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │      \u001b[38;5;34m1,280\u001b[0m │ encoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n",
       "│ (\u001b[38;5;33mEmbedding\u001b[0m)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ not_equal           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)      │          \u001b[38;5;34m0\u001b[0m │ encoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n",
       "│ (\u001b[38;5;33mNotEqual\u001b[0m)          │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_embedding   │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │      \u001b[38;5;34m1,024\u001b[0m │ decoder_inputs[\u001b[38;5;34m0\u001b[0m… │\n",
       "│ (\u001b[38;5;33mEmbedding\u001b[0m)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ encoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m),     │    \u001b[38;5;34m394,240\u001b[0m │ embedding[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m],  │\n",
       "│                     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m),      │            │ not_equal[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]   │\n",
       "│                     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)]      │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m,     │    \u001b[38;5;34m394,240\u001b[0m │ decoder_embeddin… │\n",
       "│                     │ \u001b[38;5;34m256\u001b[0m), (\u001b[38;5;45mNone\u001b[0m,      │            │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
       "│                     │ \u001b[38;5;34m256\u001b[0m), (\u001b[38;5;45mNone\u001b[0m,      │            │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
       "│                     │ \u001b[38;5;34m256\u001b[0m)]             │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ decoder_dense       │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m)   │      \u001b[38;5;34m2,056\u001b[0m │ decoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
       "│ (\u001b[38;5;33mDense\u001b[0m)             │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">792,840</span> (3.02 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m792,840\u001b[0m (3.02 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">792,840</span> (3.02 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m792,840\u001b[0m (3.02 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "Epoch 1/10\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-02-05 01:57:27.530017: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.\n",
      "2025-02-05 01:57:27.593630: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence\n",
      "\t [[{{node IteratorGetNext}}]]\n",
      "/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/trainers/epoch_iterator.py:151: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.\n",
      "  self._interrupted_warning()\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "math domain error",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 118\u001b[0m\n\u001b[1;32m    113\u001b[0m target_val  \u001b[38;5;241m=\u001b[39m decoder_target_data[split_index:]\n\u001b[1;32m    115\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m    116\u001b[0m \u001b[38;5;66;03m# 6) Fit the Model\u001b[39;00m\n\u001b[1;32m    117\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[0;32m--> 118\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    119\u001b[0m \u001b[43m    \u001b[49m\u001b[43m[\u001b[49m\u001b[43mencoder_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecoder_train\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    120\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtarget_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m32\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    122\u001b[0m \u001b[43m    \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    123\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mencoder_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecoder_val\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_val\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    124\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m    126\u001b[0m \u001b[38;5;66;03m# The accuracy reported is \"sparse_categorical_accuracy\" at the token level.\u001b[39;00m\n\u001b[1;32m    127\u001b[0m \n\u001b[1;32m    128\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m    129\u001b[0m \u001b[38;5;66;03m# 7) Evaluate the Model\u001b[39;00m\n\u001b[1;32m    130\u001b[0m \u001b[38;5;66;03m# ==========================\u001b[39;00m\n\u001b[1;32m    131\u001b[0m \u001b[38;5;66;03m# If you want a quick evaluation on the validation set:\u001b[39;00m\n\u001b[1;32m    132\u001b[0m val_loss, val_accuracy \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mevaluate([encoder_val, decoder_val], target_val)\n",
      "File \u001b[0;32m/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    119\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m    120\u001b[0m     \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m    121\u001b[0m     \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m    124\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
      "File \u001b[0;32m/mnt/disc1/code/lstm-quiz/.venv/lib64/python3.10/site-packages/keras/src/utils/progbar.py:119\u001b[0m, in \u001b[0;36mProgbar.update\u001b[0;34m(self, current, values, finalize)\u001b[0m\n\u001b[1;32m    116\u001b[0m     message \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m     numdigits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(\u001b[43mmath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog10\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    120\u001b[0m     bar \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(numdigits) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124md/\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m%\u001b[39m (current, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget)\n\u001b[1;32m    121\u001b[0m     bar \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\x1b\u001b[39;00m\u001b[38;5;124m[1m\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbar\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\x1b\u001b[39;00m\u001b[38;5;124m[0m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
      "\u001b[0;31mValueError\u001b[0m: math domain error"
     ]
    }
   ],
   "source": [
    "# ==========================\n",
    "# 1) Install/Import Dependencies\n",
    "# ==========================\n",
    "# If you are in a brand new environment, uncomment the following line:\n",
    "# %pip install tensorflow pandas\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from tensorflow.keras.layers import Input, LSTM, Embedding, Dense\n",
    "from tensorflow.keras.models import Model\n",
    "\n",
    "# ==========================\n",
    "# 2) Load Dataset (CSV)\n",
    "# ==========================\n",
    "# Adjust the file path to your CSV file\n",
    "df = pd.read_csv(\"quiz_questions.csv\")\n",
    "\n",
    "# Extract the paragraphs and questions\n",
    "paragraphs = df['paragraph'].astype(str).tolist()\n",
    "questions  = df['question'].astype(str).tolist()\n",
    "\n",
    "# (Optional) For demonstration, let's ignore question_type, answer, distractors in this example\n",
    "# but you can incorporate them as extra signals if you wish.\n",
    "\n",
    "# ==========================\n",
    "# 3) Tokenize Text\n",
    "# ==========================\n",
    "# Create two tokenizers: one for paragraphs, one for questions\n",
    "num_words = 10000  # Maximum vocabulary size\n",
    "\n",
    "tokenizer_paragraph = Tokenizer(num_words=num_words, oov_token=\"<OOV>\")\n",
    "tokenizer_paragraph.fit_on_texts(paragraphs)\n",
    "paragraph_sequences = tokenizer_paragraph.texts_to_sequences(paragraphs)\n",
    "\n",
    "tokenizer_question = Tokenizer(num_words=num_words, oov_token=\"<OOV>\")\n",
    "tokenizer_question.fit_on_texts(questions)\n",
    "question_sequences = tokenizer_question.texts_to_sequences(questions)\n",
    "\n",
    "# Get max lengths (for padding)\n",
    "max_paragraph_len = max(len(seq) for seq in paragraph_sequences)\n",
    "max_question_len  = max(len(seq) for seq in question_sequences)\n",
    "\n",
    "# Pad sequences\n",
    "encoder_input_data = pad_sequences(paragraph_sequences, maxlen=max_paragraph_len, padding='post')\n",
    "# For decoder data, we usually do teacher forcing:\n",
    "# We'll keep one version as input, one version shifted as the target\n",
    "decoder_input_data_full = pad_sequences(question_sequences, maxlen=max_question_len, padding='post')\n",
    "\n",
    "# We create decoder_target_data by shifting to the left by 1 token\n",
    "decoder_target_data = np.copy(decoder_input_data_full[:, 1:])\n",
    "decoder_input_data  = np.copy(decoder_input_data_full[:, :-1])\n",
    "\n",
    "# Expand target dimension for sparse_categorical_crossentropy\n",
    "decoder_target_data = np.expand_dims(decoder_target_data, -1)\n",
    "\n",
    "# Calculate vocab sizes\n",
    "vocab_size_paragraph = min(len(tokenizer_paragraph.word_index) + 1, num_words)\n",
    "vocab_size_question  = min(len(tokenizer_question.word_index)  + 1, num_words)\n",
    "\n",
    "# ==========================\n",
    "# 4) Build Seq2Seq Model\n",
    "# ==========================\n",
    "embedding_dim = 128\n",
    "latent_dim    = 256  # LSTM hidden dimension\n",
    "\n",
    "# ----- Encoder -----\n",
    "encoder_inputs = Input(shape=(None,), name=\"encoder_inputs\")\n",
    "encoder_embedding = Embedding(input_dim=vocab_size_paragraph,\n",
    "                              output_dim=embedding_dim,\n",
    "                              mask_zero=True)(encoder_inputs)\n",
    "\n",
    "encoder_lstm = LSTM(latent_dim, return_state=True, name=\"encoder_lstm\")\n",
    "_, state_h, state_c = encoder_lstm(encoder_embedding)\n",
    "\n",
    "encoder_states = [state_h, state_c]\n",
    "\n",
    "# ----- Decoder -----\n",
    "decoder_inputs = Input(shape=(None,), name=\"decoder_inputs\")\n",
    "decoder_embedding = Embedding(input_dim=vocab_size_question,\n",
    "                              output_dim=embedding_dim,\n",
    "                              mask_zero=True,\n",
    "                              name=\"decoder_embedding\")(decoder_inputs)\n",
    "\n",
    "decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, name=\"decoder_lstm\")\n",
    "decoder_outputs, _, _ = decoder_lstm(decoder_embedding,\n",
    "                                     initial_state=encoder_states)\n",
    "decoder_dense = Dense(vocab_size_question, activation='softmax', name=\"decoder_dense\")\n",
    "decoder_outputs = decoder_dense(decoder_outputs)\n",
    "\n",
    "# Combine into a training model\n",
    "model = Model([encoder_inputs, decoder_inputs], decoder_outputs)\n",
    "model.compile(optimizer='adam',\n",
    "              loss='sparse_categorical_crossentropy',\n",
    "              metrics=['sparse_categorical_accuracy'])\n",
    "\n",
    "print(model.summary())\n",
    "\n",
    "# ==========================\n",
    "# 5) Train/Test Split (Optional)\n",
    "# ==========================\n",
    "# For simplicity, let's do a quick train/validation split\n",
    "# Adjust split size or do a separate test set for production usage.\n",
    "split_index = int(0.8 * len(encoder_input_data))\n",
    "encoder_train = encoder_input_data[:split_index]\n",
    "decoder_train = decoder_input_data[:split_index]\n",
    "target_train  = decoder_target_data[:split_index]\n",
    "\n",
    "encoder_val = encoder_input_data[split_index:]\n",
    "decoder_val = decoder_input_data[split_index:]\n",
    "target_val  = decoder_target_data[split_index:]\n",
    "\n",
    "# ==========================\n",
    "# 6) Fit the Model\n",
    "# ==========================\n",
    "history = model.fit(\n",
    "    [encoder_train, decoder_train],\n",
    "    target_train,\n",
    "    batch_size=32,\n",
    "    epochs=10,\n",
    "    validation_data=([encoder_val, decoder_val], target_val)\n",
    ")\n",
    "\n",
    "# The accuracy reported is \"sparse_categorical_accuracy\" at the token level.\n",
    "\n",
    "# ==========================\n",
    "# 7) Evaluate the Model\n",
    "# ==========================\n",
    "# If you want a quick evaluation on the validation set:\n",
    "val_loss, val_accuracy = model.evaluate([encoder_val, decoder_val], target_val)\n",
    "print(f\"Validation Loss: {val_loss:.4f}\")\n",
    "print(f\"Validation Accuracy (token-level): {val_accuracy:.4f}\")\n",
    "\n",
    "# ==========================\n",
    "# 8) Build Inference Models\n",
    "# ==========================\n",
    "# Encoder model for inference\n",
    "encoder_model_inf = Model(encoder_inputs, encoder_states)\n",
    "\n",
    "# Decoder model for inference\n",
    "decoder_state_input_h = Input(shape=(latent_dim,), name=\"inference_state_h\")\n",
    "decoder_state_input_c = Input(shape=(latent_dim,), name=\"inference_state_c\")\n",
    "decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]\n",
    "\n",
    "dec_emb_inf = decoder_embedding(decoder_inputs)\n",
    "decoder_inf_outputs, state_h_inf, state_c_inf = decoder_lstm(\n",
    "    dec_emb_inf, initial_state=decoder_states_inputs\n",
    ")\n",
    "decoder_inf_states = [state_h_inf, state_c_inf]\n",
    "decoder_inf_outputs = decoder_dense(decoder_inf_outputs)\n",
    "\n",
    "decoder_model_inf = Model(\n",
    "    [decoder_inputs] + decoder_states_inputs,\n",
    "    [decoder_inf_outputs] + decoder_inf_states\n",
    ")\n",
    "\n",
    "# Create index-to-word mapping for the question tokenizer\n",
    "index_to_word_question = {idx: word for word, idx in tokenizer_question.word_index.items()}\n",
    "# If you used an OOV token, might want to handle that as well.\n",
    "\n",
    "def generate_question(paragraph_text, max_length=50, start_token=None, end_token=None):\n",
    "    \"\"\"\n",
    "    Generate a question from a paragraph using the trained seq2seq model.\n",
    "    Token-level decoding with greedy search.\n",
    "    \"\"\"\n",
    "    # 1) Encode the paragraph\n",
    "    seq = tokenizer_paragraph.texts_to_sequences([paragraph_text])\n",
    "    seq = pad_sequences(seq, maxlen=max_paragraph_len, padding='post')\n",
    "    states_value = encoder_model_inf.predict(seq)\n",
    "\n",
    "    # 2) Start token\n",
    "    target_seq = np.zeros((1, 1), dtype='int32')\n",
    "    # If you have a <START> token, set it here\n",
    "    # e.g., target_seq[0, 0] = tokenizer_question.word_index[\"<start>\"]\n",
    "\n",
    "    decoded_words = []\n",
    "\n",
    "    for _ in range(max_length):\n",
    "        output_tokens, h, c = decoder_model_inf.predict([target_seq] + states_value)\n",
    "\n",
    "        sampled_token_index = np.argmax(output_tokens[0, -1, :])\n",
    "        sampled_word = index_to_word_question.get(sampled_token_index, '<UNK>')\n",
    "\n",
    "        # Stop if we encounter an <end> token or a special index\n",
    "        if end_token and (sampled_word == end_token):\n",
    "            break\n",
    "\n",
    "        decoded_words.append(sampled_word)\n",
    "\n",
    "        # Next target\n",
    "        target_seq = np.zeros((1, 1), dtype='int32')\n",
    "        target_seq[0, 0] = sampled_token_index\n",
    "\n",
    "        states_value = [h, c]\n",
    "\n",
    "    return ' '.join(decoded_words)\n",
    "\n",
    "# ==========================\n",
    "# 9) Test Inference on a Paragraph\n",
    "# ==========================\n",
    "test_paragraph = \"Albert Einstein was a theoretical physicist born in Germany...\"\n",
    "generated = generate_question(test_paragraph)\n",
    "print(\"Generated question:\", generated)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "yups 0\n",
      "yups 1\n",
      "yups 2\n",
      "yups 3\n",
      "yups 4\n",
      "yups 5\n",
      "yups 6\n",
      "yups 7\n",
      "yups 8\n",
      "yups 9\n",
      "yups 10\n",
      "yups 11\n",
      "yups 12\n",
      "yups 13\n",
      "yups 14\n",
      "yups 15\n",
      "yups 16\n",
      "yups 17\n",
      "yups 18\n",
      "yups 19\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}