feat: adding qc model and adding dataset

2025-04-22 22:53:18 +07:00 · 2025-04-22 22:53:18 +07:00 · 1c270d4e75
parent 3a04f94fb3
commit 1c270d4e75
7 changed files with 602 additions and 65 deletions
--- a/NER_SRL/multi_task_lstm_ner_srl_model.keras
+++ b/NER_SRL/multi_task_lstm_ner_srl_model.keras
--- a/NER_SRL/new_lstm_ner_srl.ipynb
+++ b/NER_SRL/new_lstm_ner_srl.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 13,
   "id": "fb106e20",
   "metadata": {},
   "outputs": [],
@ -19,7 +19,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 14,
   "id": "00347a5f",
   "metadata": {},
   "outputs": [
@ -54,7 +54,6 @@
    "    for line in f:\n",
    "        line = line.strip()\n",
    "        if not line:\n",
-    "            # Jika baris kosong → akhir kalimat\n",
    "            if tokens:\n",
    "                data.append({\n",
    "                    \"tokens\": tokens,\n",
@ -82,7 +81,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": null,
+   "id": "3793950a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
   "id": "ac8eb374",
   "metadata": {},
   "outputs": [],
@ -103,7 +110,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 16,
   "id": "80356f1f",
   "metadata": {},
   "outputs": [],
@ -130,7 +137,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 17,
   "id": "fe219c96",
   "metadata": {},
   "outputs": [],
@ -138,25 +145,25 @@
    "X_train, X_test, y_ner_train, y_ner_test, y_srl_train, y_srl_test = train_test_split(\n",
    "    X, y_ner, y_srl,            \n",
    "    test_size=0.20,             \n",
-    "    random_state=42,            # supaya reproducible\n",
-    "    shuffle=True                # acak baris\n",
+    "    random_state=42,\n",
+    "    shuffle=True            \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 18,
   "id": "7a9636b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_13\"</span>\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_1\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
-       "\u001b[1mModel: \"functional_13\"\u001b[0m\n"
+       "\u001b[1mModel: \"functional_1\"\u001b[0m\n"
      ]
     },
     "metadata": {},
@ -168,19 +175,19 @@
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)        </span>┃<span style=\"font-weight: bold\"> Output Shape      </span>┃<span style=\"font-weight: bold\">    Param # </span>┃<span style=\"font-weight: bold\"> Connected to      </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
-       "│ input_layer_13      │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>)        │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
+       "│ input_layer_1       │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>)        │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ embedding_13        │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)    │     <span style=\"color: #00af00; text-decoration-color: #00af00\">44,544</span> │ input_layer_13[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
+       "│ embedding_1         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)    │     <span style=\"color: #00af00; text-decoration-color: #00af00\">44,544</span> │ input_layer_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ bidirectional_13    │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)   │     <span style=\"color: #00af00; text-decoration-color: #00af00\">66,048</span> │ embedding_13[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
+       "│ bidirectional_1     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)   │     <span style=\"color: #00af00; text-decoration-color: #00af00\">66,048</span> │ embedding_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>)     │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ ner_output          │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">25</span>)    │      <span style=\"color: #00af00; text-decoration-color: #00af00\">3,225</span> │ bidirectional_13… │\n",
+       "│ ner_output          │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">25</span>)    │      <span style=\"color: #00af00; text-decoration-color: #00af00\">3,225</span> │ bidirectional_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>)   │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ srl_output          │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">20</span>)    │      <span style=\"color: #00af00; text-decoration-color: #00af00\">2,580</span> │ bidirectional_13… │\n",
+       "│ srl_output          │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">18</span>)    │      <span style=\"color: #00af00; text-decoration-color: #00af00\">2,322</span> │ bidirectional_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>)   │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
       "</pre>\n"
@ -189,19 +196,19 @@
       "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)       \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m   Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to     \u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
-       "│ input_layer_13      │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m)        │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
+       "│ input_layer_1       │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m)        │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ embedding_13        │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m64\u001b[0m)    │     \u001b[38;5;34m44,544\u001b[0m │ input_layer_13[\u001b[38;5;34m0\u001b[0m… │\n",
+       "│ embedding_1         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m64\u001b[0m)    │     \u001b[38;5;34m44,544\u001b[0m │ input_layer_1[\u001b[38;5;34m0\u001b[0m]… │\n",
       "│ (\u001b[38;5;33mEmbedding\u001b[0m)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ bidirectional_13    │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m128\u001b[0m)   │     \u001b[38;5;34m66,048\u001b[0m │ embedding_13[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
+       "│ bidirectional_1     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m128\u001b[0m)   │     \u001b[38;5;34m66,048\u001b[0m │ embedding_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "│ (\u001b[38;5;33mBidirectional\u001b[0m)     │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ ner_output          │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m25\u001b[0m)    │      \u001b[38;5;34m3,225\u001b[0m │ bidirectional_13… │\n",
+       "│ ner_output          │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m25\u001b[0m)    │      \u001b[38;5;34m3,225\u001b[0m │ bidirectional_1[\u001b[38;5;34m…\u001b[0m │\n",
       "│ (\u001b[38;5;33mTimeDistributed\u001b[0m)   │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
-       "│ srl_output          │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m20\u001b[0m)    │      \u001b[38;5;34m2,580\u001b[0m │ bidirectional_13… │\n",
+       "│ srl_output          │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m18\u001b[0m)    │      \u001b[38;5;34m2,322\u001b[0m │ bidirectional_1[\u001b[38;5;34m…\u001b[0m │\n",
       "│ (\u001b[38;5;33mTimeDistributed\u001b[0m)   │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
      ]
@ -212,11 +219,11 @@
    {
     "data": {
      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">116,397</span> (454.68 KB)\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">116,139</span> (453.67 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
-       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m116,397\u001b[0m (454.68 KB)\n"
+       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m116,139\u001b[0m (453.67 KB)\n"
      ]
     },
     "metadata": {},
@ -225,11 +232,11 @@
    {
     "data": {
      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">116,397</span> (454.68 KB)\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">116,139</span> (453.67 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
-       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m116,397\u001b[0m (454.68 KB)\n"
+       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m116,139\u001b[0m (453.67 KB)\n"
      ]
     },
     "metadata": {},
@ -253,25 +260,25 @@
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 19ms/step - loss: 3.3010 - ner_output_accuracy: 0.8807 - ner_output_loss: 1.5617 - srl_output_accuracy: 0.7456 - srl_output_loss: 1.7393 - val_loss: 0.7284 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2466 - val_srl_output_accuracy: 0.8300 - val_srl_output_loss: 0.4818\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 19ms/step - loss: 3.2850 - ner_output_accuracy: 0.8700 - ner_output_loss: 1.6767 - srl_output_accuracy: 0.7518 - srl_output_loss: 1.6083 - val_loss: 0.7275 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2555 - val_srl_output_accuracy: 0.8450 - val_srl_output_loss: 0.4720\n",
      "Epoch 2/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.7355 - ner_output_accuracy: 0.9569 - ner_output_loss: 0.2279 - srl_output_accuracy: 0.8297 - srl_output_loss: 0.5076 - val_loss: 0.6655 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2323 - val_srl_output_accuracy: 0.8506 - val_srl_output_loss: 0.4332\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.7622 - ner_output_accuracy: 0.9528 - ner_output_loss: 0.2458 - srl_output_accuracy: 0.8296 - srl_output_loss: 0.5163 - val_loss: 0.6534 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2296 - val_srl_output_accuracy: 0.8531 - val_srl_output_loss: 0.4238\n",
      "Epoch 3/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.7041 - ner_output_accuracy: 0.9522 - ner_output_loss: 0.2219 - srl_output_accuracy: 0.8488 - srl_output_loss: 0.4822 - val_loss: 0.6368 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2232 - val_srl_output_accuracy: 0.8744 - val_srl_output_loss: 0.4135\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6875 - ner_output_accuracy: 0.9572 - ner_output_loss: 0.2126 - srl_output_accuracy: 0.8496 - srl_output_loss: 0.4750 - val_loss: 0.6327 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2273 - val_srl_output_accuracy: 0.8688 - val_srl_output_loss: 0.4054\n",
      "Epoch 4/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6864 - ner_output_accuracy: 0.9520 - ner_output_loss: 0.2184 - srl_output_accuracy: 0.8548 - srl_output_loss: 0.4680 - val_loss: 0.6078 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2193 - val_srl_output_accuracy: 0.8769 - val_srl_output_loss: 0.3885\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.6103 - ner_output_accuracy: 0.9533 - ner_output_loss: 0.2114 - srl_output_accuracy: 0.8772 - srl_output_loss: 0.3988 - val_loss: 0.6009 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2137 - val_srl_output_accuracy: 0.8662 - val_srl_output_loss: 0.3872\n",
      "Epoch 5/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6304 - ner_output_accuracy: 0.9545 - ner_output_loss: 0.2009 - srl_output_accuracy: 0.8675 - srl_output_loss: 0.4295 - val_loss: 0.5727 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2015 - val_srl_output_accuracy: 0.8812 - val_srl_output_loss: 0.3711\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6757 - ner_output_accuracy: 0.9486 - ner_output_loss: 0.2281 - srl_output_accuracy: 0.8582 - srl_output_loss: 0.4476 - val_loss: 0.5690 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2040 - val_srl_output_accuracy: 0.8781 - val_srl_output_loss: 0.3650\n",
      "Epoch 6/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.5679 - ner_output_accuracy: 0.9557 - ner_output_loss: 0.1749 - srl_output_accuracy: 0.8783 - srl_output_loss: 0.3930 - val_loss: 0.5471 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.1956 - val_srl_output_accuracy: 0.8831 - val_srl_output_loss: 0.3515\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.5864 - ner_output_accuracy: 0.9477 - ner_output_loss: 0.2198 - srl_output_accuracy: 0.8898 - srl_output_loss: 0.3666 - val_loss: 0.5458 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.1961 - val_srl_output_accuracy: 0.8875 - val_srl_output_loss: 0.3497\n",
      "Epoch 7/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.5000 - ner_output_accuracy: 0.9587 - ner_output_loss: 0.1634 - srl_output_accuracy: 0.8917 - srl_output_loss: 0.3366 - val_loss: 0.5364 - val_ner_output_accuracy: 0.9513 - val_ner_output_loss: 0.1899 - val_srl_output_accuracy: 0.8850 - val_srl_output_loss: 0.3465\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.5877 - ner_output_accuracy: 0.9506 - ner_output_loss: 0.1914 - srl_output_accuracy: 0.8773 - srl_output_loss: 0.3963 - val_loss: 0.5260 - val_ner_output_accuracy: 0.9525 - val_ner_output_loss: 0.1898 - val_srl_output_accuracy: 0.8875 - val_srl_output_loss: 0.3362\n",
      "Epoch 8/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.5526 - ner_output_accuracy: 0.9541 - ner_output_loss: 0.1791 - srl_output_accuracy: 0.8840 - srl_output_loss: 0.3735 - val_loss: 0.5054 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.1799 - val_srl_output_accuracy: 0.8963 - val_srl_output_loss: 0.3256\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.5046 - ner_output_accuracy: 0.9536 - ner_output_loss: 0.1756 - srl_output_accuracy: 0.8912 - srl_output_loss: 0.3290 - val_loss: 0.5094 - val_ner_output_accuracy: 0.9531 - val_ner_output_loss: 0.1829 - val_srl_output_accuracy: 0.8881 - val_srl_output_loss: 0.3265\n",
      "Epoch 9/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.5094 - ner_output_accuracy: 0.9561 - ner_output_loss: 0.1701 - srl_output_accuracy: 0.8915 - srl_output_loss: 0.3393 - val_loss: 0.4881 - val_ner_output_accuracy: 0.9512 - val_ner_output_loss: 0.1707 - val_srl_output_accuracy: 0.9013 - val_srl_output_loss: 0.3174\n",
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.4807 - ner_output_accuracy: 0.9539 - ner_output_loss: 0.1704 - srl_output_accuracy: 0.9021 - srl_output_loss: 0.3103 - val_loss: 0.4876 - val_ner_output_accuracy: 0.9531 - val_ner_output_loss: 0.1719 - val_srl_output_accuracy: 0.9025 - val_srl_output_loss: 0.3156\n",
      "Epoch 10/10\n",
-      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.4633 - ner_output_accuracy: 0.9524 - ner_output_loss: 0.1675 - srl_output_accuracy: 0.9092 - srl_output_loss: 0.2959 - val_loss: 0.4804 - val_ner_output_accuracy: 0.9531 - val_ner_output_loss: 0.1597 - val_srl_output_accuracy: 0.9050 - val_srl_output_loss: 0.3206\n"
+      "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.4134 - ner_output_accuracy: 0.9634 - ner_output_loss: 0.1350 - srl_output_accuracy: 0.9245 - srl_output_loss: 0.2784 - val_loss: 0.4587 - val_ner_output_accuracy: 0.9550 - val_ner_output_loss: 0.1598 - val_srl_output_accuracy: 0.9087 - val_srl_output_loss: 0.2989\n"
     ]
    }
   ],
@ -317,7 +324,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 19,
   "id": "3a55990b",
   "metadata": {},
   "outputs": [
@ -325,23 +332,32 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'loss': 0.48035523295402527, 'compile_metrics': 0.15973526239395142, 'ner_output_loss': 0.32061997056007385, 'srl_output_loss': 0.953125}\n",
+      "{'loss': 0.45865434408187866, 'compile_metrics': 0.159775510430336, 'ner_output_loss': 0.29887881875038147, 'srl_output_loss': 0.9550000429153442}\n",
+      "{0: 'B-DATE', 1: 'B-ETH', 2: 'B-EVENT', 3: 'B-LOC', 4: 'B-MIN', 5: 'B-MISC', 6: 'B-ORG', 7: 'B-PER', 8: 'B-QUANT', 9: 'B-REL', 10: 'B-RES', 11: 'B-TERM', 12: 'B-TIME', 13: 'I-DATE', 14: 'I-ETH', 15: 'I-EVENT', 16: 'I-LOC', 17: 'I-MISC', 18: 'I-ORG', 19: 'I-PER', 20: 'I-QUANT', 21: 'I-RES', 22: 'I-TERM', 23: 'I-TIME', 24: 'O'}\n",
      "\n",
      "📊 [NER] Classification Report (test set):\n",
      "              precision    recall  f1-score   support\n",
      "\n",
-      "        DATE       0.25      0.12      0.17         8\n",
+      "        DATE       0.33      0.12      0.18         8\n",
      "       EVENT       0.00      0.00      0.00         1\n",
-      "         LOC       0.50      0.04      0.07        28\n",
+      "         LOC       1.00      0.04      0.07        28\n",
      "         ORG       0.00      0.00      0.00         4\n",
      "         PER       0.00      0.00      0.00         2\n",
-      "        TIME       0.20      0.10      0.13        10\n",
+      "        TIME       0.50      0.30      0.37        10\n",
      "\n",
-      "   micro avg       0.27      0.06      0.09        53\n",
-      "   macro avg       0.16      0.04      0.06        53\n",
-      "weighted avg       0.34      0.06      0.09        53\n",
+      "   micro avg       0.50      0.09      0.16        53\n",
+      "   macro avg       0.31      0.08      0.10        53\n",
+      "weighted avg       0.67      0.09      0.13        53\n",
      "\n"
     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, msg_start, len(result))\n"
+     ]
    }
   ],
   "source": [
@ -375,7 +391,7 @@
    "y_pred_ner, y_pred_srl = model.predict(X_test, verbose=0)\n",
    "\n",
    "true_ner, pred_ner = decode(y_pred_ner, y_ner_test, idx2tag_ner)\n",
-    "\n",
+    "print(idx2tag_ner)\n",
    "print(\"\\n📊 [NER] Classification Report (test set):\")\n",
    "print(classification_report(true_ner, pred_ner, digits=2))\n",
    "\n",
@ -400,7 +416,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 20,
   "id": "547d1533",
   "metadata": {},
   "outputs": [
@ -408,28 +424,53 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{0: 'ARG0', 1: 'ARG1', 2: 'ARG2', 3: 'ARG3', 4: 'ARGM-BNF', 5: 'ARGM-CAU', 6: 'ARGM-COM', 7: 'ARGM-FRQ', 8: 'ARGM-LOC', 9: 'ARGM-MNR', 10: 'ARGM-MOD', 11: 'ARGM-NEG', 12: 'ARGM-PNC', 13: 'ARGM-PRD', 14: 'ARGM-PRP', 15: 'ARGM-SRC', 16: 'ARGM-TMP', 17: 'O', 18: 'R-ARG1', 19: 'V'}\n",
+      "{0: 'ARG0', 1: 'ARG1', 2: 'ARG2', 3: 'ARG3', 4: 'ARGM-BNF', 5: 'ARGM-CAU', 6: 'ARGM-COM', 7: 'ARGM-FRQ', 8: 'ARGM-LOC', 9: 'ARGM-MNR', 10: 'ARGM-MOD', 11: 'ARGM-NEG', 12: 'ARGM-PRP', 13: 'ARGM-SRC', 14: 'ARGM-TMP', 15: 'O', 16: 'R-ARG1', 17: 'V'}\n",
      "\n",
      "📊 [SRL] Classification Report (test set):\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         CAU       0.00      0.00      0.00         1\n",
      "         FRQ       0.00      0.00      0.00         1\n",
-      "         LOC       0.36      0.40      0.38        10\n",
+      "         LOC       0.31      0.50      0.38        10\n",
      "         MNR       0.00      0.00      0.00         4\n",
-      "         PNC       0.00      0.00      0.00         1\n",
      "         PRP       0.00      0.00      0.00         1\n",
-      "         RG0       0.31      0.21      0.25        19\n",
-      "         RG1       0.21      0.15      0.17        46\n",
-      "         RG2       0.19      0.40      0.26        10\n",
-      "         TMP       0.41      0.53      0.46        17\n",
-      "           _       0.10      0.06      0.07        33\n",
+      "         RG0       0.50      0.11      0.17        19\n",
+      "         RG1       0.18      0.20      0.19        46\n",
+      "         RG2       0.27      0.40      0.32        10\n",
+      "         TMP       0.50      0.59      0.54        17\n",
+      "           _       0.12      0.03      0.05        33\n",
      "\n",
-      "   micro avg       0.25      0.21      0.23       143\n",
-      "   macro avg       0.14      0.16      0.15       143\n",
-      "weighted avg       0.22      0.21      0.21       143\n",
+      "   micro avg       0.28      0.22      0.24       142\n",
+      "   macro avg       0.19      0.18      0.17       142\n",
+      "weighted avg       0.26      0.22      0.21       142\n",
      "\n"
     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG1 seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: V seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-TMP seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG0 seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-LOC seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-MNR seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-FRQ seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG2 seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-PRP seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n",
+      "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-CAU seems not to be NE tag.\n",
+      "  warnings.warn('{} seems not to be NE tag.'.format(chunk))\n"
+     ]
    }
   ],
   "source": [
--- a/NER_SRL/tag2idx_srl.pkl
+++ b/NER_SRL/tag2idx_srl.pkl
--- a/QC/question_generation_train.py
+++ b/QC/question_generation_train.py
@ -0,0 +1,270 @@
+"""
+qg_pipeline_static.py
+~~~~~~~~~~~~~~~~~~~~~
+Question Generation Encoder‑Decoder LSTM
+dengan fitur simbolik NER & SRL (pipeline statis).
+
+Datasets:
+  – train.jsonl / valid.jsonl  (lihat format di fungsi `load_jsonl`)
+"""
+
+import json, random, numpy as np, tensorflow as tf
+from collections import Counter
+from pathlib import Path
+from sklearn.model_selection import train_test_split
+
+# ------------------------------------------------------------------------------
+# 1. UTILITAS DASAR
+# ------------------------------------------------------------------------------
+
+SEED = 42
+random.seed(SEED)
+np.random.seed(SEED)
+tf.random.set_seed(SEED)
+
+TRAIN_FILE = "../dataset/dataset_qc.json"
+VALID_RATIO = 0.10
+MAX_CTX_LEN = 50
+MAX_Q_LEN = 30
+WORD_EMB_DIM = 128
+BATCH = 32
+EPOCHS = 15
+
+SPECIALS_WORD = ("<pad>", "<unk>", "<bos>", "<eos>")
+SPECIALS_TAG = ("<pad>",)
+
+
+def load_jsonl(path):
+    """Muatt satu file JSON‑Lines. Setiap line = dict."""
+    records = []
+    with open(path, encoding="utf-8") as f:
+        for line in f:
+            obj = json.loads(line)
+            records.append(obj)
+    return records
+
+
+def build_vocab(list_of_seq, specials):
+    """Bangun (token->id, id->token) dict dari kumpulan sekuens."""
+    counter = Counter(tok for seq in list_of_seq for tok in seq)
+    itos = list(specials) + [tok for tok, _ in counter.most_common()]
+    stoi = {tok: i for i, tok in enumerate(itos)}
+    return stoi, itos
+
+
+def encode(seq, tbl, max_len):
+    ids = [tbl.get(tok, tbl["<unk>"]) for tok in seq]
+    return (ids + [tbl["<pad>"]] * max_len)[:max_len]
+
+
+# ------------------------------------------------------------------------------
+# 2. DATA PREP
+# ------------------------------------------------------------------------------
+
+
+# def prepare_training_data(file_path):
+#     """Load → build vocab → encode ke numpy array."""
+#     recs = load_jsonl(file_path)
+
+#     ctx, ner, srl, ques = [], [], [], []
+#     for r in recs:
+#         ctx.append(r["context_tokens"])
+#         ner.append(r["ner_tags"])
+#         srl.append(r["srl_tags"])
+#         # tambahkan <bos>, <eos>
+#         ques.append(["<bos>"] + r["question_tokens"] + ["<eos>"])
+
+#     # 2.1 vocab
+#     w2i_ctx, i2w_ctx = build_vocab(ctx, SPECIALS_WORD[:2])  # <pad>,<unk>
+#     w2i_q, i2w_q = build_vocab(ques, SPECIALS_WORD)  # 4  specials
+#     t2i_ner, _ = build_vocab(ner, SPECIALS_TAG)
+#     t2i_srl, _ = build_vocab(srl, SPECIALS_TAG)
+
+#     # 2.2 encode & pad
+#     X_tok = np.array([encode(s, w2i_ctx, MAX_CTX_LEN) for s in ctx])
+#     X_ner = np.array([encode(s, t2i_ner, MAX_CTX_LEN) for s in ner])
+#     X_srl = np.array([encode(s, t2i_srl, MAX_CTX_LEN) for s in srl])
+
+#     Y_in = np.array([encode(s[:-1], w2i_q, MAX_Q_LEN) for s in ques])  # bos..last-1
+#     Y_out = np.array([encode(s[1:], w2i_q, MAX_Q_LEN) for s in ques])  # 2..eos
+
+#     return (
+#         X_tok,
+#         X_ner,
+#         X_srl,
+#         Y_in,
+#         Y_out,
+#         w2i_ctx,
+#         i2w_ctx,
+#         w2i_q,
+#         i2w_q,
+#         t2i_ner,
+#         t2i_srl,
+#     )
+
+
+# --- ganti fungsi lama ---
+def prepare_training_data(file_path):
+    recs = load_jsonl(file_path)
+
+    ctx, ner, srl, ques, span_st, span_ed = [], [], [], [], [], []
+    for r in recs:
+        tokens = r["tokens"]
+        ctx.append(tokens)  # context_tokens
+
+        ner.append(r["ner"])
+        srl.append(r["srl"])
+
+        # --- hitung answer_span otomatis ---
+        ans_toks = r["answer"].split()
+        try:
+            start = next(
+                i
+                for i in range(len(tokens))
+                if tokens[i : i + len(ans_toks)] == ans_toks
+            )
+            end = start + len(ans_toks) - 1
+        except StopIteration:
+            raise ValueError(
+                f"Jawaban '{r['answer']}' tidak cocok dengan tokens {tokens}"
+            )
+        span_st.append(start)
+        span_ed.append(end)
+
+        # question tokens: tokenisasi sederhana
+        ques.append(["<bos>"] + r["question"].split() + ["<eos>"])
+
+    # ---------- build vocab sama persis ----------
+    w2i_ctx, i2w_ctx = build_vocab(ctx, SPECIALS_WORD[:2])
+    w2i_q, i2w_q = build_vocab(ques, SPECIALS_WORD)
+    t2i_ner, _ = build_vocab(ner, SPECIALS_TAG)
+    t2i_srl, _ = build_vocab(srl, SPECIALS_TAG)
+
+    # ---------- encode ----------
+    X_tok = np.array([encode(s, w2i_ctx, MAX_CTX_LEN) for s in ctx])
+    X_ner = np.array([encode(s, t2i_ner, MAX_CTX_LEN) for s in ner])
+    X_srl = np.array([encode(s, t2i_srl, MAX_CTX_LEN) for s in srl])
+
+    Y_in = np.array([encode(s[:-1], w2i_q, MAX_Q_LEN) for s in ques])
+    Y_out = np.array([encode(s[1:], w2i_q, MAX_Q_LEN) for s in ques])
+
+    # simpan span bila nanti mau copy‑mechanism
+    spans = np.array(list(zip(span_st, span_ed)))  # (N, 2)
+
+    return (
+        X_tok,
+        X_ner,
+        X_srl,
+        Y_in,
+        Y_out,
+        spans,
+        w2i_ctx,
+        i2w_ctx,
+        w2i_q,
+        i2w_q,
+        t2i_ner,
+        t2i_srl,
+    )
+
+
+print("> Loading dataset …")
+
+(X_tok, X_ner, X_srl, Y_in, Y_out, w2i_ctx, i2w_ctx, w2i_q, i2w_q, t2i_ner, t2i_srl) = (
+    prepare_training_data(TRAIN_FILE)
+)
+
+train_idx, valid_idx = train_test_split(
+    np.arange(len(X_tok)), test_size=VALID_RATIO, random_state=SEED
+)
+
+
+def pick(arr, idx):
+    return arr[idx]
+
+
+train_data = [pick(a, train_idx) for a in (X_tok, X_ner, X_srl, Y_in, Y_out)]
+valid_data = [pick(a, valid_idx) for a in (X_tok, X_ner, X_srl, Y_in, Y_out)]
+
+# ------------------------------------------------------------------------------
+# 3.  MODEL
+# ------------------------------------------------------------------------------
+
+
+def build_model(vocab_ctx, vocab_q, n_ner, n_srl):
+    tok_in = tf.keras.layers.Input((MAX_CTX_LEN,), name="tok")
+    ner_in = tf.keras.layers.Input((MAX_CTX_LEN,), name="ner")
+    srl_in = tf.keras.layers.Input((MAX_CTX_LEN,), name="srl")
+    dec_in = tf.keras.layers.Input((MAX_Q_LEN,), name="dec")
+
+    tok_emb = tf.keras.layers.Embedding(vocab_ctx, WORD_EMB_DIM, mask_zero=True)(tok_in)
+    ner_emb = tf.keras.layers.Embedding(n_ner, 32, mask_zero=True)(ner_in)
+    srl_emb = tf.keras.layers.Embedding(n_srl, 32, mask_zero=True)(srl_in)
+
+    enc_in = tf.keras.layers.Concatenate()([tok_emb, ner_emb, srl_emb])
+    enc_out, fwd_h, fwd_c, bwd_h, bwd_c = tf.keras.layers.Bidirectional(
+        tf.keras.layers.LSTM(WORD_EMB_DIM, return_sequences=True, return_state=True)
+    )(enc_in)
+
+    state_h = tf.keras.layers.Concatenate()([fwd_h, bwd_h])
+    state_c = tf.keras.layers.Concatenate()([fwd_c, bwd_c])
+
+    dec_emb = tf.keras.layers.Embedding(vocab_q, WORD_EMB_DIM, mask_zero=True)(dec_in)
+    dec_lstm = tf.keras.layers.LSTM(
+        WORD_EMB_DIM * 2, return_sequences=True, return_state=True
+    )
+    dec_out, _, _ = dec_lstm(dec_emb, initial_state=[state_h, state_c])
+
+    # Attention (dot)
+    score = tf.keras.layers.Dot(axes=[2, 2])([dec_out, enc_out])
+    attn_weights = tf.keras.layers.Activation("softmax")(score)
+    context_vec = tf.keras.layers.Dot(axes=[2, 1])([attn_weights, enc_out])
+
+    dec_cat = tf.keras.layers.Concatenate()([dec_out, context_vec])
+    outputs = tf.keras.layers.TimeDistributed(
+        tf.keras.layers.Dense(vocab_q, activation="softmax")
+    )(dec_cat)
+
+    mdl = tf.keras.Model([tok_in, ner_in, srl_in, dec_in], outputs)
+    mdl.compile(
+        optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
+    )
+    return mdl
+
+
+print("> Building model …")
+model = build_model(len(w2i_ctx), len(w2i_q), len(t2i_ner), len(t2i_srl))
+model.summary(line_length=120)
+
+# ------------------------------------------------------------------------------
+# 4.  DATA GENERATOR
+# ------------------------------------------------------------------------------
+
+
+def generator(data, batch=BATCH):
+    X_tok, X_ner, X_srl, Y_inp, Y_outp = data
+    n = len(X_tok)
+    while True:
+        idx = np.random.permutation(n)
+        for i in range(0, n, batch):
+            b = idx[i : i + batch]
+            yield [X_tok[b], X_ner[b], X_srl[b], Y_inp[b]], Y_outp[b][..., None]
+
+
+steps_train = len(train_idx) // BATCH
+steps_valid = len(valid_idx) // BATCH
+
+# ------------------------------------------------------------------------------
+# 5.  TRAIN
+# ------------------------------------------------------------------------------
+
+print("> Training …")
+_ = model.fit(
+    generator(train_data),
+    steps_per_epoch=steps_train,
+    validation_data=generator(valid_data),
+    validation_steps=steps_valid,
+    epochs=EPOCHS,
+)
+
+model.save("qg_lstm_static.h5")
+print("✓ Model saved to qg_lstm_static.h5")
--- a/QC/test_model_qc.py
+++ b/QC/test_model_qc.py
@ -0,0 +1,58 @@
+MAX_CTX_LEN = 50
+
+
+# -- dummy placeholder untuk model NER/SRL Anda -------------------------------
+def predict_ner(tokens):  # ganti sesuai implementasi
+    return ["O"] * len(tokens)
+
+
+def predict_srl(tokens):  # ganti sesuai implementasi
+    return ["O"] * len(tokens)
+
+
+# ------------------------------------------------------------------------------
+
+
+def greedy_decode(context_tokens):
+    """Menghasilkan satu pertanyaan (greedy)."""
+    # 6.1  Tagging
+    ner_tags = predict_ner(context_tokens)
+    srl_tags = predict_srl(context_tokens)
+
+    # 6.2  Encode
+    ctx_ids = encode(context_tokens, w2i_ctx, MAX_CTX_LEN)[None]
+    ner_ids = encode(ner_tags, t2i_ner, MAX_CTX_LEN)[None]
+    srl_ids = encode(srl_tags, t2i_srl, MAX_CTX_LEN)[None]
+
+    dec_seq = [w2i_q["<bos>"]]
+    for _ in range(MAX_Q_LEN - 1):
+        dec_pad = dec_seq + [w2i_q["<pad>"]] * (MAX_Q_LEN - len(dec_seq))
+        pred = model.predict(
+            [ctx_ids, ner_ids, srl_ids, np.array([dec_pad])], verbose=0
+        )
+        next_id = int(pred[0, len(dec_seq) - 1].argmax())
+        if i2w_q[next_id] == "<eos>":
+            break
+        dec_seq.append(next_id)
+
+    tokens_q = [i2w_q[t] for t in dec_seq[1:]]
+    return " ".join(tokens_q)
+
+
+if __name__ == "__main__":
+    sample = [
+        "Keberagaman",
+        "potensi",
+        "sumber",
+        "daya",
+        "alam",
+        "Indonesia",
+        "tidak",
+        "lepas",
+        "dari",
+        "proses",
+        "geografis",
+        ".",
+    ]
+    print("\n[CTX]", " ".join(sample))
+    print("[Q]  ", greedy_decode(sample))
--- a/dataset/dataset
+++ b/dataset/dataset
@ -1,8 +1,136 @@
-{
-  "tokens": ["Barack", "Obama", "lahir", "di", "Hawaii", "."],
-  "ner": ["B-PER", "I-PER", "O", "O", "B-LOC", "O"],
-  "srl": ["B-ARG0", "I-ARG0", "B-V", "B-ARGM-LOC", "I-ARGM-LOC", "O"],
-  "question": "___ lahir di Hawaii.",
-  "answer": "Barack Obama",
-  "type": "isian"
-}
+[
+  {
+    "tokens": [
+      "R.",
+      "Soewardi",
+      "Soerjaningrat",
+      "adalah",
+      "putra",
+      "GPH",
+      "Soerjaningrat",
+      "dan",
+      "cucu",
+      "Pakualam",
+      "III",
+      "."
+    ],
+    "ner": [
+      "B-PER",
+      "I-PER",
+      "I-PER",
+      "O",
+      "O",
+      "B-PER",
+      "I-PER",
+      "O",
+      "O",
+      "B-PER",
+      "I-PER",
+      "O"
+    ],
+    "srl": [
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "V",
+      "ARG1",
+      "ARG1",
+      "ARG1",
+      "ARG1",
+      "ARG1",
+      "ARG1",
+      "ARG1",
+      "O"
+    ],
+    "question": "___ adalah putra GPH Soerjaningrat dan cucu Pakualam III.",
+    "answer": "R. Soewardi Soerjaningrat",
+    "type": "isian"
+  },
+  {
+    "tokens": ["Ia", "lantas", "diterima", "belajar", "di", "STOVIA", "."],
+    "ner": ["O", "O", "O", "O", "O", "B-ORG", "O"],
+    "srl": ["ARG0", "O", "V", "ARG1", "O", "ARGM-LOC", "O"],
+    "question": "Ia diterima belajar di ___.",
+    "answer": "STOVIA",
+    "type": "isian"
+  },
+  {
+    "tokens": [
+      "Ia",
+      "bersama",
+      "Douwes",
+      "Dekker",
+      "dan",
+      "dr.",
+      "Cipto",
+      "Mangoenkoesoemo",
+      "lantas",
+      "mendirikan",
+      "Indische",
+      "Partij",
+      "pada",
+      "25",
+      "Desember",
+      "1912",
+      "."
+    ],
+    "ner": [
+      "O",
+      "O",
+      "B-PER",
+      "I-PER",
+      "O",
+      "B-PER",
+      "I-PER",
+      "I-PER",
+      "O",
+      "O",
+      "B-ORG",
+      "I-ORG",
+      "O",
+      "B-DATE",
+      "I-DATE",
+      "I-DATE",
+      "O"
+    ],
+    "srl": [
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "ARG0",
+      "O",
+      "V",
+      "ARG1",
+      "ARG1",
+      "O",
+      "ARGM-TMP",
+      "ARGM-TMP",
+      "ARGM-TMP",
+      "O"
+    ],
+    "question": "Ia bersama Douwes Dekker dan dr. Cipto Mangoenkoesoemo lantas mendirikan ___ pada 25 Desember 1912.",
+    "answer": "Indische Partij",
+    "type": "isian"
+  },
+  {
+    "tokens": [
+      "Indische",
+      "Partij",
+      "didirikan",
+      "pada",
+      "25",
+      "Desember",
+      "1912",
+      "."
+    ],
+    "ner": ["B-ORG", "I-ORG", "O", "O", "B-DATE", "I-DATE", "I-DATE", "O"],
+    "srl": ["ARG1", "ARG1", "V", "O", "ARGM-TMP", "ARGM-TMP", "ARGM-TMP", "O"],
+    "question": "Indische Partij didirikan pada tanggal ___.",
+    "answer": "25 Desember 1912",
+    "type": "isian"
+  }
+]
--- a/dataset/dataset_ner_srl.tsv
+++ b/dataset/dataset_ner_srl.tsv
@ -2009,4 +2009,44 @@ memasak	O	V
 nasi	O	ARG1
 di	O	O
 dapur	B-LOC	ARGM-LOC
-.	O	O
+.	O	O   
+
+R.	B=PER	ARG0
+Soewardi	I-PER	ARG0
+Soerjaningrat	I-PER	ARG0
+adalah	O	V
+putra	O	ARG1
+GPH	B-PER	ARG1
+Soerjaningrat	I-PER	ARG1
+dan	O	ARG1
+cucu	O	ARG1
+Pakualam	B-PER	ARG1
+III	I-PER	ARG1
+.
+
+Ia	O	ARG0
+bersama	O	ARG0
+Douwes	B-PER	ARG0
+Dekker	I-PER	ARG0
+dan	O	ARG0
+dr.	B-PER	ARG0
+Cipto	I-PER	ARG0
+Mangoenkoesoemo	I-PER	ARG0
+lantas	O	O
+mendirikan	O	V
+Indische 	B-ORG	ARG1
+Partij	I-ORG	ARG1
+pada 	O	O
+25	B-DATE	ARGM-TMP
+Desember 	I-DATE	ARGM-TMP
+1912	I-DATE	ARGM-TMP
+. 	O	O
+
+Indische 	B-ORG	ARG1
+Partij	I-ORG	ARG1
+didirikan 	O	V
+pada 	O	O
+25	B-DATE	ARGM-TMP
+Desember 	I-DATE	ARGM-TMP
+1912	I-DATE	ARGM-TMP
+. 	O	O