799 lines
60 KiB
Plaintext
799 lines
60 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"id": "fb283f23",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Total flattened samples: 342\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import json\n",
|
||
"from pathlib import Path\n",
|
||
"from itertools import chain\n",
|
||
"\n",
|
||
"RAW = json.loads(\n",
|
||
" Path(\"../dataset/dev_dataset_qg.json\").read_text()\n",
|
||
") # ← file contoh Anda\n",
|
||
"\n",
|
||
"samples = []\n",
|
||
"for item in RAW:\n",
|
||
" for qp in item[\"quiz_posibility\"]:\n",
|
||
" samp = {\n",
|
||
" \"tokens\": [tok.lower() for tok in item[\"tokens\"]],\n",
|
||
" \"ner\": item[\"ner\"],\n",
|
||
" \"srl\": item[\"srl\"],\n",
|
||
" \"q_type\": qp[\"type\"], # isian / opsi / benar_salah\n",
|
||
" \"q_toks\": [tok.lower() for tok in qp[\"question\"]]\n",
|
||
" + [\"<eos>\"], # tambahkan <eos>\n",
|
||
" }\n",
|
||
" # Jawaban bisa multi token\n",
|
||
" if isinstance(qp[\"answer\"], list):\n",
|
||
" samp[\"a_toks\"] = [tok.lower() for tok in qp[\"answer\"]] + [\"<eos>\"]\n",
|
||
" else:\n",
|
||
" samp[\"a_toks\"] = [qp[\"answer\"].lower(), \"<eos>\"]\n",
|
||
" samples.append(samp)\n",
|
||
"\n",
|
||
"print(\"Total flattened samples:\", len(samples))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "fa4f979d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'<pad>': 0, '<unk>': 1, '<sos>': 2, '<eos>': 3, 'jepara': 4, 'false': 5, 'trowulan': 6, '17': 7, 'agustus': 8, '1945': 9, 'soekarno': 10, 'mohammad hatta': 11, '365': 12, 'hari': 13, 'merkurius': 14, 'true': 15, 'mars': 16, 'jupiter': 17, 'saturnus': 18, 'uranus': 19, 'neptunus': 20, '5': 21, 'januari': 22, '2020': 23, '12': 24, 'februari': 25, '2019': 26, '23': 27, 'maret': 28, '2021': 29, '1': 30, 'april': 31, '2022': 32, '15': 33, 'mei': 34, '2023': 35, 'gunung': 36, 'everest': 37, 'amazon': 38, 'piramida': 39, 'giza': 40, 'benua': 41, 'asia': 42, 'colosseum': 43, 'taj': 44, 'mahal': 45, 'petra': 46, 'tembok': 47, 'cina': 48, 'chichen': 49, 'itza': 50, 'patung': 51, 'yesus': 52, 'penebus': 53, 'machu': 54, 'picchu': 55, 'stonehenge': 56, 'menara': 57, 'pisa': 58, 'angkot': 59, 'wat': 60, '8848': 61, 'meter': 62, '17 agustus 1945': 63, 'albert': 64, 'einstein': 65, 'jantung': 66, 'memompa darah': 67, 'tokyo': 68, '100': 69, 'derajat': 70, 'celsius': 71, 'thomas': 72, 'alva': 73, 'edison': 74, '1879': 75, 'ketiga': 76, 'leonardo': 77, 'da': 78, 'vinci': 79, 'leonardo da vinci': 80, '9,46': 81, 'triliun': 82, 'kilometer': 83, 'mahatma': 84, 'gandhi': 85, '1958': 86, 'kornea': 87, 'waterloo': 88, '1815': 89, 'indonesia': 90, 'marie': 91, 'curie': 92, 'fisika dan kimia': 93, 'inka': 94, 'oksigen': 95, 'karbon dioksida dan air': 96, 'vincent': 97, 'van': 98, 'gogh': 99, 'double': 100, 'helix': 101, 'double helix': 102, 'alexander': 103, 'fleming': 104, 'jeruk': 105, 'dan': 106, 'kiwi': 107, 'vitamin c': 108, 'nikola': 109, 'tesla': 110, 'sungai': 111, 'nil': 112, '6650 kilometer': 113, 'paus': 114, 'biru': 115, 'pankreas': 116, 'mengatur gula darah': 117, 'charles': 118, 'darwin': 119, 'shah': 120, 'jahan': 121, 'mumtaz mahal': 122, '44.58 juta km²': 123, '54': 124, 'di selatan laut mediterania': 125, 'eropa': 126, '10.18 juta km²': 127, 'atlantik': 128, 'pasifik': 129, 'hutan amazon': 130, 'australia': 131, 'belahan bumi selatan': 132, 'antartika': 133, 'kutub selatan': 134, '4.7 miliar': 135, 'kilimanjaro': 136, '5,895 meter': 137, 'sahara': 138, 'afrika': 139, 'alpen': 140, '8': 141, 'superior': 142, 'danau superior': 143, 'amerika selatan': 144, 'ali': 145, 'turnamen': 146, 'nina': 147, 'rapat': 148, 'farhan': 149, 'andi': 150, 'workshop': 151, 'lina': 152, 'pameran': 153, 'iqbal': 154, 'siti': 155, 'perlombaan': 156, 'konser': 157, 'fajar': 158, 'dina': 159, 'festival': 160, 'rian': 161, 'bazar': 162, 'tari': 163, 'seminar': 164, 'kompetisi': 165, 'rudi': 166, 'putri': 167, 'budi': 168, 'hana': 169, 'raka': 170, 'dewi': 171, 'surabaya': 172, 'yogyakarta': 173, 'kota': 174, 'jakarta': 175, 'bandung': 176, 'malang': 177, 'bali': 178, 'padang': 179, 'ibukota': 180, 'makassar': 181, 'medan': 182}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"def build_vocab(seq_iter, reserved=[\"<pad>\", \"<unk>\", \"<sos>\", \"<eos>\"]):\n",
|
||
" vocab = {tok: idx for idx, tok in enumerate(reserved)}\n",
|
||
" for tok in chain.from_iterable(seq_iter):\n",
|
||
" if tok not in vocab:\n",
|
||
" vocab[tok] = len(vocab)\n",
|
||
" return vocab\n",
|
||
"\n",
|
||
"\n",
|
||
"vocab_tok = build_vocab((s[\"tokens\"] for s in samples))\n",
|
||
"vocab_ner = build_vocab((s[\"ner\"] for s in samples), reserved=[\"<pad>\", \"<unk>\"])\n",
|
||
"vocab_srl = build_vocab((s[\"srl\"] for s in samples), reserved=[\"<pad>\", \"<unk>\"])\n",
|
||
"vocab_q = build_vocab((s[\"q_toks\"] for s in samples))\n",
|
||
"vocab_a = build_vocab((s[\"a_toks\"] for s in samples))\n",
|
||
"\n",
|
||
"vocab_typ = {\"isian\": 0, \"opsi\": 1, \"true_false\": 2}\n",
|
||
"\n",
|
||
"print(vocab_a)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"id": "d1a5b324",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
||
"\n",
|
||
"\n",
|
||
"def encode(seq, vmap): # token → id\n",
|
||
" return [vmap.get(t, vmap[\"<unk>\"]) for t in seq]\n",
|
||
"\n",
|
||
"\n",
|
||
"MAX_SENT = max(len(s[\"tokens\"]) for s in samples)\n",
|
||
"MAX_Q = max(len(s[\"q_toks\"]) for s in samples)\n",
|
||
"MAX_A = max(len(s[\"a_toks\"]) for s in samples)\n",
|
||
"\n",
|
||
"X_tok = pad_sequences(\n",
|
||
" [encode(s[\"tokens\"], vocab_tok) for s in samples], maxlen=MAX_SENT, padding=\"post\"\n",
|
||
")\n",
|
||
"X_ner = pad_sequences(\n",
|
||
" [encode(s[\"ner\"], vocab_ner) for s in samples], maxlen=MAX_SENT, padding=\"post\"\n",
|
||
")\n",
|
||
"X_srl = pad_sequences(\n",
|
||
" [encode(s[\"srl\"], vocab_srl) for s in samples], maxlen=MAX_SENT, padding=\"post\"\n",
|
||
")\n",
|
||
"\n",
|
||
"# Decoder input = <sos> + target[:-1]\n",
|
||
"dec_q_in = pad_sequences(\n",
|
||
" [[vocab_q[\"<sos>\"], *encode(s[\"q_toks\"][:-1], vocab_q)] for s in samples],\n",
|
||
" maxlen=MAX_Q,\n",
|
||
" padding=\"post\",\n",
|
||
")\n",
|
||
"dec_q_out = pad_sequences(\n",
|
||
" [encode(s[\"q_toks\"], vocab_q) for s in samples], maxlen=MAX_Q, padding=\"post\"\n",
|
||
")\n",
|
||
"\n",
|
||
"dec_a_in = pad_sequences(\n",
|
||
" [[vocab_a[\"<sos>\"], *encode(s[\"a_toks\"][:-1], vocab_a)] for s in samples],\n",
|
||
" maxlen=MAX_A,\n",
|
||
" padding=\"post\",\n",
|
||
")\n",
|
||
"dec_a_out = pad_sequences(\n",
|
||
" [encode(s[\"a_toks\"], vocab_a) for s in samples], maxlen=MAX_A, padding=\"post\"\n",
|
||
")\n",
|
||
"\n",
|
||
"MAX_SENT = max(len(s[\"tokens\"]) for s in samples)\n",
|
||
"MAX_Q = max(len(s[\"q_toks\"]) for s in samples)\n",
|
||
"MAX_A = max(len(s[\"a_toks\"]) for s in samples)\n",
|
||
"y_type = np.array([vocab_typ[s[\"q_type\"]] for s in samples])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "ff5bd85f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_8\"</span>\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"\u001b[1mModel: \"functional_8\"\u001b[0m\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
||
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
|
||
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
||
"│ tok_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ ner_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ srl_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_tok │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">57,856</span> │ tok_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_ner │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,248</span> │ ner_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_srl │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">448</span> │ srl_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ dec_q_in │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">13</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ concatenate_8 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">192</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ embedding_tok[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ embedding_ner[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
||
"│ │ │ │ embedding_srl[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ dec_a_in │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_q_decoder │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">13</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">52,096</span> │ dec_q_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ encoder_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">459,776</span> │ concatenate_8[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
||
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ │\n",
|
||
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_a_decoder │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">23,424</span> │ dec_a_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ lstm_q_decoder │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">13</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ embedding_q_deco… │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ not_equal_32 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">13</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dec_q_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ lstm_a_decoder │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ embedding_a_deco… │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ not_equal_33 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dec_a_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ q_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">13</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">407</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">104,599</span> │ lstm_q_decoder[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ not_equal_32[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ a_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">183</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">47,031</span> │ lstm_a_decoder[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
||
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ not_equal_33[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ type_output (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">771</span> │ encoder_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
||
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
||
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
|
||
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
||
"│ tok_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ ner_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ srl_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_tok │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m57,856\u001b[0m │ tok_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_ner │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m1,248\u001b[0m │ ner_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_srl │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m448\u001b[0m │ srl_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ dec_q_in │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
||
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ concatenate_8 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m192\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ embedding_tok[\u001b[38;5;34m0\u001b[0m]… │\n",
|
||
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ embedding_ner[\u001b[38;5;34m0\u001b[0m]… │\n",
|
||
"│ │ │ │ embedding_srl[\u001b[38;5;34m0\u001b[0m]… │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ dec_a_in │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
||
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_q_decoder │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m52,096\u001b[0m │ dec_q_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ encoder_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m459,776\u001b[0m │ concatenate_8[\u001b[38;5;34m0\u001b[0m]… │\n",
|
||
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ │\n",
|
||
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ embedding_a_decoder │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m23,424\u001b[0m │ dec_a_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ lstm_q_decoder │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m394,240\u001b[0m │ embedding_q_deco… │\n",
|
||
"│ (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ not_equal_32 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dec_q_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ lstm_a_decoder │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m394,240\u001b[0m │ embedding_a_deco… │\n",
|
||
"│ (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ not_equal_33 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dec_a_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
||
"│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ q_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m, \u001b[38;5;34m407\u001b[0m) │ \u001b[38;5;34m104,599\u001b[0m │ lstm_q_decoder[\u001b[38;5;34m0\u001b[0m… │\n",
|
||
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ not_equal_32[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ a_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m183\u001b[0m) │ \u001b[38;5;34m47,031\u001b[0m │ lstm_a_decoder[\u001b[38;5;34m0\u001b[0m… │\n",
|
||
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ not_equal_33[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
||
"│ type_output (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m771\u001b[0m │ encoder_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
||
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,535,729</span> (5.86 MB)\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,535,729\u001b[0m (5.86 MB)\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,535,729</span> (5.86 MB)\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m1,535,729\u001b[0m (5.86 MB)\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"from tensorflow.keras.layers import (\n",
|
||
" Input,\n",
|
||
" Embedding,\n",
|
||
" LSTM,\n",
|
||
" Concatenate,\n",
|
||
" Dense,\n",
|
||
" TimeDistributed,\n",
|
||
")\n",
|
||
"from tensorflow.keras.models import Model\n",
|
||
"\n",
|
||
"# ---- constants ---------------------------------------------------\n",
|
||
"d_tok = 128 # token embedding dim\n",
|
||
"d_tag = 32 # NER / SRL embedding dim\n",
|
||
"units = 256\n",
|
||
"\n",
|
||
"# ---- encoder -----------------------------------------------------\n",
|
||
"inp_tok = Input((MAX_SENT,), name=\"tok_in\")\n",
|
||
"inp_ner = Input((MAX_SENT,), name=\"ner_in\")\n",
|
||
"inp_srl = Input((MAX_SENT,), name=\"srl_in\")\n",
|
||
"\n",
|
||
"# make ALL streams mask the same way (here: no masking,\n",
|
||
"# we'll just pad with 0s and let the LSTM ignore them)\n",
|
||
"emb_tok = Embedding(len(vocab_tok), d_tok, mask_zero=False, name=\"embedding_tok\")(\n",
|
||
" inp_tok\n",
|
||
")\n",
|
||
"emb_ner = Embedding(len(vocab_ner), d_tag, mask_zero=False, name=\"embedding_ner\")(\n",
|
||
" inp_ner\n",
|
||
")\n",
|
||
"emb_srl = Embedding(len(vocab_srl), d_tag, mask_zero=False, name=\"embedding_srl\")(\n",
|
||
" inp_srl\n",
|
||
")\n",
|
||
"\n",
|
||
"enc_concat = Concatenate()([emb_tok, emb_ner, emb_srl])\n",
|
||
"enc_out, state_h, state_c = LSTM(units, return_state=True, name=\"encoder_lstm\")(\n",
|
||
" enc_concat\n",
|
||
")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ---------- DECODER : Question ----------\n",
|
||
"dec_q_inp = Input(shape=(MAX_Q,), name=\"dec_q_in\")\n",
|
||
"dec_emb_q = Embedding(len(vocab_q), d_tok, mask_zero=True, name=\"embedding_q_decoder\")(\n",
|
||
" dec_q_inp\n",
|
||
")\n",
|
||
"dec_q, _, _ = LSTM(\n",
|
||
" units, return_state=True, return_sequences=True, name=\"lstm_q_decoder\"\n",
|
||
")(dec_emb_q, initial_state=[state_h, state_c])\n",
|
||
"q_out = TimeDistributed(\n",
|
||
" Dense(len(vocab_q), activation=\"softmax\", name=\"dense_q_output\"), name=\"q_output\"\n",
|
||
")(dec_q)\n",
|
||
"\n",
|
||
"# ---------- DECODER : Answer ----------\n",
|
||
"dec_a_inp = Input(shape=(MAX_A,), name=\"dec_a_in\")\n",
|
||
"dec_emb_a = Embedding(len(vocab_a), d_tok, mask_zero=True, name=\"embedding_a_decoder\")(\n",
|
||
" dec_a_inp\n",
|
||
")\n",
|
||
"dec_a, _, _ = LSTM(\n",
|
||
" units, return_state=True, return_sequences=True, name=\"lstm_a_decoder\"\n",
|
||
")(dec_emb_a, initial_state=[state_h, state_c])\n",
|
||
"a_out = TimeDistributed(\n",
|
||
" Dense(len(vocab_a), activation=\"softmax\", name=\"dense_a_output\"), name=\"a_output\"\n",
|
||
")(dec_a)\n",
|
||
"\n",
|
||
"# ---------- CLASSIFIER : Question Type ----------\n",
|
||
"type_out = Dense(len(vocab_typ), activation=\"softmax\", name=\"type_output\")(enc_out)\n",
|
||
"\n",
|
||
"model = Model(\n",
|
||
" inputs=[inp_tok, inp_ner, inp_srl, dec_q_inp, dec_a_inp],\n",
|
||
" outputs=[q_out, a_out, type_out],\n",
|
||
")\n",
|
||
"\n",
|
||
"model.summary()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"id": "fece1ae9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Epoch 1/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 161ms/step - a_output_loss: 5.1540 - a_output_sparse_categorical_accuracy: 0.1507 - loss: 11.4761 - q_output_loss: 5.9970 - q_output_sparse_categorical_accuracy: 0.0600 - type_output_accuracy: 0.4506 - type_output_loss: 1.0728 - val_a_output_loss: 4.5900 - val_a_output_sparse_categorical_accuracy: 0.2500 - val_loss: 10.8292 - val_q_output_loss: 5.9316 - val_q_output_sparse_categorical_accuracy: 0.0769 - val_type_output_accuracy: 0.5143 - val_type_output_loss: 1.0253\n",
|
||
"Epoch 2/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - a_output_loss: 4.2365 - a_output_sparse_categorical_accuracy: 0.2500 - loss: 10.2493 - q_output_loss: 5.6397 - q_output_sparse_categorical_accuracy: 0.1183 - type_output_accuracy: 0.5209 - type_output_loss: 1.2188 - val_a_output_loss: 3.2588 - val_a_output_sparse_categorical_accuracy: 0.2500 - val_loss: 9.0808 - val_q_output_loss: 5.4082 - val_q_output_sparse_categorical_accuracy: 0.0923 - val_type_output_accuracy: 0.5143 - val_type_output_loss: 1.3791\n",
|
||
"Epoch 3/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - a_output_loss: 3.5259 - a_output_sparse_categorical_accuracy: 0.2500 - loss: 8.4974 - q_output_loss: 4.6444 - q_output_sparse_categorical_accuracy: 0.1174 - type_output_accuracy: 0.5233 - type_output_loss: 1.0788 - val_a_output_loss: 3.3879 - val_a_output_sparse_categorical_accuracy: 0.2500 - val_loss: 9.5209 - val_q_output_loss: 5.7546 - val_q_output_sparse_categorical_accuracy: 0.0769 - val_type_output_accuracy: 0.2000 - val_type_output_loss: 1.2615\n",
|
||
"Epoch 4/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - a_output_loss: 3.3147 - a_output_sparse_categorical_accuracy: 0.2500 - loss: 8.1027 - q_output_loss: 4.4209 - q_output_sparse_categorical_accuracy: 0.1099 - type_output_accuracy: 0.3256 - type_output_loss: 1.2069 - val_a_output_loss: 3.0792 - val_a_output_sparse_categorical_accuracy: 0.2500 - val_loss: 9.2232 - val_q_output_loss: 5.8382 - val_q_output_sparse_categorical_accuracy: 0.0769 - val_type_output_accuracy: 0.5143 - val_type_output_loss: 1.0193\n",
|
||
"Epoch 5/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - a_output_loss: 3.1559 - a_output_sparse_categorical_accuracy: 0.2500 - loss: 7.7733 - q_output_loss: 4.3048 - q_output_sparse_categorical_accuracy: 0.1120 - type_output_accuracy: 0.5160 - type_output_loss: 1.0414 - val_a_output_loss: 3.0450 - val_a_output_sparse_categorical_accuracy: 0.2500 - val_loss: 9.1657 - val_q_output_loss: 5.7943 - val_q_output_sparse_categorical_accuracy: 0.0923 - val_type_output_accuracy: 0.5143 - val_type_output_loss: 1.0881\n",
|
||
"Epoch 6/30\n",
|
||
"\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - a_output_loss: 3.0962 - a_output_sparse_categorical_accuracy: 0.2569 - loss: 7.6096 - q_output_loss: 4.1973 - q_output_sparse_categorical_accuracy: 0.1121 - type_output_accuracy: 0.5318 - type_output_loss: 1.0492 - val_a_output_loss: 3.1428 - val_a_output_sparse_categorical_accuracy: 0.3214 - val_loss: 9.2982 - val_q_output_loss: 5.8475 - val_q_output_sparse_categorical_accuracy: 0.0769 - val_type_output_accuracy: 0.5143 - val_type_output_loss: 1.0265\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"losses = {\n",
|
||
" \"q_output\": \"sparse_categorical_crossentropy\",\n",
|
||
" \"a_output\": \"sparse_categorical_crossentropy\",\n",
|
||
" \"type_output\": \"sparse_categorical_crossentropy\",\n",
|
||
"}\n",
|
||
"loss_weights = {\"q_output\": 1.0, \"a_output\": 1.0, \"type_output\": 0.3}\n",
|
||
"\n",
|
||
"model.compile(\n",
|
||
" optimizer=\"adam\",\n",
|
||
" loss=losses,\n",
|
||
" loss_weights=loss_weights,\n",
|
||
" metrics={\n",
|
||
" \"q_output\": \"sparse_categorical_accuracy\",\n",
|
||
" \"a_output\": \"sparse_categorical_accuracy\",\n",
|
||
" \"type_output\": \"accuracy\",\n",
|
||
" },\n",
|
||
")\n",
|
||
"\n",
|
||
"history = model.fit(\n",
|
||
" [X_tok, X_ner, X_srl, dec_q_in, dec_a_in],\n",
|
||
" [dec_q_out, dec_a_out, y_type],\n",
|
||
" validation_split=0.1,\n",
|
||
" epochs=30,\n",
|
||
" batch_size=64,\n",
|
||
" callbacks=[tf.keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)],\n",
|
||
" verbose=1,\n",
|
||
")\n",
|
||
"\n",
|
||
"model.save(\"full_seq2seq.keras\")\n",
|
||
"\n",
|
||
"import json\n",
|
||
"import pickle\n",
|
||
"\n",
|
||
"# def save_vocab(vocab, path):\n",
|
||
"# with open(path, \"w\", encoding=\"utf-8\") as f:\n",
|
||
"# json.dump(vocab, f, ensure_ascii=False, indent=2)\n",
|
||
"\n",
|
||
"# # Simpan semua vocab\n",
|
||
"# save_vocab(vocab_tok, \"vocab_tok.json\")\n",
|
||
"# save_vocab(vocab_ner, \"vocab_ner.json\")\n",
|
||
"# save_vocab(vocab_srl, \"vocab_srl.json\")\n",
|
||
"# save_vocab(vocab_q, \"vocab_q.json\")\n",
|
||
"# save_vocab(vocab_a, \"vocab_a.json\")\n",
|
||
"# save_vocab(vocab_typ, \"vocab_typ.json\")\n",
|
||
"\n",
|
||
"\n",
|
||
"def save_vocab_pkl(vocab, path):\n",
|
||
" with open(path, \"wb\") as f:\n",
|
||
" pickle.dump(vocab, f)\n",
|
||
"\n",
|
||
"\n",
|
||
"# Simpan semua vocab\n",
|
||
"save_vocab_pkl(vocab_tok, \"vocab_tok.pkl\")\n",
|
||
"save_vocab_pkl(vocab_ner, \"vocab_ner.pkl\")\n",
|
||
"save_vocab_pkl(vocab_srl, \"vocab_srl.pkl\")\n",
|
||
"save_vocab_pkl(vocab_q, \"vocab_q.pkl\")\n",
|
||
"save_vocab_pkl(vocab_a, \"vocab_a.pkl\")\n",
|
||
"save_vocab_pkl(vocab_typ, \"vocab_typ.pkl\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "3355c0c7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"import numpy as np\n",
|
||
"import pickle\n",
|
||
"from tensorflow.keras.models import load_model, Model\n",
|
||
"from tensorflow.keras.layers import Input, Concatenate\n",
|
||
"\n",
|
||
"# === Load Model Utama ===\n",
|
||
"model = load_model(\"full_seq2seq.keras\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# === Load Vocabulary dari .pkl ===\n",
|
||
"def load_vocab(path):\n",
|
||
" with open(path, \"rb\") as f:\n",
|
||
" return pickle.load(f)\n",
|
||
"\n",
|
||
"\n",
|
||
"vocab_tok = load_vocab(\"vocab_tok.pkl\")\n",
|
||
"vocab_ner = load_vocab(\"vocab_ner.pkl\")\n",
|
||
"vocab_srl = load_vocab(\"vocab_srl.pkl\")\n",
|
||
"vocab_q = load_vocab(\"vocab_q.pkl\")\n",
|
||
"vocab_a = load_vocab(\"vocab_a.pkl\")\n",
|
||
"vocab_typ = load_vocab(\"vocab_typ.pkl\")\n",
|
||
"\n",
|
||
"inv_vocab_q = {v: k for k, v in vocab_q.items()}\n",
|
||
"inv_vocab_a = {v: k for k, v in vocab_a.items()}\n",
|
||
"\n",
|
||
"# === Build Encoder Model ===\n",
|
||
"MAX_SENT = model.input_shape[0][1] # Ambil shape dari model yang diload\n",
|
||
"MAX_Q = model.input_shape[3][1] # Max length for question\n",
|
||
"MAX_A = model.input_shape[4][1] # Max length for answer\n",
|
||
"\n",
|
||
"inp_tok_g = Input(shape=(MAX_SENT,), name=\"tok_in_g\")\n",
|
||
"inp_ner_g = Input(shape=(MAX_SENT,), name=\"ner_in_g\")\n",
|
||
"inp_srl_g = Input(shape=(MAX_SENT,), name=\"srl_in_g\")\n",
|
||
"\n",
|
||
"emb_tok = model.get_layer(\"embedding_tok\").call(inp_tok_g)\n",
|
||
"emb_ner = model.get_layer(\"embedding_ner\").call(inp_ner_g)\n",
|
||
"emb_srl = model.get_layer(\"embedding_srl\").call(inp_srl_g)\n",
|
||
"\n",
|
||
"enc_concat = Concatenate(name=\"concat_encoder\")([emb_tok, emb_ner, emb_srl])\n",
|
||
"\n",
|
||
"encoder_lstm = model.get_layer(\"encoder_lstm\")\n",
|
||
"enc_out, state_h, state_c = encoder_lstm(enc_concat)\n",
|
||
"\n",
|
||
"# Create encoder model with full output including enc_out\n",
|
||
"encoder_model = Model(\n",
|
||
" inputs=[inp_tok_g, inp_ner_g, inp_srl_g],\n",
|
||
" outputs=[enc_out, state_h, state_c],\n",
|
||
" name=\"encoder_model\",\n",
|
||
")\n",
|
||
"\n",
|
||
"# === Build Decoder for Question ===\n",
|
||
"dec_q_inp = Input(shape=(1,), name=\"dec_q_in\")\n",
|
||
"dec_emb_q = model.get_layer(\"embedding_q_decoder\").call(dec_q_inp)\n",
|
||
"\n",
|
||
"state_h_dec = Input(shape=(256,), name=\"state_h_dec\")\n",
|
||
"state_c_dec = Input(shape=(256,), name=\"state_c_dec\")\n",
|
||
"\n",
|
||
"lstm_decoder_q = model.get_layer(\"lstm_q_decoder\")\n",
|
||
"\n",
|
||
"dec_out_q, state_h_q, state_c_q = lstm_decoder_q(\n",
|
||
" dec_emb_q, initial_state=[state_h_dec, state_c_dec]\n",
|
||
")\n",
|
||
"\n",
|
||
"q_time_dist_layer = model.get_layer(\"q_output\")\n",
|
||
"dense_q = q_time_dist_layer.layer\n",
|
||
"q_output = dense_q(dec_out_q)\n",
|
||
"\n",
|
||
"decoder_q = Model(\n",
|
||
" inputs=[dec_q_inp, state_h_dec, state_c_dec],\n",
|
||
" outputs=[q_output, state_h_q, state_c_q],\n",
|
||
" name=\"decoder_question_model\",\n",
|
||
")\n",
|
||
"\n",
|
||
"# === Build Decoder for Answer ===\n",
|
||
"dec_a_inp = Input(shape=(1,), name=\"dec_a_in\")\n",
|
||
"dec_emb_a = model.get_layer(\"embedding_a_decoder\").call(dec_a_inp)\n",
|
||
"\n",
|
||
"state_h_a = Input(shape=(256,), name=\"state_h_a\")\n",
|
||
"state_c_a = Input(shape=(256,), name=\"state_c_a\")\n",
|
||
"\n",
|
||
"lstm_decoder_a = model.get_layer(\"lstm_a_decoder\")\n",
|
||
"\n",
|
||
"dec_out_a, state_h_a_out, state_c_a_out = lstm_decoder_a(\n",
|
||
" dec_emb_a, initial_state=[state_h_a, state_c_a]\n",
|
||
")\n",
|
||
"\n",
|
||
"a_time_dist_layer = model.get_layer(\"a_output\")\n",
|
||
"dense_a = a_time_dist_layer.layer\n",
|
||
"a_output = dense_a(dec_out_a)\n",
|
||
"\n",
|
||
"decoder_a = Model(\n",
|
||
" inputs=[dec_a_inp, state_h_a, state_c_a],\n",
|
||
" outputs=[a_output, state_h_a_out, state_c_a_out],\n",
|
||
" name=\"decoder_answer_model\",\n",
|
||
")\n",
|
||
"\n",
|
||
"# === Build Classifier for Question Type ===\n",
|
||
"type_dense = model.get_layer(\"type_output\")\n",
|
||
"type_out = type_dense(enc_out)\n",
|
||
"\n",
|
||
"classifier_model = Model(\n",
|
||
" inputs=[inp_tok_g, inp_ner_g, inp_srl_g], outputs=type_out, name=\"classifier_model\"\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "d406e6ff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Generated Question: menghadiri menghadiri ___ ___\n",
|
||
"Generated Answer : \n",
|
||
"Question Type : isian\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"def encode(seq, vmap):\n",
|
||
" return [vmap.get(tok, vmap[\"<unk>\"]) for tok in seq]\n",
|
||
"\n",
|
||
"\n",
|
||
"def encode_and_pad(seq, vmap, max_len=MAX_SENT):\n",
|
||
" encoded = [vmap.get(tok, vmap[\"<unk>\"]) for tok in seq]\n",
|
||
" # Pad with vocab[\"<pad>\"] to the right if sequence is shorter than max_len\n",
|
||
" padded = encoded + [vmap[\"<pad>\"]] * (max_len - len(encoded))\n",
|
||
" return padded[:max_len] # Ensure it doesn't exceed max_len\n",
|
||
"\n",
|
||
"\n",
|
||
"def greedy_decode(tokens, ner, srl, max_q=20, max_a=10):\n",
|
||
" # --- encode encoder inputs -------------------------------------------\n",
|
||
" if isinstance(tokens, np.ndarray):\n",
|
||
" enc_tok = tokens\n",
|
||
" enc_ner = ner\n",
|
||
" enc_srl = srl\n",
|
||
" else:\n",
|
||
" enc_tok = np.array([encode_and_pad(tokens, vocab_tok, MAX_SENT)])\n",
|
||
" enc_ner = np.array([encode_and_pad(ner, vocab_ner, MAX_SENT)])\n",
|
||
" enc_srl = np.array([encode_and_pad(srl, vocab_srl, MAX_SENT)])\n",
|
||
"\n",
|
||
" # --- Get encoder outputs ---\n",
|
||
" enc_out, h, c = encoder_model.predict([enc_tok, enc_ner, enc_srl], verbose=0)\n",
|
||
"\n",
|
||
" # QUESTION Decoding\n",
|
||
" tgt = np.array([[vocab_q[\"<sos>\"]]])\n",
|
||
" question_ids = []\n",
|
||
" for _ in range(max_q):\n",
|
||
" logits, h, c = decoder_q.predict([tgt, h, c], verbose=0)\n",
|
||
" next_id = int(logits[0, 0].argmax()) # Get the predicted token ID\n",
|
||
" if next_id == vocab_q[\"<eos>\"]:\n",
|
||
" break\n",
|
||
" question_ids.append(next_id)\n",
|
||
" tgt = np.array([[next_id]]) # Feed the predicted token back as input\n",
|
||
"\n",
|
||
" # ANSWER Decoding - use encoder outputs again for fresh state\n",
|
||
" _, h, c = encoder_model.predict([enc_tok, enc_ner, enc_srl], verbose=0)\n",
|
||
" tgt = np.array([[vocab_a[\"<sos>\"]]])\n",
|
||
" answer_ids = []\n",
|
||
" for _ in range(max_a):\n",
|
||
" logits, h, c = decoder_a.predict([tgt, h, c], verbose=0)\n",
|
||
" next_id = int(logits[0, 0].argmax())\n",
|
||
" if next_id == vocab_a[\"<eos>\"]:\n",
|
||
" break\n",
|
||
" answer_ids.append(next_id)\n",
|
||
" tgt = np.array([[next_id]])\n",
|
||
"\n",
|
||
" # Question Type\n",
|
||
" qtype_logits = classifier_model.predict([enc_tok, enc_ner, enc_srl], verbose=0)\n",
|
||
" qtype_id = int(qtype_logits.argmax())\n",
|
||
"\n",
|
||
" # Final output\n",
|
||
" question = [inv_vocab_q.get(i, \"<unk>\") for i in question_ids]\n",
|
||
" answer = [inv_vocab_a.get(i, \"<unk>\") for i in answer_ids]\n",
|
||
" q_type = [k for k, v in vocab_typ.items() if v == qtype_id][0]\n",
|
||
"\n",
|
||
" return question, answer, q_type\n",
|
||
"\n",
|
||
"\n",
|
||
"def test_model():\n",
|
||
" test_data = {\n",
|
||
" \"tokens\": [\"nama\", \"lengkap\", \"saya\", \"Maya\", \"Maya\"],\n",
|
||
" \"ner\": [\"O\", \"O\", \"O\", \"B-PER\", \"B-PER\"],\n",
|
||
" \"srl\": [\"O\", \"O\", \"ARG0\", \"ARG0\", \"ARG0\"],\n",
|
||
" }\n",
|
||
" # tokens = [\n",
|
||
" # \"soekarno\",\n",
|
||
" # \"membacakan\",\n",
|
||
" # \"teks\",\n",
|
||
" # \"proklamasi\",\n",
|
||
" # \"pada\",\n",
|
||
" # \"17\",\n",
|
||
" # \"agustus\",\n",
|
||
" # \"1945\",\n",
|
||
" # ]\n",
|
||
" # ner_tags = [\"B-PER\", \"O\", \"O\", \"O\", \"O\", \"B-DATE\", \"I-DATE\", \"I-DATE\"]\n",
|
||
" # srl_tags = [\"ARG0\", \"V\", \"ARG1\", \"ARG1\", \"O\", \"ARGM-TMP\", \"ARGM-TMP\", \"ARGM-TMP\"]\n",
|
||
"\n",
|
||
" question, answer, q_type = greedy_decode(\n",
|
||
" test_data[\"tokens\"], test_data[\"ner\"], test_data[\"srl\"]\n",
|
||
" )\n",
|
||
" print(f\"Generated Question: {' '.join(question)}\")\n",
|
||
" print(f\"Generated Answer : {' '.join(answer)}\")\n",
|
||
" print(f\"Question Type : {q_type}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"test_model()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"id": "5adde3c3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"BLEU : 0.0385\n",
|
||
"ROUGE1: 0.1052 | ROUGE-L: 0.1052\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction\n",
|
||
"from rouge_score import rouge_scorer\n",
|
||
"\n",
|
||
"smoothie = SmoothingFunction().method4\n",
|
||
"scorer = rouge_scorer.RougeScorer([\"rouge1\", \"rougeL\"], use_stemmer=True)\n",
|
||
"\n",
|
||
"\n",
|
||
"# Helper to strip special ids\n",
|
||
"def strip_special(ids, vocab):\n",
|
||
" pad = vocab[\"<pad>\"] if \"<pad>\" in vocab else None\n",
|
||
" eos = vocab[\"<eos>\"]\n",
|
||
" return [i for i in ids if i not in (pad, eos)]\n",
|
||
"\n",
|
||
"\n",
|
||
"def ids_to_text(ids, inv_vocab):\n",
|
||
" return \" \".join(inv_vocab[i] for i in ids)\n",
|
||
"\n",
|
||
"\n",
|
||
"# ---- evaluation over a set of indices ----\n",
|
||
"import random\n",
|
||
"\n",
|
||
"\n",
|
||
"def evaluate(indices=None):\n",
|
||
" if indices is None:\n",
|
||
" indices = random.sample(range(len(X_tok)), k=min(100, len(X_tok)))\n",
|
||
"\n",
|
||
" bleu_scores, rou1, rouL = [], [], []\n",
|
||
" for idx in indices:\n",
|
||
" # Ground truth\n",
|
||
" gt_q = strip_special(dec_q_out[idx], vocab_q)\n",
|
||
" gt_a = strip_special(dec_a_out[idx], vocab_a)\n",
|
||
" # Prediction\n",
|
||
" q_pred, a_pred, _ = greedy_decode(\n",
|
||
" X_tok[idx : idx + 1], X_ner[idx : idx + 1], X_srl[idx : idx + 1]\n",
|
||
" )\n",
|
||
"\n",
|
||
" # BLEU on question tokens\n",
|
||
" bleu_scores.append(\n",
|
||
" sentence_bleu(\n",
|
||
" [[inv_vocab_q[i] for i in gt_q]], q_pred, smoothing_function=smoothie\n",
|
||
" )\n",
|
||
" )\n",
|
||
" # ROUGE on question strings\n",
|
||
" r = scorer.score(ids_to_text(gt_q, inv_vocab_q), \" \".join(q_pred))\n",
|
||
" rou1.append(r[\"rouge1\"].fmeasure)\n",
|
||
" rouL.append(r[\"rougeL\"].fmeasure)\n",
|
||
"\n",
|
||
" print(f\"BLEU : {np.mean(bleu_scores):.4f}\")\n",
|
||
" print(f\"ROUGE1: {np.mean(rou1):.4f} | ROUGE-L: {np.mean(rouL):.4f}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"evaluate()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "myenv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.16"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|