582 lines
44 KiB
Plaintext
582 lines
44 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 92,
|
|
"id": "9bf2159a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import json\n",
|
|
"import numpy as np\n",
|
|
"from pathlib import Path\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
|
|
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
|
"from tensorflow.keras.utils import to_categorical\n",
|
|
"\n",
|
|
"from tensorflow.keras.models import Model\n",
|
|
"from tensorflow.keras.layers import (\n",
|
|
" Input,\n",
|
|
" Embedding,\n",
|
|
" LSTM,\n",
|
|
" Concatenate,\n",
|
|
" Dense,\n",
|
|
" TimeDistributed,\n",
|
|
")\n",
|
|
"from tensorflow.keras.callbacks import EarlyStopping\n",
|
|
"from sklearn.metrics import classification_report"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 104,
|
|
"id": "50118278",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
" Jumlah data valid: 732 / 732\n",
|
|
" Jumlah data tidak valid: 0\n",
|
|
"\n",
|
|
"Contoh pertanyaan dari data valid:\n",
|
|
"[\n",
|
|
" [\n",
|
|
" \"___\",\n",
|
|
" \"adalah\",\n",
|
|
" \"putra\",\n",
|
|
" \"gph\",\n",
|
|
" \"soerjaningrat\",\n",
|
|
" \"dan\",\n",
|
|
" \"cucu\",\n",
|
|
" \"pakualam\",\n",
|
|
" \"iii.\"\n",
|
|
" ],\n",
|
|
" [\n",
|
|
" \"ia\",\n",
|
|
" \"diterima\",\n",
|
|
" \"belajar\",\n",
|
|
" \"di\",\n",
|
|
" \"___.\"\n",
|
|
" ],\n",
|
|
" [\n",
|
|
" \"ia\",\n",
|
|
" \"bersama\",\n",
|
|
" \"douwes\",\n",
|
|
" \"dekker\",\n",
|
|
" \"dan\",\n",
|
|
" \"dr.\",\n",
|
|
" \"cipto\",\n",
|
|
" \"mangoenkoesoemo\",\n",
|
|
" \"lantas\",\n",
|
|
" \"mendirikan\",\n",
|
|
" \"___\",\n",
|
|
" \"pada\",\n",
|
|
" \"25\",\n",
|
|
" \"desember\",\n",
|
|
" \"1912.\"\n",
|
|
" ]\n",
|
|
"]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load raw data\n",
|
|
"with open(\"normalized_dataset.json\", encoding=\"utf-8\") as f:\n",
|
|
" raw_data = json.load(f)\n",
|
|
"\n",
|
|
"# Validasi lengkap\n",
|
|
"required_keys = {\"tokens\", \"ner\", \"srl\", \"question\", \"answer\", \"type\"}\n",
|
|
"valid_data = []\n",
|
|
"invalid_data = []\n",
|
|
"\n",
|
|
"for idx, item in enumerate(raw_data):\n",
|
|
" error_messages = []\n",
|
|
"\n",
|
|
" if not isinstance(item, dict):\n",
|
|
" error_messages.append(\"bukan dictionary\")\n",
|
|
"\n",
|
|
" missing_keys = required_keys - item.keys()\n",
|
|
" if missing_keys:\n",
|
|
" error_messages.append(f\"missing keys: {missing_keys}\")\n",
|
|
"\n",
|
|
" if not error_messages:\n",
|
|
" # Cek tipe data dan None\n",
|
|
" if (not isinstance(item[\"tokens\"], list) or\n",
|
|
" not isinstance(item[\"ner\"], list) or\n",
|
|
" not isinstance(item[\"srl\"], list) or\n",
|
|
" not isinstance(item[\"question\"], list) or\n",
|
|
" not isinstance(item[\"answer\"], list) or\n",
|
|
" not isinstance(item[\"type\"], str)):\n",
|
|
" error_messages.append(\"field type tidak sesuai\")\n",
|
|
" \n",
|
|
" if error_messages:\n",
|
|
" print(f\"\\n Index {idx} | Masalah: {', '.join(error_messages)}\")\n",
|
|
" print(json.dumps(item, indent=2, ensure_ascii=False))\n",
|
|
" invalid_data.append(item)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" valid_data.append(item)\n",
|
|
"\n",
|
|
"# Statistik\n",
|
|
"print(f\"\\n Jumlah data valid: {len(valid_data)} / {len(raw_data)}\")\n",
|
|
"print(f\" Jumlah data tidak valid: {len(invalid_data)}\")\n",
|
|
"\n",
|
|
"# Proses data valid\n",
|
|
"tokens = [[t.lower().strip() for t in item[\"tokens\"]] for item in valid_data]\n",
|
|
"ner_tags = [item[\"ner\"] for item in valid_data]\n",
|
|
"srl_tags = [item[\"srl\"] for item in valid_data]\n",
|
|
"questions = [[token.lower().strip() for token in item[\"question\"]] for item in valid_data]\n",
|
|
"answers = [[token.lower().strip() for token in item[\"answer\"]] for item in valid_data]\n",
|
|
"types = [item[\"type\"] for item in valid_data]\n",
|
|
"\n",
|
|
"print(\"\\nContoh pertanyaan dari data valid:\")\n",
|
|
"print(json.dumps(questions[:3], indent=2, ensure_ascii=False))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 94,
|
|
"id": "4e3a0088",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# tokenize\n",
|
|
"token_tok = Tokenizer(lower=False, oov_token=\"UNK\")\n",
|
|
"token_ner = Tokenizer(lower=False)\n",
|
|
"token_srl = Tokenizer(lower=False)\n",
|
|
"token_q = Tokenizer(lower=False)\n",
|
|
"token_a = Tokenizer(lower=False)\n",
|
|
"token_type = Tokenizer(lower=False)\n",
|
|
"\n",
|
|
"token_tok.fit_on_texts(tokens)\n",
|
|
"token_ner.fit_on_texts(ner_tags)\n",
|
|
"token_srl.fit_on_texts(srl_tags)\n",
|
|
"token_q.fit_on_texts(questions)\n",
|
|
"token_a.fit_on_texts(answers)\n",
|
|
"token_type.fit_on_texts(types)\n",
|
|
"\n",
|
|
"\n",
|
|
"maxlen = 20"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 95,
|
|
"id": "555f9e22",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'tof', 'none', 'ftb'}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"X_tok = pad_sequences(\n",
|
|
" token_tok.texts_to_sequences(tokens), padding=\"post\", maxlen=maxlen\n",
|
|
")\n",
|
|
"X_ner = pad_sequences(\n",
|
|
" token_ner.texts_to_sequences(ner_tags), padding=\"post\", maxlen=maxlen\n",
|
|
")\n",
|
|
"X_srl = pad_sequences(\n",
|
|
" token_srl.texts_to_sequences(srl_tags), padding=\"post\", maxlen=maxlen\n",
|
|
")\n",
|
|
"y_q = pad_sequences(token_q.texts_to_sequences(questions), padding=\"post\", maxlen=maxlen)\n",
|
|
"y_a = pad_sequences(token_a.texts_to_sequences(answers), padding=\"post\", maxlen=maxlen)\n",
|
|
"\n",
|
|
"print(set(types))\n",
|
|
"\n",
|
|
"y_type = [seq[0] for seq in token_type.texts_to_sequences(types)] # list of int\n",
|
|
"y_type = to_categorical(np.array(y_type) - 1, num_classes=len(token_type.word_index))\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"id": "f530cfe7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_tok_train, X_tok_test, X_ner_train, X_ner_test, X_srl_train, X_srl_test, \\\n",
|
|
"y_q_train, y_q_test, y_a_train, y_a_test, y_type_train, y_type_test = train_test_split(\n",
|
|
" X_tok, X_ner, X_srl, y_q, y_a, y_type, test_size=0.2, random_state=42\n",
|
|
")\n",
|
|
"\n",
|
|
"X_train = [X_tok_train, X_ner_train, X_srl_train]\n",
|
|
"X_test = [X_tok_test, X_ner_test, X_srl_test]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 97,
|
|
"id": "255e2a9a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_10\"</span>\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1mModel: \"functional_10\"\u001b[0m\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
|
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
|
"│ tok_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_30 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">354,560</span> │ tok_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_31 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">672</span> │ ner_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_32 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">448</span> │ srl_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ concatenate_10 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">160</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ embedding_30[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ embedding_31[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"│ │ │ │ embedding_32[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ lstm_10 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">427,008</span> │ concatenate_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ get_item_10 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ lstm_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GetItem</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">727</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">186,839</span> │ lstm_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ answer_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">367</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">94,319</span> │ lstm_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ type_output (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">771</span> │ get_item_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
|
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
|
"│ tok_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_30 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m354,560\u001b[0m │ tok_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_31 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m) │ \u001b[38;5;34m672\u001b[0m │ ner_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ embedding_32 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m16\u001b[0m) │ \u001b[38;5;34m448\u001b[0m │ srl_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ concatenate_10 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m160\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ embedding_30[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
|
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ embedding_31[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
|
"│ │ │ │ embedding_32[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ lstm_10 (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m427,008\u001b[0m │ concatenate_10[\u001b[38;5;34m0\u001b[0m… │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ get_item_10 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ lstm_10[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mGetItem\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m727\u001b[0m) │ \u001b[38;5;34m186,839\u001b[0m │ lstm_10[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ answer_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m367\u001b[0m) │ \u001b[38;5;34m94,319\u001b[0m │ lstm_10[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ type_output (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m771\u001b[0m │ get_item_10[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,064,617</span> (4.06 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,064,617\u001b[0m (4.06 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,064,617</span> (4.06 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m1,064,617\u001b[0m (4.06 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch 1/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 31ms/step - answer_output_accuracy: 0.9421 - answer_output_loss: 1.0839 - loss: 3.7461 - question_output_accuracy: 0.8187 - question_output_loss: 1.8269 - type_output_accuracy: 0.5154 - type_output_loss: 0.8354 - val_answer_output_accuracy: 0.9683 - val_answer_output_loss: 0.1305 - val_loss: 1.3997 - val_question_output_accuracy: 0.8508 - val_question_output_loss: 0.5916 - val_type_output_accuracy: 0.5451 - val_type_output_loss: 0.6777\n",
|
|
"Epoch 2/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 28ms/step - answer_output_accuracy: 0.9690 - answer_output_loss: 0.1140 - loss: 1.3590 - question_output_accuracy: 0.8490 - question_output_loss: 0.5613 - type_output_accuracy: 0.5599 - type_output_loss: 0.6837 - val_answer_output_accuracy: 0.9695 - val_answer_output_loss: 0.1072 - val_loss: 1.2185 - val_question_output_accuracy: 0.8523 - val_question_output_loss: 0.4397 - val_type_output_accuracy: 0.5484 - val_type_output_loss: 0.6715\n",
|
|
"Epoch 3/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 28ms/step - answer_output_accuracy: 0.9704 - answer_output_loss: 0.0929 - loss: 1.1687 - question_output_accuracy: 0.8545 - question_output_loss: 0.4132 - type_output_accuracy: 0.5641 - type_output_loss: 0.6626 - val_answer_output_accuracy: 0.9699 - val_answer_output_loss: 0.0893 - val_loss: 1.1270 - val_question_output_accuracy: 0.8511 - val_question_output_loss: 0.3662 - val_type_output_accuracy: 0.5206 - val_type_output_loss: 0.6712\n",
|
|
"Epoch 4/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 30ms/step - answer_output_accuracy: 0.9721 - answer_output_loss: 0.0755 - loss: 1.0676 - question_output_accuracy: 0.8573 - question_output_loss: 0.3360 - type_output_accuracy: 0.5583 - type_output_loss: 0.6562 - val_answer_output_accuracy: 0.9725 - val_answer_output_loss: 0.0819 - val_loss: 1.0924 - val_question_output_accuracy: 0.8538 - val_question_output_loss: 0.3355 - val_type_output_accuracy: 0.5217 - val_type_output_loss: 0.6720\n",
|
|
"Epoch 5/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 29ms/step - answer_output_accuracy: 0.9736 - answer_output_loss: 0.0687 - loss: 1.0458 - question_output_accuracy: 0.8604 - question_output_loss: 0.3208 - type_output_accuracy: 0.5582 - type_output_loss: 0.6564 - val_answer_output_accuracy: 0.9727 - val_answer_output_loss: 0.0785 - val_loss: 1.0680 - val_question_output_accuracy: 0.8557 - val_question_output_loss: 0.3196 - val_type_output_accuracy: 0.5406 - val_type_output_loss: 0.6702\n",
|
|
"Epoch 6/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 38ms/step - answer_output_accuracy: 0.9744 - answer_output_loss: 0.0606 - loss: 1.0098 - question_output_accuracy: 0.8582 - question_output_loss: 0.2944 - type_output_accuracy: 0.5536 - type_output_loss: 0.6548 - val_answer_output_accuracy: 0.9730 - val_answer_output_loss: 0.0761 - val_loss: 1.0590 - val_question_output_accuracy: 0.8529 - val_question_output_loss: 0.3082 - val_type_output_accuracy: 0.5106 - val_type_output_loss: 0.6747\n",
|
|
"Epoch 7/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 44ms/step - answer_output_accuracy: 0.9743 - answer_output_loss: 0.0602 - loss: 0.9826 - question_output_accuracy: 0.8617 - question_output_loss: 0.2767 - type_output_accuracy: 0.5678 - type_output_loss: 0.6457 - val_answer_output_accuracy: 0.9737 - val_answer_output_loss: 0.0735 - val_loss: 1.0377 - val_question_output_accuracy: 0.8632 - val_question_output_loss: 0.2956 - val_type_output_accuracy: 0.5617 - val_type_output_loss: 0.6667\n",
|
|
"Epoch 8/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 42ms/step - answer_output_accuracy: 0.9738 - answer_output_loss: 0.0577 - loss: 0.9710 - question_output_accuracy: 0.8606 - question_output_loss: 0.2633 - type_output_accuracy: 0.5602 - type_output_loss: 0.6501 - val_answer_output_accuracy: 0.9739 - val_answer_output_loss: 0.0702 - val_loss: 1.0265 - val_question_output_accuracy: 0.8609 - val_question_output_loss: 0.2860 - val_type_output_accuracy: 0.5462 - val_type_output_loss: 0.6695\n",
|
|
"Epoch 9/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 42ms/step - answer_output_accuracy: 0.9743 - answer_output_loss: 0.0522 - loss: 0.9559 - question_output_accuracy: 0.8590 - question_output_loss: 0.2549 - type_output_accuracy: 0.5509 - type_output_loss: 0.6488 - val_answer_output_accuracy: 0.9715 - val_answer_output_loss: 0.0709 - val_loss: 1.0313 - val_question_output_accuracy: 0.8511 - val_question_output_loss: 0.2829 - val_type_output_accuracy: 0.5128 - val_type_output_loss: 0.6767\n",
|
|
"Epoch 10/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 42ms/step - answer_output_accuracy: 0.9747 - answer_output_loss: 0.0512 - loss: 0.9363 - question_output_accuracy: 0.8622 - question_output_loss: 0.2406 - type_output_accuracy: 0.5592 - type_output_loss: 0.6445 - val_answer_output_accuracy: 0.9737 - val_answer_output_loss: 0.0704 - val_loss: 1.0202 - val_question_output_accuracy: 0.8548 - val_question_output_loss: 0.2788 - val_type_output_accuracy: 0.5495 - val_type_output_loss: 0.6721\n",
|
|
"Epoch 11/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 42ms/step - answer_output_accuracy: 0.9748 - answer_output_loss: 0.0520 - loss: 0.9421 - question_output_accuracy: 0.8594 - question_output_loss: 0.2446 - type_output_accuracy: 0.5581 - type_output_loss: 0.6454 - val_answer_output_accuracy: 0.9735 - val_answer_output_loss: 0.0696 - val_loss: 1.0231 - val_question_output_accuracy: 0.8625 - val_question_output_loss: 0.2759 - val_type_output_accuracy: 0.5484 - val_type_output_loss: 0.6794\n",
|
|
"Epoch 12/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 42ms/step - answer_output_accuracy: 0.9750 - answer_output_loss: 0.0499 - loss: 0.9247 - question_output_accuracy: 0.8637 - question_output_loss: 0.2316 - type_output_accuracy: 0.5730 - type_output_loss: 0.6433 - val_answer_output_accuracy: 0.9736 - val_answer_output_loss: 0.0693 - val_loss: 1.0144 - val_question_output_accuracy: 0.8623 - val_question_output_loss: 0.2735 - val_type_output_accuracy: 0.5584 - val_type_output_loss: 0.6716\n",
|
|
"Epoch 13/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 45ms/step - answer_output_accuracy: 0.9743 - answer_output_loss: 0.0546 - loss: 0.9290 - question_output_accuracy: 0.8637 - question_output_loss: 0.2325 - type_output_accuracy: 0.5665 - type_output_loss: 0.6419 - val_answer_output_accuracy: 0.9721 - val_answer_output_loss: 0.0683 - val_loss: 1.0072 - val_question_output_accuracy: 0.8631 - val_question_output_loss: 0.2707 - val_type_output_accuracy: 0.5595 - val_type_output_loss: 0.6674\n",
|
|
"Epoch 14/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 47ms/step - answer_output_accuracy: 0.9749 - answer_output_loss: 0.0482 - loss: 0.9212 - question_output_accuracy: 0.8620 - question_output_loss: 0.2314 - type_output_accuracy: 0.5596 - type_output_loss: 0.6417 - val_answer_output_accuracy: 0.9725 - val_answer_output_loss: 0.0687 - val_loss: 1.0154 - val_question_output_accuracy: 0.8514 - val_question_output_loss: 0.2748 - val_type_output_accuracy: 0.5339 - val_type_output_loss: 0.6714\n",
|
|
"Epoch 15/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 39ms/step - answer_output_accuracy: 0.9742 - answer_output_loss: 0.0493 - loss: 0.9149 - question_output_accuracy: 0.8617 - question_output_loss: 0.2254 - type_output_accuracy: 0.5621 - type_output_loss: 0.6402 - val_answer_output_accuracy: 0.9719 - val_answer_output_loss: 0.0692 - val_loss: 1.0235 - val_question_output_accuracy: 0.8524 - val_question_output_loss: 0.2776 - val_type_output_accuracy: 0.5317 - val_type_output_loss: 0.6747\n",
|
|
"Epoch 16/30\n",
|
|
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 31ms/step - answer_output_accuracy: 0.9752 - answer_output_loss: 0.0441 - loss: 0.9078 - question_output_accuracy: 0.8643 - question_output_loss: 0.2212 - type_output_accuracy: 0.5728 - type_output_loss: 0.6425 - val_answer_output_accuracy: 0.9722 - val_answer_output_loss: 0.0681 - val_loss: 1.0137 - val_question_output_accuracy: 0.8541 - val_question_output_loss: 0.2713 - val_type_output_accuracy: 0.5306 - val_type_output_loss: 0.6736\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"inp_tok = Input(shape=(None,), name=\"tok_input\")\n",
|
|
"inp_ner = Input(shape=(None,), name=\"ner_input\")\n",
|
|
"inp_srl = Input(shape=(None,), name=\"srl_input\")\n",
|
|
"\n",
|
|
"emb_tok = Embedding(input_dim=len(token_tok.word_index) + 1, output_dim=128)(inp_tok)\n",
|
|
"emb_ner = Embedding(input_dim=len(token_ner.word_index) + 1, output_dim=16)(inp_ner)\n",
|
|
"emb_srl = Embedding(input_dim=len(token_srl.word_index) + 1, output_dim=16)(inp_srl)\n",
|
|
"\n",
|
|
"# emb_tok = Embedding(input_dim=..., output_dim=..., mask_zero=True)(inp_tok)\n",
|
|
"# emb_ner = Embedding(input_dim=..., output_dim=..., mask_zero=True)(inp_ner)\n",
|
|
"# emb_srl = Embedding(input_dim=..., output_dim=..., mask_zero=True)(inp_srl)\n",
|
|
"\n",
|
|
"merged = Concatenate()([emb_tok, emb_ner, emb_srl])\n",
|
|
"\n",
|
|
"x = LSTM(256, return_sequences=True)(merged)\n",
|
|
"\n",
|
|
"out_question = TimeDistributed(Dense(len(token_q.word_index) + 1, activation=\"softmax\"), name=\"question_output\")(x)\n",
|
|
"out_answer = TimeDistributed(Dense(len(token_a.word_index) + 1, activation=\"softmax\"), name=\"answer_output\")(x)\n",
|
|
"out_type = Dense(len(token_type.word_index), activation=\"softmax\", name=\"type_output\")(\n",
|
|
" x[:, 0, :]\n",
|
|
") # gunakan step pertama\n",
|
|
"\n",
|
|
"model = Model(\n",
|
|
" inputs=[inp_tok, inp_ner, inp_srl], outputs=[out_question, out_answer, out_type]\n",
|
|
")\n",
|
|
"model.compile(\n",
|
|
" optimizer=\"adam\",\n",
|
|
" loss={\n",
|
|
" \"question_output\": \"sparse_categorical_crossentropy\",\n",
|
|
" \"answer_output\": \"sparse_categorical_crossentropy\",\n",
|
|
" \"type_output\": \"categorical_crossentropy\",\n",
|
|
" },\n",
|
|
" metrics={\n",
|
|
" \"question_output\": \"accuracy\",\n",
|
|
" \"answer_output\": \"accuracy\",\n",
|
|
" \"type_output\": \"accuracy\",\n",
|
|
" },\n",
|
|
")\n",
|
|
"\n",
|
|
"model.summary()\n",
|
|
"\n",
|
|
"# ----------------------------------------------------------------------------\n",
|
|
"# 5. TRAINING\n",
|
|
"# ----------------------------------------------------------------------------\n",
|
|
"model.fit(\n",
|
|
" X_train,\n",
|
|
" {\n",
|
|
" \"question_output\": np.expand_dims(y_q_train, -1),\n",
|
|
" \"answer_output\": np.expand_dims(y_a_train, -1),\n",
|
|
" \"type_output\": y_type_train,\n",
|
|
" },\n",
|
|
" batch_size=32,\n",
|
|
" epochs=30,\n",
|
|
" validation_split=0.1,\n",
|
|
" callbacks=[EarlyStopping(patience=3, restore_best_weights=True)],\n",
|
|
")\n",
|
|
"\n",
|
|
"import pickle\n",
|
|
"\n",
|
|
"\n",
|
|
"model.save(\"new_model_lstm_qg.keras\")\n",
|
|
"with open(\"tokenizers.pkl\", \"wb\") as f:\n",
|
|
" pickle.dump({\n",
|
|
" \"token\": token_tok,\n",
|
|
" \"ner\": token_ner,\n",
|
|
" \"srl\": token_srl,\n",
|
|
" \"question\": token_q,\n",
|
|
" \"answer\": token_a,\n",
|
|
" \"type\": token_type\n",
|
|
" }, f)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"id": "06fd86c7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[1m71/71\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 13ms/step\n",
|
|
"\n",
|
|
"=== Akurasi Detail ===\n",
|
|
"Question Accuracy (Token-level): 0.3\n",
|
|
"Answer Accuracy (Token-level) : 0.7\n",
|
|
"Type Accuracy (Class-level) : 0.55\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"def token_level_accuracy(y_true, y_pred):\n",
|
|
" correct = 0\n",
|
|
" total = 0\n",
|
|
" for true_seq, pred_seq in zip(y_true, y_pred):\n",
|
|
" for t, p in zip(true_seq, pred_seq):\n",
|
|
" if t != 0: # ignore padding\n",
|
|
" total += 1\n",
|
|
" if t == p:\n",
|
|
" correct += 1\n",
|
|
" return correct / total if total > 0 else 0\n",
|
|
"\n",
|
|
"\n",
|
|
"# Predict on test set\n",
|
|
"y_pred_q, y_pred_a, y_pred_type = model.predict(X_test)\n",
|
|
"\n",
|
|
"# Decode predictions to class indices\n",
|
|
"y_pred_q = np.argmax(y_pred_q, axis=-1)\n",
|
|
"y_pred_a = np.argmax(y_pred_a, axis=-1)\n",
|
|
"y_pred_type = np.argmax(y_pred_type, axis=-1)\n",
|
|
"y_true_type = np.argmax(y_type_test, axis=-1)\n",
|
|
"\n",
|
|
"# Calculate token-level accuracy\n",
|
|
"acc_q = token_level_accuracy(y_q_test, y_pred_q)\n",
|
|
"acc_a = token_level_accuracy(y_a_test, y_pred_a)\n",
|
|
"\n",
|
|
"# Type classification report\n",
|
|
"report_type = classification_report(y_true_type, y_pred_type, zero_division=0)\n",
|
|
"\n",
|
|
"# Print Results\n",
|
|
"print(\"\\n=== Akurasi Detail ===\")\n",
|
|
"print(f\"Question Accuracy (Token-level): {acc_q:.1f}\")\n",
|
|
"print(f\"Answer Accuracy (Token-level) : {acc_a:.1f}\")\n",
|
|
"print(f\"Type Accuracy (Class-level) : {np.mean(y_true_type == y_pred_type):.2f}\")\n",
|
|
"# print(\"\\n=== Classification Report (TYPE) ===\")\n",
|
|
"# print(report_type)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"id": "d5ed106c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"# flat_true_a, flat_pred_a = flatten_valid(y_a_test, y_pred_a_class)\n",
|
|
"# print(\"\\n=== Classification Report: ANSWER ===\")\n",
|
|
"# print(classification_report(flat_true_a, flat_pred_a))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 100,
|
|
"id": "aa3860de",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"# print(\"\\n=== Classification Report: TYPE ===\")\n",
|
|
"# print(classification_report(y_true_type_class, y_pred_type_class))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "myenv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|