{ "cells": [ { "cell_type": "code", "execution_count": 13, "id": "fb106e20", "metadata": {}, "outputs": [], "source": [ "import json, pickle\n", "import numpy as np\n", "from keras.models import Model\n", "from keras.layers import Input, Embedding, Bidirectional, LSTM, TimeDistributed, Dense\n", "from keras.preprocessing.sequence import pad_sequences\n", "from keras.utils import to_categorical\n", "from seqeval.metrics import classification_report\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 14, "id": "00347a5f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total kalimat: 156\n", "Total token: 1850\n" ] } ], "source": [ "#load the data\n", "# with open(\"../dataset/dataset_ner_srl.json\", encoding=\"utf-8\") as f:\n", "# data = json.load(f)\n", "\n", "# sentences = [[tok.lower() for tok in item[\"tokens\"]] for item in data]\n", "# labels_ner = [item[\"labels_ner\"] for item in data]\n", "# labels_srl = [item[\"labels_srl\"] for item in data]\n", "\n", "# for i, label_seq in enumerate(labels_ner):\n", "# if \"V\" in label_seq:\n", "# print(f\"Label 'V' ditemukan di index {i}: {label_seq}\")\n", " \n", " \n", "data = []\n", "\n", "with open(\"../dataset/dataset_ner_srl.tsv\", encoding=\"utf-8\") as f:\n", " tokens, ner_labels, srl_labels = [], [], []\n", " \n", " for line in f:\n", " line = line.strip()\n", " if not line:\n", " if tokens:\n", " data.append({\n", " \"tokens\": tokens,\n", " \"labels_ner\": ner_labels,\n", " \"labels_srl\": srl_labels\n", " })\n", " tokens, ner_labels, srl_labels = [], [], []\n", " else:\n", " token, ner, srl = line.split(\"\\t\")\n", " tokens.append(token)\n", " ner_labels.append(ner)\n", " srl_labels.append(srl)\n", "\n", "# Preprocessing sama seperti sebelumnya\n", "sentences = [[tok.lower() for tok in item[\"tokens\"]] for item in data]\n", "labels_ner = [item[\"labels_ner\"] for item in data]\n", "labels_srl = [item[\"labels_srl\"] for item in data]\n", "\n", "total_kalimat = len(data)\n", "total_token = sum(len(item[\"tokens\"]) for item in data)\n", "\n", "print(\"Total kalimat:\", total_kalimat)\n", "print(\"Total token:\", total_token)" ] }, { "cell_type": "code", "execution_count": null, "id": "3793950a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 15, "id": "ac8eb374", "metadata": {}, "outputs": [], "source": [ "# tagging \n", "words = sorted({w for s in sentences for w in s})\n", "ner_tags = sorted({t for seq in labels_ner for t in seq})\n", "srl_tags = sorted({t for seq in labels_srl for t in seq})\n", "\n", "word2idx = {w: i + 2 for i, w in enumerate(words)}\n", "word2idx[\"PAD\"], word2idx[\"UNK\"] = 0, 1\n", "\n", "tag2idx_ner = {t: i for i, t in enumerate(ner_tags)}\n", "tag2idx_srl = {t: i for i, t in enumerate(srl_tags)}\n", "idx2tag_ner = {i: t for t, i in tag2idx_ner.items()}\n", "idx2tag_srl = {i: t for t, i in tag2idx_srl.items()}" ] }, { "cell_type": "code", "execution_count": 16, "id": "80356f1f", "metadata": {}, "outputs": [], "source": [ "# encoding\n", "\n", "X = [[word2idx.get(w, word2idx[\"UNK\"]) for w in s] for s in sentences]\n", "y_ner = [[tag2idx_ner[t] for t in seq] for seq in labels_ner]\n", "y_srl = [[tag2idx_srl[t] for t in seq] for seq in labels_srl]\n", "\n", "maxlen = 50 \n", "\n", "X = pad_sequences(X, maxlen=maxlen, padding=\"post\", value=word2idx[\"PAD\"])\n", "y_ner = pad_sequences(y_ner, maxlen=maxlen, padding=\"post\", value=tag2idx_ner[\"O\"])\n", "y_srl = pad_sequences(y_srl, maxlen=maxlen, padding=\"post\", value=tag2idx_srl[\"O\"])\n", "\n", "y_ner = [to_categorical(seq, num_classes=len(tag2idx_ner)) for seq in y_ner]\n", "y_srl = [to_categorical(seq, num_classes=len(tag2idx_srl)) for seq in y_srl]\n", "\n", "X = np.array(X)\n", "y_ner = np.array(y_ner)\n", "y_srl = np.array(y_srl)" ] }, { "cell_type": "code", "execution_count": 17, "id": "fe219c96", "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_ner_train, y_ner_test, y_srl_train, y_srl_test = train_test_split(\n", " X, y_ner, y_srl, \n", " test_size=0.20, \n", " random_state=42,\n", " shuffle=True \n", ")" ] }, { "cell_type": "code", "execution_count": 18, "id": "7a9636b6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"functional_1\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"functional_1\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)         Output Shape          Param #  Connected to      ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ input_layer_1       │ (None, 50)        │          0 │ -                 │\n",
       "│ (InputLayer)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ embedding_1         │ (None, 50, 64)    │     44,544 │ input_layer_1[0]… │\n",
       "│ (Embedding)         │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ bidirectional_1     │ (None, 50, 128)   │     66,048 │ embedding_1[0][0] │\n",
       "│ (Bidirectional)     │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ ner_output          │ (None, 50, 25)    │      3,225 │ bidirectional_1[ │\n",
       "│ (TimeDistributed)   │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ srl_output          │ (None, 50, 18)    │      2,322 │ bidirectional_1[ │\n",
       "│ (TimeDistributed)   │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n", "│ input_layer_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n", "│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ embedding_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m44,544\u001b[0m │ input_layer_1[\u001b[38;5;34m0\u001b[0m]… │\n", "│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ bidirectional_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m66,048\u001b[0m │ embedding_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n", "│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ ner_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m25\u001b[0m) │ \u001b[38;5;34m3,225\u001b[0m │ bidirectional_1[\u001b[38;5;34m…\u001b[0m │\n", "│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n", "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n", "│ srl_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50\u001b[0m, \u001b[38;5;34m18\u001b[0m) │ \u001b[38;5;34m2,322\u001b[0m │ bidirectional_1[\u001b[38;5;34m…\u001b[0m │\n", "│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ │\n", "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 116,139 (453.67 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m116,139\u001b[0m (453.67 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 116,139 (453.67 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m116,139\u001b[0m (453.67 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 19ms/step - loss: 3.2850 - ner_output_accuracy: 0.8700 - ner_output_loss: 1.6767 - srl_output_accuracy: 0.7518 - srl_output_loss: 1.6083 - val_loss: 0.7275 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2555 - val_srl_output_accuracy: 0.8450 - val_srl_output_loss: 0.4720\n", "Epoch 2/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.7622 - ner_output_accuracy: 0.9528 - ner_output_loss: 0.2458 - srl_output_accuracy: 0.8296 - srl_output_loss: 0.5163 - val_loss: 0.6534 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2296 - val_srl_output_accuracy: 0.8531 - val_srl_output_loss: 0.4238\n", "Epoch 3/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6875 - ner_output_accuracy: 0.9572 - ner_output_loss: 0.2126 - srl_output_accuracy: 0.8496 - srl_output_loss: 0.4750 - val_loss: 0.6327 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2273 - val_srl_output_accuracy: 0.8688 - val_srl_output_loss: 0.4054\n", "Epoch 4/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.6103 - ner_output_accuracy: 0.9533 - ner_output_loss: 0.2114 - srl_output_accuracy: 0.8772 - srl_output_loss: 0.3988 - val_loss: 0.6009 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2137 - val_srl_output_accuracy: 0.8662 - val_srl_output_loss: 0.3872\n", "Epoch 5/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.6757 - ner_output_accuracy: 0.9486 - ner_output_loss: 0.2281 - srl_output_accuracy: 0.8582 - srl_output_loss: 0.4476 - val_loss: 0.5690 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.2040 - val_srl_output_accuracy: 0.8781 - val_srl_output_loss: 0.3650\n", "Epoch 6/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.5864 - ner_output_accuracy: 0.9477 - ner_output_loss: 0.2198 - srl_output_accuracy: 0.8898 - srl_output_loss: 0.3666 - val_loss: 0.5458 - val_ner_output_accuracy: 0.9519 - val_ner_output_loss: 0.1961 - val_srl_output_accuracy: 0.8875 - val_srl_output_loss: 0.3497\n", "Epoch 7/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.5877 - ner_output_accuracy: 0.9506 - ner_output_loss: 0.1914 - srl_output_accuracy: 0.8773 - srl_output_loss: 0.3963 - val_loss: 0.5260 - val_ner_output_accuracy: 0.9525 - val_ner_output_loss: 0.1898 - val_srl_output_accuracy: 0.8875 - val_srl_output_loss: 0.3362\n", "Epoch 8/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 0.5046 - ner_output_accuracy: 0.9536 - ner_output_loss: 0.1756 - srl_output_accuracy: 0.8912 - srl_output_loss: 0.3290 - val_loss: 0.5094 - val_ner_output_accuracy: 0.9531 - val_ner_output_loss: 0.1829 - val_srl_output_accuracy: 0.8881 - val_srl_output_loss: 0.3265\n", "Epoch 9/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.4807 - ner_output_accuracy: 0.9539 - ner_output_loss: 0.1704 - srl_output_accuracy: 0.9021 - srl_output_loss: 0.3103 - val_loss: 0.4876 - val_ner_output_accuracy: 0.9531 - val_ner_output_loss: 0.1719 - val_srl_output_accuracy: 0.9025 - val_srl_output_loss: 0.3156\n", "Epoch 10/10\n", "\u001b[1m62/62\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 0.4134 - ner_output_accuracy: 0.9634 - ner_output_loss: 0.1350 - srl_output_accuracy: 0.9245 - srl_output_loss: 0.2784 - val_loss: 0.4587 - val_ner_output_accuracy: 0.9550 - val_ner_output_loss: 0.1598 - val_srl_output_accuracy: 0.9087 - val_srl_output_loss: 0.2989\n" ] } ], "source": [ "input_layer = Input(shape=(maxlen,))\n", "embed = Embedding(len(word2idx), 64)(input_layer)\n", "bilstm = Bidirectional(LSTM(64, return_sequences=True))(embed)\n", "\n", "ner_output = TimeDistributed(\n", " Dense(len(tag2idx_ner), activation=\"softmax\"), name=\"ner_output\"\n", ")(bilstm)\n", "srl_output = TimeDistributed(\n", " Dense(len(tag2idx_srl), activation=\"softmax\"), name=\"srl_output\"\n", ")(bilstm)\n", "\n", "model = Model(inputs=input_layer, outputs=[ner_output, srl_output])\n", "model.compile(\n", " optimizer=\"adam\",\n", " loss={\n", " \"ner_output\": \"categorical_crossentropy\",\n", " \"srl_output\": \"categorical_crossentropy\",\n", " },\n", " metrics={\"ner_output\": \"accuracy\", \"srl_output\": \"accuracy\"},\n", ")\n", "model.summary()\n", "model.fit(\n", " X_train, {\"ner_output\": y_ner_train, \"srl_output\": y_srl_train}, \n", " validation_data=(X_test, {\"ner_output\": y_ner_test, \"srl_output\": y_srl_test}),\n", " batch_size=2,\n", " epochs=10,\n", " verbose=1\n", ")\n", "\n", "# ---------- 6. Simpan artefak ----------\n", "model.save(\"multi_task_lstm_ner_srl_model.keras\")\n", "with open(\"word2idx.pkl\", \"wb\") as f:\n", " pickle.dump(word2idx, f)\n", "with open(\"tag2idx_ner.pkl\", \"wb\") as f:\n", " pickle.dump(tag2idx_ner, f)\n", "with open(\"tag2idx_srl.pkl\", \"wb\") as f:\n", " pickle.dump(tag2idx_srl, f)\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "3a55990b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'loss': 0.45865434408187866, 'compile_metrics': 0.159775510430336, 'ner_output_loss': 0.29887881875038147, 'srl_output_loss': 0.9550000429153442}\n", "{0: 'B-DATE', 1: 'B-ETH', 2: 'B-EVENT', 3: 'B-LOC', 4: 'B-MIN', 5: 'B-MISC', 6: 'B-ORG', 7: 'B-PER', 8: 'B-QUANT', 9: 'B-REL', 10: 'B-RES', 11: 'B-TERM', 12: 'B-TIME', 13: 'I-DATE', 14: 'I-ETH', 15: 'I-EVENT', 16: 'I-LOC', 17: 'I-MISC', 18: 'I-ORG', 19: 'I-PER', 20: 'I-QUANT', 21: 'I-RES', 22: 'I-TERM', 23: 'I-TIME', 24: 'O'}\n", "\n", "📊 [NER] Classification Report (test set):\n", " precision recall f1-score support\n", "\n", " DATE 0.33 0.12 0.18 8\n", " EVENT 0.00 0.00 0.00 1\n", " LOC 1.00 0.04 0.07 28\n", " ORG 0.00 0.00 0.00 4\n", " PER 0.00 0.00 0.00 2\n", " TIME 0.50 0.30 0.37 10\n", "\n", " micro avg 0.50 0.09 0.16 53\n", " macro avg 0.31 0.08 0.10 53\n", "weighted avg 0.67 0.09 0.13 53\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "# evaluation\n", "\n", "results = model.evaluate(\n", " X_test,\n", " {\"ner_output\": y_ner_test, \"srl_output\": y_srl_test},\n", " verbose=0\n", ")\n", "\n", "# `metrics_names` = [\"loss\",\n", "# \"ner_output_loss\", \"srl_output_loss\",\n", "# \"ner_output_accuracy\", \"srl_output_accuracy\"]\n", "print(dict(zip(model.metrics_names, results)))\n", "\n", "def decode(pred, true, idx2tag):\n", " out_true, out_pred = [], []\n", " for p_seq, t_seq in zip(pred, true):\n", " t_labels, p_labels = [], []\n", " for p_tok, t_tok in zip(p_seq, t_seq):\n", " if t_tok.sum() == 0: # token PAD → lewati\n", " continue\n", " t_labels.append(idx2tag[t_tok.argmax()])\n", " p_labels.append(idx2tag[p_tok.argmax()])\n", " out_true.append(t_labels)\n", " out_pred.append(p_labels)\n", " return out_true, out_pred\n", "\n", "# prediksi hanya pada test set\n", "y_pred_ner, y_pred_srl = model.predict(X_test, verbose=0)\n", "\n", "true_ner, pred_ner = decode(y_pred_ner, y_ner_test, idx2tag_ner)\n", "print(idx2tag_ner)\n", "print(\"\\n📊 [NER] Classification Report (test set):\")\n", "print(classification_report(true_ner, pred_ner, digits=2))\n", "\n", "\n", "\n", "\n", "# y_pred_ner, y_pred_srl = model.predict(X, verbose=0)\n", "\n", "\n", "# def decode(pred, true, idx2tag):\n", "# true_tags = [[idx2tag[np.argmax(tok)] for tok in seq] for seq in true]\n", "# pred_tags = [[idx2tag[np.argmax(tok)] for tok in seq] for seq in pred]\n", "# return true_tags, pred_tags\n", "\n", "\n", "# true_ner, pred_ner = decode(y_pred_ner, y_ner, idx2tag_ner)\n", "\n", "# print(\"\\n📊 [NER] Classification Report:\")\n", "# print(classification_report(true_ner, pred_ner))\n", "\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "547d1533", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: 'ARG0', 1: 'ARG1', 2: 'ARG2', 3: 'ARG3', 4: 'ARGM-BNF', 5: 'ARGM-CAU', 6: 'ARGM-COM', 7: 'ARGM-FRQ', 8: 'ARGM-LOC', 9: 'ARGM-MNR', 10: 'ARGM-MOD', 11: 'ARGM-NEG', 12: 'ARGM-PRP', 13: 'ARGM-SRC', 14: 'ARGM-TMP', 15: 'O', 16: 'R-ARG1', 17: 'V'}\n", "\n", "📊 [SRL] Classification Report (test set):\n", " precision recall f1-score support\n", "\n", " CAU 0.00 0.00 0.00 1\n", " FRQ 0.00 0.00 0.00 1\n", " LOC 0.31 0.50 0.38 10\n", " MNR 0.00 0.00 0.00 4\n", " PRP 0.00 0.00 0.00 1\n", " RG0 0.50 0.11 0.17 19\n", " RG1 0.18 0.20 0.19 46\n", " RG2 0.27 0.40 0.32 10\n", " TMP 0.50 0.59 0.54 17\n", " _ 0.12 0.03 0.05 33\n", "\n", " micro avg 0.28 0.22 0.24 142\n", " macro avg 0.19 0.18 0.17 142\n", "weighted avg 0.26 0.22 0.21 142\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG1 seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: V seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-TMP seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG0 seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-LOC seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-MNR seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-FRQ seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARG2 seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-PRP seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", "/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: ARGM-CAU seems not to be NE tag.\n", " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n" ] } ], "source": [ "# true_srl, pred_srl = decode(y_pred_srl, y_srl, idx2tag_srl)\n", "# print(\"\\n📊 [SRL] Classification Report:\")\n", "# print(classification_report(true_srl, pred_srl))\n", "\n", "true_srl, pred_srl = decode(y_pred_srl, y_srl_test, idx2tag_srl)\n", "print(idx2tag_srl)\n", "print(\"\\n📊 [SRL] Classification Report (test set):\")\n", "print(classification_report(true_srl, pred_srl, digits=2))" ] } ], "metadata": { "kernelspec": { "display_name": "myenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 5 }