From ed1a3df0d860755b2dd8fdbc3d0a6c2551fdbac0 Mon Sep 17 00:00:00 2001 From: akhdanre Date: Wed, 12 Mar 2025 11:37:17 +0700 Subject: [PATCH] feat: training model change name --- dataset/training_dataset.json | 8 ++++---- training_model.ipynb | 28 ++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/dataset/training_dataset.json b/dataset/training_dataset.json index e6cec4d..c3f8f61 100644 --- a/dataset/training_dataset.json +++ b/dataset/training_dataset.json @@ -176,19 +176,19 @@ "qa_pairs": [ { "type": "fill_in_the_blank", - "question": "Apa Kepanjangan dari BPUPKI?", - "answer": "Badan Penyelidik Usaha Usaha Persiapan Kemerdekaan Indonesia" + "question": "Apa kepanjangan dari BPUPKI?", + "answer": "Badan Penyelidik Usaha-Usaha Persiapan Kemerdekaan Indonesia" }, { "type": "multiple_choice", - "question": "BPUPKI dibentuk pada ", + "question": "BPUPKI dibentuk pada tanggal?", "options": [ "20 April 1945", "29 April 1945", "10 April 1945", "20 Mei 1945" ], - "answer": "20 Mei 1945" + "answer": "29 April 1945" } ] } diff --git a/training_model.ipynb b/training_model.ipynb index 201fa7a..2a2efb9 100644 --- a/training_model.ipynb +++ b/training_model.ipynb @@ -2,30 +2,37 @@ "cells": [ { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# import library\n", "\n", + "# Data manipulation and visualization\n", "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import json\n", - "from tensorflow.keras.preprocessing.text import Tokenizer\n", - "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "import matplotlib.pyplot as plt\n", "\n", + "# Natural language processing\n", "import re\n", "import string\n", "import nltk\n", "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "from nltk.stem import WordNetLemmatizer\n", - "import pickle\n", "\n", + "# Deep learning\n", + "from tensorflow.keras.preprocessing.text import Tokenizer\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate\n", - "from sklearn.metrics import classification_report, precision_score, recall_score, accuracy_score\n" + "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n", + "\n", + "# Metrics for model evaluation\n", + "from sklearn.metrics import classification_report, precision_score, recall_score, accuracy_score\n", + "\n", + "# Utility for serialization\n", + "import pickle\n" ] }, { @@ -346,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -467,6 +474,11 @@ " },\n", ")\n", "\n", + "\n", + "early_stop = EarlyStopping(monitor='val_loss', patience=3)\n", + "checkpoint = ModelCheckpoint(\"best_model.h5\", monitor='val_loss', save_best_only=True)\n", + "\n", + "\n", "# === Training Model === #\n", "model.fit(\n", " [context_padded, question_padded],\n",