{ "cells": [ { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# import library\n", "\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import json\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "\n", "import re\n", "import string\n", "import nltk\n", "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "from nltk.stem import WordNetLemmatizer\n", "import pickle\n", "\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate\n", "from sklearn.metrics import classification_report, precision_score, recall_score, accuracy_score\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /home/akeon/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n", "[nltk_data] Downloading package punkt to /home/akeon/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n", "[nltk_data] Downloading package punkt_tab to /home/akeon/nltk_data...\n", "[nltk_data] Package punkt_tab is already up-to-date!\n", "[nltk_data] Downloading package wordnet to /home/akeon/nltk_data...\n", "[nltk_data] Package wordnet is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# download assets\n", "nltk.download(\"stopwords\")\n", "nltk.download(\"punkt\")\n", "nltk.download(\"punkt_tab\")\n", "nltk.download(\"wordnet\")" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | context | \n", "qa_pairs | \n", "
---|---|---|
0 | \n", "Albert Einstein adalah fisikawan teoretis kela... | \n", "[{'type': 'fill_in_the_blank', 'question': '__... | \n", "
1 | \n", "Samudra Pasifik adalah yang terbesar dan terda... | \n", "[{'type': 'fill_in_the_blank', 'question': 'Sa... | \n", "
2 | \n", "Proklamasi Kemerdekaan Indonesia dibacakan pad... | \n", "[{'type': 'fill_in_the_blank', 'question': 'Pr... | \n", "
3 | \n", "Hukum Newton adalah tiga hukum fisika yang men... | \n", "[{'type': 'fill_in_the_blank', 'question': 'Hu... | \n", "
4 | \n", "Budi Utomo adalah organisasi pemuda yang didir... | \n", "[{'type': 'fill_in_the_blank', 'question': 'Bu... | \n", "