2266 lines
159 KiB
Plaintext
2266 lines
159 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"id": "58e41ccb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import json\n",
|
|
"import random\n",
|
|
"import tensorflow as tf\n",
|
|
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
|
|
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
|
"from tensorflow.keras.models import Model, load_model\n",
|
|
"from tensorflow.keras.layers import (\n",
|
|
" Input,\n",
|
|
" LSTM,\n",
|
|
" Dense,\n",
|
|
" Embedding,\n",
|
|
" Bidirectional,\n",
|
|
" Concatenate,\n",
|
|
" Attention,\n",
|
|
" Dropout,\n",
|
|
")\n",
|
|
"from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import re"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"id": "9f22b5d1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"../dataset/stable_qg_qa_train_dataset.json\", \"r\") as f:\n",
|
|
" data = json.load(f)\n",
|
|
"\n",
|
|
"\n",
|
|
"# Preprocessing function\n",
|
|
"def preprocess_text(text):\n",
|
|
" \"\"\"Melakukan preprocessing teks dasar\"\"\"\n",
|
|
" text = text.lower()\n",
|
|
" text = re.sub(r\"\\s+\", \" \", text).strip()\n",
|
|
" return text\n",
|
|
"\n",
|
|
"\n",
|
|
"# Persiapkan data untuk model\n",
|
|
"def prepare_data(data):\n",
|
|
" \"\"\"Siapkan data untuk model\"\"\"\n",
|
|
" contexts = []\n",
|
|
" tokens_list = []\n",
|
|
" ner_list = []\n",
|
|
" srl_list = []\n",
|
|
" questions = []\n",
|
|
" answers = []\n",
|
|
" q_types = []\n",
|
|
"\n",
|
|
" for item in data:\n",
|
|
" for qa in item[\"qas\"]:\n",
|
|
" contexts.append(preprocess_text(item[\"context\"]))\n",
|
|
" tokens_list.append(item[\"tokens\"])\n",
|
|
" ner_list.append(item[\"ner\"])\n",
|
|
" srl_list.append(item[\"srl\"])\n",
|
|
" questions.append(preprocess_text(qa[\"question\"]))\n",
|
|
" answers.append(qa[\"answer\"])\n",
|
|
" q_types.append(qa[\"type\"])\n",
|
|
"\n",
|
|
" return contexts, tokens_list, ner_list, srl_list, questions, answers, q_types\n",
|
|
"\n",
|
|
"\n",
|
|
"contexts, tokens_list, ner_list, srl_list, questions, answers, q_types = prepare_data(\n",
|
|
" data\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"id": "c703ec2a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"max_vocab_size = 10000\n",
|
|
"tokenizer = Tokenizer(num_words=max_vocab_size, oov_token=\"<OOV>\")\n",
|
|
"tokenizer.fit_on_texts(contexts + questions + [\" \".join(item) for item in tokens_list])\n",
|
|
"vocab_size = len(tokenizer.word_index) + 1\n",
|
|
"\n",
|
|
"# Encoding untuk NER\n",
|
|
"ner_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
|
|
"ner_tokenizer.fit_on_texts([\" \".join(ner) for ner in ner_list])\n",
|
|
"ner_vocab_size = len(ner_tokenizer.word_index) + 1\n",
|
|
"\n",
|
|
"# Encoding untuk SRL\n",
|
|
"srl_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
|
|
"srl_tokenizer.fit_on_texts([\" \".join(srl) for srl in srl_list])\n",
|
|
"srl_vocab_size = len(srl_tokenizer.word_index) + 1\n",
|
|
"\n",
|
|
"# Encoding untuk tipe pertanyaan\n",
|
|
"q_type_tokenizer = Tokenizer()\n",
|
|
"q_type_tokenizer.fit_on_texts(q_types)\n",
|
|
"q_type_vocab_size = len(q_type_tokenizer.word_index) + 1\n",
|
|
"\n",
|
|
"\n",
|
|
"# Konversi token, ner, srl ke sequences\n",
|
|
"def tokens_to_sequences(tokens, ner, srl):\n",
|
|
" \"\"\"Konversi token, ner, dan srl ke sequences\"\"\"\n",
|
|
" token_seqs = [tokenizer.texts_to_sequences([\" \".join(t)])[0] for t in tokens]\n",
|
|
" ner_seqs = [ner_tokenizer.texts_to_sequences([\" \".join(n)])[0] for n in ner]\n",
|
|
" srl_seqs = [srl_tokenizer.texts_to_sequences([\" \".join(s)])[0] for s in srl]\n",
|
|
" return token_seqs, ner_seqs, srl_seqs\n",
|
|
"\n",
|
|
"\n",
|
|
"# Menentukan panjang maksimum untuk padding\n",
|
|
"context_seqs = tokenizer.texts_to_sequences(contexts)\n",
|
|
"question_seqs = tokenizer.texts_to_sequences(questions)\n",
|
|
"token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)\n",
|
|
"\n",
|
|
"max_context_len = max([len(seq) for seq in context_seqs])\n",
|
|
"max_question_len = max([len(seq) for seq in question_seqs])\n",
|
|
"max_token_len = max([len(seq) for seq in token_seqs])\n",
|
|
"\n",
|
|
"\n",
|
|
"# Pad sequences untuk memastikan semua input sama panjang\n",
|
|
"def pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs):\n",
|
|
" \"\"\"Padding semua sequences\"\"\"\n",
|
|
" context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding=\"post\")\n",
|
|
" question_padded = pad_sequences(\n",
|
|
" question_seqs, maxlen=max_question_len, padding=\"post\"\n",
|
|
" )\n",
|
|
" token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding=\"post\")\n",
|
|
" ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding=\"post\")\n",
|
|
" srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding=\"post\")\n",
|
|
" return context_padded, question_padded, token_padded, ner_padded, srl_padded\n",
|
|
"\n",
|
|
"\n",
|
|
"# Siapkan encoder untuk jawaban\n",
|
|
"answer_tokenizer = Tokenizer(oov_token=\"<OOV>\")\n",
|
|
"answer_tokenizer.fit_on_texts(answers)\n",
|
|
"answer_vocab_size = len(answer_tokenizer.word_index) + 1\n",
|
|
"\n",
|
|
"# Encode tipe pertanyaan - FIX - Menggunakan indeks langsung bukan sequence\n",
|
|
"q_type_indices = []\n",
|
|
"for q_type in q_types:\n",
|
|
" # Dapatkan indeks tipe pertanyaan (dikurangi 1 karena indeks dimulai dari 1)\n",
|
|
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
|
|
" q_type_indices.append(q_type_idx)\n",
|
|
"\n",
|
|
"# Konversi ke numpy array\n",
|
|
"q_type_indices = np.array(q_type_indices)\n",
|
|
"\n",
|
|
"# One-hot encode tipe pertanyaan\n",
|
|
"q_type_categorical = tf.keras.utils.to_categorical(\n",
|
|
" q_type_indices, num_classes=q_type_vocab_size\n",
|
|
")\n",
|
|
"\n",
|
|
"# Pad sequences\n",
|
|
"context_padded, question_padded, token_padded, ner_padded, srl_padded = (\n",
|
|
" pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs)\n",
|
|
")\n",
|
|
"\n",
|
|
"# Encode jawaban\n",
|
|
"answer_seqs = answer_tokenizer.texts_to_sequences(answers)\n",
|
|
"max_answer_len = max([len(seq) for seq in answer_seqs])\n",
|
|
"answer_padded = pad_sequences(answer_seqs, maxlen=max_answer_len, padding=\"post\")\n",
|
|
"\n",
|
|
"# Split data menjadi train dan test sets\n",
|
|
"indices = list(range(len(context_padded)))\n",
|
|
"train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"id": "f5e6a6b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fungsi untuk mendapatkan subset dari data berdasarkan indices\n",
|
|
"def get_subset(data, indices):\n",
|
|
" return np.array([data[i] for i in indices])\n",
|
|
"\n",
|
|
"\n",
|
|
"# Train data\n",
|
|
"train_context = get_subset(context_padded, train_indices)\n",
|
|
"train_question = get_subset(question_padded, train_indices)\n",
|
|
"train_token = get_subset(token_padded, train_indices)\n",
|
|
"train_ner = get_subset(ner_padded, train_indices)\n",
|
|
"train_srl = get_subset(srl_padded, train_indices)\n",
|
|
"train_q_type = get_subset(q_type_categorical, train_indices)\n",
|
|
"train_answer = get_subset(answer_padded, train_indices)\n",
|
|
"\n",
|
|
"# Test data\n",
|
|
"test_context = get_subset(context_padded, test_indices)\n",
|
|
"test_question = get_subset(question_padded, test_indices)\n",
|
|
"test_token = get_subset(token_padded, test_indices)\n",
|
|
"test_ner = get_subset(ner_padded, test_indices)\n",
|
|
"test_srl = get_subset(srl_padded, test_indices)\n",
|
|
"test_q_type = get_subset(q_type_categorical, test_indices)\n",
|
|
"test_answer = get_subset(answer_padded, test_indices)\n",
|
|
"\n",
|
|
"# Hyperparameters\n",
|
|
"embedding_dim = 100\n",
|
|
"lstm_units = 128\n",
|
|
"ner_embedding_dim = 50\n",
|
|
"srl_embedding_dim = 50\n",
|
|
"dropout_rate = 0.3"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"id": "00a25f78",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_2\"</span>\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1mModel: \"functional_2\"\u001b[0m\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
|
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
|
"│ context_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ text_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">100</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">213,900</span> │ context_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ question_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
|
"│ │ │ │ token_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,700</span> │ ner_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_embedding │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,600</span> │ srl_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_6 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">234,496</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_7 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">234,496</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ text_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ ner_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
|
"│ │ │ │ srl_embedding[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ context_attention │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_6[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Attention</span>) │ │ │ bidirectional_7[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_8 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">336,896</span> │ token_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ context_att_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_attentio… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_7[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_pool │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bidirectional_8[<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1…</span> │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ q_type_input │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">5</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ all_features │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">773</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ context_att_pool… │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ question_pool[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
|
|
"│ │ │ │ token_pool[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
|
|
"│ │ │ │ q_type_input[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">198,144</span> │ all_features[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dropout_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dense_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">32,896</span> │ dropout_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dropout_5 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ answer_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">939</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">121,131</span> │ dropout_5[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
|
|
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ │ │ │\n",
|
|
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
|
|
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
|
|
"│ context_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ text_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m100\u001b[0m) │ \u001b[38;5;34m213,900\u001b[0m │ context_input[\u001b[38;5;34m0\u001b[0m]… │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ question_input[\u001b[38;5;34m0\u001b[0m… │\n",
|
|
"│ │ │ │ token_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ ner_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,700\u001b[0m │ ner_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ srl_embedding │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m50\u001b[0m) │ \u001b[38;5;34m1,600\u001b[0m │ srl_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mEmbedding\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_6 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m234,496\u001b[0m │ text_embedding[\u001b[38;5;34m0\u001b[0m… │\n",
|
|
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_7 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m14\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m234,496\u001b[0m │ text_embedding[\u001b[38;5;34m1\u001b[0m… │\n",
|
|
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m200\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ text_embedding[\u001b[38;5;34m2\u001b[0m… │\n",
|
|
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ ner_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
|
|
"│ │ │ │ srl_embedding[\u001b[38;5;34m0\u001b[0m]… │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ context_attention │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_6[\u001b[38;5;34m…\u001b[0m │\n",
|
|
"│ (\u001b[38;5;33mAttention\u001b[0m) │ │ │ bidirectional_7[\u001b[38;5;34m…\u001b[0m │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ bidirectional_8 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m336,896\u001b[0m │ token_features[\u001b[38;5;34m0\u001b[0m… │\n",
|
|
"│ (\u001b[38;5;33mBidirectional\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ context_att_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_attentio… │\n",
|
|
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ question_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_7[\u001b[38;5;34m…\u001b[0m │\n",
|
|
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ token_pool │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ bidirectional_8[\u001b[38;5;34m…\u001b[0m │\n",
|
|
"│ (\u001b[38;5;33mGlobalMaxPooling1…\u001b[0m │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ q_type_input │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m5\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
|
|
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ all_features │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m773\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ context_att_pool… │\n",
|
|
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ question_pool[\u001b[38;5;34m0\u001b[0m]… │\n",
|
|
"│ │ │ │ token_pool[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
|
|
"│ │ │ │ q_type_input[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m198,144\u001b[0m │ all_features[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dropout_4 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m32,896\u001b[0m │ dropout_4[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ dropout_5 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dense_2[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
|
|
"│ answer_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m939\u001b[0m) │ \u001b[38;5;34m121,131\u001b[0m │ dropout_5[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
|
|
"│ (\u001b[38;5;33mDense\u001b[0m) │ │ │ │\n",
|
|
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,375,259</span> (5.25 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,375,259\u001b[0m (5.25 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,375,259</span> (5.25 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m1,375,259\u001b[0m (5.25 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Function untuk membuat model\n",
|
|
"def create_qa_model():\n",
|
|
" # Input layers\n",
|
|
" context_input = Input(shape=(max_context_len,), name=\"context_input\")\n",
|
|
" question_input = Input(shape=(max_question_len,), name=\"question_input\")\n",
|
|
" token_input = Input(shape=(max_token_len,), name=\"token_input\")\n",
|
|
" ner_input = Input(shape=(max_token_len,), name=\"ner_input\")\n",
|
|
" srl_input = Input(shape=(max_token_len,), name=\"srl_input\")\n",
|
|
" q_type_input = Input(shape=(q_type_vocab_size,), name=\"q_type_input\")\n",
|
|
"\n",
|
|
" # Shared embedding layer for text\n",
|
|
" text_embedding = Embedding(vocab_size, embedding_dim, name=\"text_embedding\")\n",
|
|
"\n",
|
|
" # Embedding untuk NER dan SRL\n",
|
|
" ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name=\"ner_embedding\")(\n",
|
|
" ner_input\n",
|
|
" )\n",
|
|
" srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name=\"srl_embedding\")(\n",
|
|
" srl_input\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Apply embeddings\n",
|
|
" context_embed = text_embedding(context_input)\n",
|
|
" question_embed = text_embedding(question_input)\n",
|
|
" token_embed = text_embedding(token_input)\n",
|
|
"\n",
|
|
" # Bi-directional LSTM untuk context dan token-level features\n",
|
|
" context_lstm = Bidirectional(\n",
|
|
" LSTM(lstm_units, return_sequences=True, name=\"context_lstm\")\n",
|
|
" )(context_embed)\n",
|
|
" question_lstm = Bidirectional(\n",
|
|
" LSTM(lstm_units, return_sequences=True, name=\"question_lstm\")\n",
|
|
" )(question_embed)\n",
|
|
"\n",
|
|
" # Concat token features (tokens, NER, SRL)\n",
|
|
" token_features = Concatenate(name=\"token_features\")(\n",
|
|
" [token_embed, ner_embedding, srl_embedding]\n",
|
|
" )\n",
|
|
" token_lstm = Bidirectional(\n",
|
|
" LSTM(lstm_units, return_sequences=True, name=\"token_lstm\")\n",
|
|
" )(token_features)\n",
|
|
"\n",
|
|
" # Attention mechanism untuk context dengan memperhatikan question\n",
|
|
" context_attention = tf.keras.layers.Attention(name=\"context_attention\")(\n",
|
|
" [context_lstm, question_lstm]\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Pool attention outputs\n",
|
|
" context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"context_att_pool\")(\n",
|
|
" context_attention\n",
|
|
" )\n",
|
|
" question_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"question_pool\")(\n",
|
|
" question_lstm\n",
|
|
" )\n",
|
|
" token_pool = tf.keras.layers.GlobalMaxPooling1D(name=\"token_pool\")(token_lstm)\n",
|
|
"\n",
|
|
" # Concat all features\n",
|
|
" all_features = Concatenate(name=\"all_features\")(\n",
|
|
" [context_att_pool, question_pool, token_pool, q_type_input]\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Dense layers\n",
|
|
" x = Dense(256, activation=\"relu\", name=\"dense_1\")(all_features)\n",
|
|
" x = Dropout(dropout_rate)(x)\n",
|
|
" x = Dense(128, activation=\"relu\", name=\"dense_2\")(x)\n",
|
|
" x = Dropout(dropout_rate)(x)\n",
|
|
"\n",
|
|
" # Output layer untuk jawaban\n",
|
|
" answer_output = Dense(\n",
|
|
" answer_vocab_size, activation=\"softmax\", name=\"answer_output\"\n",
|
|
" )(x)\n",
|
|
"\n",
|
|
" # Create model\n",
|
|
" model = Model(\n",
|
|
" inputs=[\n",
|
|
" context_input,\n",
|
|
" question_input,\n",
|
|
" token_input,\n",
|
|
" ner_input,\n",
|
|
" srl_input,\n",
|
|
" q_type_input,\n",
|
|
" ],\n",
|
|
" outputs=answer_output,\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Compile model\n",
|
|
" model.compile(\n",
|
|
" optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"]\n",
|
|
" )\n",
|
|
"\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"# Buat model\n",
|
|
"model = create_qa_model()\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"id": "88e9f158",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch 1/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 47ms/step - accuracy: 0.0914 - loss: 6.2711 - val_accuracy: 0.1368 - val_loss: 5.4982\n",
|
|
"Epoch 2/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 42ms/step - accuracy: 0.1016 - loss: 5.0131 - val_accuracy: 0.0596 - val_loss: 5.4249\n",
|
|
"Epoch 3/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.0930 - loss: 4.8057 - val_accuracy: 0.0877 - val_loss: 5.5286\n",
|
|
"Epoch 4/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.1550 - loss: 4.4755 - val_accuracy: 0.1509 - val_loss: 5.2432\n",
|
|
"Epoch 5/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 45ms/step - accuracy: 0.2023 - loss: 4.2949 - val_accuracy: 0.1895 - val_loss: 5.3838\n",
|
|
"Epoch 6/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.2297 - loss: 4.1641 - val_accuracy: 0.2070 - val_loss: 5.4800\n",
|
|
"Epoch 7/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.2747 - loss: 3.7958 - val_accuracy: 0.2140 - val_loss: 5.4369\n",
|
|
"Epoch 8/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.2835 - loss: 3.6250 - val_accuracy: 0.2140 - val_loss: 5.5970\n",
|
|
"Epoch 9/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 45ms/step - accuracy: 0.2955 - loss: 3.4545 - val_accuracy: 0.2035 - val_loss: 5.6533\n",
|
|
"Epoch 10/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.3261 - loss: 3.3813 - val_accuracy: 0.2456 - val_loss: 5.4599\n",
|
|
"Epoch 11/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.3392 - loss: 3.0959 - val_accuracy: 0.2386 - val_loss: 5.8937\n",
|
|
"Epoch 12/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.3257 - loss: 3.0641 - val_accuracy: 0.2561 - val_loss: 6.0506\n",
|
|
"Epoch 13/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.3680 - loss: 2.8640 - val_accuracy: 0.2737 - val_loss: 6.1670\n",
|
|
"Epoch 14/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.3826 - loss: 2.7676 - val_accuracy: 0.2737 - val_loss: 6.2903\n",
|
|
"Epoch 15/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 45ms/step - accuracy: 0.4369 - loss: 2.4490 - val_accuracy: 0.2877 - val_loss: 6.2423\n",
|
|
"Epoch 16/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.4493 - loss: 2.3650 - val_accuracy: 0.2877 - val_loss: 6.1958\n",
|
|
"Epoch 17/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.4788 - loss: 2.2109 - val_accuracy: 0.2842 - val_loss: 6.8556\n",
|
|
"Epoch 18/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.4830 - loss: 2.1169 - val_accuracy: 0.3158 - val_loss: 6.6652\n",
|
|
"Epoch 19/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.5301 - loss: 1.9123 - val_accuracy: 0.3018 - val_loss: 7.4072\n",
|
|
"Epoch 20/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.5356 - loss: 1.8835 - val_accuracy: 0.3228 - val_loss: 6.8462\n",
|
|
"Epoch 21/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.5506 - loss: 1.7196 - val_accuracy: 0.3228 - val_loss: 7.6676\n",
|
|
"Epoch 22/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.5761 - loss: 1.6448 - val_accuracy: 0.3333 - val_loss: 7.6882\n",
|
|
"Epoch 23/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.5701 - loss: 1.6007 - val_accuracy: 0.3509 - val_loss: 7.6626\n",
|
|
"Epoch 24/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.5669 - loss: 1.5955 - val_accuracy: 0.3649 - val_loss: 7.5473\n",
|
|
"Epoch 25/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.6115 - loss: 1.4500 - val_accuracy: 0.3684 - val_loss: 7.9749\n",
|
|
"Epoch 26/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.6328 - loss: 1.3238 - val_accuracy: 0.3614 - val_loss: 8.6771\n",
|
|
"Epoch 27/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.6521 - loss: 1.2637 - val_accuracy: 0.3789 - val_loss: 8.3299\n",
|
|
"Epoch 28/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 42ms/step - accuracy: 0.6325 - loss: 1.3356 - val_accuracy: 0.3895 - val_loss: 8.8868\n",
|
|
"Epoch 29/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.7215 - loss: 1.0663 - val_accuracy: 0.3965 - val_loss: 9.2352\n",
|
|
"Epoch 30/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.6951 - loss: 1.0896 - val_accuracy: 0.3825 - val_loss: 8.6609\n",
|
|
"Epoch 31/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.6828 - loss: 1.0719 - val_accuracy: 0.3895 - val_loss: 9.0799\n",
|
|
"Epoch 32/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 42ms/step - accuracy: 0.7450 - loss: 0.9185 - val_accuracy: 0.3860 - val_loss: 9.4576\n",
|
|
"Epoch 33/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.7480 - loss: 0.8708 - val_accuracy: 0.4211 - val_loss: 10.2996\n",
|
|
"Epoch 34/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.7707 - loss: 0.7947 - val_accuracy: 0.3965 - val_loss: 10.0710\n",
|
|
"Epoch 35/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 45ms/step - accuracy: 0.7821 - loss: 0.7821 - val_accuracy: 0.4070 - val_loss: 9.7857\n",
|
|
"Epoch 36/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 44ms/step - accuracy: 0.7868 - loss: 0.7426 - val_accuracy: 0.3895 - val_loss: 10.3897\n",
|
|
"Epoch 37/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 43ms/step - accuracy: 0.7889 - loss: 0.6774 - val_accuracy: 0.4175 - val_loss: 10.3458\n",
|
|
"Epoch 38/50\n",
|
|
"\u001b[1m143/143\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 45ms/step - accuracy: 0.8259 - loss: 0.6240 - val_accuracy: 0.4070 - val_loss: 10.7760\n",
|
|
"Epoch 38: early stopping\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Callback untuk menyimpan model terbaik\n",
|
|
"checkpoint = ModelCheckpoint(\n",
|
|
" \"qa_lstm_model.h5\", monitor=\"val_accuracy\", save_best_only=True, verbose=1\n",
|
|
")\n",
|
|
"\n",
|
|
"early_stop = EarlyStopping(monitor=\"val_accuracy\", patience=5, verbose=1)\n",
|
|
"\n",
|
|
"# Training\n",
|
|
"batch_size = 8\n",
|
|
"epochs = 50\n",
|
|
"\n",
|
|
"# Ubah format jawaban untuk sparse categorical crossentropy\n",
|
|
"train_answer_labels = train_answer[:, 0] # Ambil indeks pertama dari jawaban\n",
|
|
"test_answer_labels = test_answer[:, 0]\n",
|
|
"\n",
|
|
"# Train model\n",
|
|
"history = model.fit(\n",
|
|
" [train_context, train_question, train_token, train_ner, train_srl, train_q_type],\n",
|
|
" train_answer_labels,\n",
|
|
" batch_size=batch_size,\n",
|
|
" epochs=epochs,\n",
|
|
" validation_data=(\n",
|
|
" [test_context, test_question, test_token, test_ner, test_srl, test_q_type],\n",
|
|
" test_answer_labels,\n",
|
|
" ),\n",
|
|
" callbacks=[\n",
|
|
" # checkpoint,\n",
|
|
" early_stop,\n",
|
|
" ],\n",
|
|
")\n",
|
|
"\n",
|
|
"model.save(\"qa_lstm_model_final.keras\")\n",
|
|
"\n",
|
|
"# Simpan tokenizer\n",
|
|
"tokenizer_data = {\n",
|
|
" \"word_tokenizer\": tokenizer.to_json(),\n",
|
|
" \"ner_tokenizer\": ner_tokenizer.to_json(),\n",
|
|
" \"srl_tokenizer\": srl_tokenizer.to_json(),\n",
|
|
" \"answer_tokenizer\": answer_tokenizer.to_json(),\n",
|
|
" \"q_type_tokenizer\": q_type_tokenizer.to_json(),\n",
|
|
" \"max_context_len\": max_context_len,\n",
|
|
" \"max_question_len\": max_question_len,\n",
|
|
" \"max_token_len\": max_token_len,\n",
|
|
"}\n",
|
|
"\n",
|
|
"with open(\"qa_tokenizers.json\", \"w\") as f:\n",
|
|
" json.dump(tokenizer_data, f)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"id": "426ad763",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[1m36/36\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - accuracy: 0.3886 - loss: 10.5154\n",
|
|
"Test Loss: 10.776042938232422\n",
|
|
"Test Accuracy: 0.4070175290107727\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model = load_model(\"qa_lstm_model_final.keras\")\n",
|
|
"results = model.evaluate(\n",
|
|
" [test_context, test_question, test_token, test_ner, test_srl, test_q_type],\n",
|
|
" test_answer_labels,\n",
|
|
" batch_size=batch_size,\n",
|
|
")\n",
|
|
"\n",
|
|
"print(\"Test Loss:\", results[0])\n",
|
|
"print(\"Test Accuracy:\", results[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"id": "3cbe7470",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def predict_answer(context, question, tokens, ner, srl, q_type):\n",
|
|
" # Preprocess\n",
|
|
" context_seq = tokenizer.texts_to_sequences([preprocess_text(context)])\n",
|
|
" question_seq = tokenizer.texts_to_sequences([preprocess_text(question)])\n",
|
|
"\n",
|
|
" # Convert token, ner, srl dengan benar (memperhatikan format yang sama dengan data training)\n",
|
|
" token_seq = [tokenizer.texts_to_sequences([\" \".join(tokens)])[0]]\n",
|
|
" ner_seq = [ner_tokenizer.texts_to_sequences([\" \".join(ner)])[0]]\n",
|
|
" srl_seq = [srl_tokenizer.texts_to_sequences([\" \".join(srl)])[0]]\n",
|
|
"\n",
|
|
" # Handle tipe pertanyaan\n",
|
|
" q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)\n",
|
|
" q_type_cat = tf.keras.utils.to_categorical(\n",
|
|
" [q_type_idx], num_classes=q_type_vocab_size\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Pad sequences\n",
|
|
" context_pad = pad_sequences(context_seq, maxlen=max_context_len, padding=\"post\")\n",
|
|
" question_pad = pad_sequences(question_seq, maxlen=max_question_len, padding=\"post\")\n",
|
|
" token_pad = pad_sequences(token_seq, maxlen=max_token_len, padding=\"post\")\n",
|
|
" ner_pad = pad_sequences(ner_seq, maxlen=max_token_len, padding=\"post\")\n",
|
|
" srl_pad = pad_sequences(srl_seq, maxlen=max_token_len, padding=\"post\")\n",
|
|
"\n",
|
|
" # Predict\n",
|
|
" prediction = model.predict(\n",
|
|
" [context_pad, question_pad, token_pad, ner_pad, srl_pad, q_type_cat]\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Get answer index\n",
|
|
" answer_idx = np.argmax(prediction[0])\n",
|
|
"\n",
|
|
" # Convert to answer text\n",
|
|
" for word, idx in answer_tokenizer.word_index.items():\n",
|
|
" if idx == answer_idx:\n",
|
|
" return word\n",
|
|
"\n",
|
|
" return \"Unknown\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 45,
|
|
"id": "4b0d6d42",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Processing sample 1/285 (index 628)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 2/285 (index 680)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 3/285 (index 1085)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 4/285 (index 578)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 5/285 (index 1010)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 6/285 (index 759)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 7/285 (index 931)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 8/285 (index 727)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 9/285 (index 70)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 10/285 (index 123)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 11/285 (index 847)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 12/285 (index 203)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 13/285 (index 584)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 14/285 (index 259)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 15/285 (index 843)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 16/285 (index 1299)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 17/285 (index 361)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 18/285 (index 78)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 44ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 19/285 (index 486)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 20/285 (index 51)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 21/285 (index 67)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 22/285 (index 617)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 23/285 (index 1214)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 24/285 (index 590)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 25/285 (index 76)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 26/285 (index 942)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 27/285 (index 610)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 28/285 (index 354)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 29/285 (index 1108)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 30/285 (index 247)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 31/285 (index 1222)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 32/285 (index 163)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 33/285 (index 1007)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 34/285 (index 906)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 35/285 (index 1413)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 36/285 (index 168)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 37/285 (index 44)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 38/285 (index 668)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 39/285 (index 988)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 40/285 (index 1206)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0183\n",
|
|
"Full BLEU (1-gram): 0.0183\n",
|
|
"Processing sample 41/285 (index 1234)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 42/285 (index 1415)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 43/285 (index 1250)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 44/285 (index 1383)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 45/285 (index 350)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 46/285 (index 1172)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 47/285 (index 29)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 48/285 (index 1355)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 49/285 (index 710)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 50/285 (index 208)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 51/285 (index 432)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 52/285 (index 49)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 53/285 (index 570)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 54/285 (index 915)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 55/285 (index 261)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 56/285 (index 1212)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 57/285 (index 471)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 58/285 (index 861)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 59/285 (index 978)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 60/285 (index 812)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 61/285 (index 1311)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 62/285 (index 1306)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 63/285 (index 184)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 64/285 (index 218)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 65/285 (index 1204)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 66/285 (index 901)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 67/285 (index 1036)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 68/285 (index 1144)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 69/285 (index 1096)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 40ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 70/285 (index 1273)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 71/285 (index 864)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 72/285 (index 724)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 73/285 (index 560)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 74/285 (index 892)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 75/285 (index 1419)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 76/285 (index 377)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 77/285 (index 101)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 78/285 (index 695)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 79/285 (index 297)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 80/285 (index 1380)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 81/285 (index 1081)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 82/285 (index 798)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 83/285 (index 1407)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 84/285 (index 382)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 85/285 (index 394)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 86/285 (index 846)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 87/285 (index 240)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 88/285 (index 479)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 89/285 (index 198)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 90/285 (index 1225)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 91/285 (index 351)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 92/285 (index 277)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 93/285 (index 451)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 94/285 (index 554)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 95/285 (index 1268)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 96/285 (index 1146)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 97/285 (index 756)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 98/285 (index 43)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 99/285 (index 1323)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 100/285 (index 885)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 101/285 (index 1314)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 102/285 (index 1221)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 103/285 (index 720)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 104/285 (index 908)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 105/285 (index 636)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 106/285 (index 650)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 107/285 (index 585)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 108/285 (index 506)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 109/285 (index 310)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 110/285 (index 353)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 111/285 (index 1181)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 112/285 (index 65)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 113/285 (index 289)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 114/285 (index 620)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 115/285 (index 1061)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 116/285 (index 1270)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 117/285 (index 1356)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 118/285 (index 909)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 119/285 (index 411)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 120/285 (index 1420)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 121/285 (index 115)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 122/285 (index 752)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 123/285 (index 1259)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 124/285 (index 1188)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 125/285 (index 631)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 126/285 (index 435)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 127/285 (index 365)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 128/285 (index 32)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 129/285 (index 630)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 130/285 (index 482)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 131/285 (index 772)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 132/285 (index 380)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 133/285 (index 1112)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 134/285 (index 233)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 135/285 (index 428)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 136/285 (index 551)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 137/285 (index 1395)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 138/285 (index 1066)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 139/285 (index 237)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 140/285 (index 605)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 141/285 (index 529)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 142/285 (index 810)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 143/285 (index 274)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 144/285 (index 743)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 145/285 (index 994)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 146/285 (index 745)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 147/285 (index 1087)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 148/285 (index 63)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 149/285 (index 903)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 150/285 (index 974)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 151/285 (index 485)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 152/285 (index 549)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 153/285 (index 796)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 154/285 (index 1260)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 155/285 (index 371)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 156/285 (index 661)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 157/285 (index 30)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 158/285 (index 56)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 159/285 (index 1151)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 160/285 (index 367)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 161/285 (index 1371)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 162/285 (index 923)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 163/285 (index 1030)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 164/285 (index 478)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 165/285 (index 767)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 166/285 (index 528)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 167/285 (index 239)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 168/285 (index 355)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 169/285 (index 904)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 170/285 (index 649)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 171/285 (index 757)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 172/285 (index 99)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 173/285 (index 422)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 174/285 (index 792)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 175/285 (index 192)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 176/285 (index 888)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 177/285 (index 838)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 178/285 (index 332)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 179/285 (index 1361)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 180/285 (index 672)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 181/285 (index 1084)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 182/285 (index 220)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 183/285 (index 481)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 184/285 (index 535)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 185/285 (index 1398)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 186/285 (index 765)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 187/285 (index 309)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 188/285 (index 543)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 189/285 (index 1347)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 190/285 (index 1424)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 191/285 (index 381)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 192/285 (index 275)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 193/285 (index 839)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 194/285 (index 58)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 195/285 (index 244)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 196/285 (index 1220)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 197/285 (index 660)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 198/285 (index 23)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 199/285 (index 107)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 200/285 (index 807)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 201/285 (index 722)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 202/285 (index 1177)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 203/285 (index 651)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 204/285 (index 1113)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 205/285 (index 589)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 206/285 (index 936)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 207/285 (index 579)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 208/285 (index 1098)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 209/285 (index 1328)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 210/285 (index 736)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 211/285 (index 638)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 212/285 (index 905)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 213/285 (index 643)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 214/285 (index 342)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 215/285 (index 196)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 216/285 (index 1376)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 217/285 (index 128)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 218/285 (index 1198)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 219/285 (index 81)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 220/285 (index 1335)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 221/285 (index 344)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 222/285 (index 1100)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 223/285 (index 1150)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 224/285 (index 532)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 225/285 (index 111)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 226/285 (index 1404)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 227/285 (index 707)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 228/285 (index 894)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 229/285 (index 306)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 230/285 (index 231)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 231/285 (index 869)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 232/285 (index 744)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 233/285 (index 316)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 234/285 (index 175)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 235/285 (index 1063)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 236/285 (index 1278)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 237/285 (index 141)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 238/285 (index 461)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 239/285 (index 594)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 240/285 (index 1303)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0183\n",
|
|
"Full BLEU (1-gram): 0.0183\n",
|
|
"Processing sample 241/285 (index 817)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 242/285 (index 1148)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 243/285 (index 875)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 244/285 (index 682)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 245/285 (index 1277)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 246/285 (index 346)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 247/285 (index 1132)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 248/285 (index 1414)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 249/285 (index 941)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 250/285 (index 477)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 251/285 (index 1161)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 252/285 (index 271)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 253/285 (index 514)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 254/285 (index 762)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 255/285 (index 413)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 256/285 (index 1048)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 257/285 (index 425)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step\n",
|
|
"Unigram BLEU: 0.1353\n",
|
|
"Full BLEU (1-gram): 0.1353\n",
|
|
"Processing sample 258/285 (index 1417)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 259/285 (index 615)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 260/285 (index 619)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 261/285 (index 1387)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 262/285 (index 254)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 263/285 (index 1043)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 264/285 (index 982)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 265/285 (index 952)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 266/285 (index 270)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 267/285 (index 86)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 268/285 (index 1352)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 269/285 (index 265)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 270/285 (index 1410)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 271/285 (index 1364)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 272/285 (index 1075)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 273/285 (index 453)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step\n",
|
|
"Unigram BLEU: 0.3679\n",
|
|
"Full BLEU (1-gram): 0.3679\n",
|
|
"Processing sample 274/285 (index 666)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 275/285 (index 221)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 276/285 (index 113)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 277/285 (index 1026)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 278/285 (index 1055)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 279/285 (index 174)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"Processing sample 280/285 (index 705)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 281/285 (index 1000)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 282/285 (index 1119)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 283/285 (index 917)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 284/285 (index 889)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step\n",
|
|
"Unigram BLEU: 1.0000\n",
|
|
"Full BLEU (1-gram): 1.0000\n",
|
|
"Processing sample 285/285 (index 490)\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step\n",
|
|
"Unigram BLEU: 0.0000\n",
|
|
"Full BLEU (1-gram): 0.0000\n",
|
|
"\n",
|
|
"DEBUG INFORMATION:\n",
|
|
"Total samples processed: 285\n",
|
|
"Samples with perfect score (1.0): 62\n",
|
|
"Samples with zero score (0.0): 199\n",
|
|
"Number of scores collected: 285\n",
|
|
"First 10 unigram scores: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(1.0), np.float64(1.0), np.float64(0.0), np.float64(1.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]\n",
|
|
"Last 10 unigram scores: [np.float64(0.0), np.float64(0.0), np.float64(1.0), np.float64(0.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(0.0)]\n",
|
|
"Sum of all unigram scores: 66.6419\n",
|
|
"Mean of all unigram scores: 0.2338\n",
|
|
"Results saved to: bleu_answer_calculation.xlsx\n",
|
|
"Summary of scores saved to: bleu_scores_summary.xlsx\n",
|
|
"\n",
|
|
"Model Performance Metrics:\n",
|
|
"Average Unigram BLEU Score: 23.38%\n",
|
|
"Perfect matches: 62 out of 285 (21.75%)\n",
|
|
"Zero scores: 199 out of 285 (69.82%)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from collections import Counter\n",
|
|
"\n",
|
|
"# def evaluate_model_performance(test_data):\n",
|
|
"# output_path = \"bleu_answer_calculation.xlsx\"\n",
|
|
" \n",
|
|
"# bleu_scores = []\n",
|
|
"# rows = []\n",
|
|
"\n",
|
|
"# for i in range(len(test_data)):\n",
|
|
"# idx = test_data[i]\n",
|
|
" \n",
|
|
"# sample_context = contexts[idx]\n",
|
|
"# sample_question = questions[idx] # Get the actual question\n",
|
|
"# sample_tokens = tokens_list[idx]\n",
|
|
"# sample_ner = ner_list[idx]\n",
|
|
"# sample_srl = srl_list[idx]\n",
|
|
"# sample_q_type = q_types[idx]\n",
|
|
"# actual_answer = answers[idx]\n",
|
|
" \n",
|
|
"# print(f\"Processing sample {i+1}/{len(test_data)} (index {idx})\")\n",
|
|
" \n",
|
|
"# # Call predict_answer with parameters in the correct order\n",
|
|
"# pred_answer = predict_answer(\n",
|
|
"# sample_context, # context\n",
|
|
"# sample_question, # question (string)\n",
|
|
"# sample_tokens, # tokens (list)\n",
|
|
"# sample_ner, # ner\n",
|
|
"# sample_srl, # srl\n",
|
|
"# sample_q_type # q_type\n",
|
|
"# )\n",
|
|
"\n",
|
|
"# actual_tokens = actual_answer.split()\n",
|
|
"# pred_tokens = pred_answer.split()\n",
|
|
"\n",
|
|
"# max_n = 4\n",
|
|
"# weights = [1 / max_n] * max_n\n",
|
|
"# clipped_counts = []\n",
|
|
"# total_counts = []\n",
|
|
"# precisions = []\n",
|
|
"\n",
|
|
"# log_text = f\"Sample {i+1}:\\n\"\n",
|
|
"# log_text += f\"Context: {sample_context}\\n\"\n",
|
|
"# log_text += f\"Question: {sample_question}\\n\"\n",
|
|
"# log_text += f\"Actual Answer: {actual_answer}\\n\"\n",
|
|
"# log_text += f\"Predicted Answer: {pred_answer}\\n\"\n",
|
|
"# log_text += f\"Actual Tokens: {actual_tokens}\\n\"\n",
|
|
"# log_text += f\"Predicted Tokens: {pred_tokens}\\n\"\n",
|
|
" \n",
|
|
"# print(log_text)\n",
|
|
"\n",
|
|
"# for n in range(1, max_n + 1):\n",
|
|
"# # Skip if not enough tokens for n-gram\n",
|
|
"# if len(actual_tokens) < n or len(pred_tokens) < n:\n",
|
|
"# clipped_counts.append(0)\n",
|
|
"# total_counts.append(0 if len(pred_tokens) < n else sum(1 for _ in range(len(pred_tokens) - n + 1)))\n",
|
|
"# precisions.append(0)\n",
|
|
"# log_text += f\"{n}-gram: clipped count = 0, total candidate = {total_counts[-1]}, precision = 0.0000\\n\"\n",
|
|
"# continue\n",
|
|
" \n",
|
|
"# ref_ngrams = Counter(tuple(actual_tokens[j:j + n]) for j in range(len(actual_tokens) - n + 1))\n",
|
|
"# cand_ngrams = Counter(tuple(pred_tokens[j:j + n]) for j in range(len(pred_tokens) - n + 1))\n",
|
|
"\n",
|
|
"# clip_sum = sum(min(cnt, ref_ngrams.get(ng, 0)) for ng, cnt in cand_ngrams.items())\n",
|
|
"# total = sum(cand_ngrams.values())\n",
|
|
"# p_n = clip_sum / total if total > 0 else 0\n",
|
|
"\n",
|
|
"# clipped_counts.append(clip_sum)\n",
|
|
"# total_counts.append(total)\n",
|
|
"# precisions.append(p_n)\n",
|
|
"\n",
|
|
"# log_text += f\"{n}-gram: clipped count = {clip_sum}, total candidate = {total}, precision = {p_n:.4f}\\n\"\n",
|
|
"\n",
|
|
"# c = len(pred_tokens)\n",
|
|
"# r = len(actual_tokens)\n",
|
|
"\n",
|
|
"# if c == 0:\n",
|
|
"# bp = 0\n",
|
|
"# log_text += f\"Brevity Penalty: BP = {bp:.4f} (c={c}, r={r}) - No predicted tokens.\\n\"\n",
|
|
"# else:\n",
|
|
"# bp = 1 if c > r else np.exp(1 - r / c)\n",
|
|
"# log_text += f\"Brevity Penalty: BP = {bp:.4f} (c={c}, r={r})\\n\"\n",
|
|
"\n",
|
|
"# # Avoid math domain error with log(0)\n",
|
|
"# filtered_precisions = [max(p, 1e-10) for p in precisions] # Replace 0 with small value\n",
|
|
" \n",
|
|
"# if all(p > 0 for p in precisions):\n",
|
|
"# bleu = bp * np.exp(sum(w * np.log(p) for w, p in zip(weights, filtered_precisions)))\n",
|
|
"# else:\n",
|
|
"# bleu = 0.0\n",
|
|
"\n",
|
|
"# log_text += f\"BLEU score = {bleu:.4f}\\n\"\n",
|
|
"# print(f\"BLEU score = {bleu:.4f}\")\n",
|
|
"\n",
|
|
"# bleu_scores.append(bleu)\n",
|
|
"# rows.append({\"Result\": log_text})\n",
|
|
"\n",
|
|
"# try:\n",
|
|
"# df = pd.DataFrame(rows)\n",
|
|
"# df.to_excel(output_path, index=False)\n",
|
|
"# print(f\"Results saved to: {output_path}\")\n",
|
|
"# except Exception as e:\n",
|
|
"# print(f\"Error saving to Excel: {e}\")\n",
|
|
"\n",
|
|
"# # Handle empty bleu_scores\n",
|
|
"# avg_bleu = np.mean(bleu_scores) if bleu_scores else 0.0\n",
|
|
" \n",
|
|
"# results = {\n",
|
|
"# \"avg_bleu_score\": avg_bleu,\n",
|
|
"# }\n",
|
|
"\n",
|
|
"# return results\n",
|
|
"\n",
|
|
"def evaluate_model_performance(test_data, use_unigram_only=True, include_bigram=False):\n",
|
|
" \"\"\"\n",
|
|
" Evaluate model performance using BLEU score.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" test_data: List of indices for test samples\n",
|
|
" use_unigram_only: If True, calculate BLEU using only unigrams\n",
|
|
" include_bigram: If True and use_unigram_only is False, include up to bigrams\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" Dictionary with evaluation metrics\n",
|
|
" \"\"\"\n",
|
|
" output_path = \"bleu_answer_calculation.xlsx\"\n",
|
|
" \n",
|
|
" unigram_scores = []\n",
|
|
" bigram_scores = []\n",
|
|
" full_bleu_scores = []\n",
|
|
" rows = []\n",
|
|
" \n",
|
|
" # Add counters to track stats\n",
|
|
" total_samples = len(test_data)\n",
|
|
" perfect_matches = 0\n",
|
|
" zero_scores = 0\n",
|
|
"\n",
|
|
" for i in range(total_samples):\n",
|
|
" idx = test_data[i]\n",
|
|
" \n",
|
|
" sample_context = contexts[idx]\n",
|
|
" sample_question = questions[idx]\n",
|
|
" sample_tokens = tokens_list[idx]\n",
|
|
" sample_ner = ner_list[idx]\n",
|
|
" sample_srl = srl_list[idx]\n",
|
|
" sample_q_type = q_types[idx]\n",
|
|
" actual_answer = answers[idx]\n",
|
|
" \n",
|
|
" print(f\"Processing sample {i+1}/{total_samples} (index {idx})\")\n",
|
|
" \n",
|
|
" # Call predict_answer with parameters in the correct order\n",
|
|
" pred_answer = predict_answer(\n",
|
|
" sample_context,\n",
|
|
" sample_question,\n",
|
|
" sample_tokens,\n",
|
|
" sample_ner,\n",
|
|
" sample_srl,\n",
|
|
" sample_q_type\n",
|
|
" )\n",
|
|
"\n",
|
|
" actual_tokens = actual_answer.split()\n",
|
|
" pred_tokens = pred_answer.split()\n",
|
|
"\n",
|
|
" # Define max_n based on parameters\n",
|
|
" if use_unigram_only:\n",
|
|
" max_n = 1\n",
|
|
" elif include_bigram:\n",
|
|
" max_n = 2\n",
|
|
" else:\n",
|
|
" max_n = 4 # Original implementation with up to 4-grams\n",
|
|
" \n",
|
|
" weights = [1 / max_n] * max_n\n",
|
|
" clipped_counts = []\n",
|
|
" total_counts = []\n",
|
|
" precisions = []\n",
|
|
"\n",
|
|
" log_text = f\"Sample {i+1}:\\n\"\n",
|
|
" log_text += f\"Context: {sample_context}\\n\"\n",
|
|
" log_text += f\"Question: {sample_question}\\n\"\n",
|
|
" log_text += f\"Actual Answer: {actual_answer}\\n\"\n",
|
|
" log_text += f\"Predicted Answer: {pred_answer}\\n\"\n",
|
|
" log_text += f\"Actual Tokens: {actual_tokens}\\n\"\n",
|
|
" log_text += f\"Predicted Tokens: {pred_tokens}\\n\"\n",
|
|
"\n",
|
|
" # Calculate precision for each n-gram level\n",
|
|
" for n in range(1, max_n + 1):\n",
|
|
" # Skip if not enough tokens for n-gram\n",
|
|
" if len(actual_tokens) < n or len(pred_tokens) < n:\n",
|
|
" clipped_counts.append(0)\n",
|
|
" total_counts.append(0 if len(pred_tokens) < n else sum(1 for _ in range(len(pred_tokens) - n + 1)))\n",
|
|
" precisions.append(0)\n",
|
|
" log_text += f\"{n}-gram: clipped count = 0, total candidate = {total_counts[-1]}, precision = 0.0000\\n\"\n",
|
|
" continue\n",
|
|
" \n",
|
|
" ref_ngrams = Counter(tuple(actual_tokens[j:j + n]) for j in range(len(actual_tokens) - n + 1))\n",
|
|
" cand_ngrams = Counter(tuple(pred_tokens[j:j + n]) for j in range(len(pred_tokens) - n + 1))\n",
|
|
"\n",
|
|
" clip_sum = sum(min(cnt, ref_ngrams.get(ng, 0)) for ng, cnt in cand_ngrams.items())\n",
|
|
" total = sum(cand_ngrams.values())\n",
|
|
" p_n = clip_sum / total if total > 0 else 0\n",
|
|
"\n",
|
|
" clipped_counts.append(clip_sum)\n",
|
|
" total_counts.append(total)\n",
|
|
" precisions.append(p_n)\n",
|
|
"\n",
|
|
" log_text += f\"{n}-gram: clipped count = {clip_sum}, total candidate = {total}, precision = {p_n:.4f}\\n\"\n",
|
|
" \n",
|
|
" # Save unigram and bigram scores separately\n",
|
|
" if n == 1:\n",
|
|
" unigram_precision = p_n\n",
|
|
" elif n == 2:\n",
|
|
" bigram_precision = p_n\n",
|
|
"\n",
|
|
" c = len(pred_tokens)\n",
|
|
" r = len(actual_tokens)\n",
|
|
"\n",
|
|
" if c == 0:\n",
|
|
" bp = 0\n",
|
|
" log_text += f\"Brevity Penalty: BP = {bp:.4f} (c={c}, r={r}) - No predicted tokens.\\n\"\n",
|
|
" else:\n",
|
|
" bp = 1 if c > r else np.exp(1 - r / c)\n",
|
|
" log_text += f\"Brevity Penalty: BP = {bp:.4f} (c={c}, r={r})\\n\"\n",
|
|
"\n",
|
|
" # Calculate unigram BLEU (just precision with brevity penalty)\n",
|
|
" unigram_bleu = bp * unigram_precision if 'unigram_precision' in locals() else 0\n",
|
|
" unigram_scores.append(unigram_bleu)\n",
|
|
" log_text += f\"Unigram BLEU score = {unigram_bleu:.4f}\\n\"\n",
|
|
" \n",
|
|
" # Track perfect matches and zero scores\n",
|
|
" if unigram_bleu >= 0.9999: # Close enough to 1.0\n",
|
|
" perfect_matches += 1\n",
|
|
" if unigram_bleu <= 0.0001: # Close enough to 0.0\n",
|
|
" zero_scores += 1\n",
|
|
" \n",
|
|
" # Calculate bigram BLEU if needed\n",
|
|
" if include_bigram and 'bigram_precision' in locals():\n",
|
|
" # Geometric mean of unigram and bigram precision\n",
|
|
" bigram_bleu = bp * np.sqrt(unigram_precision * bigram_precision) if unigram_precision > 0 and bigram_precision > 0 else 0\n",
|
|
" bigram_scores.append(bigram_bleu)\n",
|
|
" log_text += f\"Bigram BLEU score = {bigram_bleu:.4f}\\n\"\n",
|
|
"\n",
|
|
" # Calculate full BLEU with all n-grams\n",
|
|
" if all(p > 0 for p in precisions):\n",
|
|
" # Avoid math domain error with log(0)\n",
|
|
" filtered_precisions = [max(p, 1e-10) for p in precisions]\n",
|
|
" full_bleu = bp * np.exp(sum(w * np.log(p) for w, p in zip(weights, filtered_precisions)))\n",
|
|
" else:\n",
|
|
" full_bleu = 0.0\n",
|
|
" \n",
|
|
" full_bleu_scores.append(full_bleu)\n",
|
|
" log_text += f\"Full BLEU score ({max_n}-gram) = {full_bleu:.4f}\\n\"\n",
|
|
" \n",
|
|
" print(f\"Unigram BLEU: {unigram_bleu:.4f}\")\n",
|
|
" if include_bigram:\n",
|
|
" print(f\"Bigram BLEU: {bigram_scores[-1]:.4f}\")\n",
|
|
" print(f\"Full BLEU ({max_n}-gram): {full_bleu:.4f}\")\n",
|
|
"\n",
|
|
" rows.append({\"Result\": log_text})\n",
|
|
"\n",
|
|
" # Debug information\n",
|
|
" print(\"\\nDEBUG INFORMATION:\")\n",
|
|
" print(f\"Total samples processed: {total_samples}\")\n",
|
|
" print(f\"Samples with perfect score (1.0): {perfect_matches}\")\n",
|
|
" print(f\"Samples with zero score (0.0): {zero_scores}\")\n",
|
|
" print(f\"Number of scores collected: {len(unigram_scores)}\")\n",
|
|
" \n",
|
|
" # Print all scores for verification\n",
|
|
" if len(unigram_scores) <= 20: # Only if reasonably short\n",
|
|
" print(\"All unigram scores:\", unigram_scores)\n",
|
|
" else:\n",
|
|
" print(\"First 10 unigram scores:\", unigram_scores[:10])\n",
|
|
" print(\"Last 10 unigram scores:\", unigram_scores[-10:])\n",
|
|
" \n",
|
|
" # Calculate average scores with extra checks\n",
|
|
" if unigram_scores:\n",
|
|
" avg_unigram = sum(unigram_scores) / len(unigram_scores)\n",
|
|
" print(f\"Sum of all unigram scores: {sum(unigram_scores):.4f}\")\n",
|
|
" print(f\"Mean of all unigram scores: {avg_unigram:.4f}\")\n",
|
|
" else:\n",
|
|
" avg_unigram = 0.0\n",
|
|
" print(\"Warning: No unigram scores to average!\")\n",
|
|
" \n",
|
|
" avg_bigram = sum(bigram_scores) / len(bigram_scores) if bigram_scores else 0.0\n",
|
|
" avg_full = sum(full_bleu_scores) / len(full_bleu_scores) if full_bleu_scores else 0.0\n",
|
|
" \n",
|
|
" # Save results to Excel\n",
|
|
" try:\n",
|
|
" df = pd.DataFrame(rows)\n",
|
|
" df.to_excel(output_path, index=False)\n",
|
|
" print(f\"Results saved to: {output_path}\")\n",
|
|
" \n",
|
|
" # Also save a summary of scores\n",
|
|
" summary_data = []\n",
|
|
" for i in range(len(unigram_scores)):\n",
|
|
" summary_row = {\n",
|
|
" \"Sample\": i+1, \n",
|
|
" \"Index\": test_data[i],\n",
|
|
" \"Unigram BLEU\": unigram_scores[i]\n",
|
|
" }\n",
|
|
" if include_bigram and i < len(bigram_scores):\n",
|
|
" summary_row[\"Bigram BLEU\"] = bigram_scores[i]\n",
|
|
" summary_row[\"Full BLEU\"] = full_bleu_scores[i]\n",
|
|
" summary_data.append(summary_row)\n",
|
|
" \n",
|
|
" summary_df = pd.DataFrame(summary_data)\n",
|
|
" summary_df.to_excel(\"bleu_scores_summary.xlsx\", index=False)\n",
|
|
" print(\"Summary of scores saved to: bleu_scores_summary.xlsx\")\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Error saving to Excel: {e}\")\n",
|
|
" \n",
|
|
" results = {\n",
|
|
" \"avg_unigram_bleu\": avg_unigram,\n",
|
|
" \"avg_bigram_bleu\": avg_bigram if include_bigram else None,\n",
|
|
" \"avg_full_bleu\": avg_full,\n",
|
|
" \"perfect_matches\": perfect_matches,\n",
|
|
" \"zero_scores\": zero_scores,\n",
|
|
" \"total_samples\": total_samples\n",
|
|
" }\n",
|
|
"\n",
|
|
" return results\n",
|
|
"\n",
|
|
"# performance_metrics = evaluate_model_performance(test_indices)\n",
|
|
"performance_metrics = evaluate_model_performance(test_indices, use_unigram_only=True)\n",
|
|
"\n",
|
|
"print(\"\\nModel Performance Metrics:\")\n",
|
|
"print(f\"Average Unigram BLEU Score: {performance_metrics['avg_unigram_bleu'] * 100:.2f}%\")\n",
|
|
"print(f\"Perfect matches: {performance_metrics['perfect_matches']} out of {performance_metrics['total_samples']} ({performance_metrics['perfect_matches']/performance_metrics['total_samples']*100:.2f}%)\")\n",
|
|
"print(f\"Zero scores: {performance_metrics['zero_scores']} out of {performance_metrics['total_samples']} ({performance_metrics['zero_scores']/performance_metrics['total_samples']*100:.2f}%)\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "myenv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|