feat: adding new model training

This commit is contained in:
akhdanre 2025-05-14 23:07:52 +07:00
parent ad4b6d6137
commit f0f6f412bb
23 changed files with 42320 additions and 739 deletions

View File

@ -0,0 +1,424 @@
import numpy as np
import pandas as pd
import json
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input,
LSTM,
Dense,
Embedding,
Bidirectional,
Concatenate,
Attention,
Dropout,
)
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import re
import string
from collections import Counter
# Data contoh yang diberikan
# data = [
# {
# "context": "raden ajeng kartini lahir pada 21 april 1879 di jepara",
# "tokens": [
# "raden", "ajeng", "kartini", "lahir", "pada", "21", "april", "1879", "di", "jepara"
# ],
# "ner": [
# "PER", "PER", "PER", "O", "O", "DATE", "DATE", "DATE", "O", "LOC"
# ],
# "srl": [
# "ARG0", "ARG0", "ARG0", "V", "O", "ARGM-TMP", "ARGM-TMP", "ARGM-TMP", "O", "ARGM-LOC"
# ],
# "qas": [
# {
# "type": "isian",
# "question": "Dimana kartini lahir ___",
# "answer": "jepara",
# "id": "qa_0_q1"
# },
# {
# "type": "true_false",
# "question": "Kartini lahir pada tanggal 21 mei 1879 ___",
# "options": ["true", "false"],
# "answer": "false",
# "id": "qa_0_q2"
# }
# ]
# },
# {
# "context": "kerajaan majapahit berdiri pada tahun 1293 di trowulan",
# "tokens": [
# "kerajaan", "majapahit", "berdiri", "pada", "tahun", "1293", "di", "trowulan"
# ],
# "ner": [
# "O", "ORG", "O", "O", "O", "DATE", "O", "LOC"
# ],
# "srl": [
# "ARG1", "ARG1", "V", "O", "O", "ARGM-TMP", "O", "ARGM-LOC"
# ],
# "qas": [
# {
# "type": "opsi",
# "question": "Dimana kerajaan majapahit berdiri ___",
# "options": ["trowulan", "singasari", "kuta", "banten"],
# "answer": "trowulan",
# "id": "qa_1_q1"
# },
# {
# "type": "true_false",
# "question": "Kerajaan majapahit berdiri pada tahun 1300 ___",
# "options": ["true", "false"],
# "answer": "false",
# "id": "qa_1_q2"
# }
# ]
# },
# {
# "context": "soekarno dan mohammad hatta memproklamasikan kemerdekaan indonesia pada 17 agustus 1945",
# "tokens": [
# "soekarno", "dan", "mohammad", "hatta", "memproklamasikan", "kemerdekaan", "indonesia", "pada", "17", "agustus", "1945"
# ],
# "ner": [
# "PER", "O", "PER", "PER", "O", "O", "LOC", "O", "DATE", "DATE", "DATE"
# ],
# "srl": [
# "ARG0", "O", "ARG0", "ARG0", "V", "ARG1", "ARGM-LOC", "O", "ARGM-TMP", "ARGM-TMP", "ARGM-TMP"
# ],
# "qas": [
# {
# "type": "isian",
# "question": "Pada tanggal berapa kemerdekaan indonesia diproklamasikan ___",
# "answer": "17 agustus 1945",
# "id": "qa_2_q1"
# },
# {
# "type": "opsi",
# "question": "Siapa yang memproklamasikan kemerdekaan indonesia ___",
# "options": ["soekarno", "mohammad hatta", "sudirman", "ahmad yani"],
# "answer": "soekarno mohammad hatta",
# "id": "qa_2_q2"
# }
# ]
# }
# ]
with open("data_converted.json", "r") as f:
data = json.load(f)
# # Simpan ke file JSON untuk kebutuhan di masa depan
# with read('qa_dataset.json', 'w', encoding='utf-8') as f:
# json.dump(data, f, ensure_ascii=False, indent=2)
# Preprocessing function
def preprocess_text(text):
"""Melakukan preprocessing teks dasar"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
# Persiapkan data untuk model
def prepare_data(data):
"""Siapkan data untuk model"""
contexts = []
tokens_list = []
ner_list = []
srl_list = []
questions = []
answers = []
q_types = []
for item in data:
for qa in item["qas"]:
contexts.append(preprocess_text(item["context"]))
tokens_list.append(item["tokens"])
ner_list.append(item["ner"])
srl_list.append(item["srl"])
questions.append(preprocess_text(qa["question"]))
answers.append(qa["answer"])
q_types.append(qa["type"])
return contexts, tokens_list, ner_list, srl_list, questions, answers, q_types
# Siapkan data
contexts, tokens_list, ner_list, srl_list, questions, answers, q_types = prepare_data(
data
)
# Tokenizer untuk teks (context dan question)
max_vocab_size = 10000
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(contexts + questions + [" ".join(item) for item in tokens_list])
vocab_size = len(tokenizer.word_index) + 1
# Encoding untuk NER
ner_tokenizer = Tokenizer(oov_token="<OOV>")
ner_tokenizer.fit_on_texts([" ".join(ner) for ner in ner_list])
ner_vocab_size = len(ner_tokenizer.word_index) + 1
# Encoding untuk SRL
srl_tokenizer = Tokenizer(oov_token="<OOV>")
srl_tokenizer.fit_on_texts([" ".join(srl) for srl in srl_list])
srl_vocab_size = len(srl_tokenizer.word_index) + 1
# Encoding untuk tipe pertanyaan
q_type_tokenizer = Tokenizer()
q_type_tokenizer.fit_on_texts(q_types)
q_type_vocab_size = len(q_type_tokenizer.word_index) + 1
# Konversi token, ner, srl ke sequences
def tokens_to_sequences(tokens, ner, srl):
"""Konversi token, ner, dan srl ke sequences"""
token_seqs = [tokenizer.texts_to_sequences([" ".join(t)])[0] for t in tokens]
ner_seqs = [ner_tokenizer.texts_to_sequences([" ".join(n)])[0] for n in ner]
srl_seqs = [srl_tokenizer.texts_to_sequences([" ".join(s)])[0] for s in srl]
return token_seqs, ner_seqs, srl_seqs
# Menentukan panjang maksimum untuk padding
context_seqs = tokenizer.texts_to_sequences(contexts)
question_seqs = tokenizer.texts_to_sequences(questions)
token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)
max_context_len = max([len(seq) for seq in context_seqs])
max_question_len = max([len(seq) for seq in question_seqs])
max_token_len = max([len(seq) for seq in token_seqs])
# Pad sequences untuk memastikan semua input sama panjang
def pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs):
"""Padding semua sequences"""
context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding="post")
question_padded = pad_sequences(
question_seqs, maxlen=max_question_len, padding="post"
)
token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding="post")
ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding="post")
srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding="post")
return context_padded, question_padded, token_padded, ner_padded, srl_padded
# Siapkan encoder untuk jawaban
answer_tokenizer = Tokenizer(oov_token="<OOV>")
answer_tokenizer.fit_on_texts(answers)
answer_vocab_size = len(answer_tokenizer.word_index) + 1
# Encode tipe pertanyaan - FIX - Menggunakan indeks langsung bukan sequence
q_type_indices = []
for q_type in q_types:
# Dapatkan indeks tipe pertanyaan (dikurangi 1 karena indeks dimulai dari 1)
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_indices.append(q_type_idx)
# Konversi ke numpy array
q_type_indices = np.array(q_type_indices)
# One-hot encode tipe pertanyaan
q_type_categorical = tf.keras.utils.to_categorical(
q_type_indices, num_classes=q_type_vocab_size
)
# Pad sequences
context_padded, question_padded, token_padded, ner_padded, srl_padded = (
pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs)
)
# Encode jawaban
answer_seqs = answer_tokenizer.texts_to_sequences(answers)
max_answer_len = max([len(seq) for seq in answer_seqs])
answer_padded = pad_sequences(answer_seqs, maxlen=max_answer_len, padding="post")
# Split data menjadi train dan test sets
indices = list(range(len(context_padded)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)
# Fungsi untuk mendapatkan subset dari data berdasarkan indices
def get_subset(data, indices):
return np.array([data[i] for i in indices])
# Train data
train_context = get_subset(context_padded, train_indices)
train_question = get_subset(question_padded, train_indices)
train_token = get_subset(token_padded, train_indices)
train_ner = get_subset(ner_padded, train_indices)
train_srl = get_subset(srl_padded, train_indices)
train_q_type = get_subset(q_type_categorical, train_indices)
train_answer = get_subset(answer_padded, train_indices)
# Test data
test_context = get_subset(context_padded, test_indices)
test_question = get_subset(question_padded, test_indices)
test_token = get_subset(token_padded, test_indices)
test_ner = get_subset(ner_padded, test_indices)
test_srl = get_subset(srl_padded, test_indices)
test_q_type = get_subset(q_type_categorical, test_indices)
test_answer = get_subset(answer_padded, test_indices)
# Hyperparameters
embedding_dim = 100
lstm_units = 128
ner_embedding_dim = 50
srl_embedding_dim = 50
dropout_rate = 0.3
# Function untuk membuat model
def create_qa_model():
# Input layers
context_input = Input(shape=(max_context_len,), name="context_input")
question_input = Input(shape=(max_question_len,), name="question_input")
token_input = Input(shape=(max_token_len,), name="token_input")
ner_input = Input(shape=(max_token_len,), name="ner_input")
srl_input = Input(shape=(max_token_len,), name="srl_input")
q_type_input = Input(shape=(q_type_vocab_size,), name="q_type_input")
# Shared embedding layer for text
text_embedding = Embedding(vocab_size, embedding_dim, name="text_embedding")
# Embedding untuk NER dan SRL
ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name="ner_embedding")(
ner_input
)
srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name="srl_embedding")(
srl_input
)
# Apply embeddings
context_embed = text_embedding(context_input)
question_embed = text_embedding(question_input)
token_embed = text_embedding(token_input)
# Bi-directional LSTM untuk context dan token-level features
context_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="context_lstm")
)(context_embed)
question_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="question_lstm")
)(question_embed)
# Concat token features (tokens, NER, SRL)
token_features = Concatenate(name="token_features")(
[token_embed, ner_embedding, srl_embedding]
)
token_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="token_lstm")
)(token_features)
# Attention mechanism untuk context dengan memperhatikan question
context_attention = tf.keras.layers.Attention(name="context_attention")(
[context_lstm, question_lstm]
)
# Pool attention outputs
context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name="context_att_pool")(
context_attention
)
question_pool = tf.keras.layers.GlobalMaxPooling1D(name="question_pool")(
question_lstm
)
token_pool = tf.keras.layers.GlobalMaxPooling1D(name="token_pool")(token_lstm)
# Concat all features
all_features = Concatenate(name="all_features")(
[context_att_pool, question_pool, token_pool, q_type_input]
)
# Dense layers
x = Dense(256, activation="relu", name="dense_1")(all_features)
x = Dropout(dropout_rate)(x)
x = Dense(128, activation="relu", name="dense_2")(x)
x = Dropout(dropout_rate)(x)
# Output layer untuk jawaban
answer_output = Dense(
answer_vocab_size, activation="softmax", name="answer_output"
)(x)
# Create model
model = Model(
inputs=[
context_input,
question_input,
token_input,
ner_input,
srl_input,
q_type_input,
],
outputs=answer_output,
)
# Compile model
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
# Buat model
model = create_qa_model()
model.summary()
# Callback untuk menyimpan model terbaik
checkpoint = ModelCheckpoint(
"qa_lstm_model.h5", monitor="val_accuracy", save_best_only=True, verbose=1
)
early_stop = EarlyStopping(monitor="val_accuracy", patience=5, verbose=1)
# Training
batch_size = 8
epochs = 50
# Ubah format jawaban untuk sparse categorical crossentropy
train_answer_labels = train_answer[:, 0] # Ambil indeks pertama dari jawaban
test_answer_labels = test_answer[:, 0]
# Train model
history = model.fit(
[train_context, train_question, train_token, train_ner, train_srl, train_q_type],
train_answer_labels,
batch_size=batch_size,
epochs=epochs,
validation_data=(
[test_context, test_question, test_token, test_ner, test_srl, test_q_type],
test_answer_labels,
),
callbacks=[checkpoint, early_stop],
)
# Simpan model dan tokenizer
model.save("qa_lstm_model_final.h5")
# Simpan tokenizer
tokenizer_data = {
"word_tokenizer": tokenizer.to_json(),
"ner_tokenizer": ner_tokenizer.to_json(),
"srl_tokenizer": srl_tokenizer.to_json(),
"answer_tokenizer": answer_tokenizer.to_json(),
"q_type_tokenizer": q_type_tokenizer.to_json(),
"max_context_len": max_context_len,
"max_question_len": max_question_len,
"max_token_len": max_token_len,
}
with open("qa_tokenizers.json", "w") as f:
json.dump(tokenizer_data, f)
print("Model dan tokenizer berhasil disimpan!")

View File

@ -0,0 +1,151 @@
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import re
import random
# Load tokenizers and model configurations
with open("qa_tokenizers.json", "r") as f:
tokenizer_data = json.load(f)
tokenizer = tokenizer_from_json(tokenizer_data["word_tokenizer"])
ner_tokenizer = tokenizer_from_json(tokenizer_data["ner_tokenizer"])
srl_tokenizer = tokenizer_from_json(tokenizer_data["srl_tokenizer"])
answer_tokenizer = tokenizer_from_json(tokenizer_data["answer_tokenizer"])
q_type_tokenizer = tokenizer_from_json(tokenizer_data["q_type_tokenizer"])
max_context_len = tokenizer_data["max_context_len"]
max_question_len = tokenizer_data["max_question_len"]
max_token_len = tokenizer_data["max_token_len"]
q_type_vocab_size = len(q_type_tokenizer.word_index) + 1
# Load trained model
model = load_model("qa_lstm_model_final.h5")
def preprocess_text(text):
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
def predict_answer(context, question, tokens, ner, srl, q_type):
context_seq = tokenizer.texts_to_sequences([preprocess_text(context)])
question_seq = tokenizer.texts_to_sequences([preprocess_text(question)])
token_seq = [tokenizer.texts_to_sequences([" ".join(tokens)])[0]]
ner_seq = [ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]]
srl_seq = [srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]]
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_cat = tf.keras.utils.to_categorical([q_type_idx], num_classes=q_type_vocab_size)
# Pad sequences
context_pad = pad_sequences(context_seq, maxlen=max_context_len, padding="post")
question_pad = pad_sequences(question_seq, maxlen=max_question_len, padding="post")
token_pad = pad_sequences(token_seq, maxlen=max_token_len, padding="post")
ner_pad = pad_sequences(ner_seq, maxlen=max_token_len, padding="post")
srl_pad = pad_sequences(srl_seq, maxlen=max_token_len, padding="post")
# Predict
prediction = model.predict([context_pad, question_pad, token_pad, ner_pad, srl_pad, q_type_cat], verbose=0)
answer_idx = np.argmax(prediction[0])
# Retrieve predicted answer word
for word, idx in answer_tokenizer.word_index.items():
if idx == answer_idx:
return word
return "Unknown"
def generate_question_answer(context, tokens, ner, srl, question_type="isian"):
entities = {}
predicate = ""
for i, token in enumerate(tokens):
if ner[i] != "O":
entities.setdefault(ner[i], []).append(token)
if srl[i] == "V":
predicate = token
elif srl[i].startswith("ARG"):
entities.setdefault(srl[i], []).append(token)
subject = " ".join(entities.get("ARG0", [""]))
if question_type == "isian":
if "LOC" in entities:
location = " ".join(entities["LOC"])
return f"Dimana {subject} {predicate} ___", location
elif "DATE" in entities:
date = " ".join(entities["DATE"])
return f"Kapan {subject} {predicate} ___", date
elif question_type == "true_false":
if "DATE" in entities:
original_date = " ".join(entities["DATE"])
try:
modified_year = str(int(entities['DATE'][-1]) + random.randint(1, 5))
modified_date = f"{entities['DATE'][0]} {entities['DATE'][1]} {modified_year}"
except:
modified_date = original_date # Fallback if parsing fails
return f"{subject} {predicate} pada {modified_date} ___", "false"
elif question_type == "opsi":
if "LOC" in entities:
correct_location = " ".join(entities["LOC"])
distractors = ["singasari", "kuta", "banten", "kediri", "makassar"]
distractors = [d for d in distractors if d != correct_location]
options = random.sample(distractors, 3) + [correct_location]
random.shuffle(options)
return f"Dimana {subject} {predicate} ___", options, correct_location
return "Apa yang terjadi dalam teks ini ___", context
# ✅ Example Usage with Random Sampling
if __name__ == "__main__":
with open("data_converted.json", "r") as f:
data = json.load(f)
# Randomly select an example for testing
test_item = random.choice(data)
test_qa = random.choice(test_item["qas"])
predicted_answer = predict_answer(
test_item["context"],
test_qa["question"],
test_item["tokens"],
test_item["ner"],
test_item["srl"],
test_qa["type"]
)
print(f"Context: {test_item['context']}")
print(f"Question: {test_qa['question']}")
print(f"True Answer: {test_qa['answer']}")
print(f"Predicted Answer: {predicted_answer}")
# Generate Random Question Example
example_context = test_item["context"]
example_tokens = test_item["tokens"]
example_ner = test_item["ner"]
example_srl = test_item["srl"]
random_question_type = random.choice(["isian", "true_false", "opsi"])
result = generate_question_answer(
example_context, example_tokens, example_ner, example_srl, random_question_type
)
print("\nGenerated Question Example:")
print(f"Context: {example_context}")
print(f"Question Type: {random_question_type}")
if random_question_type == "opsi":
question, options, correct_answer = result
print(f"Generated Question: {question}")
print(f"Options: {options}")
print(f"Correct Answer: {correct_answer}")
else:
question, answer = result
print(f"Generated Question: {question}")
print(f"Answer: {answer}")

View File

@ -0,0 +1,54 @@
import json
import re
from collections import OrderedDict
def normalize_question(text):
text = re.sub(r'\s+([?.!,])', r'\1', text)
return text.capitalize()
# Load data
with open('../dataset/dev_dataset_qg.json', 'r', encoding='utf-8') as file:
data = json.load(file)
processed_data = []
for idx_entry, entry in enumerate(data):
if not isinstance(entry, dict):
continue
if "context" not in entry:
entry["context"] = " ".join(entry.get("tokens", []))
# Update NER tags: ubah 'V' menjadi 'O'
ner_tags = entry.get("ner", [])
entry["ner"] = ["O" if tag == "V" else tag for tag in ner_tags]
for idx_qa, qa in enumerate(entry.get("qas", [])):
if "id" not in qa:
qa["id"] = f"qa_{idx_entry}_q{idx_qa + 1}"
answer = qa.get("answer")
if isinstance(answer, list):
qa["answer"] = " ".join(answer)
question = qa.get("question")
if isinstance(question, list):
question_str = " ".join(question)
qa["question"] = normalize_question(question_str)
# Reorder fields: tokens first, then the rest
ordered_entry = OrderedDict()
if "context" in entry:
ordered_entry["context"] = entry.pop("context")
# Add remaining fields in their original order
for key, value in entry.items():
ordered_entry[key] = value
processed_data.append(ordered_entry)
# Save result
with open('data_converted.json', 'w', encoding='utf-8') as file:
json.dump(processed_data, file, indent=2, ensure_ascii=False)
# Optional: Print first 2 entries for quick verification
print(json.dumps(processed_data[:2], indent=2, ensure_ascii=False))

View File

@ -0,0 +1,53 @@
[
{
"context": "Raden Ajeng Kartini lahir pada 21 April 1879 di Jepara.",
"tokens": [
"raden",
"ajeng",
"kartini",
"lahir",
"pada",
"21",
"april",
"1879",
"di",
"jepara"
],
"ner_tags": [
"PER",
"PER",
"PER",
"V",
"O",
"DATE",
"DATE",
"DATE",
"O",
"LOC"
],
"srl_tags": [
"ARG0",
"ARG0",
"ARG0",
"V",
"O",
"ARGM-TMP",
"ARGM-TMP",
"ARGM-TMP",
"O",
"ARGM-LOC"
],
"qas": [
{
"id": "kartini_001_q1",
"question": "Dimana Kartini lahir?",
"answers": [{ "text": "Jepara", "answer_start": 10 }]
},
{
"id": "kartini_001_q2",
"question": "Kartini lahir pada tanggal ___?",
"answers": [{ "text": "21 April 1879", "answer_start": 6 }]
}
]
}
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,3 @@
BLEU Score: 0.0585
Validation Accuracy: 0.6740
Validation Loss: 1.8080

View File

View File

@ -0,0 +1,178 @@
[
{
"context": "raden ajeng kartini lahir pada 21 april 1879 di jepara",
"tokens": [
"raden",
"ajeng",
"kartini",
"lahir",
"pada",
"21",
"april",
"1879",
"di",
"jepara"
],
"ner": [
"PER",
"PER",
"PER",
"O",
"O",
"DATE",
"DATE",
"DATE",
"O",
"LOC"
],
"srl": [
"ARG0",
"ARG0",
"ARG0",
"V",
"O",
"ARGM-TMP",
"ARGM-TMP",
"ARGM-TMP",
"O",
"ARGM-LOC"
],
"qas": [
{
"type": "isian",
"question": "Dimana kartini lahir ___",
"answer": "jepara",
"id": "qa_0_q1"
},
{
"type": "true_false",
"question": "Kartini lahir pada tanggal 21 mei 1879 ___",
"options": [
"true",
"false"
],
"answer": "false",
"id": "qa_0_q2"
}
]
},
{
"context": "kerajaan majapahit berdiri pada tahun 1293 di trowulan",
"tokens": [
"kerajaan",
"majapahit",
"berdiri",
"pada",
"tahun",
"1293",
"di",
"trowulan"
],
"ner": [
"O",
"ORG",
"O",
"O",
"O",
"DATE",
"O",
"LOC"
],
"srl": [
"ARG1",
"ARG1",
"V",
"O",
"O",
"ARGM-TMP",
"O",
"ARGM-LOC"
],
"qas": [
{
"type": "opsi",
"question": "Dimana kerajaan majapahit berdiri ___",
"options": [
"trowulan",
"singasari",
"kuta",
"banten"
],
"answer": "trowulan",
"id": "qa_1_q1"
},
{
"type": "true_false",
"question": "Kerajaan majapahit berdiri pada tahun 1300 ___",
"options": [
"true",
"false"
],
"answer": "false",
"id": "qa_1_q2"
}
]
},
{
"context": "soekarno dan mohammad hatta memproklamasikan kemerdekaan indonesia pada 17 agustus 1945",
"tokens": [
"soekarno",
"dan",
"mohammad",
"hatta",
"memproklamasikan",
"kemerdekaan",
"indonesia",
"pada",
"17",
"agustus",
"1945"
],
"ner": [
"PER",
"O",
"PER",
"PER",
"O",
"O",
"LOC",
"O",
"DATE",
"DATE",
"DATE"
],
"srl": [
"ARG0",
"O",
"ARG0",
"ARG0",
"V",
"ARG1",
"ARGM-LOC",
"O",
"ARGM-TMP",
"ARGM-TMP",
"ARGM-TMP"
],
"qas": [
{
"type": "isian",
"question": "Pada tanggal berapa kemerdekaan indonesia diproklamasikan ___",
"answer": "17 agustus 1945",
"id": "qa_2_q1"
},
{
"type": "opsi",
"question": "Siapa yang memproklamasikan kemerdekaan indonesia ___",
"options": [
"soekarno",
"mohammad hatta",
"sudirman",
"ahmad yani"
],
"answer": "soekarno mohammad hatta",
"id": "qa_2_q2"
}
]
}
]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,490 @@
import numpy as np
import pandas as pd
import json
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input,
LSTM,
Dense,
Embedding,
Bidirectional,
Concatenate,
Attention,
Dropout,
)
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import re
with open("data_converted.json", "r") as f:
data = json.load(f)
# Preprocessing function
def preprocess_text(text):
"""Melakukan preprocessing teks dasar"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
# Persiapkan data untuk model
def prepare_data(data):
"""Siapkan data untuk model"""
contexts = []
tokens_list = []
ner_list = []
srl_list = []
questions = []
answers = []
q_types = []
for item in data:
for qa in item["qas"]:
contexts.append(preprocess_text(item["context"]))
tokens_list.append(item["tokens"])
ner_list.append(item["ner"])
srl_list.append(item["srl"])
questions.append(preprocess_text(qa["question"]))
answers.append(qa["answer"])
q_types.append(qa["type"])
return contexts, tokens_list, ner_list, srl_list, questions, answers, q_types
contexts, tokens_list, ner_list, srl_list, questions, answers, q_types = prepare_data(
data
)
max_vocab_size = 10000
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(contexts + questions + [" ".join(item) for item in tokens_list])
vocab_size = len(tokenizer.word_index) + 1
# Encoding untuk NER
ner_tokenizer = Tokenizer(oov_token="<OOV>")
ner_tokenizer.fit_on_texts([" ".join(ner) for ner in ner_list])
ner_vocab_size = len(ner_tokenizer.word_index) + 1
# Encoding untuk SRL
srl_tokenizer = Tokenizer(oov_token="<OOV>")
srl_tokenizer.fit_on_texts([" ".join(srl) for srl in srl_list])
srl_vocab_size = len(srl_tokenizer.word_index) + 1
# Encoding untuk tipe pertanyaan
q_type_tokenizer = Tokenizer()
q_type_tokenizer.fit_on_texts(q_types)
q_type_vocab_size = len(q_type_tokenizer.word_index) + 1
# Konversi token, ner, srl ke sequences
def tokens_to_sequences(tokens, ner, srl):
"""Konversi token, ner, dan srl ke sequences"""
token_seqs = [tokenizer.texts_to_sequences([" ".join(t)])[0] for t in tokens]
ner_seqs = [ner_tokenizer.texts_to_sequences([" ".join(n)])[0] for n in ner]
srl_seqs = [srl_tokenizer.texts_to_sequences([" ".join(s)])[0] for s in srl]
return token_seqs, ner_seqs, srl_seqs
# Menentukan panjang maksimum untuk padding
context_seqs = tokenizer.texts_to_sequences(contexts)
question_seqs = tokenizer.texts_to_sequences(questions)
token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)
max_context_len = max([len(seq) for seq in context_seqs])
max_question_len = max([len(seq) for seq in question_seqs])
max_token_len = max([len(seq) for seq in token_seqs])
# Pad sequences untuk memastikan semua input sama panjang
def pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs):
"""Padding semua sequences"""
context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding="post")
question_padded = pad_sequences(
question_seqs, maxlen=max_question_len, padding="post"
)
token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding="post")
ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding="post")
srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding="post")
return context_padded, question_padded, token_padded, ner_padded, srl_padded
# Siapkan encoder untuk jawaban
answer_tokenizer = Tokenizer(oov_token="<OOV>")
answer_tokenizer.fit_on_texts(answers)
answer_vocab_size = len(answer_tokenizer.word_index) + 1
# Encode tipe pertanyaan - FIX - Menggunakan indeks langsung bukan sequence
q_type_indices = []
for q_type in q_types:
# Dapatkan indeks tipe pertanyaan (dikurangi 1 karena indeks dimulai dari 1)
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_indices.append(q_type_idx)
# Konversi ke numpy array
q_type_indices = np.array(q_type_indices)
# One-hot encode tipe pertanyaan
q_type_categorical = tf.keras.utils.to_categorical(
q_type_indices, num_classes=q_type_vocab_size
)
# Pad sequences
context_padded, question_padded, token_padded, ner_padded, srl_padded = (
pad_all_sequences(context_seqs, question_seqs, token_seqs, ner_seqs, srl_seqs)
)
# Encode jawaban
answer_seqs = answer_tokenizer.texts_to_sequences(answers)
max_answer_len = max([len(seq) for seq in answer_seqs])
answer_padded = pad_sequences(answer_seqs, maxlen=max_answer_len, padding="post")
# Split data menjadi train dan test sets
indices = list(range(len(context_padded)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)
# Fungsi untuk mendapatkan subset dari data berdasarkan indices
def get_subset(data, indices):
return np.array([data[i] for i in indices])
# Train data
train_context = get_subset(context_padded, train_indices)
train_question = get_subset(question_padded, train_indices)
train_token = get_subset(token_padded, train_indices)
train_ner = get_subset(ner_padded, train_indices)
train_srl = get_subset(srl_padded, train_indices)
train_q_type = get_subset(q_type_categorical, train_indices)
train_answer = get_subset(answer_padded, train_indices)
# Test data
test_context = get_subset(context_padded, test_indices)
test_question = get_subset(question_padded, test_indices)
test_token = get_subset(token_padded, test_indices)
test_ner = get_subset(ner_padded, test_indices)
test_srl = get_subset(srl_padded, test_indices)
test_q_type = get_subset(q_type_categorical, test_indices)
test_answer = get_subset(answer_padded, test_indices)
# Hyperparameters
embedding_dim = 100
lstm_units = 128
ner_embedding_dim = 50
srl_embedding_dim = 50
dropout_rate = 0.3
# Function untuk membuat model dengan dua output: pertanyaan dan jawaban
def create_qa_generator_model():
# Input layers
context_input = Input(shape=(max_context_len,), name="context_input")
token_input = Input(shape=(max_token_len,), name="token_input")
ner_input = Input(shape=(max_token_len,), name="ner_input")
srl_input = Input(shape=(max_token_len,), name="srl_input")
# Tidak perlu question_input dan q_type_input untuk proses generasi
# karena ini akan menjadi output
# Shared embedding layer for text
text_embedding = Embedding(vocab_size, embedding_dim, name="text_embedding")
# Embedding untuk NER dan SRL
ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name="ner_embedding")(
ner_input
)
srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name="srl_embedding")(
srl_input
)
# Apply embeddings
context_embed = text_embedding(context_input)
token_embed = text_embedding(token_input)
# Bi-directional LSTM untuk context dan token-level features
context_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="context_lstm")
)(context_embed)
# Concat token features (tokens, NER, SRL)
token_features = Concatenate(name="token_features")(
[token_embed, ner_embedding, srl_embedding]
)
token_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="token_lstm")
)(token_features)
# Pool outputs
context_pool = tf.keras.layers.GlobalMaxPooling1D(name="context_pool")(context_lstm)
token_pool = tf.keras.layers.GlobalMaxPooling1D(name="token_pool")(token_lstm)
# Concat all features
all_features = Concatenate(name="all_features")([context_pool, token_pool])
# Shared layers
shared = Dense(256, activation="relu", name="shared_dense_1")(all_features)
shared = Dropout(dropout_rate)(shared)
shared = Dense(128, activation="relu", name="shared_dense_2")(shared)
shared = Dropout(dropout_rate)(shared)
# Branch untuk pertanyaan
question_branch = Dense(256, activation="relu", name="question_dense")(shared)
question_branch = Dropout(dropout_rate)(question_branch)
# Branch untuk jawaban
answer_branch = Dense(256, activation="relu", name="answer_dense")(shared)
answer_branch = Dropout(dropout_rate)(answer_branch)
# Output layers
# Untuk pertanyaan, kita buat layer decoder berbasis LSTM untuk menghasilkan sequence kata
# sebagai pertanyaan
question_decoder = LSTM(lstm_units, return_sequences=True, name="question_decoder")(
tf.keras.layers.RepeatVector(max_question_len)(question_branch)
)
question_output = Dense(vocab_size, activation="softmax", name="question_output")(
question_decoder
)
# Output layer untuk jawaban
answer_output = Dense(
answer_vocab_size, activation="softmax", name="answer_output"
)(answer_branch)
# Create model
model = Model(
inputs=[
context_input,
token_input,
ner_input,
srl_input,
],
outputs=[question_output, answer_output],
)
# Compile model dengan loss function dan metrics untuk kedua output
model.compile(
optimizer="adam",
loss={
"question_output": "categorical_crossentropy",
"answer_output": "sparse_categorical_crossentropy",
},
metrics={"question_output": "accuracy", "answer_output": "accuracy"},
loss_weights={"question_output": 1.0, "answer_output": 1.0},
)
return model
# Persiapkan target untuk pertanyaan (one-hot encoded)
# Untuk pertanyaan, kita perlu mengubah ke format categorical karena kita memprediksi
# setiap kata di sequence secara bersamaan
def prepare_question_target(question_padded):
question_target = []
for question in question_padded:
# One-hot encode setiap token dalam sequence
sequence_target = []
for token in question:
# Buat vektor one-hot untuk token ini
token_target = tf.keras.utils.to_categorical(token, num_classes=vocab_size)
sequence_target.append(token_target)
question_target.append(sequence_target)
return np.array(question_target)
# Siapkan target untuk question output
train_question_target = prepare_question_target(train_question)
test_question_target = prepare_question_target(test_question)
# Ubah format jawaban untuk sparse categorical crossentropy
train_answer_labels = train_answer[:, 0] # Ambil indeks pertama dari jawaban
test_answer_labels = test_answer[:, 0]
# Buat model
model = create_qa_generator_model()
model.summary()
# Callback untuk menyimpan model terbaik
checkpoint = ModelCheckpoint(
"qa_generator_model.h5",
monitor="val_question_output_accuracy",
save_best_only=True,
verbose=1,
mode="max",
)
early_stop = EarlyStopping(
monitor="val_question_output_accuracy", patience=5, verbose=1, mode="max"
)
# Training
batch_size = 8
epochs = 50
# Train model
history = model.fit(
[train_context, train_token, train_ner, train_srl],
{"question_output": train_question_target, "answer_output": train_answer_labels},
batch_size=batch_size,
epochs=epochs,
validation_data=(
[test_context, test_token, test_ner, test_srl],
{"question_output": test_question_target, "answer_output": test_answer_labels},
),
callbacks=[checkpoint, early_stop],
)
model.save("qa_generator_model_final.keras")
# Simpan tokenizer
tokenizer_data = {
"word_tokenizer": tokenizer.to_json(),
"ner_tokenizer": ner_tokenizer.to_json(),
"srl_tokenizer": srl_tokenizer.to_json(),
"answer_tokenizer": answer_tokenizer.to_json(),
"q_type_tokenizer": q_type_tokenizer.to_json(),
"max_context_len": max_context_len,
"max_question_len": max_question_len,
"max_token_len": max_token_len,
}
with open("qa_generator_tokenizers.json", "w") as f:
json.dump(tokenizer_data, f)
# Fungsi untuk prediksi
def predict_question_and_answer(model, context, tokens, ner, srl):
"""
Prediksi pertanyaan dan jawaban berdasarkan konteks, tokens, NER, dan SRL
"""
# Preprocess input
context_seq = tokenizer.texts_to_sequences([preprocess_text(context)])
context_padded = pad_sequences(context_seq, maxlen=max_context_len, padding="post")
token_seq = tokenizer.texts_to_sequences([" ".join(tokens)])
token_padded = pad_sequences(token_seq, maxlen=max_token_len, padding="post")
ner_seq = ner_tokenizer.texts_to_sequences([" ".join(ner)])
ner_padded = pad_sequences(ner_seq, maxlen=max_token_len, padding="post")
srl_seq = srl_tokenizer.texts_to_sequences([" ".join(srl)])
srl_padded = pad_sequences(srl_seq, maxlen=max_token_len, padding="post")
# Prediksi
question_pred, answer_pred = model.predict(
[context_padded, token_padded, ner_padded, srl_padded]
)
# Decode pertanyaan (mengambil indeks dengan probabilitas tertinggi di setiap posisi)
question_indices = np.argmax(question_pred[0], axis=1)
question_words = []
# Reverse word index untuk mendapatkan kata dari indeks
word_index = tokenizer.word_index
index_word = {v: k for k, v in word_index.items()}
# Decode pertanyaan
for idx in question_indices:
if idx != 0: # Skip padding (index 0)
word = index_word.get(idx, "<UNK>")
question_words.append(word)
else:
break # Stop at padding
# Decode jawaban
answer_idx = np.argmax(answer_pred[0])
# Reverse word index untuk jawaban
answer_word_index = answer_tokenizer.word_index
answer_index_word = {v: k for k, v in answer_word_index.items()}
answer = answer_index_word.get(answer_idx, "<UNK>")
# Bentuk pertanyaan
question = " ".join(question_words)
return question, answer
# Contoh penggunaan
# Catatan: Ini hanya contoh, perlu data aktual saat implementasi
"""
sample_context = "Selamat pagi, sekarang adalah hari Senin."
sample_tokens = ["selamat", "pagi", "sekarang", "adalah", "hari", "senin"]
sample_ner = ["O", "O", "O", "O", "O", "B-TIME"]
sample_srl = ["B-V", "B-ARG1", "B-ARGM-TMP", "B-ARGM-PRD", "I-ARGM-PRD", "I-ARGM-PRD"]
# Load model yang sudah dilatih
loaded_model = load_model("qa_generator_model_final.keras")
# Prediksi
question, answer = predict_question_and_answer(
loaded_model, sample_context, sample_tokens, sample_ner, sample_srl
)
print("Konteks:", sample_context)
print("Pertanyaan yang dihasilkan:", question)
print("Jawaban yang dihasilkan:", answer)
"""
sample = {
"context": "kerajaan majapahit berdiri pada tahun 1293 di trowulan",
"tokens": [
"kerajaan",
"majapahit",
"berdiri",
"pada",
"tahun",
"1293",
"di",
"trowulan",
],
"ner": ["O", "ORG", "O", "O", "O", "DATE", "O", "LOC"],
"srl": ["ARG1", "ARG1", "V", "O", "O", "ARGM-TMP", "O", "ARGM-LOC"],
}
question, answer = predict_question_and_answer(
model, sample["context"], sample["tokens"], sample["ner"], sample["srl"]
)
print("Konteks:", sample["context"])
print("Pertanyaan yang dihasilkan:", question)
print("Jawaban yang dihasilkan:", answer)
# Plot history training
# plt.figure(figsize=(12, 8))
# # Plot loss
# plt.subplot(2, 2, 1)
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='upper right')
# # Plot question output accuracy
# plt.subplot(2, 2, 2)
# plt.plot(history.history['question_output_accuracy'])
# plt.plot(history.history['val_question_output_accuracy'])
# plt.title('Question Output Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='lower right')
# # Plot answer output accuracy
# plt.subplot(2, 2, 3)
# plt.plot(history.history['answer_output_accuracy'])
# plt.plot(history.history['val_answer_output_accuracy'])
# plt.title('Answer Output Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='lower right')
# plt.tight_layout()
# plt.savefig("training_history.png")
# plt.show()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,615 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"id": "58e41ccb",
"metadata": {},
"outputs": [],
"source": [
"import json, pickle, random\n",
"from pathlib import Path\n",
"from itertools import chain\n",
"\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from tensorflow.keras.layers import (\n",
" Input, Embedding, LSTM, Concatenate,\n",
" Dense, TimeDistributed\n",
")\n",
"from tensorflow.keras.models import Model\n",
"from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction\n",
"from rouge_score import rouge_scorer, scoring\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "a94dd46a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"flattened samples : 8\n"
]
}
],
"source": [
"RAW = json.loads(Path(\"../dataset/dev_dataset_qg.json\").read_text())\n",
"\n",
"samples = []\n",
"for item in RAW:\n",
" for qp in item[\"quiz_posibility\"]:\n",
" samples.append({\n",
" \"tokens\" : item[\"tokens\"],\n",
" \"ner\" : item[\"ner\"],\n",
" \"srl\" : item[\"srl\"],\n",
" \"q_type\" : qp[\"type\"], # isian / opsi / benar_salah\n",
" \"q_toks\" : qp[\"question\"] + [\"<eos>\"],\n",
" \"a_toks\" : (qp[\"answer\"] if isinstance(qp[\"answer\"], list)\n",
" else [qp[\"answer\"]]) + [\"<eos>\"]\n",
" })\n",
"\n",
"print(\"flattened samples :\", len(samples))\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "852fb9a8",
"metadata": {},
"outputs": [],
"source": [
"def build_vocab(seq_iter, reserved=(\"<pad>\", \"<unk>\", \"<sos>\", \"<eos>\")):\n",
" vocab = {tok: idx for idx, tok in enumerate(reserved)}\n",
" for tok in chain.from_iterable(seq_iter):\n",
" vocab.setdefault(tok, len(vocab))\n",
" return vocab\n",
"\n",
"vocab_tok = build_vocab((s[\"tokens\"] for s in samples))\n",
"vocab_ner = build_vocab((s[\"ner\"] for s in samples), reserved=(\"<pad>\",\"<unk>\"))\n",
"vocab_srl = build_vocab((s[\"srl\"] for s in samples), reserved=(\"<pad>\",\"<unk>\"))\n",
"vocab_q = build_vocab((s[\"q_toks\"] for s in samples))\n",
"vocab_a = build_vocab((s[\"a_toks\"] for s in samples))\n",
"vocab_typ = {\"isian\":0, \"opsi\":1, \"benar_salah\":2}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "fdf696cf",
"metadata": {},
"outputs": [],
"source": [
"def enc(seq, v): return [v.get(t, v[\"<unk>\"]) for t in seq]\n",
"\n",
"MAX_SENT = max(len(s[\"tokens\"]) for s in samples)\n",
"MAX_Q = max(len(s[\"q_toks\"]) for s in samples)\n",
"MAX_A = max(len(s[\"a_toks\"]) for s in samples)\n",
"\n",
"def pad_batch(seqs, vmap, maxlen):\n",
" return tf.keras.preprocessing.sequence.pad_sequences(\n",
" [enc(s, vmap) for s in seqs], maxlen=maxlen, padding=\"post\"\n",
" )\n",
"\n",
"X_tok = pad_batch((s[\"tokens\"] for s in samples), vocab_tok, MAX_SENT)\n",
"X_ner = pad_batch((s[\"ner\"] for s in samples), vocab_ner, MAX_SENT)\n",
"X_srl = pad_batch((s[\"srl\"] for s in samples), vocab_srl, MAX_SENT)\n",
"\n",
"dec_q_in = pad_batch(\n",
" ([[\"<sos>\"]+s[\"q_toks\"][:-1] for s in samples]), vocab_q, MAX_Q)\n",
"dec_q_out = pad_batch((s[\"q_toks\"] for s in samples), vocab_q, MAX_Q)\n",
"\n",
"dec_a_in = pad_batch(\n",
" ([[\"<sos>\"]+s[\"a_toks\"][:-1] for s in samples]), vocab_a, MAX_A)\n",
"dec_a_out = pad_batch((s[\"a_toks\"] for s in samples), vocab_a, MAX_A)\n",
"\n",
"y_type = np.array([vocab_typ[s[\"q_type\"]] for s in samples])\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "33074619",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_2\"</span>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1mModel: \"functional_2\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Connected to </span>┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ tok_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_in (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_8 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ tok_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_ner (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">352</span> │ ner_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_srl (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">288</span> │ srl_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ expand_dims_4 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ not_equal_8[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">ExpandDims</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ broadcast_to_4 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ expand_dims_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">BroadcastTo</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ones_like_2 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ emb_ner[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">OnesLike</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ones_like_3 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ emb_srl[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">OnesLike</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_tok (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">4,992</span> │ tok_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ concatenate_5 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">192</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ broadcast_to_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ ones_like_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
"│ │ │ │ ones_like_3[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dec_q_in │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">9</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ concatenate_4 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">192</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ emb_tok[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>) │ │ │ emb_ner[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ │ │ │ emb_srl[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ any_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Any</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">11</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ concatenate_5[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dec_a_in │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ - │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_q (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">9</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">3,968</span> │ dec_q_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ enc_lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">459,776</span> │ concatenate_4[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ any_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_a (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">1,792</span> │ dec_a_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ lstm_q (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">9</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ emb_q[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ enc_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>], │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ enc_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_9 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">9</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dec_q_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ lstm_a (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ <span style=\"color: #00af00; text-decoration-color: #00af00\">394,240</span> │ emb_a[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>), │ │ enc_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>], │\n",
"│ │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)] │ │ enc_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_10 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dec_a_in[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NotEqual</span>) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_out │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">9</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">31</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">7,967</span> │ lstm_q[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ not_equal_9[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ a_out │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">14</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">3,598</span> │ lstm_a[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>], │\n",
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">TimeDistributed</span>) │ │ │ not_equal_10[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ type_out (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">771</span> │ enc_lstm[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
"│ tok_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ner_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ srl_in (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_8 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ tok_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_ner (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m352\u001b[0m │ ner_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_srl (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m288\u001b[0m │ srl_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ expand_dims_4 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ not_equal_8[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mExpandDims\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ broadcast_to_4 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ expand_dims_4[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ (\u001b[38;5;33mBroadcastTo\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ones_like_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ emb_ner[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mOnesLike\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ ones_like_3 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ emb_srl[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mOnesLike\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_tok (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m4,992\u001b[0m │ tok_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ concatenate_5 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m192\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ broadcast_to_4[\u001b[38;5;34m0\u001b[0m… │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ ones_like_2[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
"│ │ │ │ ones_like_3[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dec_q_in │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m9\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ concatenate_4 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m, \u001b[38;5;34m192\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ emb_tok[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ (\u001b[38;5;33mConcatenate\u001b[0m) │ │ │ emb_ner[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ │ │ │ emb_srl[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ any_2 (\u001b[38;5;33mAny\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m11\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ concatenate_5[\u001b[38;5;34m0\u001b[0m]… │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ dec_a_in │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ - │\n",
"│ (\u001b[38;5;33mInputLayer\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_q (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m9\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m3,968\u001b[0m │ dec_q_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ enc_lstm (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m459,776\u001b[0m │ concatenate_4[\u001b[38;5;34m0\u001b[0m]… │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ any_2[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ emb_a (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m1,792\u001b[0m │ dec_a_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ lstm_q (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m9\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m394,240\u001b[0m │ emb_q[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ enc_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m1\u001b[0m], │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ enc_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m2\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_9 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m9\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dec_q_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ lstm_a (\u001b[38;5;33mLSTM\u001b[0m) │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ \u001b[38;5;34m394,240\u001b[0m │ emb_a[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m), │ │ enc_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m1\u001b[0m], │\n",
"│ │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)] │ │ enc_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m2\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ not_equal_10 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ dec_a_in[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"│ (\u001b[38;5;33mNotEqual\u001b[0m) │ │ │ │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ q_out │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m9\u001b[0m, \u001b[38;5;34m31\u001b[0m) │ \u001b[38;5;34m7,967\u001b[0m │ lstm_q[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ not_equal_9[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ a_out │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m14\u001b[0m) │ \u001b[38;5;34m3,598\u001b[0m │ lstm_a[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], │\n",
"│ (\u001b[38;5;33mTimeDistributed\u001b[0m) │ │ │ not_equal_10[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
"├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
"│ type_out (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m771\u001b[0m │ enc_lstm[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
"└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,271,984</span> (4.85 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,271,984\u001b[0m (4.85 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,271,984</span> (4.85 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m1,271,984\u001b[0m (4.85 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"d_tok, d_tag, units = 128, 32, 256\n",
"pad_tok, pad_q, pad_a = vocab_tok[\"<pad>\"], vocab_q[\"<pad>\"], vocab_a[\"<pad>\"]\n",
"\n",
"# ---- Encoder ----------------------------------------------------\n",
"inp_tok = Input((MAX_SENT,), name=\"tok_in\")\n",
"inp_ner = Input((MAX_SENT,), name=\"ner_in\")\n",
"inp_srl = Input((MAX_SENT,), name=\"srl_in\")\n",
"\n",
"emb_tok = Embedding(len(vocab_tok), d_tok, mask_zero=True, name=\"emb_tok\")(inp_tok)\n",
"emb_ner = Embedding(len(vocab_ner), d_tag, mask_zero=False, name=\"emb_ner\")(inp_ner)\n",
"emb_srl = Embedding(len(vocab_srl), d_tag, mask_zero=False, name=\"emb_srl\")(inp_srl)\n",
"\n",
"enc_concat = Concatenate()([emb_tok, emb_ner, emb_srl])\n",
"enc_out, state_h, state_c = LSTM(units, return_state=True, name=\"enc_lstm\")(enc_concat)\n",
"\n",
"# ---- Decoder : Question ----------------------------------------\n",
"dec_q_inp = Input((MAX_Q,), name=\"dec_q_in\")\n",
"dec_emb_q = Embedding(len(vocab_q), d_tok, mask_zero=True, name=\"emb_q\")(dec_q_inp)\n",
"dec_q_seq, _, _ = LSTM(units, return_sequences=True, return_state=True,\n",
" name=\"lstm_q\")(dec_emb_q, initial_state=[state_h, state_c])\n",
"q_out = TimeDistributed(Dense(len(vocab_q), activation=\"softmax\"), name=\"q_out\")(dec_q_seq)\n",
"\n",
"# ---- Decoder : Answer ------------------------------------------\n",
"dec_a_inp = Input((MAX_A,), name=\"dec_a_in\")\n",
"dec_emb_a = Embedding(len(vocab_a), d_tok, mask_zero=True, name=\"emb_a\")(dec_a_inp)\n",
"dec_a_seq, _, _ = LSTM(units, return_sequences=True, return_state=True,\n",
" name=\"lstm_a\")(dec_emb_a, initial_state=[state_h, state_c])\n",
"a_out = TimeDistributed(Dense(len(vocab_a), activation=\"softmax\"), name=\"a_out\")(dec_a_seq)\n",
"\n",
"# ---- Classifier -------------------------------------------------\n",
"type_out = Dense(len(vocab_typ), activation=\"softmax\", name=\"type_out\")(enc_out)\n",
"\n",
"model = Model(\n",
" [inp_tok, inp_ner, inp_srl, dec_q_inp, dec_a_inp],\n",
" [q_out, a_out, type_out]\n",
")\n",
"\n",
"# ---- Masked loss helpers ---------------------------------------\n",
"scce = tf.keras.losses.SparseCategoricalCrossentropy(reduction=\"none\")\n",
"def masked_loss_factory(pad_id):\n",
" def loss(y_true, y_pred):\n",
" l = scce(y_true, y_pred)\n",
" mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)\n",
" return tf.reduce_sum(l*mask) / tf.reduce_sum(mask)\n",
" return loss\n",
"\n",
"model.compile(\n",
" optimizer=\"adam\",\n",
" loss = {\"q_out\":masked_loss_factory(pad_q),\n",
" \"a_out\":masked_loss_factory(pad_a),\n",
" \"type_out\":\"sparse_categorical_crossentropy\"},\n",
" loss_weights={\"q_out\":1.0, \"a_out\":1.0, \"type_out\":0.3},\n",
" metrics={\"q_out\":\"sparse_categorical_accuracy\",\n",
" \"a_out\":\"sparse_categorical_accuracy\",\n",
" \"type_out\":tf.keras.metrics.SparseCategoricalAccuracy(name=\"type_acc\")}\n",
")\n",
"model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "44d36899",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/30\n"
]
},
{
"ename": "TypeError",
"evalue": "Exception encountered when calling BroadcastTo.call().\n\n\u001b[1mFailed to convert elements of (None, 11, 128) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.\u001b[0m\n\nArguments received by BroadcastTo.call():\n • x=tf.Tensor(shape=(None, 11, 1), dtype=bool)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mX_tok\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_ner\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_srl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdec_q_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdec_a_in\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mdec_q_out\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdec_a_out\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_type\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_split\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m30\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m64\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkeras\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEarlyStopping\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpatience\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrestore_best_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\n\u001b[1;32m 9\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m model\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfull_seq2seq.keras\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# 5. SAVE VOCABS (.pkl keeps python dict intact)\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------\u001b[39;00m\n",
"File \u001b[0;32m/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
"File \u001b[0;32m/mnt/disc1/code/thesis_quiz_project/lstm-quiz/myenv/lib64/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
"\u001b[0;31mTypeError\u001b[0m: Exception encountered when calling BroadcastTo.call().\n\n\u001b[1mFailed to convert elements of (None, 11, 128) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.\u001b[0m\n\nArguments received by BroadcastTo.call():\n • x=tf.Tensor(shape=(None, 11, 1), dtype=bool)"
]
}
],
"source": [
"history = model.fit(\n",
" [X_tok, X_ner, X_srl, dec_q_in, dec_a_in],\n",
" [dec_q_out, dec_a_out, y_type],\n",
" validation_split=0.1,\n",
" epochs=30,\n",
" batch_size=64,\n",
" callbacks=[tf.keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)],\n",
" verbose=2\n",
")\n",
"model.save(\"full_seq2seq.keras\")\n",
"\n",
"\n",
"# -----------------------------------------------------------------\n",
"# 5. SAVE VOCABS (.pkl keeps python dict intact)\n",
"# -----------------------------------------------------------------\n",
"def save_vocab(v, name): pickle.dump(v, open(name,\"wb\"))\n",
"save_vocab(vocab_tok,\"vocab_tok.pkl\"); save_vocab(vocab_ner,\"vocab_ner.pkl\")\n",
"save_vocab(vocab_srl,\"vocab_srl.pkl\"); save_vocab(vocab_q, \"vocab_q.pkl\")\n",
"save_vocab(vocab_a, \"vocab_a.pkl\"); save_vocab(vocab_typ,\"vocab_typ.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61003de5",
"metadata": {},
"outputs": [],
"source": [
"def build_inference_models(trained):\n",
" # encoder\n",
" t_in = Input((MAX_SENT,), name=\"t_in\")\n",
" n_in = Input((MAX_SENT,), name=\"n_in\")\n",
" s_in = Input((MAX_SENT,), name=\"s_in\")\n",
" e_t = trained.get_layer(\"emb_tok\")(t_in)\n",
" e_n = trained.get_layer(\"emb_ner\")(n_in)\n",
" e_s = trained.get_layer(\"emb_srl\")(s_in)\n",
" concat = Concatenate()([e_t,e_n,e_s])\n",
" _, h, c = trained.get_layer(\"enc_lstm\")(concat)\n",
" enc_model = Model([t_in,n_in,s_in],[h,c])\n",
"\n",
" # questiondecoder\n",
" dq_in = Input((1,), name=\"dq_tok\")\n",
" dh = Input((units,), name=\"dh\"); dc = Input((units,), name=\"dc\")\n",
" dq_emb = trained.get_layer(\"emb_q\")(dq_in)\n",
" dq_lstm, nh, nc = trained.get_layer(\"lstm_q\")(dq_emb, initial_state=[dh,dc])\n",
" dq_out = trained.get_layer(\"q_out\").layer(dq_lstm)\n",
" dec_q_model = Model([dq_in, dh, dc], [dq_out, nh, nc])\n",
"\n",
" # answerdecoder\n",
" da_in = Input((1,), name=\"da_tok\")\n",
" ah = Input((units,), name=\"ah\"); ac = Input((units,), name=\"ac\")\n",
" da_emb = trained.get_layer(\"emb_a\")(da_in)\n",
" da_lstm, nh2, nc2 = trained.get_layer(\"lstm_a\")(da_emb, initial_state=[ah,ac])\n",
" da_out = trained.get_layer(\"a_out\").layer(da_lstm)\n",
" dec_a_model = Model([da_in, ah, ac], [da_out, nh2, nc2])\n",
"\n",
" # type classifier\n",
" type_dense = trained.get_layer(\"type_out\")\n",
" type_model = Model([t_in,n_in,s_in], type_dense(_)) # use _ = enc_lstm output\n",
"\n",
" return enc_model, dec_q_model, dec_a_model, type_model\n",
"\n",
"encoder_model, decoder_q, decoder_a, classifier_model = build_inference_models(model)\n",
"\n",
"inv_q = {v:k for k,v in vocab_q.items()}\n",
"inv_a = {v:k for k,v in vocab_a.items()}\n",
"\n",
"def enc_pad(seq, vmap, maxlen):\n",
" x = [vmap.get(t, vmap[\"<unk>\"]) for t in seq]\n",
" return x + [vmap[\"<pad>\"]] * (maxlen-len(x))\n",
"\n",
"def greedy_decode(tokens, ner, srl, max_q=20, max_a=10):\n",
" et = np.array([enc_pad(tokens, vocab_tok, MAX_SENT)])\n",
" en = np.array([enc_pad(ner, vocab_ner, MAX_SENT)])\n",
" es = np.array([enc_pad(srl, vocab_srl, MAX_SENT)])\n",
"\n",
" h,c = encoder_model.predict([et,en,es], verbose=0)\n",
"\n",
" # --- question\n",
" q_ids = []\n",
" tgt = np.array([[vocab_q[\"<sos>\"]]])\n",
" for _ in range(max_q):\n",
" logits,h,c = decoder_q.predict([tgt,h,c], verbose=0)\n",
" nxt = int(logits[0,-1].argmax())\n",
" if nxt==vocab_q[\"<eos>\"]: break\n",
" q_ids.append(nxt)\n",
" tgt = np.array([[nxt]])\n",
"\n",
" # --- answer (reuse fresh h,c)\n",
" h,c = encoder_model.predict([et,en,es], verbose=0)\n",
" a_ids = []\n",
" tgt = np.array([[vocab_a[\"<sos>\"]]])\n",
" for _ in range(max_a):\n",
" logits,h,c = decoder_a.predict([tgt,h,c], verbose=0)\n",
" nxt = int(logits[0,-1].argmax())\n",
" if nxt==vocab_a[\"<eos>\"]: break\n",
" a_ids.append(nxt)\n",
" tgt = np.array([[nxt]])\n",
"\n",
" # --- type\n",
" t_id = int(classifier_model.predict([et,en,es], verbose=0).argmax())\n",
"\n",
" return [inv_q[i] for i in q_ids], [inv_a[i] for i in a_ids], \\\n",
" [k for k,v in vocab_typ.items() if v==t_id][0]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5279b631",
"metadata": {},
"outputs": [],
"source": [
"test_tokens = [\"soekarno\",\"membacakan\",\"teks\",\"proklamasi\",\"pada\",\n",
" \"17\",\"agustus\",\"1945\"]\n",
"test_ner = [\"B-PER\",\"O\",\"O\",\"O\",\"O\",\"B-DATE\",\"I-DATE\",\"I-DATE\"]\n",
"test_srl = [\"ARG0\",\"V\",\"ARG1\",\"ARG1\",\"O\",\"ARGM-TMP\",\"ARGM-TMP\",\"ARGM-TMP\"]\n",
"\n",
"q,a,t = greedy_decode(test_tokens,test_ner,test_srl,max_q=MAX_Q,max_a=MAX_A)\n",
"print(\"\\nDEMO\\n----\")\n",
"print(\"Q :\", \" \".join(q))\n",
"print(\"A :\", \" \".join(a))\n",
"print(\"T :\", t)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "850d4905",
"metadata": {},
"outputs": [],
"source": [
"smooth = SmoothingFunction().method4\n",
"r_scorer = rouge_scorer.RougeScorer([\"rouge1\",\"rougeL\"], use_stemmer=True)\n",
"\n",
"def strip_special(seq, pad_id, eos_id):\n",
" return [x for x in seq if x not in (pad_id, eos_id)]\n",
"\n",
"def ids_to_text(ids, inv):\n",
" return \" \".join(inv[i] for i in ids)\n",
"\n",
"def evaluate(n=200):\n",
" idxs = random.sample(range(len(samples)), n)\n",
" refs, hyps = [], []\n",
" agg = scoring.BootstrapAggregator()\n",
"\n",
" for i in idxs:\n",
" gt_ids = strip_special(dec_q_out[i], pad_q, vocab_q[\"<eos>\"])\n",
" ref = ids_to_text(gt_ids, inv_q)\n",
" pred = \" \".join(greedy_decode(\n",
" samples[i][\"tokens\"],\n",
" samples[i][\"ner\"],\n",
" samples[i][\"srl\"]\n",
" )[0])\n",
" refs.append([ref.split()])\n",
" hyps.append(pred.split())\n",
" agg.add_scores(r_scorer.score(ref, pred))\n",
"\n",
" bleu = corpus_bleu(refs, hyps, smoothing_function=smooth)\n",
" r1 = agg.aggregate()[\"rouge1\"].mid\n",
" rL = agg.aggregate()[\"rougeL\"].mid\n",
"\n",
" print(f\"\\nEVAL (n={n})\")\n",
" print(f\"BLEU4 : {bleu:.4f}\")\n",
" print(f\"ROUGE1 : P={r1.precision:.3f} R={r1.recall:.3f} F1={r1.fmeasure:.3f}\")\n",
" print(f\"ROUGEL : P={rL.precision:.3f} R={rL.recall:.3f} F1={rL.fmeasure:.3f}\")\n",
"\n",
"evaluate(2) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,357 @@
import numpy as np
import pandas as pd
import json
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input,
LSTM,
Dense,
Embedding,
Bidirectional,
Concatenate,
Attention,
Dropout,
)
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import re
import string
from collections import Counter
# Load data
with open("data_converted.json", "r") as f:
data = json.load(f)
# Preprocessing function
def preprocess_text(text):
"""Melakukan preprocessing teks dasar"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
# Persiapkan data untuk model prediksi pertanyaan
def prepare_question_prediction_data(data):
"""Siapkan data untuk model prediksi pertanyaan"""
contexts = []
tokens_list = []
ner_list = []
srl_list = []
questions = []
answers = []
q_types = []
for item in data:
for qa in item["qas"]:
contexts.append(preprocess_text(item["context"]))
tokens_list.append(item["tokens"])
ner_list.append(item["ner"])
srl_list.append(item["srl"])
questions.append(preprocess_text(qa["question"]))
q_types.append(qa["type"])
return contexts, tokens_list, ner_list, srl_list, questions, q_types
# Siapkan data
contexts, tokens_list, ner_list, srl_list, questions, q_types = (
prepare_question_prediction_data(data)
)
# Tokenizer untuk teks (context, question, answer)
max_vocab_size = 10000
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
all_texts = contexts + questions + [" ".join(item) for item in tokens_list]
tokenizer.fit_on_texts(all_texts)
vocab_size = len(tokenizer.word_index) + 1
# Encoding untuk NER
ner_tokenizer = Tokenizer(oov_token="<OOV>")
ner_tokenizer.fit_on_texts([" ".join(ner) for ner in ner_list])
ner_vocab_size = len(ner_tokenizer.word_index) + 1
# Encoding untuk SRL
srl_tokenizer = Tokenizer(oov_token="<OOV>")
srl_tokenizer.fit_on_texts([" ".join(srl) for srl in srl_list])
srl_vocab_size = len(srl_tokenizer.word_index) + 1
# Encoding untuk tipe pertanyaan
q_type_tokenizer = Tokenizer()
q_type_tokenizer.fit_on_texts(q_types)
q_type_vocab_size = len(q_type_tokenizer.word_index) + 1
# Konversi token, ner, srl ke sequences
def tokens_to_sequences(tokens, ner, srl):
"""Konversi token, ner, dan srl ke sequences"""
token_seqs = [tokenizer.texts_to_sequences([" ".join(t)])[0] for t in tokens]
ner_seqs = [ner_tokenizer.texts_to_sequences([" ".join(n)])[0] for n in ner]
srl_seqs = [srl_tokenizer.texts_to_sequences([" ".join(s)])[0] for s in srl]
return token_seqs, ner_seqs, srl_seqs
# Sequences
context_seqs = tokenizer.texts_to_sequences(contexts)
question_seqs = tokenizer.texts_to_sequences(questions)
token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)
# Menentukan panjang maksimum untuk padding
max_context_len = max([len(seq) for seq in context_seqs])
max_question_len = max([len(seq) for seq in question_seqs])
max_token_len = max([len(seq) for seq in token_seqs])
# Pad sequences untuk memastikan semua input sama panjang
def pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs):
"""Padding semua sequences"""
context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding="post")
token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding="post")
ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding="post")
srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding="post")
question_padded = pad_sequences(
question_seqs, maxlen=max_question_len, padding="post"
)
return (
context_padded,
token_padded,
ner_padded,
srl_padded,
question_padded,
)
# Encode tipe pertanyaan
q_type_indices = []
for q_type in q_types:
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_indices.append(q_type_idx)
# Konversi ke numpy array
q_type_indices = np.array(q_type_indices)
# One-hot encode tipe pertanyaan
q_type_categorical = tf.keras.utils.to_categorical(
q_type_indices, num_classes=q_type_vocab_size
)
# Pad sequences
context_padded, token_padded, ner_padded, srl_padded, question_padded = (
pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs)
)
# Split data menjadi train dan test sets
indices = list(range(len(context_padded)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)
# Fungsi untuk mendapatkan subset dari data berdasarkan indices
def get_subset(data, indices):
return np.array([data[i] for i in indices])
# Train data
train_context = get_subset(context_padded, train_indices)
train_token = get_subset(token_padded, train_indices)
train_ner = get_subset(ner_padded, train_indices)
train_srl = get_subset(srl_padded, train_indices)
train_q_type = get_subset(q_type_categorical, train_indices)
train_question = get_subset(question_padded, train_indices)
# Test data
test_context = get_subset(context_padded, test_indices)
test_token = get_subset(token_padded, test_indices)
test_ner = get_subset(ner_padded, test_indices)
test_srl = get_subset(srl_padded, test_indices)
test_q_type = get_subset(q_type_categorical, test_indices)
test_question = get_subset(question_padded, test_indices)
# Hyperparameters
embedding_dim = 100
lstm_units = 128
ner_embedding_dim = 50
srl_embedding_dim = 50
dropout_rate = 0.3
# Function untuk membuat model prediksi pertanyaan
def create_question_prediction_model():
# Input layers
context_input = Input(shape=(max_context_len,), name="context_input")
token_input = Input(shape=(max_token_len,), name="token_input")
ner_input = Input(shape=(max_token_len,), name="ner_input")
srl_input = Input(shape=(max_token_len,), name="srl_input")
q_type_input = Input(shape=(q_type_vocab_size,), name="q_type_input")
# Shared embedding layer for text
text_embedding = Embedding(vocab_size, embedding_dim, name="text_embedding")
# Embedding untuk NER dan SRL
ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name="ner_embedding")(
ner_input
)
srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name="srl_embedding")(
srl_input
)
# Apply embeddings
context_embed = text_embedding(context_input)
token_embed = text_embedding(token_input)
# Bi-directional LSTM untuk context dan token-level features
context_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="context_lstm")
)(context_embed)
# Concat token features (tokens, NER, SRL)
token_features = Concatenate(name="token_features")(
[token_embed, ner_embedding, srl_embedding]
)
token_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="token_lstm")
)(token_features)
context_attention = tf.keras.layers.Attention(name="context_attention")(
context_lstm
)
# Pool attention outputs
context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name="context_att_pool")(
context_attention
)
token_pool = tf.keras.layers.GlobalMaxPooling1D(name="token_pool")(token_lstm)
# Concat all features
all_features = Concatenate(name="all_features")(
[context_att_pool, token_pool, q_type_input]
)
# Dense layers with expanded capacity for sequence generation
x = Dense(512, activation="relu", name="dense_1")(all_features)
x = Dropout(dropout_rate)(x)
x = Dense(256, activation="relu", name="dense_2")(x)
x = Dropout(dropout_rate)(x)
# Reshape untuk sequence decoder
decoder_dense = Dense(vocab_size, activation="softmax", name="decoder_dense")
# Many-to-many architecture for sequence generation
# Decoder LSTM
decoder_lstm = LSTM(lstm_units * 2, return_sequences=True, name="decoder_lstm")
# Reshape untuk input ke decoder
decoder_input = Dense(lstm_units * 2, activation="relu", name="decoder_input")(x)
decoder_input_reshaped = tf.keras.layers.Reshape((1, lstm_units * 2))(decoder_input)
# Decoder sequence with teacher forcing
# Expand dimensionality to match expected sequence length
repeated_vector = tf.keras.layers.RepeatVector(max_question_len)(decoder_input)
# Process through decoder LSTM
decoder_outputs = decoder_lstm(repeated_vector)
# Apply dense layer to each timestep
question_output_seq = tf.keras.layers.TimeDistributed(decoder_dense)(
decoder_outputs
)
# Create model
model = Model(
inputs=[
context_input,
token_input,
ner_input,
srl_input,
q_type_input,
],
outputs=question_output_seq,
)
# Compile model with categorical crossentropy for sequence prediction
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
# Buat model
model = create_question_prediction_model()
model.summary()
# Callback untuk menyimpan model terbaik
checkpoint = ModelCheckpoint(
"question_prediction_model.h5",
monitor="val_accuracy",
save_best_only=True,
verbose=1,
)
early_stop = EarlyStopping(monitor="val_accuracy", patience=10, verbose=1)
# Reshaping question data for sequence-to-sequence training
# We need to reshape to (samples, max_question_len, 1) for sparse categorical crossentropy
train_question_target = np.expand_dims(train_question, -1)
test_question_target = np.expand_dims(test_question, -1)
# Training parameters
batch_size = 8
epochs = 50
# Train model
history = model.fit(
[train_context, train_token, train_ner, train_srl, train_q_type],
train_question_target,
batch_size=batch_size,
epochs=epochs,
validation_data=(
[test_context, test_token, test_ner, test_srl, test_q_type],
test_question_target,
),
callbacks=[checkpoint, early_stop],
)
# # Plot training history
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 2, 1)
# plt.plot(history.history['accuracy'])
# plt.plot(history.history['val_accuracy'])
# plt.title('Model Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='upper left')
# plt.subplot(1, 2, 2)
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='upper left')
# plt.tight_layout()
# plt.savefig('question_prediction_training_history.png')
# plt.show()
# Simpan model dan tokenizer
model.save("question_prediction_model_final.h5")
# Simpan tokenizer
tokenizer_data = {
"word_tokenizer": tokenizer.to_json(),
"ner_tokenizer": ner_tokenizer.to_json(),
"srl_tokenizer": srl_tokenizer.to_json(),
"q_type_tokenizer": q_type_tokenizer.to_json(),
"max_context_len": max_context_len,
"max_question_len": max_question_len,
"max_token_len": max_token_len,
}
with open("question_prediction_tokenizers.json", "w") as f:
json.dump(tokenizer_data, f)
print("Model dan tokenizer untuk prediksi pertanyaan berhasil disimpan!")

View File

@ -0,0 +1,473 @@
import numpy as np
import json
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input,
LSTM,
Dense,
Embedding,
Bidirectional,
Concatenate,
Dropout,
)
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import re
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
# Load data
with open("data_converted.json", "r") as f:
data = json.load(f)
# Preprocessing function
def preprocess_text(text):
"""Melakukan preprocessing teks dasar"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
# Persiapkan data untuk model prediksi pertanyaan
def prepare_question_prediction_data(data):
"""Siapkan data untuk model prediksi pertanyaan"""
contexts = []
tokens_list = []
ner_list = []
srl_list = []
questions = []
q_types = []
for item in data:
for qa in item["qas"]:
contexts.append(preprocess_text(item["context"]))
tokens_list.append(item["tokens"])
ner_list.append(item["ner"])
srl_list.append(item["srl"])
questions.append(preprocess_text(qa["question"]))
q_types.append(qa["type"])
# Tidak mengambil jawaban (answer) sebagai input
return contexts, tokens_list, ner_list, srl_list, questions, q_types
# Siapkan data
contexts, tokens_list, ner_list, srl_list, questions, q_types = (
prepare_question_prediction_data(data)
)
# Tokenizer untuk teks (context, question)
max_vocab_size = 10000
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
all_texts = contexts + questions + [" ".join(item) for item in tokens_list]
tokenizer.fit_on_texts(all_texts)
vocab_size = len(tokenizer.word_index) + 1
# Encoding untuk NER
ner_tokenizer = Tokenizer(oov_token="<OOV>")
ner_tokenizer.fit_on_texts([" ".join(ner) for ner in ner_list])
ner_vocab_size = len(ner_tokenizer.word_index) + 1
# Encoding untuk SRL
srl_tokenizer = Tokenizer(oov_token="<OOV>")
srl_tokenizer.fit_on_texts([" ".join(srl) for srl in srl_list])
srl_vocab_size = len(srl_tokenizer.word_index) + 1
# Encoding untuk tipe pertanyaan
q_type_tokenizer = Tokenizer()
q_type_tokenizer.fit_on_texts(q_types)
q_type_vocab_size = len(q_type_tokenizer.word_index) + 1
# Konversi token, ner, srl ke sequences
def tokens_to_sequences(tokens, ner, srl):
"""Konversi token, ner, dan srl ke sequences"""
token_seqs = [tokenizer.texts_to_sequences([" ".join(t)])[0] for t in tokens]
ner_seqs = [ner_tokenizer.texts_to_sequences([" ".join(n)])[0] for n in ner]
srl_seqs = [srl_tokenizer.texts_to_sequences([" ".join(s)])[0] for s in srl]
return token_seqs, ner_seqs, srl_seqs
# Sequences
context_seqs = tokenizer.texts_to_sequences(contexts)
question_seqs = tokenizer.texts_to_sequences(questions)
token_seqs, ner_seqs, srl_seqs = tokens_to_sequences(tokens_list, ner_list, srl_list)
# Menentukan panjang maksimum untuk padding
max_context_len = max([len(seq) for seq in context_seqs])
max_question_len = max([len(seq) for seq in question_seqs])
max_token_len = max([len(seq) for seq in token_seqs])
# Pad sequences untuk memastikan semua input sama panjang
def pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs):
"""Padding semua sequences"""
context_padded = pad_sequences(context_seqs, maxlen=max_context_len, padding="post")
token_padded = pad_sequences(token_seqs, maxlen=max_token_len, padding="post")
ner_padded = pad_sequences(ner_seqs, maxlen=max_token_len, padding="post")
srl_padded = pad_sequences(srl_seqs, maxlen=max_token_len, padding="post")
question_padded = pad_sequences(
question_seqs, maxlen=max_question_len, padding="post"
)
return (
context_padded,
token_padded,
ner_padded,
srl_padded,
question_padded,
)
# Encode tipe pertanyaan
q_type_indices = []
for q_type in q_types:
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_indices.append(q_type_idx)
# Konversi ke numpy array
q_type_indices = np.array(q_type_indices)
# One-hot encode tipe pertanyaan
q_type_categorical = tf.keras.utils.to_categorical(
q_type_indices, num_classes=q_type_vocab_size
)
# Pad sequences
context_padded, token_padded, ner_padded, srl_padded, question_padded = (
pad_all_sequences(context_seqs, token_seqs, ner_seqs, srl_seqs, question_seqs)
)
# Split data menjadi train dan test sets
indices = list(range(len(context_padded)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)
# Fungsi untuk mendapatkan subset dari data berdasarkan indices
def get_subset(data, indices):
return np.array([data[i] for i in indices])
# Train data
train_context = get_subset(context_padded, train_indices)
train_token = get_subset(token_padded, train_indices)
train_ner = get_subset(ner_padded, train_indices)
train_srl = get_subset(srl_padded, train_indices)
train_q_type = get_subset(q_type_categorical, train_indices)
train_question = get_subset(question_padded, train_indices)
# Test data
test_context = get_subset(context_padded, test_indices)
test_token = get_subset(token_padded, test_indices)
test_ner = get_subset(ner_padded, test_indices)
test_srl = get_subset(srl_padded, test_indices)
test_q_type = get_subset(q_type_categorical, test_indices)
test_question = get_subset(question_padded, test_indices)
# Hyperparameters
embedding_dim = 100
lstm_units = 128
ner_embedding_dim = 50
srl_embedding_dim = 50
dropout_rate = 0.3
# Function untuk membuat model prediksi pertanyaan
def create_question_prediction_model():
# Input layers
context_input = Input(shape=(max_context_len,), name="context_input")
token_input = Input(shape=(max_token_len,), name="token_input")
ner_input = Input(shape=(max_token_len,), name="ner_input")
srl_input = Input(shape=(max_token_len,), name="srl_input")
q_type_input = Input(shape=(q_type_vocab_size,), name="q_type_input")
# Shared embedding layer for text
text_embedding = Embedding(vocab_size, embedding_dim, name="text_embedding")
# Embedding untuk NER dan SRL
ner_embedding = Embedding(ner_vocab_size, ner_embedding_dim, name="ner_embedding")(
ner_input
)
srl_embedding = Embedding(srl_vocab_size, srl_embedding_dim, name="srl_embedding")(
srl_input
)
# Apply embeddings
context_embed = text_embedding(context_input)
token_embed = text_embedding(token_input)
# Bi-directional LSTM untuk context dan token-level features
context_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="context_lstm")
)(context_embed)
# Concat token features (tokens, NER, SRL)
token_features = Concatenate(name="token_features")(
[token_embed, ner_embedding, srl_embedding]
)
token_lstm = Bidirectional(
LSTM(lstm_units, return_sequences=True, name="token_lstm")
)(token_features)
# Apply attention to context LSTM
context_attention = tf.keras.layers.Attention(name="context_attention")(
[context_lstm, context_lstm]
)
# Pool attention outputs
context_att_pool = tf.keras.layers.GlobalMaxPooling1D(name="context_att_pool")(
context_attention
)
token_pool = tf.keras.layers.GlobalMaxPooling1D(name="token_pool")(token_lstm)
# Concat all features (tidak ada answer feature)
all_features = Concatenate(name="all_features")(
[context_att_pool, token_pool, q_type_input]
)
# Dense layers with expanded capacity for sequence generation
x = Dense(512, activation="relu", name="dense_1")(all_features)
x = Dropout(dropout_rate)(x)
x = Dense(256, activation="relu", name="dense_2")(x)
x = Dropout(dropout_rate)(x)
# Reshape untuk sequence decoder
decoder_dense = Dense(vocab_size, activation="softmax", name="decoder_dense")
# Many-to-many architecture for sequence generation
# Decoder LSTM
decoder_lstm = LSTM(lstm_units * 2, return_sequences=True, name="decoder_lstm")
# Reshape untuk input ke decoder
decoder_input = Dense(lstm_units * 2, activation="relu", name="decoder_input")(x)
# Decoder sequence with teacher forcing
# Expand dimensionality to match expected sequence length
repeated_vector = tf.keras.layers.RepeatVector(max_question_len)(decoder_input)
# Process through decoder LSTM
decoder_outputs = decoder_lstm(repeated_vector)
# Apply dense layer to each timestep
question_output_seq = tf.keras.layers.TimeDistributed(decoder_dense)(
decoder_outputs
)
# Create model
model = Model(
inputs=[
context_input,
token_input,
ner_input,
srl_input,
q_type_input,
],
outputs=question_output_seq,
)
# Compile model with categorical crossentropy for sequence prediction
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
# Buat model
model = create_question_prediction_model()
model.summary()
# Callback untuk menyimpan model terbaik
checkpoint = ModelCheckpoint(
"question_prediction_model.h5",
monitor="val_accuracy",
save_best_only=True,
verbose=1,
)
early_stop = EarlyStopping(monitor="val_accuracy", patience=10, verbose=1)
# Reshaping question data for sequence-to-sequence training
# We need to reshape to (samples, max_question_len, 1) for sparse categorical crossentropy
train_question_target = np.expand_dims(train_question, -1)
test_question_target = np.expand_dims(test_question, -1)
# Training parameters
batch_size = 8
epochs = 50
# Train model
history = model.fit(
[train_context, train_token, train_ner, train_srl, train_q_type],
train_question_target,
batch_size=batch_size,
epochs=epochs,
validation_data=(
[test_context, test_token, test_ner, test_srl, test_q_type],
test_question_target,
),
callbacks=[checkpoint, early_stop],
)
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.tight_layout()
plt.savefig("question_prediction_training_history.png")
plt.show()
# Simpan model dan tokenizer
model.save("question_prediction_model_final.h5")
# Simpan tokenizer
tokenizer_data = {
"word_tokenizer": tokenizer.to_json(),
"ner_tokenizer": ner_tokenizer.to_json(),
"srl_tokenizer": srl_tokenizer.to_json(),
"q_type_tokenizer": q_type_tokenizer.to_json(),
"max_context_len": max_context_len,
"max_question_len": max_question_len,
"max_token_len": max_token_len,
}
with open("question_prediction_tokenizers.json", "w") as f:
json.dump(tokenizer_data, f)
print("Model dan tokenizer untuk prediksi pertanyaan berhasil disimpan!")
# Fungsi untuk memprediksi pertanyaan
def predict_question(context, tokens, ner, srl, q_type):
context = preprocess_text(context)
context_seq = tokenizer.texts_to_sequences([context])[0]
token_seq = tokenizer.texts_to_sequences([" ".join(tokens)])[0]
ner_seq = ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]
srl_seq = srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]
context_padded = pad_sequences(
[context_seq], maxlen=max_context_len, padding="post"
)
token_padded = pad_sequences([token_seq], maxlen=max_token_len, padding="post")
ner_padded = pad_sequences([ner_seq], maxlen=max_token_len, padding="post")
srl_padded = pad_sequences([srl_seq], maxlen=max_token_len, padding="post")
# Q-type one-hot encoding
q_type_idx = q_type_tokenizer.word_index.get(q_type, 0)
q_type_one_hot = tf.keras.utils.to_categorical(
[q_type_idx], num_classes=q_type_vocab_size
)
# Predict
pred = model.predict(
[context_padded, token_padded, ner_padded, srl_padded, q_type_one_hot],
verbose=1,
)
# Convert prediction to words
pred_seq = np.argmax(pred[0], axis=1)
# Convert indices to words
reverse_word_map = {v: k for k, v in tokenizer.word_index.items()}
pred_words = [reverse_word_map.get(i, "") for i in pred_seq if i != 0]
return " ".join(pred_words)
def evaluate_model_performance(test_data):
# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
# Lists to store scores
bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougel_scores = []
# Iterate through test data
for i in range(len(test_data)):
# Get test sample
sample_context = contexts[test_data[i]]
sample_tokens = tokens_list[test_data[i]]
sample_ner = ner_list[test_data[i]]
sample_srl = srl_list[test_data[i]]
sample_q_type = q_types[test_data[i]]
actual_question = questions[test_data[i]]
# Predict question
pred_question = predict_question(
sample_context, sample_tokens, sample_ner, sample_srl, sample_q_type
)
# Tokenize for BLEU score
actual_tokens = actual_question.split()
pred_tokens = pred_question.split()
# Calculate BLEU score
# Using unigram, bigram, trigram, and 4-gram
print("kaliamt aktual", actual_tokens)
print("kaliamt prediksi", pred_tokens)
bleu_score = sentence_bleu([actual_tokens], pred_tokens)
bleu_scores.append(bleu_score)
try:
rouge_scores = scorer.score(actual_question, pred_question)
# Extract F1 scores
rouge1_scores.append(rouge_scores["rouge1"].fmeasure)
rouge2_scores.append(rouge_scores["rouge2"].fmeasure)
rougel_scores.append(rouge_scores["rougeL"].fmeasure)
except Exception as e:
print(f"Error calculating ROUGE score: {e}")
# Calculate average scores
results = {
"avg_bleu_score": np.mean(bleu_scores),
"avg_rouge1": np.mean(rouge1_scores),
"avg_rouge2": np.mean(rouge2_scores),
"avg_rougel": np.mean(rougel_scores),
}
return results
loaded_model = load_model("question_prediction_model_final.h5")
with open("question_prediction_tokenizers.json", "r") as f:
tokenizer_data = json.load(f)
# Ambil beberapa sampel dari data test
sample_idx = random.randint(0, len(test_indices) - 1)
sample_context = contexts[test_indices[sample_idx]]
sample_tokens = tokens_list[test_indices[sample_idx]]
sample_ner = ner_list[test_indices[sample_idx]]
sample_srl = srl_list[test_indices[sample_idx]]
sample_q_type = q_types[test_indices[sample_idx]]
performance_metrics = evaluate_model_performance(test_indices)
print("\nModel Performance Metrics:")
print(f"Average BLEU Score: {performance_metrics['avg_bleu_score']:.4f}")
print(f"Average ROUGE-1 Score: {performance_metrics['avg_rouge1']:.4f}")
print(f"Average ROUGE-2 Score: {performance_metrics['avg_rouge2']:.4f}")
print(f"Average ROUGE-L Score: {performance_metrics['avg_rougel']:.4f}")

View File

@ -0,0 +1,210 @@
import numpy as np
import json
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import re
class QuestionPredictionModel:
def __init__(self, model_path, tokenizer_path):
"""
Initialize question prediction model with pre-trained model and tokenizers
"""
# Load model
self.model = load_model(model_path)
# Load tokenizers
with open(tokenizer_path, 'r') as f:
tokenizer_data = json.load(f)
# Reconstruct tokenizers
self.word_tokenizer = tokenizer_from_json(tokenizer_data['word_tokenizer'])
self.ner_tokenizer = tokenizer_from_json(tokenizer_data['ner_tokenizer'])
self.srl_tokenizer = tokenizer_from_json(tokenizer_data['srl_tokenizer'])
self.q_type_tokenizer = tokenizer_from_json(tokenizer_data['q_type_tokenizer'])
# Get max lengths
self.max_context_len = tokenizer_data['max_context_len']
self.max_answer_len = tokenizer_data['max_answer_len']
self.max_question_len = tokenizer_data['max_question_len']
self.max_token_len = tokenizer_data['max_token_len']
# Get vocabulary sizes
self.vocab_size = len(self.word_tokenizer.word_index) + 1
self.q_type_vocab_size = len(self.q_type_tokenizer.word_index) + 1
def preprocess_text(self, text):
"""Basic text preprocessing"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
def predict_question(self, context, answer, tokens, ner, srl, q_type):
"""
Predict a question based on given context, answer, tokens, NER, SRL, and question type
Args:
context (str): The context text
answer (str): The answer to generate a question for
tokens (list): List of tokens
ner (list): List of NER tags corresponding to tokens
srl (list): List of SRL tags corresponding to tokens
q_type (str): Question type ('isian', 'opsi', or 'true_false')
Returns:
str: The predicted question
"""
# Preprocess inputs
context = self.preprocess_text(context)
answer = self.preprocess_text(answer)
# Convert to sequences
context_seq = self.word_tokenizer.texts_to_sequences([context])[0]
answer_seq = self.word_tokenizer.texts_to_sequences([answer])[0]
tokens_seq = self.word_tokenizer.texts_to_sequences([" ".join(tokens)])[0]
ner_seq = self.ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]
srl_seq = self.srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]
# Pad sequences
context_padded = pad_sequences([context_seq], maxlen=self.max_context_len, padding="post")
answer_padded = pad_sequences([answer_seq], maxlen=self.max_answer_len, padding="post")
tokens_padded = pad_sequences([tokens_seq], maxlen=self.max_token_len, padding="post")
ner_padded = pad_sequences([ner_seq], maxlen=self.max_token_len, padding="post")
srl_padded = pad_sequences([srl_seq], maxlen=self.max_token_len, padding="post")
# One-hot encode question type
q_type_idx = self.q_type_tokenizer.word_index.get(q_type, 0)
q_type_categorical = tf.keras.utils.to_categorical(
[q_type_idx], num_classes=self.q_type_vocab_size
)
# Make prediction
predicted_seq = self.model.predict(
[context_padded, answer_padded, tokens_padded, ner_padded, srl_padded, q_type_categorical]
)
# Convert predictions to tokens (taking the highest probability token at each position)
predicted_indices = np.argmax(predicted_seq[0], axis=1)
# Create reversed word index for converting indices back to words
reverse_word_index = {v: k for k, v in self.word_tokenizer.word_index.items()}
# Convert indices to words
predicted_words = []
for idx in predicted_indices:
if idx != 0: # Skip padding tokens
predicted_words.append(reverse_word_index.get(idx, ''))
# Form the question
predicted_question = ' '.join(predicted_words)
# Add "___" to the end based on question type convention
if "___" not in predicted_question:
predicted_question += " ___"
return predicted_question
def batch_predict_questions(self, data):
"""
Predict questions for a batch of data
Args:
data (list): List of dictionaries with context, tokens, ner, srl, and answers
Returns:
list: List of predicted questions
"""
results = []
for item in data:
context = item["context"]
tokens = item["tokens"]
ner = item["ner"]
srl = item["srl"]
# If there are Q&A pairs, use them for evaluation
if "qas" in item:
for qa in item["qas"]:
answer = qa["answer"]
q_type = qa["type"]
ground_truth = qa["question"]
predicted_question = self.predict_question(
context, answer, tokens, ner, srl, q_type
)
results.append({
"context": context,
"answer": answer,
"predicted_question": predicted_question,
"ground_truth": ground_truth,
"question_type": q_type
})
else:
# If no Q&A pairs, generate questions for all question types
for q_type in ["isian", "true_false", "opsi"]:
# For demo purposes, use a placeholder answer (would need actual answers in real use)
# In practice, you might extract potential answers from the context
placeholders = {
"isian": "placeholder",
"true_false": "true",
"opsi": "placeholder"
}
predicted_question = self.predict_question(
context, placeholders[q_type], tokens, ner, srl, q_type
)
results.append({
"context": context,
"predicted_question": predicted_question,
"question_type": q_type
})
return results
# Example usage
if __name__ == "__main__":
# Load test data
with open("data_converted.json", "r") as f:
test_data = json.load(f)
# Initialize model
question_predictor = QuestionPredictionModel(
model_path="question_prediction_model_final.h5",
tokenizer_path="question_prediction_tokenizers.json"
)
# Example single prediction
sample = test_data[0]
context = sample["context"]
tokens = sample["tokens"]
ner = sample["ner"]
srl = sample["srl"]
answer = sample["qas"][0]["answer"]
q_type = sample["qas"][0]["type"]
predicted_question = question_predictor.predict_question(
context, answer, tokens, ner, srl, q_type
)
print(f"Context: {context}")
print(f"Answer: {answer}")
print(f"Question Type: {q_type}")
print(f"Predicted Question: {predicted_question}")
print(f"Ground Truth: {sample['qas'][0]['question']}")
# Batch prediction
results = question_predictor.batch_predict_questions(test_data[:3])
print("\nBatch Results:")
for i, result in enumerate(results):
print(f"\nResult {i+1}:")
print(f"Context: {result['context']}")
print(f"Answer: {result.get('answer', 'N/A')}")
print(f"Question Type: {result['question_type']}")
print(f"Predicted Question: {result['predicted_question']}")
if 'ground_truth' in result:
print(f"Ground Truth: {result['ground_truth']}")

View File

@ -0,0 +1,188 @@
import numpy as np
import json
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import re
class QuestionPredictionModel:
def __init__(self, model_path, tokenizer_path):
"""
Initialize question prediction model with pre-trained model and tokenizers
"""
# Load model
self.model = load_model(model_path)
# Load tokenizers
with open(tokenizer_path, "r") as f:
tokenizer_data = json.load(f)
# Reconstruct tokenizers
self.word_tokenizer = tokenizer_from_json(tokenizer_data["word_tokenizer"])
self.ner_tokenizer = tokenizer_from_json(tokenizer_data["ner_tokenizer"])
self.srl_tokenizer = tokenizer_from_json(tokenizer_data["srl_tokenizer"])
self.q_type_tokenizer = tokenizer_from_json(tokenizer_data["q_type_tokenizer"])
# Get max lengths
self.max_context_len = tokenizer_data["max_context_len"]
self.max_question_len = tokenizer_data["max_question_len"]
self.max_token_len = tokenizer_data["max_token_len"]
# Get vocabulary sizes
self.vocab_size = len(self.word_tokenizer.word_index) + 1
self.q_type_vocab_size = len(self.q_type_tokenizer.word_index) + 1
def preprocess_text(self, text):
"""Basic text preprocessing"""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
def predict_question(self, context, tokens, ner, srl, q_type):
"""Prediksi pertanyaan berdasarkan konteks dan fitur lainnya"""
# Preprocess
context = self.preprocess_text(context)
# Convert to sequences
context_seq = self.word_tokenizer.texts_to_sequences([context])[0]
token_seq = self.word_tokenizer.texts_to_sequences([" ".join(tokens)])[0]
ner_seq = self.ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]
srl_seq = self.srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]
# Pad sequences
context_padded = pad_sequences(
[context_seq], maxlen=self.max_context_len, padding="post"
)
token_padded = pad_sequences(
[token_seq], maxlen=self.max_token_len, padding="post"
)
ner_padded = pad_sequences([ner_seq], maxlen=self.max_token_len, padding="post")
srl_padded = pad_sequences([srl_seq], maxlen=self.max_token_len, padding="post")
# Q-type one-hot encoding
q_type_idx = self.q_type_tokenizer.word_index.get(q_type, 0)
q_type_one_hot = tf.keras.utils.to_categorical(
[q_type_idx], num_classes=self.q_type_vocab_size
)
# Predict
pred = self.model.predict(
[context_padded, token_padded, ner_padded, srl_padded, q_type_one_hot]
)
# Convert prediction to words
pred_seq = np.argmax(pred[0], axis=1)
# Convert indices to words
reverse_word_map = {v: k for k, v in self.word_tokenizer.word_index.items()}
pred_words = [reverse_word_map.get(i, "") for i in pred_seq if i != 0]
return " ".join(pred_words)
def batch_predict_questions(self, data):
"""
Predict questions for a batch of data
Args:
data (list): List of dictionaries with context, tokens, ner, srl, and answers
Returns:
list: List of predicted questions
"""
results = []
for item in data:
context = item["context"]
tokens = item["tokens"]
ner = item["ner"]
srl = item["srl"]
# If there are Q&A pairs, use them for evaluation
if "qas" in item:
for qa in item["qas"]:
q_type = qa["type"]
ground_truth = qa["question"]
predicted_question = self.predict_question(
context, tokens, ner, srl, q_type
)
results.append(
{
"context": context,
"predicted_question": predicted_question,
"ground_truth": ground_truth,
"question_type": q_type,
}
)
else:
# If no Q&A pairs, generate questions for all question types
for q_type in ["isian", "true_false", "opsi"]:
# For demo purposes, use a placeholder answer (would need actual answers in real use)
# In practice, you might extract potential answers from the context
placeholders = {
"isian": "placeholder",
"true_false": "true",
"opsi": "placeholder",
}
predicted_question = self.predict_question(
context, placeholders[q_type], tokens, ner, srl, q_type
)
results.append(
{
"context": context,
"predicted_question": predicted_question,
"question_type": q_type,
}
)
return results
# Example usage
if __name__ == "__main__":
# Load test data
with open("data_converted.json", "r") as f:
test_data = json.load(f)
# Initialize model
question_predictor = QuestionPredictionModel(
model_path="question_prediction_model_final.h5",
tokenizer_path="question_prediction_tokenizers.json",
)
# Example single prediction
sample = test_data[0]
context = sample["context"]
tokens = sample["tokens"]
ner = sample["ner"]
srl = sample["srl"]
answer = sample["qas"][0]["answer"]
q_type = sample["qas"][0]["type"]
predicted_question = question_predictor.predict_question(
context, tokens, ner, srl, q_type
)
print(f"Context: {context}")
print(f"Answer: {answer}")
print(f"Question Type: {q_type}")
print(f"Predicted Question: {predicted_question}")
print(f"Ground Truth: {sample['qas'][0]['question']}")
# Batch prediction
# results = question_predictor.batch_predict_questions(test_data[:3])
# print("\nBatch Results:")
# for i, result in enumerate(results):
# print(f"\nResult {i+1}:")
# print(f"Context: {result['context']}")
# print(f"Answer: {result.get('answer', 'N/A')}")
# print(f"Question Type: {result['question_type']}")
# print(f"Predicted Question: {result['predicted_question']}")
# if "ground_truth" in result:
# print(f"Ground Truth: {result['ground_truth']}")

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB