feat: done creating question generate and answer generate
This commit is contained in:
parent
9378e4d145
commit
748a5b868f
|
@ -10,6 +10,8 @@ from app.repositories import (
|
||||||
QuizMemoryRepository,
|
QuizMemoryRepository,
|
||||||
AnswerMemoryRepository,
|
AnswerMemoryRepository,
|
||||||
ScoreMemoryRepository,
|
ScoreMemoryRepository,
|
||||||
|
QuestionGenerationRepository,
|
||||||
|
AnswerGenerationRepository,
|
||||||
)
|
)
|
||||||
|
|
||||||
from app.services import (
|
from app.services import (
|
||||||
|
@ -48,6 +50,8 @@ class Container(containers.DeclarativeContainer):
|
||||||
subject_repository = providers.Factory(SubjectRepository, mongo.provided.db)
|
subject_repository = providers.Factory(SubjectRepository, mongo.provided.db)
|
||||||
session_repository = providers.Factory(SessionRepository, mongo.provided.db)
|
session_repository = providers.Factory(SessionRepository, mongo.provided.db)
|
||||||
ner_srl_repository = providers.Factory(NERSRLRepository)
|
ner_srl_repository = providers.Factory(NERSRLRepository)
|
||||||
|
question_generation_repository = providers.Factory(QuestionGenerationRepository)
|
||||||
|
answer_generator_repository = providers.Factory(AnswerGenerationRepository)
|
||||||
session_memory_repository = providers.Factory(SessionMemoryRepository, redis)
|
session_memory_repository = providers.Factory(SessionMemoryRepository, redis)
|
||||||
quiz_memory_repository = providers.Factory(QuizMemoryRepository, redis)
|
quiz_memory_repository = providers.Factory(QuizMemoryRepository, redis)
|
||||||
answer_memory_repository = providers.Factory(AnswerMemoryRepository, redis)
|
answer_memory_repository = providers.Factory(AnswerMemoryRepository, redis)
|
||||||
|
@ -101,7 +105,10 @@ class Container(containers.DeclarativeContainer):
|
||||||
)
|
)
|
||||||
|
|
||||||
question_generation_service = providers.Factory(
|
question_generation_service = providers.Factory(
|
||||||
QuestionGenerationService, ner_srl_repository
|
QuestionGenerationService,
|
||||||
|
ner_srl_repository,
|
||||||
|
question_generation_repository,
|
||||||
|
answer_generator_repository,
|
||||||
)
|
)
|
||||||
|
|
||||||
# controllers
|
# controllers
|
||||||
|
|
Binary file not shown.
File diff suppressed because one or more lines are too long
|
@ -8,6 +8,8 @@ from .session_memory_repository import SessionMemoryRepository
|
||||||
from .quiz_memory_repository import QuizMemoryRepository
|
from .quiz_memory_repository import QuizMemoryRepository
|
||||||
from .answer_memory_repository import AnswerMemoryRepository
|
from .answer_memory_repository import AnswerMemoryRepository
|
||||||
from .score_memory_repository import ScoreMemoryRepository
|
from .score_memory_repository import ScoreMemoryRepository
|
||||||
|
from .question_generation_repository import QuestionGenerationRepository
|
||||||
|
from .answer_generation_repository import AnswerGenerationRepository
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"UserRepository",
|
"UserRepository",
|
||||||
|
@ -20,4 +22,6 @@ __all__ = [
|
||||||
"QuizMemoryRepository",
|
"QuizMemoryRepository",
|
||||||
"AnswerMemoryRepository",
|
"AnswerMemoryRepository",
|
||||||
"ScoreMemoryRepository",
|
"ScoreMemoryRepository",
|
||||||
|
"QuestionGenerationRepository",
|
||||||
|
"AnswerGenerationRepository",
|
||||||
]
|
]
|
||||||
|
|
|
@ -8,8 +8,8 @@ import re
|
||||||
|
|
||||||
|
|
||||||
class AnswerGenerationRepository:
|
class AnswerGenerationRepository:
|
||||||
MODEL_PATH = "app/lstm_model/question_generation/qa_lstm_model_final.h5"
|
MODEL_PATH = "app/lstm_model/question_generation/qa_lstm_model_final_v2.keras"
|
||||||
TOKENIZER_PATH = "app/lstm_model/question_generation/qa_tokenizers.json"
|
TOKENIZER_PATH = "app/lstm_model/question_generation/qa_tokenizers_v2.json"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
with open(self.TOKENIZER_PATH, "r") as f:
|
with open(self.TOKENIZER_PATH, "r") as f:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
from tensorflow.keras.models import load_model # type: ignore
|
from tensorflow.keras.models import load_model # type: ignore
|
||||||
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,8 +40,8 @@ class NERSRLRepository:
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"tokens": tokens,
|
"tokens": tokens,
|
||||||
"labels_ner": [self.idx2tag_ner[int(i)] for i in pred_ner],
|
"ner": [self.idx2tag_ner[int(i)] for i in pred_ner],
|
||||||
"labels_srl": [self.idx2tag_srl[int(i)] for i in pred_srl],
|
"srl": [self.idx2tag_srl[int(i)] for i in pred_srl],
|
||||||
}
|
}
|
||||||
|
|
||||||
def labeling_token(self, tokens: list[str]) -> dict:
|
def labeling_token(self, tokens: list[str]) -> dict:
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.keras.preprocessing.text import tokenizer_from_json # type: ignore
|
from tensorflow.keras.preprocessing.text import tokenizer_from_json # type: ignore
|
||||||
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
|
||||||
from tensorflow.keras.models import load_model # type: ignore
|
from tensorflow.keras.models import load_model # type: ignore
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class QuestionGenerationRepository:
|
class QuestionGenerationRepository:
|
||||||
# Static paths for model and tokenizer
|
# Static paths for model and tokenizer
|
||||||
MODEL_PATH = "app/lstm_model/question_generation/question_prediction_model_final.h5"
|
MODEL_PATH = "app/lstm_model/question_generation/new_model/question_prediction_model_final.h5"
|
||||||
TOKENIZER_PATH = "app/lstm_model/question_generation/question_prediction_tokenizers.json"
|
TOKENIZER_PATH = "app/lstm_model/question_generation/new_model/question_prediction_tokenizers.json"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""
|
"""
|
||||||
|
@ -32,7 +32,6 @@ class QuestionGenerationRepository:
|
||||||
|
|
||||||
# Get max lengths
|
# Get max lengths
|
||||||
self.max_context_len = tokenizer_data["max_context_len"]
|
self.max_context_len = tokenizer_data["max_context_len"]
|
||||||
self.max_answer_len = tokenizer_data["max_answer_len"]
|
|
||||||
self.max_question_len = tokenizer_data["max_question_len"]
|
self.max_question_len = tokenizer_data["max_question_len"]
|
||||||
self.max_token_len = tokenizer_data["max_token_len"]
|
self.max_token_len = tokenizer_data["max_token_len"]
|
||||||
|
|
||||||
|
@ -46,29 +45,14 @@ class QuestionGenerationRepository:
|
||||||
text = re.sub(r"\s+", " ", text).strip()
|
text = re.sub(r"\s+", " ", text).strip()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def predict_question(self, context, answer, tokens, ner, srl, q_type):
|
def predict_question(self, context, tokens, ner, srl, q_type):
|
||||||
"""
|
"""Prediksi pertanyaan berdasarkan konteks dan fitur lainnya"""
|
||||||
Predict a question based on given context, answer, tokens, NER, SRL, and question type
|
# Preprocess
|
||||||
|
|
||||||
Args:
|
|
||||||
context (str): The context text
|
|
||||||
answer (str): The answer to generate a question for
|
|
||||||
tokens (list): List of tokens
|
|
||||||
ner (list): List of NER tags corresponding to tokens
|
|
||||||
srl (list): List of SRL tags corresponding to tokens
|
|
||||||
q_type (str): Question type ('isian', 'opsi', or 'true_false')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The predicted question
|
|
||||||
"""
|
|
||||||
# Preprocess inputs
|
|
||||||
context = self.preprocess_text(context)
|
context = self.preprocess_text(context)
|
||||||
answer = self.preprocess_text(answer)
|
|
||||||
|
|
||||||
# Convert to sequences
|
# Convert to sequences
|
||||||
context_seq = self.word_tokenizer.texts_to_sequences([context])[0]
|
context_seq = self.word_tokenizer.texts_to_sequences([context])[0]
|
||||||
answer_seq = self.word_tokenizer.texts_to_sequences([answer])[0]
|
token_seq = self.word_tokenizer.texts_to_sequences([" ".join(tokens)])[0]
|
||||||
tokens_seq = self.word_tokenizer.texts_to_sequences([" ".join(tokens)])[0]
|
|
||||||
ner_seq = self.ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]
|
ner_seq = self.ner_tokenizer.texts_to_sequences([" ".join(ner)])[0]
|
||||||
srl_seq = self.srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]
|
srl_seq = self.srl_tokenizer.texts_to_sequences([" ".join(srl)])[0]
|
||||||
|
|
||||||
|
@ -76,113 +60,28 @@ class QuestionGenerationRepository:
|
||||||
context_padded = pad_sequences(
|
context_padded = pad_sequences(
|
||||||
[context_seq], maxlen=self.max_context_len, padding="post"
|
[context_seq], maxlen=self.max_context_len, padding="post"
|
||||||
)
|
)
|
||||||
answer_padded = pad_sequences(
|
token_padded = pad_sequences(
|
||||||
[answer_seq], maxlen=self.max_answer_len, padding="post"
|
[token_seq], maxlen=self.max_token_len, padding="post"
|
||||||
)
|
|
||||||
tokens_padded = pad_sequences(
|
|
||||||
[tokens_seq], maxlen=self.max_token_len, padding="post"
|
|
||||||
)
|
)
|
||||||
ner_padded = pad_sequences([ner_seq], maxlen=self.max_token_len, padding="post")
|
ner_padded = pad_sequences([ner_seq], maxlen=self.max_token_len, padding="post")
|
||||||
srl_padded = pad_sequences([srl_seq], maxlen=self.max_token_len, padding="post")
|
srl_padded = pad_sequences([srl_seq], maxlen=self.max_token_len, padding="post")
|
||||||
|
|
||||||
# One-hot encode question type
|
# Q-type one-hot encoding
|
||||||
q_type_idx = self.q_type_tokenizer.word_index.get(q_type, 0)
|
q_type_idx = self.q_type_tokenizer.word_index.get(q_type, 0)
|
||||||
q_type_categorical = tf.keras.utils.to_categorical(
|
q_type_one_hot = tf.keras.utils.to_categorical(
|
||||||
[q_type_idx], num_classes=self.q_type_vocab_size
|
[q_type_idx], num_classes=self.q_type_vocab_size
|
||||||
)
|
)
|
||||||
|
|
||||||
# Make prediction
|
# Predict
|
||||||
predicted_seq = self.model.predict(
|
pred = self.model.predict(
|
||||||
[
|
[context_padded, token_padded, ner_padded, srl_padded, q_type_one_hot]
|
||||||
context_padded,
|
|
||||||
answer_padded,
|
|
||||||
tokens_padded,
|
|
||||||
ner_padded,
|
|
||||||
srl_padded,
|
|
||||||
q_type_categorical,
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert predictions to tokens (taking the highest probability token at each position)
|
# Convert prediction to words
|
||||||
predicted_indices = np.argmax(predicted_seq[0], axis=1)
|
pred_seq = np.argmax(pred[0], axis=1)
|
||||||
|
|
||||||
# Create reversed word index for converting indices back to words
|
|
||||||
reverse_word_index = {v: k for k, v in self.word_tokenizer.word_index.items()}
|
|
||||||
|
|
||||||
# Convert indices to words
|
# Convert indices to words
|
||||||
predicted_words = []
|
reverse_word_map = {v: k for k, v in self.word_tokenizer.word_index.items()}
|
||||||
for idx in predicted_indices:
|
pred_words = [reverse_word_map.get(i, "") for i in pred_seq if i != 0]
|
||||||
if idx != 0: # Skip padding tokens
|
|
||||||
predicted_words.append(reverse_word_index.get(idx, ""))
|
|
||||||
|
|
||||||
# Form the question
|
return " ".join(pred_words)
|
||||||
predicted_question = " ".join(predicted_words)
|
|
||||||
|
|
||||||
# Add "___" to the end based on question type convention
|
|
||||||
if "___" not in predicted_question:
|
|
||||||
predicted_question += " ___"
|
|
||||||
|
|
||||||
return predicted_question
|
|
||||||
|
|
||||||
def batch_predict_questions(self, data):
|
|
||||||
"""
|
|
||||||
Predict questions for a batch of data
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data (list): List of dictionaries with context, tokens, ner, srl, and answers
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list: List of predicted questions
|
|
||||||
"""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for item in data:
|
|
||||||
context = item["context"]
|
|
||||||
tokens = item["tokens"]
|
|
||||||
ner = item["ner"]
|
|
||||||
srl = item["srl"]
|
|
||||||
|
|
||||||
# If there are Q&A pairs, use them for evaluation
|
|
||||||
if "qas" in item:
|
|
||||||
for qa in item["qas"]:
|
|
||||||
answer = qa["answer"]
|
|
||||||
q_type = qa["type"]
|
|
||||||
ground_truth = qa["question"]
|
|
||||||
|
|
||||||
predicted_question = self.predict_question(
|
|
||||||
context, answer, tokens, ner, srl, q_type
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"context": context,
|
|
||||||
"answer": answer,
|
|
||||||
"predicted_question": predicted_question,
|
|
||||||
"ground_truth": ground_truth,
|
|
||||||
"question_type": q_type,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# If no Q&A pairs, generate questions for all question types
|
|
||||||
for q_type in ["isian", "true_false", "opsi"]:
|
|
||||||
# For demo purposes, use a placeholder answer (would need actual answers in real use)
|
|
||||||
# In practice, you might extract potential answers from the context
|
|
||||||
placeholders = {
|
|
||||||
"isian": "placeholder",
|
|
||||||
"true_false": "true",
|
|
||||||
"opsi": "placeholder",
|
|
||||||
}
|
|
||||||
|
|
||||||
predicted_question = self.predict_question(
|
|
||||||
context, placeholders[q_type], tokens, ner, srl, q_type
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"context": context,
|
|
||||||
"predicted_question": predicted_question,
|
|
||||||
"question_type": q_type,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
|
@ -1,25 +1,51 @@
|
||||||
from app.repositories import NERSRLRepository
|
from app.repositories import (
|
||||||
|
NERSRLRepository,
|
||||||
|
QuestionGenerationRepository,
|
||||||
|
AnswerGenerationRepository,
|
||||||
|
)
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class QuestionGenerationService:
|
class QuestionGenerationService:
|
||||||
|
|
||||||
def __init__(self, ner_srl_repository: NERSRLRepository):
|
def __init__(
|
||||||
|
self,
|
||||||
|
ner_srl_repository: NERSRLRepository,
|
||||||
|
question_generate_repository: QuestionGenerationRepository,
|
||||||
|
answer_generate_repository: AnswerGenerationRepository,
|
||||||
|
):
|
||||||
self._ner_srl_repository = ner_srl_repository
|
self._ner_srl_repository = ner_srl_repository
|
||||||
|
self._question_generation_repository = question_generate_repository
|
||||||
|
self._answer_generation_repository = answer_generate_repository
|
||||||
|
|
||||||
def createQuizAutomate(self, sentence: str):
|
def createQuizAutomate(self, sentence: str):
|
||||||
# Gunakan regex untuk split hanya pada titik yang diikuti spasi atau akhir kalimat,
|
# Gunakan regex untuk split hanya pada titik yang diikuti spasi atau akhir kalimat,
|
||||||
# dan bukan bagian dari angka (contoh: 19.00 tidak dipisah)
|
# dan bukan bagian dari angka (contoh: 19.00 tidak dipisah)
|
||||||
split_pattern = r"\.(?=\s|$)(?!\d)"
|
split_pattern = r"\.(?=\s|$)(?!\d)"
|
||||||
|
|
||||||
# Pisahkan kalimat menggunakan regex
|
# split sentence using regex
|
||||||
sentences = [s.strip() for s in re.split(split_pattern, sentence) if s.strip()]
|
sentences = [s.strip() for s in re.split(split_pattern, sentence) if s.strip()]
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for s in sentences:
|
for s in sentences:
|
||||||
result = self._ner_srl_repository.predict_sentence(s)
|
result = self._ner_srl_repository.predict_sentence(s)
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
|
question = self._question_generation_repository.predict_question(
|
||||||
|
context=s,
|
||||||
|
ner=result["ner"],
|
||||||
|
tokens=result["tokens"],
|
||||||
|
srl=result["srl"],
|
||||||
|
q_type=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
answer = self._answer_generation_repository.predict_answer(
|
||||||
|
context=s,
|
||||||
|
question=question,
|
||||||
|
ner=result["ner"],
|
||||||
|
tokens=result["tokens"],
|
||||||
|
srl=result["srl"],
|
||||||
|
q_type=1,
|
||||||
|
)
|
||||||
|
results.append({"qustion": question, "answer": answer})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
Loading…
Reference in New Issue