import json # load dataset with open("dataset_qc.json", encoding="utf-8") as f: raw_data = json.load(f) tokens = [[t.lower().strip() for t in item["tokens"]] for item in raw_data] ner_tags = [item["ner"] for item in raw_data] srl_tags = [item["srl"] for item in raw_data] questions = [item["question"].lower().strip() for item in raw_data] answers = [item["answer"].lower().strip() for item in raw_data] types = [item["type"] for item in raw_data]