MIF_E31222544/tokenator.py

63 lines
1.6 KiB
Python

# cleaner_main.py
import os
# Daftar nama dosen
names = [
"Wahyu Kurnia Dewanto",
"Hendra Yufit Riskiawan",
"Ika Widiastuti",
"Dwi Putro Sarwo Setyohadi",
"Taufiq Rizaldi",
"Khafidurrohman Agustianto",
"Husin",
"Faisal Lutfi Afriansyah",
"Pramuditha Shinta Dewi Puspitasari",
"Ely Mulyadi",
"Lukie Perdanasari",
"Akas Bagus Setiawan",
"Reza Putra Pradana",
"Dr. Denny Trias Utomo",
"Prawidya Destarianto",
"Moh. Munih Dian Widianta",
"Syamsul Arifin",
"Didit Rahmat Hartadi",
"Elly Antika",
"Hermawan Arief Putranto",
"Aji Seto Arifianto",
"Ratih Ayuninghemi",
"Bety Etikasari",
"Trismayanti Dwi Puspitasari",
"Ery Setiyawan Jullev Atmadji",
"Mukhamad Angga Gumilang",
"Choirul Huda",
"Dia Bitari Mei Yuana",
"Arvita Agus Kurniasari",
"Fatimatuzzahra",
"Khen Dedes",
"Surateno",
"Agus Hariyanto",
"Hariyono Rakhmad",
"Denny Wijanarko",
"Bekti Maryuni Susanto",
"Victor Phoa",
"I Gede Wiryawan",
"Shabrina Choirunnisa",
"Ahmad Fahriyannur Rosyady",
"Afis Asryullah Pratama",
"Mochammad Rifki Ulil Albaab"
]
tokens = set()
for name in names:
cleaned_name = name.lower().replace("dr.", "").replace("moh.", "").replace("mochammad", "mochammad")
for word in cleaned_name.split():
tokens.add(word.strip())
save_path = r"D:\lecturertask\cleaner_tokens.txt"
with open(save_path, "w", encoding="utf-8") as f:
for token in sorted(tokens):
f.write(token + "\n")
print(f"Tokenisasi nama disimpan ke {save_path}")