221 lines
8.3 KiB
Python
221 lines
8.3 KiB
Python
import pandas as pd
|
|
import random
|
|
|
|
# Data jurusan dan rekomendasi (tetap sama)
|
|
jurusan_data = {
|
|
"teknik informatika": {
|
|
"keterampilan": [
|
|
"spreadsheet", "python", "analisis data", "visualisasi data",
|
|
"pengujian perangkat lunak", "dokumentasi kasus uji", "manajemen waktu",
|
|
"html", "css", "javascript", "java", "node.js", "sql", "devops", "aws", "azure",
|
|
"react", "angular", "statistik", "machine learning", "cloud operation", "ruby",
|
|
"database", "desain", "pemrograman", "keamanan digital", "troubleshooting",
|
|
"kotlin", "swift", "objective-c", "r", "preprocessing data", "penyebaran model",
|
|
"manajemen proyek", "pemahaman operasi bisnis", "unity", "unreal engine", "c#", "c++"
|
|
],
|
|
"minat": [
|
|
"investigative (analitis)", "conventional (terstruktur)", "artistic (kreatif)",
|
|
"realistic (praktis, teknis)", "enterprising (memimpin)", "social (sosial)"
|
|
]
|
|
},
|
|
"sistem informasi": {
|
|
"keterampilan": [
|
|
"spreadsheet", "python", "analisis data", "visualisasi data",
|
|
"database", "sql", "manajemen proyek", "pemahaman operasi bisnis",
|
|
"administrasi jaringan", "troubleshooting", "dukungan teknis perangkat keras dan perangkat lunak"
|
|
],
|
|
"minat": ["investigative (analitis)", "conventional (terstruktur)", "realistic (praktis, teknis)", "social (sosial)"]
|
|
},
|
|
"matematika": {
|
|
"keterampilan": ["spreadsheet", "python", "analisis data", "visualisasi data", "statistik", "machine learning", "r"],
|
|
"minat": ["investigative (analitis)"]
|
|
},
|
|
"teknik komputer": {
|
|
"keterampilan": [
|
|
"html", "css", "javascript", "python", "java", "node.js", "sql", "devops", "aws", "azure",
|
|
"cloud operation", "ruby", "pemrograman", "keamanan digital", "troubleshooting",
|
|
"kotlin", "swift", "objective-c", "administrasi jaringan"
|
|
],
|
|
"minat": ["conventional (terstruktur)", "artistic (kreatif)", "realistic (praktis, teknis)", "enterprising (memimpin)"]
|
|
},
|
|
"desain komunikasi visual": {
|
|
"keterampilan": ["html", "css", "javascript", "react", "angular", "desain"],
|
|
"minat": ["artistic (kreatif)"]
|
|
},
|
|
"statistik": {
|
|
"keterampilan": ["statistik", "machine learning", "python", "r"],
|
|
"minat": ["investigative (analitis)"]
|
|
},
|
|
"ilmu komputer": {
|
|
"keterampilan": ["database", "sql"],
|
|
"minat": ["investigative (analitis)"]
|
|
}
|
|
}
|
|
|
|
def get_recommendation(ipk, jurusan):
|
|
"""Generate recommendation based on IPK and major"""
|
|
jurusan_lower = jurusan.lower()
|
|
|
|
if jurusan_lower in ["teknik informatika", "sistem informasi"]:
|
|
if ipk >= 3.7:
|
|
return ["Data Scientist", "Machine Learning Engineer", "Software Engineer"]
|
|
elif ipk >= 3.5:
|
|
return ["Data Analyst", "Web Developer", "Software Engineer"]
|
|
else:
|
|
return ["Junior Developer", "IT Support", "System Administrator"]
|
|
elif jurusan_lower == "matematika":
|
|
return ["Data Scientist", "Statistician", "Data Analyst"]
|
|
elif jurusan_lower == "desain komunikasi visual":
|
|
return ["UI/UX Designer", "Graphic Designer", "Web Designer"]
|
|
elif jurusan_lower == "teknik komputer":
|
|
return ["System Administrator", "Network Engineer", "DevOps Engineer"]
|
|
elif jurusan_lower == "statistik":
|
|
return ["Data Analyst", "Statistician", "Research Analyst"]
|
|
else:
|
|
return ["Database Administrator", "Data Analyst", "System Analyst"]
|
|
|
|
def check_hit(target, recommendations):
|
|
"""Check if target job matches any recommendation"""
|
|
target_lower = target.lower()
|
|
for rec in recommendations:
|
|
rec_lower = rec.lower()
|
|
# Check if target contains recommendation or vice versa
|
|
if target_lower in rec_lower or rec_lower in target_lower:
|
|
return True
|
|
return False
|
|
|
|
# Load CSV data
|
|
try:
|
|
df = pd.read_csv('student.csv')
|
|
print(f"Data berhasil dimuat: {len(df)} records")
|
|
except FileNotFoundError:
|
|
print("File student.csv tidak ditemukan!")
|
|
exit()
|
|
|
|
# Process data
|
|
uji_coba = []
|
|
total = len(df)
|
|
hit = 0
|
|
|
|
print("=" * 120)
|
|
print("PENGUJIAN HIT RATE SISTEM IPKMATCHER - MENGGUNAKAN DATASET CSV")
|
|
print("=" * 120)
|
|
print()
|
|
|
|
print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format(
|
|
"No", "NIM", "IPK", "Jurusan", "Keterampilan", "Minat", "Rekomendasi Sistem", "Target", "Hit"))
|
|
print("-" * 120)
|
|
|
|
for index, row in df.iterrows():
|
|
nim = row['NIM']
|
|
ipk = row['IPK']
|
|
jurusan = row['Jurusan']
|
|
target = row['dream job']
|
|
keterampilan = row['keterampilan']
|
|
minat = row['minat']
|
|
|
|
# Generate recommendations
|
|
rekomendasi = get_recommendation(ipk, jurusan)
|
|
|
|
# Check hit
|
|
is_hit = check_hit(target, rekomendasi)
|
|
if is_hit:
|
|
hit += 1
|
|
|
|
# Store for analysis
|
|
uji_coba.append({
|
|
"nim": nim,
|
|
"ipk": ipk,
|
|
"jurusan": jurusan,
|
|
"keterampilan": keterampilan,
|
|
"minat": minat,
|
|
"rekomendasi": rekomendasi,
|
|
"target": target,
|
|
"hit": is_hit
|
|
})
|
|
|
|
# Format for display
|
|
jurusan_short = (jurusan[:17] + "...") if len(jurusan) > 20 else jurusan
|
|
keterampilan_short = (keterampilan[:17] + "...") if len(keterampilan) > 20 else keterampilan
|
|
minat_short = (minat[:22] + "...") if len(minat) > 25 else minat
|
|
rekom_short = (", ".join(rekomendasi)[:32] + "...") if len(", ".join(rekomendasi)) > 35 else ", ".join(rekomendasi)
|
|
target_short = (target[:15] + "...") if len(target) > 18 else target
|
|
|
|
print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format(
|
|
index + 1, nim, ipk, jurusan_short, keterampilan_short, minat_short,
|
|
rekom_short, target_short, "✓" if is_hit else "✗"))
|
|
|
|
# Calculate hit rate
|
|
hit_rate = hit / total
|
|
print("-" * 120)
|
|
print(f"HASIL PENGUJIAN:")
|
|
print(f"Total Hit: {hit} dari {total} kasus")
|
|
print(f"Hit Rate: {hit_rate:.3f} ({hit_rate * 100:.1f}%)")
|
|
print()
|
|
|
|
# Analysis per major
|
|
print("ANALISIS PER JURUSAN:")
|
|
print("-" * 50)
|
|
jurusan_stats = {}
|
|
for case in uji_coba:
|
|
jurusan = case["jurusan"]
|
|
if jurusan not in jurusan_stats:
|
|
jurusan_stats[jurusan] = {"total": 0, "hit": 0}
|
|
|
|
jurusan_stats[jurusan]["total"] += 1
|
|
jurusan_stats[jurusan]["hit"] += case["hit"]
|
|
|
|
for jurusan, stats in jurusan_stats.items():
|
|
rate = stats["hit"] / stats["total"]
|
|
print(f"{jurusan:<25}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)")
|
|
|
|
# Analysis per IPK range
|
|
print("\nANALISIS PER RENTANG IPK:")
|
|
print("-" * 50)
|
|
ipk_ranges = {
|
|
"3.8-4.0": {"min": 3.8, "max": 4.0, "total": 0, "hit": 0},
|
|
"3.5-3.79": {"min": 3.5, "max": 3.79, "total": 0, "hit": 0},
|
|
"3.0-3.49": {"min": 3.0, "max": 3.49, "total": 0, "hit": 0},
|
|
"<3.0": {"min": 0, "max": 2.99, "total": 0, "hit": 0}
|
|
}
|
|
|
|
for case in uji_coba:
|
|
ipk = case["ipk"]
|
|
for range_name, range_info in ipk_ranges.items():
|
|
if range_info["min"] <= ipk <= range_info["max"]:
|
|
range_info["total"] += 1
|
|
range_info["hit"] += case["hit"]
|
|
break
|
|
|
|
for range_name, stats in ipk_ranges.items():
|
|
if stats["total"] > 0:
|
|
rate = stats["hit"] / stats["total"]
|
|
print(f"IPK {range_name:<10}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)")
|
|
|
|
print("=" * 120)
|
|
|
|
# Detailed results
|
|
# print("\nDETAIL LENGKAP HASIL PENGUJIAN:")
|
|
# print("=" * 80)
|
|
# for i, case in enumerate(uji_coba, 1):
|
|
# print(f"\nTest Case {i}:")
|
|
# print(f" NIM: {case['nim']}")
|
|
# print(f" IPK: {case['ipk']}")
|
|
# print(f" Jurusan: {case['jurusan']}")
|
|
# print(f" Keterampilan: {case['keterampilan']}")
|
|
# print(f" Minat: {case['minat']}")
|
|
# print(f" Rekomendasi: {', '.join(case['rekomendasi'])}")
|
|
# print(f" Target (Dream Job): {case['target']}")
|
|
# print(f" Status: {'✓ HIT' if case['hit'] else '✗ MISS'}")
|
|
# print(f" {'-'*40}")
|
|
|
|
# Summary of missed predictions
|
|
print("\nANALISIS PREDIKSI YANG MELESET:")
|
|
print("=" * 80)
|
|
missed_cases = [case for case in uji_coba if not case['hit']]
|
|
print(f"Total kasus yang meleset: {len(missed_cases)}")
|
|
|
|
if missed_cases:
|
|
print("\nContoh kasus yang meleset:")
|
|
for i, case in enumerate(missed_cases[:10], 1): # Show first 10 missed cases
|
|
print(f"{i}. NIM {case['nim']}: Target '{case['target']}' vs Rekomendasi {case['rekomendasi']}") |