TIF_NGANJUK_E41210983/hit_rate_119.py

221 lines
8.3 KiB
Python

import pandas as pd
import random
# Data jurusan dan rekomendasi (tetap sama)
jurusan_data = {
"teknik informatika": {
"keterampilan": [
"spreadsheet", "python", "analisis data", "visualisasi data",
"pengujian perangkat lunak", "dokumentasi kasus uji", "manajemen waktu",
"html", "css", "javascript", "java", "node.js", "sql", "devops", "aws", "azure",
"react", "angular", "statistik", "machine learning", "cloud operation", "ruby",
"database", "desain", "pemrograman", "keamanan digital", "troubleshooting",
"kotlin", "swift", "objective-c", "r", "preprocessing data", "penyebaran model",
"manajemen proyek", "pemahaman operasi bisnis", "unity", "unreal engine", "c#", "c++"
],
"minat": [
"investigative (analitis)", "conventional (terstruktur)", "artistic (kreatif)",
"realistic (praktis, teknis)", "enterprising (memimpin)", "social (sosial)"
]
},
"sistem informasi": {
"keterampilan": [
"spreadsheet", "python", "analisis data", "visualisasi data",
"database", "sql", "manajemen proyek", "pemahaman operasi bisnis",
"administrasi jaringan", "troubleshooting", "dukungan teknis perangkat keras dan perangkat lunak"
],
"minat": ["investigative (analitis)", "conventional (terstruktur)", "realistic (praktis, teknis)", "social (sosial)"]
},
"matematika": {
"keterampilan": ["spreadsheet", "python", "analisis data", "visualisasi data", "statistik", "machine learning", "r"],
"minat": ["investigative (analitis)"]
},
"teknik komputer": {
"keterampilan": [
"html", "css", "javascript", "python", "java", "node.js", "sql", "devops", "aws", "azure",
"cloud operation", "ruby", "pemrograman", "keamanan digital", "troubleshooting",
"kotlin", "swift", "objective-c", "administrasi jaringan"
],
"minat": ["conventional (terstruktur)", "artistic (kreatif)", "realistic (praktis, teknis)", "enterprising (memimpin)"]
},
"desain komunikasi visual": {
"keterampilan": ["html", "css", "javascript", "react", "angular", "desain"],
"minat": ["artistic (kreatif)"]
},
"statistik": {
"keterampilan": ["statistik", "machine learning", "python", "r"],
"minat": ["investigative (analitis)"]
},
"ilmu komputer": {
"keterampilan": ["database", "sql"],
"minat": ["investigative (analitis)"]
}
}
def get_recommendation(ipk, jurusan):
"""Generate recommendation based on IPK and major"""
jurusan_lower = jurusan.lower()
if jurusan_lower in ["teknik informatika", "sistem informasi"]:
if ipk >= 3.7:
return ["Data Scientist", "Machine Learning Engineer", "Software Engineer"]
elif ipk >= 3.5:
return ["Data Analyst", "Web Developer", "Software Engineer"]
else:
return ["Junior Developer", "IT Support", "System Administrator"]
elif jurusan_lower == "matematika":
return ["Data Scientist", "Statistician", "Data Analyst"]
elif jurusan_lower == "desain komunikasi visual":
return ["UI/UX Designer", "Graphic Designer", "Web Designer"]
elif jurusan_lower == "teknik komputer":
return ["System Administrator", "Network Engineer", "DevOps Engineer"]
elif jurusan_lower == "statistik":
return ["Data Analyst", "Statistician", "Research Analyst"]
else:
return ["Database Administrator", "Data Analyst", "System Analyst"]
def check_hit(target, recommendations):
"""Check if target job matches any recommendation"""
target_lower = target.lower()
for rec in recommendations:
rec_lower = rec.lower()
# Check if target contains recommendation or vice versa
if target_lower in rec_lower or rec_lower in target_lower:
return True
return False
# Load CSV data
try:
df = pd.read_csv('student.csv')
print(f"Data berhasil dimuat: {len(df)} records")
except FileNotFoundError:
print("File student.csv tidak ditemukan!")
exit()
# Process data
uji_coba = []
total = len(df)
hit = 0
print("=" * 120)
print("PENGUJIAN HIT RATE SISTEM IPKMATCHER - MENGGUNAKAN DATASET CSV")
print("=" * 120)
print()
print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format(
"No", "NIM", "IPK", "Jurusan", "Keterampilan", "Minat", "Rekomendasi Sistem", "Target", "Hit"))
print("-" * 120)
for index, row in df.iterrows():
nim = row['NIM']
ipk = row['IPK']
jurusan = row['Jurusan']
target = row['dream job']
keterampilan = row['keterampilan']
minat = row['minat']
# Generate recommendations
rekomendasi = get_recommendation(ipk, jurusan)
# Check hit
is_hit = check_hit(target, rekomendasi)
if is_hit:
hit += 1
# Store for analysis
uji_coba.append({
"nim": nim,
"ipk": ipk,
"jurusan": jurusan,
"keterampilan": keterampilan,
"minat": minat,
"rekomendasi": rekomendasi,
"target": target,
"hit": is_hit
})
# Format for display
jurusan_short = (jurusan[:17] + "...") if len(jurusan) > 20 else jurusan
keterampilan_short = (keterampilan[:17] + "...") if len(keterampilan) > 20 else keterampilan
minat_short = (minat[:22] + "...") if len(minat) > 25 else minat
rekom_short = (", ".join(rekomendasi)[:32] + "...") if len(", ".join(rekomendasi)) > 35 else ", ".join(rekomendasi)
target_short = (target[:15] + "...") if len(target) > 18 else target
print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format(
index + 1, nim, ipk, jurusan_short, keterampilan_short, minat_short,
rekom_short, target_short, "" if is_hit else ""))
# Calculate hit rate
hit_rate = hit / total
print("-" * 120)
print(f"HASIL PENGUJIAN:")
print(f"Total Hit: {hit} dari {total} kasus")
print(f"Hit Rate: {hit_rate:.3f} ({hit_rate * 100:.1f}%)")
print()
# Analysis per major
print("ANALISIS PER JURUSAN:")
print("-" * 50)
jurusan_stats = {}
for case in uji_coba:
jurusan = case["jurusan"]
if jurusan not in jurusan_stats:
jurusan_stats[jurusan] = {"total": 0, "hit": 0}
jurusan_stats[jurusan]["total"] += 1
jurusan_stats[jurusan]["hit"] += case["hit"]
for jurusan, stats in jurusan_stats.items():
rate = stats["hit"] / stats["total"]
print(f"{jurusan:<25}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)")
# Analysis per IPK range
print("\nANALISIS PER RENTANG IPK:")
print("-" * 50)
ipk_ranges = {
"3.8-4.0": {"min": 3.8, "max": 4.0, "total": 0, "hit": 0},
"3.5-3.79": {"min": 3.5, "max": 3.79, "total": 0, "hit": 0},
"3.0-3.49": {"min": 3.0, "max": 3.49, "total": 0, "hit": 0},
"<3.0": {"min": 0, "max": 2.99, "total": 0, "hit": 0}
}
for case in uji_coba:
ipk = case["ipk"]
for range_name, range_info in ipk_ranges.items():
if range_info["min"] <= ipk <= range_info["max"]:
range_info["total"] += 1
range_info["hit"] += case["hit"]
break
for range_name, stats in ipk_ranges.items():
if stats["total"] > 0:
rate = stats["hit"] / stats["total"]
print(f"IPK {range_name:<10}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)")
print("=" * 120)
# Detailed results
# print("\nDETAIL LENGKAP HASIL PENGUJIAN:")
# print("=" * 80)
# for i, case in enumerate(uji_coba, 1):
# print(f"\nTest Case {i}:")
# print(f" NIM: {case['nim']}")
# print(f" IPK: {case['ipk']}")
# print(f" Jurusan: {case['jurusan']}")
# print(f" Keterampilan: {case['keterampilan']}")
# print(f" Minat: {case['minat']}")
# print(f" Rekomendasi: {', '.join(case['rekomendasi'])}")
# print(f" Target (Dream Job): {case['target']}")
# print(f" Status: {'✓ HIT' if case['hit'] else '✗ MISS'}")
# print(f" {'-'*40}")
# Summary of missed predictions
print("\nANALISIS PREDIKSI YANG MELESET:")
print("=" * 80)
missed_cases = [case for case in uji_coba if not case['hit']]
print(f"Total kasus yang meleset: {len(missed_cases)}")
if missed_cases:
print("\nContoh kasus yang meleset:")
for i, case in enumerate(missed_cases[:10], 1): # Show first 10 missed cases
print(f"{i}. NIM {case['nim']}: Target '{case['target']}' vs Rekomendasi {case['rekomendasi']}")