commit c54e3c2346d367c6b4ef0df9e450d6981188ca9e Author: Sangga123 Date: Mon Jul 14 13:31:13 2025 +0700 ADD GIT diff --git a/5-fold cross_validation.py b/5-fold cross_validation.py new file mode 100644 index 0000000..24d1035 --- /dev/null +++ b/5-fold cross_validation.py @@ -0,0 +1,299 @@ +import pandas as pd +import numpy as np +from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.model_selection import KFold +import warnings +warnings.filterwarnings('ignore') + +class JobRecommendationSystem: + def __init__(self): + self.skill_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') + self.interest_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') + self.major_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') + self.job_encoder = LabelEncoder() + self.ipk_scaler = MinMaxScaler() + self.is_fitted = False + + def preprocess_data(self, df): + """Preprocess data untuk feature engineering""" + # Copy data untuk menghindari modifikasi asli + data = df.copy() + + # Normalisasi nama kolom + data.columns = data.columns.str.strip() + + # Clean dan standardisasi text + data['keterampilan'] = data['keterampilan'].str.strip().str.lower() + data['minat'] = data['minat'].str.strip().str.lower() + data['Jurusan'] = data['Jurusan'].str.strip().str.lower() + data['dream job'] = data['dream job'].str.strip() + + return data + + def create_features(self, data, fit=False): + """Membuat feature matrix dari data""" + # Reshape untuk encoder + skills = data['keterampilan'].values.reshape(-1, 1) + interests = data['minat'].values.reshape(-1, 1) + majors = data['Jurusan'].values.reshape(-1, 1) + ipk = data['IPK'].values.reshape(-1, 1) + + if fit: + # Fit encoders + skill_features = self.skill_encoder.fit_transform(skills) + interest_features = self.interest_encoder.fit_transform(interests) + major_features = self.major_encoder.fit_transform(majors) + ipk_features = self.ipk_scaler.fit_transform(ipk) + else: + # Transform only + skill_features = self.skill_encoder.transform(skills) + interest_features = self.interest_encoder.transform(interests) + major_features = self.major_encoder.transform(majors) + ipk_features = self.ipk_scaler.transform(ipk) + + # Gabungkan semua features + features = np.hstack([ + ipk_features, + skill_features, + interest_features, + major_features + ]) + + return features + + def fit(self, X_train, y_train): + """Fit model dengan training data""" + self.train_features = self.create_features(X_train, fit=True) + self.train_jobs = self.job_encoder.fit_transform(y_train) + self.train_job_names = y_train.values + self.is_fitted = True + + def predict(self, X_test, k=3): + """Predict jobs untuk test data menggunakan cosine similarity""" + if not self.is_fitted: + raise ValueError("Model belum di-fit!") + + test_features = self.create_features(X_test, fit=False) + + # Hitung cosine similarity + similarities = cosine_similarity(test_features, self.train_features) + + predictions = [] + + for i, sim_scores in enumerate(similarities): + # Ambil k tetangga terdekat + top_k_indices = np.argsort(sim_scores)[::-1][:k] + + # Ambil job dari tetangga terdekat + neighbor_jobs = [self.train_job_names[idx] for idx in top_k_indices] + + # Hitung frequency-based recommendation + job_counts = {} + for job in neighbor_jobs: + job_counts[job] = job_counts.get(job, 0) + 1 + + # Ambil job dengan frekuensi tertinggi + recommended_job = max(job_counts.keys(), key=job_counts.get) + predictions.append(recommended_job) + + return predictions + + def predict_top_k(self, X_test, k=3, top_jobs=3): + """Predict top-k jobs untuk test data""" + if not self.is_fitted: + raise ValueError("Model belum di-fit!") + + test_features = self.create_features(X_test, fit=False) + similarities = cosine_similarity(test_features, self.train_features) + + predictions = [] + + for i, sim_scores in enumerate(similarities): + top_k_indices = np.argsort(sim_scores)[::-1][:k] + neighbor_jobs = [self.train_job_names[idx] for idx in top_k_indices] + + # Hitung job frequency + job_counts = {} + for job in neighbor_jobs: + job_counts[job] = job_counts.get(job, 0) + 1 + + # Ambil top jobs berdasarkan frequency + sorted_jobs = sorted(job_counts.items(), key=lambda x: x[1], reverse=True) + top_recommended = [job for job, count in sorted_jobs[:top_jobs]] + + predictions.append(top_recommended) + + return predictions + +def evaluate_model(df, n_splits=5, k_neighbors=5): + """Evaluasi model menggunakan 5-fold cross validation""" + + # Preprocess data + model = JobRecommendationSystem() + processed_data = model.preprocess_data(df) + + # Inisialisasi KFold + kf = KFold(n_splits=n_splits, shuffle=True, random_state=42) + + # Metrics untuk setiap fold + fold_results = [] + + print("=" * 80) + print("EVALUASI 5-FOLD CROSS-VALIDATION DENGAN COSINE SIMILARITY") + print("=" * 80) + print() + + for fold, (train_idx, test_idx) in enumerate(kf.split(processed_data), 1): + print(f"Processing Fold {fold}...") + + # Split data + train_data = processed_data.iloc[train_idx] + test_data = processed_data.iloc[test_idx] + + X_train = train_data[['IPK', 'Jurusan', 'keterampilan', 'minat']] + y_train = train_data['dream job'] + X_test = test_data[['IPK', 'Jurusan', 'keterampilan', 'minat']] + y_test = test_data['dream job'] + + # Fit dan predict + fold_model = JobRecommendationSystem() + fold_model.fit(X_train, y_train) + + # Single prediction (top-1) + predictions_top1 = fold_model.predict(X_test, k=k_neighbors) + + # Top-3 predictions + predictions_top3 = fold_model.predict_top_k(X_test, k=k_neighbors, top_jobs=3) + + # Hitung metrics + # Accuracy (exact match) + exact_matches = sum(1 for pred, actual in zip(predictions_top1, y_test) + if pred.lower() == actual.lower()) + accuracy = exact_matches / len(y_test) + + # Top-3 accuracy + top3_matches = 0 + for pred_list, actual in zip(predictions_top3, y_test): + if any(pred.lower() == actual.lower() for pred in pred_list): + top3_matches += 1 + top3_accuracy = top3_matches / len(y_test) + + # Partial match (similar to original hit rate) + partial_matches = 0 + for pred, actual in zip(predictions_top1, y_test): + pred_lower = pred.lower() + actual_lower = actual.lower() + if pred_lower in actual_lower or actual_lower in pred_lower: + partial_matches += 1 + partial_accuracy = partial_matches / len(y_test) + + # Simpan hasil fold + fold_result = { + 'fold': fold, + 'train_size': len(train_data), + 'test_size': len(test_data), + 'exact_accuracy': accuracy, + 'partial_accuracy': partial_accuracy, + 'top3_accuracy': top3_accuracy, + 'predictions': predictions_top1, + 'actual': y_test.tolist() + } + fold_results.append(fold_result) + + # Print hasil fold + print(f"Fold {fold} Results:") + print(f" Train Size: {len(train_data)}, Test Size: {len(test_data)}") + print(f" Exact Accuracy: {accuracy:.3f} ({accuracy*100:.1f}%)") + print(f" Partial Accuracy: {partial_accuracy:.3f} ({partial_accuracy*100:.1f}%)") + print(f" Top-3 Accuracy: {top3_accuracy:.3f} ({top3_accuracy*100:.1f}%)") + print() + + return fold_results + +def analyze_results(fold_results): + """Analisis hasil cross-validation""" + + print("=" * 80) + print("RINGKASAN HASIL 5-FOLD CROSS-VALIDATION") + print("=" * 80) + + # Hitung rata-rata metrics + exact_accs = [result['exact_accuracy'] for result in fold_results] + partial_accs = [result['partial_accuracy'] for result in fold_results] + top3_accs = [result['top3_accuracy'] for result in fold_results] + + print(f"\nRata-rata Metrics:") + print(f"Exact Accuracy: {np.mean(exact_accs):.3f} ± {np.std(exact_accs):.3f}") + print(f"Partial Accuracy: {np.mean(partial_accs):.3f} ± {np.std(partial_accs):.3f}") + print(f"Top-3 Accuracy: {np.mean(top3_accs):.3f} ± {np.std(top3_accs):.3f}") + + print(f"\nDetail per Fold:") + print(f"{'Fold':<6} {'Exact':<8} {'Partial':<8} {'Top-3':<8}") + print("-" * 35) + for result in fold_results: + print(f"{result['fold']:<6} {result['exact_accuracy']:.3f} {result['partial_accuracy']:.3f} {result['top3_accuracy']:.3f}") + + # Analisis error + print(f"\nAnalisis Kesalahan Prediksi:") + all_predictions = [] + all_actual = [] + + for result in fold_results: + all_predictions.extend(result['predictions']) + all_actual.extend(result['actual']) + + # Hitung confusion untuk job yang sering salah + error_analysis = {} + for pred, actual in zip(all_predictions, all_actual): + if pred.lower() != actual.lower(): + key = f"'{actual}' → '{pred}'" + error_analysis[key] = error_analysis.get(key, 0) + 1 + + print("\nKesalahan Prediksi Terbanyak:") + sorted_errors = sorted(error_analysis.items(), key=lambda x: x[1], reverse=True) + for error, count in sorted_errors[:10]: + print(f" {error}: {count} kali") + + return { + 'mean_exact_accuracy': np.mean(exact_accs), + 'std_exact_accuracy': np.std(exact_accs), + 'mean_partial_accuracy': np.mean(partial_accs), + 'std_partial_accuracy': np.std(partial_accs), + 'mean_top3_accuracy': np.mean(top3_accs), + 'std_top3_accuracy': np.std(top3_accs), + 'fold_results': fold_results + } + +def main(): + """Fungsi utama untuk menjalankan evaluasi""" + + # Load data + try: + df = pd.read_csv('student.csv') + print(f"Data berhasil dimuat: {len(df)} records") + print(f"Kolom: {list(df.columns)}") + print() + except FileNotFoundError: + print("File student.csv tidak ditemukan!") + return + + # Jalankan evaluasi + fold_results = evaluate_model(df, n_splits=5, k_neighbors=5) + + # Analisis hasil + summary = analyze_results(fold_results) + + print("\n" + "=" * 80) + print("KESIMPULAN") + print("=" * 80) + print(f"Model Cosine Similarity dengan 5-fold CV menunjukkan:") + print(f"• Exact Match Accuracy: {summary['mean_exact_accuracy']:.1%} ± {summary['std_exact_accuracy']:.1%}") + print(f"• Partial Match Accuracy: {summary['mean_partial_accuracy']:.1%} ± {summary['std_partial_accuracy']:.1%}") + print(f"• Top-3 Accuracy: {summary['mean_top3_accuracy']:.1%} ± {summary['std_top3_accuracy']:.1%}") + print() + print("Metode ini menggunakan kesamaan profil mahasiswa (IPK, jurusan, keterampilan, minat)") + print("untuk merekomendasikan pekerjaan berdasarkan tetangga terdekat.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hit_rate_119.py b/hit_rate_119.py new file mode 100644 index 0000000..6f2808d --- /dev/null +++ b/hit_rate_119.py @@ -0,0 +1,221 @@ +import pandas as pd +import random + +# Data jurusan dan rekomendasi (tetap sama) +jurusan_data = { + "teknik informatika": { + "keterampilan": [ + "spreadsheet", "python", "analisis data", "visualisasi data", + "pengujian perangkat lunak", "dokumentasi kasus uji", "manajemen waktu", + "html", "css", "javascript", "java", "node.js", "sql", "devops", "aws", "azure", + "react", "angular", "statistik", "machine learning", "cloud operation", "ruby", + "database", "desain", "pemrograman", "keamanan digital", "troubleshooting", + "kotlin", "swift", "objective-c", "r", "preprocessing data", "penyebaran model", + "manajemen proyek", "pemahaman operasi bisnis", "unity", "unreal engine", "c#", "c++" + ], + "minat": [ + "investigative (analitis)", "conventional (terstruktur)", "artistic (kreatif)", + "realistic (praktis, teknis)", "enterprising (memimpin)", "social (sosial)" + ] + }, + "sistem informasi": { + "keterampilan": [ + "spreadsheet", "python", "analisis data", "visualisasi data", + "database", "sql", "manajemen proyek", "pemahaman operasi bisnis", + "administrasi jaringan", "troubleshooting", "dukungan teknis perangkat keras dan perangkat lunak" + ], + "minat": ["investigative (analitis)", "conventional (terstruktur)", "realistic (praktis, teknis)", "social (sosial)"] + }, + "matematika": { + "keterampilan": ["spreadsheet", "python", "analisis data", "visualisasi data", "statistik", "machine learning", "r"], + "minat": ["investigative (analitis)"] + }, + "teknik komputer": { + "keterampilan": [ + "html", "css", "javascript", "python", "java", "node.js", "sql", "devops", "aws", "azure", + "cloud operation", "ruby", "pemrograman", "keamanan digital", "troubleshooting", + "kotlin", "swift", "objective-c", "administrasi jaringan" + ], + "minat": ["conventional (terstruktur)", "artistic (kreatif)", "realistic (praktis, teknis)", "enterprising (memimpin)"] + }, + "desain komunikasi visual": { + "keterampilan": ["html", "css", "javascript", "react", "angular", "desain"], + "minat": ["artistic (kreatif)"] + }, + "statistik": { + "keterampilan": ["statistik", "machine learning", "python", "r"], + "minat": ["investigative (analitis)"] + }, + "ilmu komputer": { + "keterampilan": ["database", "sql"], + "minat": ["investigative (analitis)"] + } +} + +def get_recommendation(ipk, jurusan): + """Generate recommendation based on IPK and major""" + jurusan_lower = jurusan.lower() + + if jurusan_lower in ["teknik informatika", "sistem informasi"]: + if ipk >= 3.7: + return ["Data Scientist", "Machine Learning Engineer", "Software Engineer"] + elif ipk >= 3.5: + return ["Data Analyst", "Web Developer", "Software Engineer"] + else: + return ["Junior Developer", "IT Support", "System Administrator"] + elif jurusan_lower == "matematika": + return ["Data Scientist", "Statistician", "Data Analyst"] + elif jurusan_lower == "desain komunikasi visual": + return ["UI/UX Designer", "Graphic Designer", "Web Designer"] + elif jurusan_lower == "teknik komputer": + return ["System Administrator", "Network Engineer", "DevOps Engineer"] + elif jurusan_lower == "statistik": + return ["Data Analyst", "Statistician", "Research Analyst"] + else: + return ["Database Administrator", "Data Analyst", "System Analyst"] + +def check_hit(target, recommendations): + """Check if target job matches any recommendation""" + target_lower = target.lower() + for rec in recommendations: + rec_lower = rec.lower() + # Check if target contains recommendation or vice versa + if target_lower in rec_lower or rec_lower in target_lower: + return True + return False + +# Load CSV data +try: + df = pd.read_csv('student.csv') + print(f"Data berhasil dimuat: {len(df)} records") +except FileNotFoundError: + print("File student.csv tidak ditemukan!") + exit() + +# Process data +uji_coba = [] +total = len(df) +hit = 0 + +print("=" * 120) +print("PENGUJIAN HIT RATE SISTEM IPKMATCHER - MENGGUNAKAN DATASET CSV") +print("=" * 120) +print() + +print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format( + "No", "NIM", "IPK", "Jurusan", "Keterampilan", "Minat", "Rekomendasi Sistem", "Target", "Hit")) +print("-" * 120) + +for index, row in df.iterrows(): + nim = row['NIM'] + ipk = row['IPK'] + jurusan = row['Jurusan'] + target = row['dream job'] + keterampilan = row['keterampilan'] + minat = row['minat'] + + # Generate recommendations + rekomendasi = get_recommendation(ipk, jurusan) + + # Check hit + is_hit = check_hit(target, rekomendasi) + if is_hit: + hit += 1 + + # Store for analysis + uji_coba.append({ + "nim": nim, + "ipk": ipk, + "jurusan": jurusan, + "keterampilan": keterampilan, + "minat": minat, + "rekomendasi": rekomendasi, + "target": target, + "hit": is_hit + }) + + # Format for display + jurusan_short = (jurusan[:17] + "...") if len(jurusan) > 20 else jurusan + keterampilan_short = (keterampilan[:17] + "...") if len(keterampilan) > 20 else keterampilan + minat_short = (minat[:22] + "...") if len(minat) > 25 else minat + rekom_short = (", ".join(rekomendasi)[:32] + "...") if len(", ".join(rekomendasi)) > 35 else ", ".join(rekomendasi) + target_short = (target[:15] + "...") if len(target) > 18 else target + + print("{:<3} {:<5} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format( + index + 1, nim, ipk, jurusan_short, keterampilan_short, minat_short, + rekom_short, target_short, "✓" if is_hit else "✗")) + +# Calculate hit rate +hit_rate = hit / total +print("-" * 120) +print(f"HASIL PENGUJIAN:") +print(f"Total Hit: {hit} dari {total} kasus") +print(f"Hit Rate: {hit_rate:.3f} ({hit_rate * 100:.1f}%)") +print() + +# Analysis per major +print("ANALISIS PER JURUSAN:") +print("-" * 50) +jurusan_stats = {} +for case in uji_coba: + jurusan = case["jurusan"] + if jurusan not in jurusan_stats: + jurusan_stats[jurusan] = {"total": 0, "hit": 0} + + jurusan_stats[jurusan]["total"] += 1 + jurusan_stats[jurusan]["hit"] += case["hit"] + +for jurusan, stats in jurusan_stats.items(): + rate = stats["hit"] / stats["total"] + print(f"{jurusan:<25}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)") + +# Analysis per IPK range +print("\nANALISIS PER RENTANG IPK:") +print("-" * 50) +ipk_ranges = { + "3.8-4.0": {"min": 3.8, "max": 4.0, "total": 0, "hit": 0}, + "3.5-3.79": {"min": 3.5, "max": 3.79, "total": 0, "hit": 0}, + "3.0-3.49": {"min": 3.0, "max": 3.49, "total": 0, "hit": 0}, + "<3.0": {"min": 0, "max": 2.99, "total": 0, "hit": 0} +} + +for case in uji_coba: + ipk = case["ipk"] + for range_name, range_info in ipk_ranges.items(): + if range_info["min"] <= ipk <= range_info["max"]: + range_info["total"] += 1 + range_info["hit"] += case["hit"] + break + +for range_name, stats in ipk_ranges.items(): + if stats["total"] > 0: + rate = stats["hit"] / stats["total"] + print(f"IPK {range_name:<10}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)") + +print("=" * 120) + +# Detailed results +# print("\nDETAIL LENGKAP HASIL PENGUJIAN:") +# print("=" * 80) +# for i, case in enumerate(uji_coba, 1): +# print(f"\nTest Case {i}:") +# print(f" NIM: {case['nim']}") +# print(f" IPK: {case['ipk']}") +# print(f" Jurusan: {case['jurusan']}") +# print(f" Keterampilan: {case['keterampilan']}") +# print(f" Minat: {case['minat']}") +# print(f" Rekomendasi: {', '.join(case['rekomendasi'])}") +# print(f" Target (Dream Job): {case['target']}") +# print(f" Status: {'✓ HIT' if case['hit'] else '✗ MISS'}") +# print(f" {'-'*40}") + +# Summary of missed predictions +print("\nANALISIS PREDIKSI YANG MELESET:") +print("=" * 80) +missed_cases = [case for case in uji_coba if not case['hit']] +print(f"Total kasus yang meleset: {len(missed_cases)}") + +if missed_cases: + print("\nContoh kasus yang meleset:") + for i, case in enumerate(missed_cases[:10], 1): # Show first 10 missed cases + print(f"{i}. NIM {case['nim']}: Target '{case['target']}' vs Rekomendasi {case['rekomendasi']}") \ No newline at end of file diff --git a/hit_rate_20.py b/hit_rate_20.py new file mode 100644 index 0000000..e9ae9fc --- /dev/null +++ b/hit_rate_20.py @@ -0,0 +1,188 @@ +import random +jurusan_data = { + "teknik informatika": { + "keterampilan": [ + "spreadsheet", "python", "analisis data", "visualisasi data", + "pengujian perangkat lunak", "dokumentasi kasus uji", "manajemen waktu", + "html", "css", "javascript", "java", "node.js", "sql", "devops", "aws", "azure", + "react", "angular", "statistik", "machine learning", "cloud operation", "ruby", + "database", "desain", "pemrograman", "keamanan digital", "troubleshooting", + "kotlin", "swift", "objective-c", "r", "preprocessing data", "penyebaran model", + "manajemen proyek", "pemahaman operasi bisnis", "unity", "unreal engine", "c#", "c++" + ], + "minat": [ + "investigative (analitis)", "conventional (terstruktur)", "artistic (kreatif)", + "realistic (praktis, teknis)", "enterprising (memimpin)", "social (sosial)" + ] + }, + "sistem informasi": { + "keterampilan": [ + "spreadsheet", "python", "analisis data", "visualisasi data", + "database", "sql", "manajemen proyek", "pemahaman operasi bisnis", + "administrasi jaringan", "troubleshooting", "dukungan teknis perangkat keras dan perangkat lunak" + ], + "minat": ["investigative (analitis)", "conventional (terstruktur)", "realistic (praktis, teknis)", "social (sosial)"] + }, + "matematika": { + "keterampilan": ["spreadsheet", "python", "analisis data", "visualisasi data", "statistik", "machine learning", "r"], + "minat": ["investigative (analitis)"] + }, + "teknik komputer": { + "keterampilan": [ + "html", "css", "javascript", "python", "java", "node.js", "sql", "devops", "aws", "azure", + "cloud operation", "ruby", "pemrograman", "keamanan digital", "troubleshooting", + "kotlin", "swift", "objective-c", "administrasi jaringan" + ], + "minat": ["conventional (terstruktur)", "artistic (kreatif)", "realistic (praktis, teknis)", "enterprising (memimpin)"] + }, + "desain komunikasi visual": { + "keterampilan": ["html", "css", "javascript", "react", "angular", "desain"], + "minat": ["artistic (kreatif)"] + }, + "statistik": { + "keterampilan": ["statistik", "machine learning", "python", "r"], + "minat": ["investigative (analitis)"] + }, + "ilmu komputer": { + "keterampilan": ["database", "sql"], + "minat": ["investigative (analitis)"] + } +} + +def get_random_skills_interests(jurusan): + if jurusan in jurusan_data: + keterampilan = random.choice(jurusan_data[jurusan]["keterampilan"]) + minat = random.choice(jurusan_data[jurusan]["minat"]) + return keterampilan, minat + return "python", "investigative (analitis)" # default + +random.seed(42) + +uji_coba = [] + +test_cases_data = [ + {"nama": "Ahmad", "ipk": 3.78, "jurusan": "teknik informatika", "target": "Web Developer"}, + {"nama": "Sari", "ipk": 3.50, "jurusan": "sistem informasi", "target": "Data Analyst"}, + {"nama": "Budi", "ipk": 3.83, "jurusan": "teknik informatika", "target": "Data Scientist"}, + {"nama": "Rina", "ipk": 3.91, "jurusan": "teknik informatika", "target": "Quality Assurance"}, + {"nama": "Doni", "ipk": 3.83, "jurusan": "sistem informasi", "target": "Data Analyst"}, + {"nama": "Lisa", "ipk": 3.78, "jurusan": "teknik informatika", "target": "Web Developer"}, + {"nama": "Agus", "ipk": 3.71, "jurusan": "teknik informatika", "target": "Frontend Developer"}, + {"nama": "Maya", "ipk": 3.74, "jurusan": "desain komunikasi visual", "target": "UI/UX Designer"}, + {"nama": "Rudi", "ipk": 3.92, "jurusan": "teknik informatika", "target": "Machine Learning Engineer"}, + {"nama": "Dewi", "ipk": 3.95, "jurusan": "matematika", "target": "Data Scientist"}, + {"nama": "Andi", "ipk": 3.25, "jurusan": "teknik komputer", "target": "System Administrator"}, + {"nama": "Nina", "ipk": 3.65, "jurusan": "statistik", "target": "Data Analyst"}, + {"nama": "Hadi", "ipk": 3.45, "jurusan": "ilmu komputer", "target": "Database Administrator"}, + {"nama": "Tina", "ipk": 3.88, "jurusan": "teknik informatika", "target": "Software Engineer"}, + {"nama": "Bayu", "ipk": 3.55, "jurusan": "sistem informasi", "target": "Business Analyst"}, + {"nama": "Sinta", "ipk": 3.72, "jurusan": "desain komunikasi visual", "target": "UI/UX Designer"}, + {"nama": "Dika", "ipk": 3.33, "jurusan": "teknik komputer", "target": "Network Engineer"}, + {"nama": "Lusi", "ipk": 3.67, "jurusan": "matematika", "target": "Statistician"}, + {"nama": "Yoga", "ipk": 3.81, "jurusan": "teknik informatika", "target": "DevOps Engineer"}, + {"nama": "Eka", "ipk": 3.59, "jurusan": "sistem informasi", "target": "IT Support"} +] + +for case_data in test_cases_data: + keterampilan, minat = get_random_skills_interests(case_data["jurusan"]) + + if case_data["jurusan"] in ["teknik informatika", "sistem informasi"]: + if case_data["ipk"] >= 3.7: + rekomendasi = ["Data Scientist", "Machine Learning Engineer", "Software Engineer"] + elif case_data["ipk"] >= 3.5: + rekomendasi = ["Data Analyst", "Web Developer", "Software Engineer"] + else: + rekomendasi = ["Junior Developer", "IT Support", "System Administrator"] + elif case_data["jurusan"] == "matematika": + rekomendasi = ["Data Scientist", "Statistician", "Data Analyst"] + elif case_data["jurusan"] == "desain komunikasi visual": + rekomendasi = ["UI/UX Designer", "Graphic Designer", "Web Designer"] + elif case_data["jurusan"] == "teknik komputer": + rekomendasi = ["System Administrator", "Network Engineer", "DevOps Engineer"] + elif case_data["jurusan"] == "statistik": + rekomendasi = ["Data Analyst", "Statistician", "Research Analyst"] + else: + rekomendasi = ["Database Administrator", "Data Analyst", "System Analyst"] + + uji_coba.append({ + "nama": case_data["nama"], + "ipk": case_data["ipk"], + "jurusan": case_data["jurusan"], + "keterampilan": keterampilan, + "minat": minat, + "rekomendasi": rekomendasi, + "target": case_data["target"] + }) + +total = len(uji_coba) +hit = 0 + +print("=" * 120) +print("PENGUJIAN HIT RATE SISTEM IPKMATCHER") +print("=" * 120) +print() + +print("{:<3} {:<8} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format( + "No", "Nama", "IPK", "Jurusan", "Keterampilan", "Minat", "Rekomendasi Sistem", "Target", "Hit")) +print("-" * 120) + +for i, case in enumerate(uji_coba, 1): + nama = case["nama"] + ipk = case["ipk"] + jurusan = case["jurusan"] + keterampilan = case["keterampilan"] + minat = case["minat"] + rekom = case["rekomendasi"] + target = case["target"] + + is_hit = 1 if any(target.lower() in rec.lower() or rec.lower() in target.lower() for rec in rekom) else 0 + hit += is_hit + + jurusan_short = (jurusan[:17] + "...") if len(jurusan) > 20 else jurusan + keterampilan_short = (keterampilan[:17] + "...") if len(keterampilan) > 20 else keterampilan + minat_short = (minat[:22] + "...") if len(minat) > 25 else minat + rekom_short = (", ".join(rekom)[:32] + "...") if len(", ".join(rekom)) > 35 else ", ".join(rekom) + target_short = (target[:15] + "...") if len(target) > 18 else target + + print("{:<3} {:<8} {:<5} {:<20} {:<20} {:<25} {:<35} {:<18} {:<4}".format( + i, nama, ipk, jurusan_short, keterampilan_short, minat_short, rekom_short, target_short, "✓" if is_hit else "✗")) + +hit_rate = hit / total +print("-" * 120) +print(f"HASIL PENGUJIAN:") +print(f"Total Hit: {hit} dari {total} kasus") +print(f"Hit Rate: {hit_rate:.3f} ({hit_rate * 100:.1f}%)") +print() + +print("ANALISIS PER JURUSAN:") +print("-" * 50) +jurusan_stats = {} +for case in uji_coba: + jurusan = case["jurusan"] + if jurusan not in jurusan_stats: + jurusan_stats[jurusan] = {"total": 0, "hit": 0} + + jurusan_stats[jurusan]["total"] += 1 + is_hit = 1 if any(case["target"].lower() in rec.lower() or rec.lower() in case["target"].lower() for rec in case["rekomendasi"]) else 0 + jurusan_stats[jurusan]["hit"] += is_hit + +for jurusan, stats in jurusan_stats.items(): + rate = stats["hit"] / stats["total"] + print(f"{jurusan.title():<25}: {stats['hit']}/{stats['total']} ({rate*100:.1f}%)") + +print("=" * 120) + +print("\nDETAIL LENGKAP HASIL PENGUJIAN:") +print("=" * 80) +for i, case in enumerate(uji_coba, 1): + print(f"\nTest Case {i}:") + print(f" Nama: {case['nama']}") + print(f" IPK: {case['ipk']}") + print(f" Jurusan: {case['jurusan'].title()}") + print(f" Keterampilan: {case['keterampilan'].title()}") + print(f" Minat: {case['minat'].title()}") + print(f" Rekomendasi: {', '.join(case['rekomendasi'])}") + print(f" Target: {case['target']}") + is_hit = 1 if any(case["target"].lower() in rec.lower() or rec.lower() in case["target"].lower() for rec in case["rekomendasi"]) else 0 + print(f" Status: {'✓ HIT' if is_hit else '✗ MISS'}") + print(f" {'-'*40}") \ No newline at end of file diff --git a/job_profiles.csv b/job_profiles.csv new file mode 100644 index 0000000..1514563 --- /dev/null +++ b/job_profiles.csv @@ -0,0 +1,21 @@ +dream job,IPK,keterampilan_Administrasi Jaringan,keterampilan_Algoritma Machine Learning,keterampilan_Analisis Data,keterampilan_Azure,keterampilan_C#,keterampilan_C++,keterampilan_CSS,keterampilan_Cloud Operation,keterampilan_Database,keterampilan_Desain,keterampilan_Dokumentasi Kasus Uji,keterampilan_Dukungan Teknis Perangkat Keras dan Perangkat Lunak,keterampilan_HTML,keterampilan_Java,keterampilan_JavaScript,keterampilan_Javascript,keterampilan_Keamanan Digital,keterampilan_Kotlin,keterampilan_Kotlin ,keterampilan_Machine Learning,keterampilan_Manajemen Penyimpanan Database,keterampilan_Manajemen Proyek,keterampilan_Manajemen Waktu,keterampilan_Node.js,keterampilan_Pemahaman Operasi Bisnis,keterampilan_Pemrograman,keterampilan_Pengujian Perangkat Lunak,keterampilan_Preprocessing Data,keterampilan_Python,keterampilan_React,keterampilan_Ruby,keterampilan_SQL,keterampilan_Spreadsheet,keterampilan_Statistik,keterampilan_Swift,keterampilan_Troubleshooting,keterampilan_Unity,keterampilan_Unreal Engine,keterampilan_Visualisasi Data,minat_Artistic (Kreatif),minat_Conventional (Terstruktur),minat_Enterprising (Memimpin),minat_Investigative (Analitis),"minat_Realistic (Praktis, Teknis)",minat_Social (Sosial) +Backend Developer,0.5707079106380618,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6666666666666666,0.0,0.16666666666666666,0.16666666666666666,0.0 +Business Intelligence Developer,0.7181107600199566,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,1.0,0.0,0.0 +Cloud Engineer,0.6885913853317809,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.8333333333333334,0.0 +Cybersecurity Analyst,0.7101557736016407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.16666666666666666,0.3333333333333333,0.3333333333333333,0.16666666666666666,0.0 +Data Analyst,0.735953054097075,0.0,0.0,0.42857142857142855,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14285714285714285,0.0,0.0,0.0,0.14285714285714285,0.0,0.0,0.0,0.0,0.0,0.2857142857142857,0.0,0.0,0.0,1.0,0.0,0.0 +Data Engineer,0.7873219136315758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.5,0.3333333333333333,0.0 +Data Scientist,0.7581905870613669,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +Database Administrator,0.5515383336105103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0 +DevOps Engineer,0.7336049670159097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0 +Frontend Developer,0.6585730916347909,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +Game Developer,0.637091856533067,0.0,0.0,0.0,0.0,0.3333333333333333,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.16666666666666666,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +IT Project Manager,0.7553356616220409,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.6666666666666666 +IT Support Specialist,0.7002605465934916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +Machine Learning Engineer,0.7346859582016739,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +Mobile App Developer,0.6177726038028714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.3333333333333333,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +Network Administrator,0.745689894118299,0.6666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333333333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0 +Quality Assurance,0.6567104606685512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0 +Software Engineer,0.7200510006097897,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0 +UI/UX Designer,0.6349021564388267,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +Web Developer,0.6734297910083704,0.0,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.0,0.0,0.0,0.16666666666666666,0.0,0.0,0.6666666666666666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 diff --git a/jobs.xlsx b/jobs.xlsx new file mode 100644 index 0000000..bb465c4 Binary files /dev/null and b/jobs.xlsx differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..67359ca Binary files /dev/null and b/requirements.txt differ diff --git a/server.py b/server.py new file mode 100644 index 0000000..34bdc96 --- /dev/null +++ b/server.py @@ -0,0 +1,240 @@ +from flask import Flask, render_template, request, redirect, url_for +import pandas as pd +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.preprocessing import MinMaxScaler +import logging +import jinja2 + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +app = Flask(__name__) + +def load_data(job_file): + try: + jobs = pd.read_excel(job_file) + expected_job_columns = ['id', 'job', 'min_ipk', 'job_desc', 'jurusan', 'keterampilan', 'minat'] + + # Debug: tampilkan kolom yang ada + logger.info(f"Kolom yang ditemukan di {job_file}: {list(jobs.columns)}") + logger.info(f"Kolom yang diharapkan: {expected_job_columns}") + + # Cek apakah semua kolom yang diperlukan ada (tidak harus urutan yang sama) + missing_columns = [col for col in expected_job_columns if col not in jobs.columns] + if missing_columns: + logger.error(f"Kolom yang hilang: {missing_columns}") + raise ValueError(f"File {job_file} harus memiliki kolom: {expected_job_columns}. Kolom yang hilang: {missing_columns}") + + # Reorder kolom sesuai ekspektasi dan hapus kolom yang tidak diperlukan + jobs = jobs[expected_job_columns] + + jobs = jobs.dropna() + jobs['jurusan'] = jobs['jurusan'].str.split(',').str[0].str.strip().str.lower().str.rstrip('.') + jobs['keterampilan'] = jobs['keterampilan'].str.split(',').apply(lambda x: [skill.strip().lower() for skill in x]) + jobs['minat'] = jobs['minat'].str.strip().str.lower() + + logger.info(f"Data jobs berhasil dimuat dan dibersihkan. Shape: {jobs.shape}") + logger.info(f"Sample data:\n{jobs.head()}") + + return jobs + except Exception as e: + logger.error(f"Error memuat data: {e}") + raise + +def preprocess_data(jobs, ipk, jurusan, keterampilan, minat): + try: + # Dapatkan semua nilai unik + all_jurusan = sorted(set(jobs['jurusan']).union([jurusan.lower()])) + all_keterampilan = sorted(set([skill for sublist in jobs['keterampilan'] for skill in sublist]).union([keterampilan.lower()])) + all_minat = sorted(set(jobs['minat']).union([minat.lower()])) + + logger.info(f"Total jurusan unik: {len(all_jurusan)}") + logger.info(f"Total keterampilan unik: {len(all_keterampilan)}") + logger.info(f"Total minat unik: {len(all_minat)}") + + def to_one_hot(value, all_values): + vector = np.array([1 if value.lower() == val else 0 for val in all_values]) + return vector + + # Buat vektor untuk jobs + job_jurusan_vectors = np.array([to_one_hot(jurusan, all_jurusan) for jurusan in jobs['jurusan']]) + job_minat_vectors = np.array([to_one_hot(minat, all_minat) for minat in jobs['minat']]) + + # Buat vektor keterampilan untuk jobs + job_keterampilan_vectors = np.zeros((len(jobs), len(all_keterampilan))) + for i, skills in enumerate(jobs['keterampilan']): + for skill in skills: + if skill in all_keterampilan: + job_keterampilan_vectors[i, all_keterampilan.index(skill)] = 1 + + # Normalisasi IPK + scaler = MinMaxScaler() + job_gpa = scaler.fit_transform(jobs['min_ipk'].values.reshape(-1, 1)).flatten() # Flatten untuk 1D + user_gpa = scaler.transform(np.array([[ipk]])).flatten() # Flatten untuk 1D + + # Debug dimensi sebelum hstack + logger.info(f"Dimensi job_jurusan_vectors: {job_jurusan_vectors.shape}") + logger.info(f"Dimensi job_keterampilan_vectors: {job_keterampilan_vectors.shape}") + logger.info(f"Dimensi job_minat_vectors: {job_minat_vectors.shape}") + logger.info(f"Dimensi job_gpa: {job_gpa.shape}") + + # Gabungkan semua vektor jobs + job_vectors = np.hstack([ + job_jurusan_vectors, + job_keterampilan_vectors, + job_minat_vectors, + job_gpa.reshape(-1, 1) # Reshape untuk konsistensi + ]) + + # Buat vektor user + user_jurusan_vector = to_one_hot(jurusan, all_jurusan) + user_keterampilan_vector = to_one_hot(keterampilan, all_keterampilan) + user_minat_vector = to_one_hot(minat, all_minat) + + # Debug dimensi user vectors + logger.info(f"Dimensi user_jurusan_vector: {user_jurusan_vector.shape}") + logger.info(f"Dimensi user_keterampilan_vector: {user_keterampilan_vector.shape}") + logger.info(f"Dimensi user_minat_vector: {user_minat_vector.shape}") + logger.info(f"Dimensi user_gpa: {user_gpa.shape}") + + # Gabungkan vektor user + user_vector = np.hstack([ + user_jurusan_vector, + user_keterampilan_vector, + user_minat_vector, + user_gpa + ]) + + logger.info(f"Dimensi final job_vectors: {job_vectors.shape}") + logger.info(f"Dimensi final user_vector: {user_vector.shape}") + + return job_vectors, user_vector, all_jurusan, all_keterampilan, all_minat + + except Exception as e: + logger.error(f"Error dalam preprocess_data: {e}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + raise + +def recommend_jobs(job_vectors, user_vector, jobs, ipk, top_n=3): + try: + eligible_jobs = jobs[jobs['min_ipk'] <= ipk].copy() + if eligible_jobs.empty: + logger.info(f"Tidak ada pekerjaan yang memenuhi syarat untuk IPK {ipk}") + return eligible_jobs + + eligible_indices = eligible_jobs.index + eligible_job_vectors = job_vectors[eligible_indices] + + # Reshape user_vector untuk cosine_similarity + user_vector_reshaped = user_vector.reshape(1, -1) + + logger.info(f"Dimensi eligible_job_vectors: {eligible_job_vectors.shape}") + logger.info(f"Dimensi user_vector_reshaped: {user_vector_reshaped.shape}") + + similarity_scores = cosine_similarity(user_vector_reshaped, eligible_job_vectors) + top_indices = np.argsort(similarity_scores[0])[::-1][:min(top_n, len(eligible_jobs))] + + recommendations = eligible_jobs.iloc[top_indices].copy() + recommendations['similarity_score'] = similarity_scores[0][top_indices] + + logger.info(f"Rekomendasi untuk IPK {ipk}: {len(recommendations)} pekerjaan ditemukan") + return recommendations + + except Exception as e: + logger.error(f"Error dalam recommend_jobs: {e}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + raise + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/recommend', methods=['POST']) +def recommend(): + try: + # Debug: Print semua data yang diterima + logger.info("=== DEBUG: Data yang diterima dari form ===") + for key, value in request.form.items(): + logger.info(f"{key}: '{value}' (type: {type(value)}, len: {len(value)})") + + nama = request.form['nama'].strip() + ipk_input = request.form['ipk'].strip() + jurusan = request.form['jurusan'].strip().lower() or 'teknik informatika' + keterampilan = request.form['keterampilan'].strip().lower() or 'python' + minat = request.form['minat'].strip().lower() or 'investigative (analitis)' + + logger.info(f"=== DEBUG: Data setelah processing ===") + logger.info(f"nama: '{nama}'") + logger.info(f"ipk_input: '{ipk_input}'") + logger.info(f"jurusan: '{jurusan}'") + logger.info(f"keterampilan: '{keterampilan}'") + logger.info(f"minat: '{minat}'") + + # Debug: Cek apakah IPK kosong atau None + if not ipk_input: + logger.error("IPK input kosong!") + return render_template('index.html', error="IPK tidak boleh kosong!") + + # Replace koma dengan titik + ipk_input = ipk_input.replace(',', '.') + logger.info(f"IPK setelah replace koma: '{ipk_input}'") + + # Coba konversi ke float + try: + ipk = float(ipk_input) + logger.info(f"IPK berhasil dikonversi: {ipk} (type: {type(ipk)})") + except ValueError as e: + logger.error(f"Gagal konversi IPK '{ipk_input}' ke float: {e}") + return render_template('index.html', error=f"IPK harus berupa angka desimal yang valid (contoh: 3.50)! Input: '{ipk_input}'") + + # Validasi range IPK + if not (0.0 <= ipk <= 4.0): + logger.warning(f"IPK {ipk} tidak valid, harus antara 0.0 dan 4.0") + return render_template('index.html', error=f"IPK harus antara 0.0 dan 4.0! IPK Anda: {ipk}") + + # Validasi dropdown + if not jurusan or not keterampilan or not minat: + logger.warning("Salah satu dropdown tidak dipilih") + logger.warning(f"jurusan: '{jurusan}', keterampilan: '{keterampilan}', minat: '{minat}'") + return render_template('index.html', error="Harap pilih jurusan, keterampilan, dan minat!") + + logger.info("Semua validasi berhasil, memproses rekomendasi...") + logger.info("Loading jobs.xlsx") + jobs = load_data('jobs.xlsx') + logger.info("Preprocessing data") + job_vectors, user_vector, _, _, _ = preprocess_data(jobs, ipk, jurusan, keterampilan, minat) + logger.info("Generating recommendations") + recommendations = recommend_jobs(job_vectors, user_vector, jobs, ipk) + + logger.info(f"Recommendations generated: {len(recommendations)} jobs") + return render_template('result.html', + user_name=nama, + user_ipk=ipk, + user_jurusan=jurusan, + user_keterampilan=keterampilan, + user_minat=minat, + recommendations=recommendations.to_dict(orient='records')) + + except ValueError as e: + logger.error(f"ValueError: {e}") + return render_template('index.html', error=f"Terjadi kesalahan pemrosesan data: {str(e)}") + except FileNotFoundError as e: + logger.error(f"FileNotFoundError: {e}") + return render_template('index.html', error="File jobs.xlsx tidak ditemukan!") + except jinja2.exceptions.TemplateNotFound as e: + logger.error(f"TemplateNotFound: {e}") + return render_template('index.html', error=f"Template {e} tidak ditemukan!") + except jinja2.exceptions.TemplateSyntaxError as e: + logger.error(f"TemplateSyntaxError: {e}") + return render_template('index.html', error=f"Kesalahan sintaks di template: {e}") + except Exception as e: + logger.error(f"Unexpected error: {e}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + return render_template('index.html', error=f"Terjadi kesalahan: {e}") + +if __name__ == '__main__': + app.run(debug=True) \ No newline at end of file diff --git a/student.csv b/student.csv new file mode 100644 index 0000000..ddbc567 --- /dev/null +++ b/student.csv @@ -0,0 +1,120 @@ +NIM,IPK,Jurusan,dream job,keterampilan,minat +1,3.78,Teknik Informatika,Mobile App Developer,Java,Artistic (Kreatif) +2,3.50,Teknik Informatika,Data Scientist,Statistik,Investigative (Analitis) +3,3.83,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +4,3.91,Teknik Informatika,Backend Developer,Node.js,Conventional (Terstruktur) +5,3.83,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +6,3.64,Teknik Informatika,Web Developer,Javascript,Artistic (Kreatif) +7,3.63,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,"Realistic (Praktis, Teknis)" +8,3.88,Teknik Informatika,Data Analyst,Visualisasi Data,Investigative (Analitis) +9,3.64,Teknik Informatika,Cybersecurity Analyst,Troubleshooting,Enterprising (Memimpin) +10,3.72,Teknik Informatika,Software Engineer,Pemrograman,Conventional (Terstruktur) +11,3.58,Teknik Informatika,Game Developer,C#,Artistic (Kreatif) +12,3.8,Teknik Informatika,DevOps Engineer,Ruby,"Realistic (Praktis, Teknis)" +13,3.52,Teknik Informatika,Database Administrator,SQL,"Realistic (Praktis, Teknis)" +14,3.68,Teknik Informatika,IT Project Manager,Manajemen Proyek,Social (Sosial) +15,3.56,Teknik Informatika,Machine Learning Engineer,Python,Investigative (Analitis) +16,3.79,Teknik Informatika,Business Intelligence Developer,Analisis Data,Investigative (Analitis) +17,3.77,Teknik Informatika,Frontend Developer,CSS,Artistic (Kreatif) +18,3.6,Teknik Informatika,Network Administrator,Administrasi Jaringan,"Realistic (Praktis, Teknis)" +19,3.68,Teknik Informatika,Data Engineer,Database,Investigative (Analitis) +20,3.34,Teknik Informatika,Quality Assurance,Dokumentasi Kasus Uji,Conventional (Terstruktur) +21,3.76,Teknik Informatika,Data Analyst,Spreadsheet,Investigative (Analitis) +22,3.87,Teknik Informatika,Web Developer,HTML,Artistic (Kreatif) +23,3.69,Teknik Informatika,Software Engineer,Pemrograman,Conventional (Terstruktur) +24,3.69,Teknik Informatika,Mobile App Developer,Kotlin,Artistic (Kreatif) +25,3.61,Teknik Informatika,Frontend Developer,JavaScript,Artistic (Kreatif) +26,3.89,Teknik Informatika,Data Scientist,Python,Investigative (Analitis) +27,3.08,Teknik Informatika,DevOps Engineer,Cloud Operation,"Realistic (Praktis, Teknis)" +28,3.46,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +29,3.5,Teknik Informatika,Cybersecurity Analyst,Keamanan Digital,Enterprising (Memimpin) +30,3.21,Teknik Informatika,Backend Developer,Java,Conventional (Terstruktur) +31,3.36,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,"Realistic (Praktis, Teknis)" +32,3.48,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +33,3.78,Teknik Informatika,Machine Learning Engineer,Preprocessing Data,Investigative (Analitis) +34,3.64,Teknik Informatika,Database Administrator,SQL,"Realistic (Praktis, Teknis)" +35,3.78,Teknik Informatika,Business Intelligence Developer,Pemahaman Operasi Bisnis,Investigative (Analitis) +36,3.2,Teknik Informatika,Game Developer,Unreal Engine,Artistic (Kreatif) +37,3.54,Teknik Informatika,Data Engineer,Database,Investigative (Analitis) +38,3.79,Teknik Informatika,IT Project Manager,Manajemen Proyek,Enterprising (Memimpin) +39,3.53,Teknik Informatika,Network Administrator,Administrasi Jaringan,"Realistic (Praktis, Teknis)" +40,3.78,Teknik Informatika,Quality Assurance,Manajemen Waktu,Conventional (Terstruktur) +41,3.71,Teknik Informatika,Web Developer,CSS,Artistic (Kreatif) +42,3.66,Teknik Informatika,Data Analyst,Analisis Data,Investigative (Analitis) +43,3.58,Teknik Informatika,Software Engineer,Pemrograman,Artistic (Kreatif) +44,3.1,Teknik Informatika,Frontend Developer,JavaScript,Artistic (Kreatif) +45,2.93,Teknik Informatika,Mobile App Developer,Swift,Artistic (Kreatif) +46,3.06,Teknik Informatika,Backend Developer,Azure,Conventional (Terstruktur) +47,3.83,Teknik Informatika,DevOps Engineer,Ruby,"Realistic (Praktis, Teknis)" +48,3.63,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +49,3.63,Teknik Informatika,Cybersecurity Analyst,Troubleshooting,Investigative (Analitis) +50,3.48,Teknik Informatika,Data Scientist,Statistik,Investigative (Analitis) +51,3.86,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,"Realistic (Praktis, Teknis)" +52,3.62,Teknik Informatika,Network Administrator,Troubleshooting,"Realistic (Praktis, Teknis)" +53,3.58,Teknik Informatika,Machine Learning Engineer,Machine Learning,Investigative (Analitis) +54,3.5,Teknik Informatika,IT Project Manager,Manajemen Proyek,Social (Sosial) +55,3.13,Teknik Informatika,Business Intelligence Developer,Visualisasi Data,Investigative (Analitis) +56,3.45,Teknik Informatika,Game Developer,C++,Artistic (Kreatif) +57,3.81,Teknik Informatika,Data Engineer,SQL,Conventional (Terstruktur) +58,3.23,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +59,2.97,Teknik Informatika,Database Administrator,Database,"Realistic (Praktis, Teknis)" +60,3.19,Teknik Informatika,Quality Assurance,Pengujian Perangkat Lunak,Conventional (Terstruktur) +61,3.24,Teknik Informatika,Data Analyst,Analisis Data,Investigative (Analitis) +62,3.3,Teknik Informatika,Web Developer,Javascript,Artistic (Kreatif) +63,3.51,Teknik Informatika,Software Engineer,Pemrograman,Artistic (Kreatif) +64,3.77,Teknik Informatika,Data Scientist,Machine Learning,Investigative (Analitis) +65,3.78,Teknik Informatika,Frontend Developer,HTML,Artistic (Kreatif) +66,3.71,Teknik Informatika,Backend Developer,Java,Conventional (Terstruktur) +67,3.74,Teknik Informatika,DevOps Engineer,Python,"Realistic (Praktis, Teknis)" +68,3.92,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +69,3.95,Teknik Informatika,Cybersecurity Analyst,Troubleshooting,Investigative (Analitis) +70,3.93,Teknik Informatika,Mobile App Developer,Java,Artistic (Kreatif) +71,3.85,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,"Realistic (Praktis, Teknis)" +72,3.56,Teknik Informatika,Network Administrator,Administrasi Jaringan,"Realistic (Praktis, Teknis)" +73,3.63,Teknik Informatika,Machine Learning Engineer,Python,Investigative (Analitis) +74,3.9,Teknik Informatika,IT Project Manager,Manajemen Proyek,Social (Sosial) +75,3.81,Teknik Informatika,Business Intelligence Developer,Visualisasi Data,Investigative (Analitis) +76,3.55,Teknik Informatika,Game Developer,Unity,Artistic (Kreatif) +77,3.89,Teknik Informatika,Data Engineer,Database,"Realistic (Praktis, Teknis)" +78,3.75,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +79,3.53,Teknik Informatika,Database Administrator,SQL,"Realistic (Praktis, Teknis)" +80,3.8,Teknik Informatika,Quality Assurance,Manajemen Waktu,Conventional (Terstruktur) +81,3.9,Teknik Informatika,Data Analyst,Visualisasi Data,Investigative (Analitis) +82,3.68,Teknik Informatika,Web Developer,Javascript,Artistic (Kreatif) +83,3.71,Teknik Informatika,Software Engineer,Pemrograman,Artistic (Kreatif) +84,3.76,Teknik Informatika,Data Scientist,Machine Learning,Investigative (Analitis) +85,3.56,Teknik Informatika,Frontend Developer,React,Artistic (Kreatif) +86,3.58,Teknik Informatika,Backend Developer,Python,Investigative (Analitis) +87,3.93,Teknik Informatika,DevOps Engineer,Cloud Operation,"Realistic (Praktis, Teknis)" +88,3.44,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +89,3.92,Teknik Informatika,Cybersecurity Analyst,Keamanan Digital,"Realistic (Praktis, Teknis)" +90,3.45,Teknik Informatika,Mobile App Developer,Kotlin,Artistic (Kreatif) +91,3.62,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,Conventional (Terstruktur) +92,3.95,Teknik Informatika,Network Administrator,Troubleshooting,"Realistic (Praktis, Teknis)" +93,3.76,Teknik Informatika,Machine Learning Engineer,Python,Investigative (Analitis) +94,3.76,Teknik Informatika,IT Project Manager,Manajemen Proyek,Social (Sosial) +95,3.76,Teknik Informatika,Business Intelligence Developer,Visualisasi Data,Investigative (Analitis) +96,3.92,Teknik Informatika,Game Developer,C#,Artistic (Kreatif) +97,3.65,Teknik Informatika,Data Engineer,Database,"Realistic (Praktis, Teknis)" +98,3.86,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +99,3.81,Teknik Informatika,Database Administrator,SQL,"Realistic (Praktis, Teknis)" +100,3.89,Teknik Informatika,Quality Assurance,Manajemen Waktu,Conventional (Terstruktur) +101,3.54,Teknik Informatika,Data Analyst,Python,Investigative (Analitis) +102,3.5,Teknik Informatika,Web Developer,Javascript,Artistic (Kreatif) +103,3.77,Teknik Informatika,Software Engineer,Pemrograman,Conventional (Terstruktur) +104,3.81,Teknik Informatika,Data Scientist,Statistik,Investigative (Analitis) +105,3.79,Teknik Informatika,Frontend Developer,JavaScript,Artistic (Kreatif) +106,3.6,Teknik Informatika,Backend Developer,Node.js,"Realistic (Praktis, Teknis)" +107,3.69,Teknik Informatika,DevOps Engineer,Ruby,"Realistic (Praktis, Teknis)" +108,3.19,Teknik Informatika,UI/UX Designer,Desain,Artistic (Kreatif) +109,3.29,Teknik Informatika,Cybersecurity Analyst,Keamanan Digital,Conventional (Terstruktur) +110,3.58,Teknik Informatika,Mobile App Developer,Kotlin ,Artistic (Kreatif) +111,3.47,Teknik Informatika,Cloud Engineer,Manajemen Penyimpanan Database,"Realistic (Praktis, Teknis)" +112,3.88,Teknik Informatika,Network Administrator,Administrasi Jaringan,"Realistic (Praktis, Teknis)" +113,3.76,Teknik Informatika,Machine Learning Engineer,Machine Learning,Investigative (Analitis) +114,3.57,Teknik Informatika,IT Project Manager,Manajemen Proyek,Enterprising (Memimpin) +115,3.7,Teknik Informatika,Business Intelligence Developer,Pemahaman Operasi Bisnis,Investigative (Analitis) +116,3.78,Teknik Informatika,Game Developer,C++,Artistic (Kreatif) +117,3.82,Teknik Informatika,Data Engineer,SQL,Investigative (Analitis) +118,3.72,Teknik Informatika,IT Support Specialist,Dukungan Teknis Perangkat Keras dan Perangkat Lunak,Social (Sosial) +119,3.78,Teknik Informatika,Data Analyst,Analisis Data,Investigative (Analitis) diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..4b8df72 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,330 @@ + + + + + + IPKMatcher + + + +
+

IPKMatcher

+

Mulai tentukan karier Anda.

+ +
+ + + + + + + + + + + + + + + + +
+ + {% if error %} +
{{ error }}
+ {% endif %} +
+ + + + \ No newline at end of file diff --git a/templates/result.html b/templates/result.html new file mode 100644 index 0000000..1636a8c --- /dev/null +++ b/templates/result.html @@ -0,0 +1,159 @@ + + + + + + Hasil Rekomendasi - IPKMatcher + + + +
+

IPKMatcher

+ + + +
+

Rekomendasi Pekerjaan

+ + {% if recommendations %} + {% for job in recommendations %} +
+
{{ job.job }}
+
{{ job.job_desc }}
+
+ IPK Minimum: {{ job.min_ipk }} | + Jurusan: {{ job.jurusan|title }} | + Keterampilan: {{ job.keterampilan|join(', ')|title }} | + Minat: {{ job.minat|title }} +
+
+ Kesesuaian: {{ "%.1f"|format(job.similarity_score * 100) }}% +
+
+ {% endfor %} + {% else %} +
+

Maaf, tidak ada pekerjaan yang sesuai dengan profil Anda.

+

Cobalah meningkatkan IPK atau mengembangkan keterampilan lain.

+
+ {% endif %} +
+ + Kembali ke Beranda +
+ + \ No newline at end of file