from flask import Flask, render_template, request, redirect, url_for import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sklearn.preprocessing import MinMaxScaler import logging import jinja2 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) app = Flask(__name__) def load_data(job_file): try: jobs = pd.read_excel(job_file) expected_job_columns = ['id', 'job', 'min_ipk', 'job_desc', 'jurusan', 'keterampilan', 'minat'] # Debug: tampilkan kolom yang ada logger.info(f"Kolom yang ditemukan di {job_file}: {list(jobs.columns)}") logger.info(f"Kolom yang diharapkan: {expected_job_columns}") # Cek apakah semua kolom yang diperlukan ada (tidak harus urutan yang sama) missing_columns = [col for col in expected_job_columns if col not in jobs.columns] if missing_columns: logger.error(f"Kolom yang hilang: {missing_columns}") raise ValueError(f"File {job_file} harus memiliki kolom: {expected_job_columns}. Kolom yang hilang: {missing_columns}") # Reorder kolom sesuai ekspektasi dan hapus kolom yang tidak diperlukan jobs = jobs[expected_job_columns] jobs = jobs.dropna() jobs['jurusan'] = jobs['jurusan'].str.split(',').str[0].str.strip().str.lower().str.rstrip('.') jobs['keterampilan'] = jobs['keterampilan'].str.split(',').apply(lambda x: [skill.strip().lower() for skill in x]) jobs['minat'] = jobs['minat'].str.strip().str.lower() logger.info(f"Data jobs berhasil dimuat dan dibersihkan. Shape: {jobs.shape}") logger.info(f"Sample data:\n{jobs.head()}") return jobs except Exception as e: logger.error(f"Error memuat data: {e}") raise def preprocess_data(jobs, ipk, jurusan, keterampilan, minat): try: # Dapatkan semua nilai unik all_jurusan = sorted(set(jobs['jurusan']).union([jurusan.lower()])) all_keterampilan = sorted(set([skill for sublist in jobs['keterampilan'] for skill in sublist]).union([keterampilan.lower()])) all_minat = sorted(set(jobs['minat']).union([minat.lower()])) logger.info(f"Total jurusan unik: {len(all_jurusan)}") logger.info(f"Total keterampilan unik: {len(all_keterampilan)}") logger.info(f"Total minat unik: {len(all_minat)}") def to_one_hot(value, all_values): vector = np.array([1 if value.lower() == val else 0 for val in all_values]) return vector # Buat vektor untuk jobs job_jurusan_vectors = np.array([to_one_hot(jurusan, all_jurusan) for jurusan in jobs['jurusan']]) job_minat_vectors = np.array([to_one_hot(minat, all_minat) for minat in jobs['minat']]) # Buat vektor keterampilan untuk jobs job_keterampilan_vectors = np.zeros((len(jobs), len(all_keterampilan))) for i, skills in enumerate(jobs['keterampilan']): for skill in skills: if skill in all_keterampilan: job_keterampilan_vectors[i, all_keterampilan.index(skill)] = 1 # Normalisasi IPK scaler = MinMaxScaler() job_gpa = scaler.fit_transform(jobs['min_ipk'].values.reshape(-1, 1)).flatten() # Flatten untuk 1D user_gpa = scaler.transform(np.array([[ipk]])).flatten() # Flatten untuk 1D # Debug dimensi sebelum hstack logger.info(f"Dimensi job_jurusan_vectors: {job_jurusan_vectors.shape}") logger.info(f"Dimensi job_keterampilan_vectors: {job_keterampilan_vectors.shape}") logger.info(f"Dimensi job_minat_vectors: {job_minat_vectors.shape}") logger.info(f"Dimensi job_gpa: {job_gpa.shape}") # Gabungkan semua vektor jobs job_vectors = np.hstack([ job_jurusan_vectors, job_keterampilan_vectors, job_minat_vectors, job_gpa.reshape(-1, 1) # Reshape untuk konsistensi ]) # Buat vektor user user_jurusan_vector = to_one_hot(jurusan, all_jurusan) user_keterampilan_vector = to_one_hot(keterampilan, all_keterampilan) user_minat_vector = to_one_hot(minat, all_minat) # Debug dimensi user vectors logger.info(f"Dimensi user_jurusan_vector: {user_jurusan_vector.shape}") logger.info(f"Dimensi user_keterampilan_vector: {user_keterampilan_vector.shape}") logger.info(f"Dimensi user_minat_vector: {user_minat_vector.shape}") logger.info(f"Dimensi user_gpa: {user_gpa.shape}") # Gabungkan vektor user user_vector = np.hstack([ user_jurusan_vector, user_keterampilan_vector, user_minat_vector, user_gpa ]) logger.info(f"Dimensi final job_vectors: {job_vectors.shape}") logger.info(f"Dimensi final user_vector: {user_vector.shape}") return job_vectors, user_vector, all_jurusan, all_keterampilan, all_minat except Exception as e: logger.error(f"Error dalam preprocess_data: {e}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") raise def recommend_jobs(job_vectors, user_vector, jobs, ipk, top_n=3): try: eligible_jobs = jobs[jobs['min_ipk'] <= ipk].copy() if eligible_jobs.empty: logger.info(f"Tidak ada pekerjaan yang memenuhi syarat untuk IPK {ipk}") return eligible_jobs eligible_indices = eligible_jobs.index eligible_job_vectors = job_vectors[eligible_indices] # Reshape user_vector untuk cosine_similarity user_vector_reshaped = user_vector.reshape(1, -1) logger.info(f"Dimensi eligible_job_vectors: {eligible_job_vectors.shape}") logger.info(f"Dimensi user_vector_reshaped: {user_vector_reshaped.shape}") similarity_scores = cosine_similarity(user_vector_reshaped, eligible_job_vectors) top_indices = np.argsort(similarity_scores[0])[::-1][:min(top_n, len(eligible_jobs))] recommendations = eligible_jobs.iloc[top_indices].copy() recommendations['similarity_score'] = similarity_scores[0][top_indices] logger.info(f"Rekomendasi untuk IPK {ipk}: {len(recommendations)} pekerjaan ditemukan") return recommendations except Exception as e: logger.error(f"Error dalam recommend_jobs: {e}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") raise @app.route('/') def index(): return render_template('index.html') @app.route('/recommend', methods=['POST']) def recommend(): try: # Debug: Print semua data yang diterima logger.info("=== DEBUG: Data yang diterima dari form ===") for key, value in request.form.items(): logger.info(f"{key}: '{value}' (type: {type(value)}, len: {len(value)})") nama = request.form['nama'].strip() ipk_input = request.form['ipk'].strip() jurusan = request.form['jurusan'].strip().lower() or 'teknik informatika' keterampilan = request.form['keterampilan'].strip().lower() or 'python' minat = request.form['minat'].strip().lower() or 'investigative (analitis)' logger.info(f"=== DEBUG: Data setelah processing ===") logger.info(f"nama: '{nama}'") logger.info(f"ipk_input: '{ipk_input}'") logger.info(f"jurusan: '{jurusan}'") logger.info(f"keterampilan: '{keterampilan}'") logger.info(f"minat: '{minat}'") # Debug: Cek apakah IPK kosong atau None if not ipk_input: logger.error("IPK input kosong!") return render_template('index.html', error="IPK tidak boleh kosong!") # Replace koma dengan titik ipk_input = ipk_input.replace(',', '.') logger.info(f"IPK setelah replace koma: '{ipk_input}'") # Coba konversi ke float try: ipk = float(ipk_input) logger.info(f"IPK berhasil dikonversi: {ipk} (type: {type(ipk)})") except ValueError as e: logger.error(f"Gagal konversi IPK '{ipk_input}' ke float: {e}") return render_template('index.html', error=f"IPK harus berupa angka desimal yang valid (contoh: 3.50)! Input: '{ipk_input}'") # Validasi range IPK if not (0.0 <= ipk <= 4.0): logger.warning(f"IPK {ipk} tidak valid, harus antara 0.0 dan 4.0") return render_template('index.html', error=f"IPK harus antara 0.0 dan 4.0! IPK Anda: {ipk}") # Validasi dropdown if not jurusan or not keterampilan or not minat: logger.warning("Salah satu dropdown tidak dipilih") logger.warning(f"jurusan: '{jurusan}', keterampilan: '{keterampilan}', minat: '{minat}'") return render_template('index.html', error="Harap pilih jurusan, keterampilan, dan minat!") logger.info("Semua validasi berhasil, memproses rekomendasi...") logger.info("Loading jobs.xlsx") jobs = load_data('jobs.xlsx') logger.info("Preprocessing data") job_vectors, user_vector, _, _, _ = preprocess_data(jobs, ipk, jurusan, keterampilan, minat) logger.info("Generating recommendations") recommendations = recommend_jobs(job_vectors, user_vector, jobs, ipk) logger.info(f"Recommendations generated: {len(recommendations)} jobs") return render_template('result.html', user_name=nama, user_ipk=ipk, user_jurusan=jurusan, user_keterampilan=keterampilan, user_minat=minat, recommendations=recommendations.to_dict(orient='records')) except ValueError as e: logger.error(f"ValueError: {e}") return render_template('index.html', error=f"Terjadi kesalahan pemrosesan data: {str(e)}") except FileNotFoundError as e: logger.error(f"FileNotFoundError: {e}") return render_template('index.html', error="File jobs.xlsx tidak ditemukan!") except jinja2.exceptions.TemplateNotFound as e: logger.error(f"TemplateNotFound: {e}") return render_template('index.html', error=f"Template {e} tidak ditemukan!") except jinja2.exceptions.TemplateSyntaxError as e: logger.error(f"TemplateSyntaxError: {e}") return render_template('index.html', error=f"Kesalahan sintaks di template: {e}") except Exception as e: logger.error(f"Unexpected error: {e}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") return render_template('index.html', error=f"Terjadi kesalahan: {e}") if __name__ == '__main__': app.run(debug=True)