240 lines
11 KiB
Python
240 lines
11 KiB
Python
from flask import Flask, render_template, request, redirect, url_for
|
|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
import logging
|
|
import jinja2
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = Flask(__name__)
|
|
|
|
def load_data(job_file):
|
|
try:
|
|
jobs = pd.read_excel(job_file)
|
|
expected_job_columns = ['id', 'job', 'min_ipk', 'job_desc', 'jurusan', 'keterampilan', 'minat']
|
|
|
|
# Debug: tampilkan kolom yang ada
|
|
logger.info(f"Kolom yang ditemukan di {job_file}: {list(jobs.columns)}")
|
|
logger.info(f"Kolom yang diharapkan: {expected_job_columns}")
|
|
|
|
# Cek apakah semua kolom yang diperlukan ada (tidak harus urutan yang sama)
|
|
missing_columns = [col for col in expected_job_columns if col not in jobs.columns]
|
|
if missing_columns:
|
|
logger.error(f"Kolom yang hilang: {missing_columns}")
|
|
raise ValueError(f"File {job_file} harus memiliki kolom: {expected_job_columns}. Kolom yang hilang: {missing_columns}")
|
|
|
|
# Reorder kolom sesuai ekspektasi dan hapus kolom yang tidak diperlukan
|
|
jobs = jobs[expected_job_columns]
|
|
|
|
jobs = jobs.dropna()
|
|
jobs['jurusan'] = jobs['jurusan'].str.split(',').str[0].str.strip().str.lower().str.rstrip('.')
|
|
jobs['keterampilan'] = jobs['keterampilan'].str.split(',').apply(lambda x: [skill.strip().lower() for skill in x])
|
|
jobs['minat'] = jobs['minat'].str.strip().str.lower()
|
|
|
|
logger.info(f"Data jobs berhasil dimuat dan dibersihkan. Shape: {jobs.shape}")
|
|
logger.info(f"Sample data:\n{jobs.head()}")
|
|
|
|
return jobs
|
|
except Exception as e:
|
|
logger.error(f"Error memuat data: {e}")
|
|
raise
|
|
|
|
def preprocess_data(jobs, ipk, jurusan, keterampilan, minat):
|
|
try:
|
|
# Dapatkan semua nilai unik
|
|
all_jurusan = sorted(set(jobs['jurusan']).union([jurusan.lower()]))
|
|
all_keterampilan = sorted(set([skill for sublist in jobs['keterampilan'] for skill in sublist]).union([keterampilan.lower()]))
|
|
all_minat = sorted(set(jobs['minat']).union([minat.lower()]))
|
|
|
|
logger.info(f"Total jurusan unik: {len(all_jurusan)}")
|
|
logger.info(f"Total keterampilan unik: {len(all_keterampilan)}")
|
|
logger.info(f"Total minat unik: {len(all_minat)}")
|
|
|
|
def to_one_hot(value, all_values):
|
|
vector = np.array([1 if value.lower() == val else 0 for val in all_values])
|
|
return vector
|
|
|
|
# Buat vektor untuk jobs
|
|
job_jurusan_vectors = np.array([to_one_hot(jurusan, all_jurusan) for jurusan in jobs['jurusan']])
|
|
job_minat_vectors = np.array([to_one_hot(minat, all_minat) for minat in jobs['minat']])
|
|
|
|
# Buat vektor keterampilan untuk jobs
|
|
job_keterampilan_vectors = np.zeros((len(jobs), len(all_keterampilan)))
|
|
for i, skills in enumerate(jobs['keterampilan']):
|
|
for skill in skills:
|
|
if skill in all_keterampilan:
|
|
job_keterampilan_vectors[i, all_keterampilan.index(skill)] = 1
|
|
|
|
# Normalisasi IPK
|
|
scaler = MinMaxScaler()
|
|
job_gpa = scaler.fit_transform(jobs['min_ipk'].values.reshape(-1, 1)).flatten() # Flatten untuk 1D
|
|
user_gpa = scaler.transform(np.array([[ipk]])).flatten() # Flatten untuk 1D
|
|
|
|
# Debug dimensi sebelum hstack
|
|
logger.info(f"Dimensi job_jurusan_vectors: {job_jurusan_vectors.shape}")
|
|
logger.info(f"Dimensi job_keterampilan_vectors: {job_keterampilan_vectors.shape}")
|
|
logger.info(f"Dimensi job_minat_vectors: {job_minat_vectors.shape}")
|
|
logger.info(f"Dimensi job_gpa: {job_gpa.shape}")
|
|
|
|
# Gabungkan semua vektor jobs
|
|
job_vectors = np.hstack([
|
|
job_jurusan_vectors,
|
|
job_keterampilan_vectors,
|
|
job_minat_vectors,
|
|
job_gpa.reshape(-1, 1) # Reshape untuk konsistensi
|
|
])
|
|
|
|
# Buat vektor user
|
|
user_jurusan_vector = to_one_hot(jurusan, all_jurusan)
|
|
user_keterampilan_vector = to_one_hot(keterampilan, all_keterampilan)
|
|
user_minat_vector = to_one_hot(minat, all_minat)
|
|
|
|
# Debug dimensi user vectors
|
|
logger.info(f"Dimensi user_jurusan_vector: {user_jurusan_vector.shape}")
|
|
logger.info(f"Dimensi user_keterampilan_vector: {user_keterampilan_vector.shape}")
|
|
logger.info(f"Dimensi user_minat_vector: {user_minat_vector.shape}")
|
|
logger.info(f"Dimensi user_gpa: {user_gpa.shape}")
|
|
|
|
# Gabungkan vektor user
|
|
user_vector = np.hstack([
|
|
user_jurusan_vector,
|
|
user_keterampilan_vector,
|
|
user_minat_vector,
|
|
user_gpa
|
|
])
|
|
|
|
logger.info(f"Dimensi final job_vectors: {job_vectors.shape}")
|
|
logger.info(f"Dimensi final user_vector: {user_vector.shape}")
|
|
|
|
return job_vectors, user_vector, all_jurusan, all_keterampilan, all_minat
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error dalam preprocess_data: {e}")
|
|
import traceback
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
raise
|
|
|
|
def recommend_jobs(job_vectors, user_vector, jobs, ipk, top_n=3):
|
|
try:
|
|
eligible_jobs = jobs[jobs['min_ipk'] <= ipk].copy()
|
|
if eligible_jobs.empty:
|
|
logger.info(f"Tidak ada pekerjaan yang memenuhi syarat untuk IPK {ipk}")
|
|
return eligible_jobs
|
|
|
|
eligible_indices = eligible_jobs.index
|
|
eligible_job_vectors = job_vectors[eligible_indices]
|
|
|
|
# Reshape user_vector untuk cosine_similarity
|
|
user_vector_reshaped = user_vector.reshape(1, -1)
|
|
|
|
logger.info(f"Dimensi eligible_job_vectors: {eligible_job_vectors.shape}")
|
|
logger.info(f"Dimensi user_vector_reshaped: {user_vector_reshaped.shape}")
|
|
|
|
similarity_scores = cosine_similarity(user_vector_reshaped, eligible_job_vectors)
|
|
top_indices = np.argsort(similarity_scores[0])[::-1][:min(top_n, len(eligible_jobs))]
|
|
|
|
recommendations = eligible_jobs.iloc[top_indices].copy()
|
|
recommendations['similarity_score'] = similarity_scores[0][top_indices]
|
|
|
|
logger.info(f"Rekomendasi untuk IPK {ipk}: {len(recommendations)} pekerjaan ditemukan")
|
|
return recommendations
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error dalam recommend_jobs: {e}")
|
|
import traceback
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
raise
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
@app.route('/recommend', methods=['POST'])
|
|
def recommend():
|
|
try:
|
|
# Debug: Print semua data yang diterima
|
|
logger.info("=== DEBUG: Data yang diterima dari form ===")
|
|
for key, value in request.form.items():
|
|
logger.info(f"{key}: '{value}' (type: {type(value)}, len: {len(value)})")
|
|
|
|
nama = request.form['nama'].strip()
|
|
ipk_input = request.form['ipk'].strip()
|
|
jurusan = request.form['jurusan'].strip().lower() or 'teknik informatika'
|
|
keterampilan = request.form['keterampilan'].strip().lower() or 'python'
|
|
minat = request.form['minat'].strip().lower() or 'investigative (analitis)'
|
|
|
|
logger.info(f"=== DEBUG: Data setelah processing ===")
|
|
logger.info(f"nama: '{nama}'")
|
|
logger.info(f"ipk_input: '{ipk_input}'")
|
|
logger.info(f"jurusan: '{jurusan}'")
|
|
logger.info(f"keterampilan: '{keterampilan}'")
|
|
logger.info(f"minat: '{minat}'")
|
|
|
|
# Debug: Cek apakah IPK kosong atau None
|
|
if not ipk_input:
|
|
logger.error("IPK input kosong!")
|
|
return render_template('index.html', error="IPK tidak boleh kosong!")
|
|
|
|
# Replace koma dengan titik
|
|
ipk_input = ipk_input.replace(',', '.')
|
|
logger.info(f"IPK setelah replace koma: '{ipk_input}'")
|
|
|
|
# Coba konversi ke float
|
|
try:
|
|
ipk = float(ipk_input)
|
|
logger.info(f"IPK berhasil dikonversi: {ipk} (type: {type(ipk)})")
|
|
except ValueError as e:
|
|
logger.error(f"Gagal konversi IPK '{ipk_input}' ke float: {e}")
|
|
return render_template('index.html', error=f"IPK harus berupa angka desimal yang valid (contoh: 3.50)! Input: '{ipk_input}'")
|
|
|
|
# Validasi range IPK
|
|
if not (0.0 <= ipk <= 4.0):
|
|
logger.warning(f"IPK {ipk} tidak valid, harus antara 0.0 dan 4.0")
|
|
return render_template('index.html', error=f"IPK harus antara 0.0 dan 4.0! IPK Anda: {ipk}")
|
|
|
|
# Validasi dropdown
|
|
if not jurusan or not keterampilan or not minat:
|
|
logger.warning("Salah satu dropdown tidak dipilih")
|
|
logger.warning(f"jurusan: '{jurusan}', keterampilan: '{keterampilan}', minat: '{minat}'")
|
|
return render_template('index.html', error="Harap pilih jurusan, keterampilan, dan minat!")
|
|
|
|
logger.info("Semua validasi berhasil, memproses rekomendasi...")
|
|
logger.info("Loading jobs.xlsx")
|
|
jobs = load_data('jobs.xlsx')
|
|
logger.info("Preprocessing data")
|
|
job_vectors, user_vector, _, _, _ = preprocess_data(jobs, ipk, jurusan, keterampilan, minat)
|
|
logger.info("Generating recommendations")
|
|
recommendations = recommend_jobs(job_vectors, user_vector, jobs, ipk)
|
|
|
|
logger.info(f"Recommendations generated: {len(recommendations)} jobs")
|
|
return render_template('result.html',
|
|
user_name=nama,
|
|
user_ipk=ipk,
|
|
user_jurusan=jurusan,
|
|
user_keterampilan=keterampilan,
|
|
user_minat=minat,
|
|
recommendations=recommendations.to_dict(orient='records'))
|
|
|
|
except ValueError as e:
|
|
logger.error(f"ValueError: {e}")
|
|
return render_template('index.html', error=f"Terjadi kesalahan pemrosesan data: {str(e)}")
|
|
except FileNotFoundError as e:
|
|
logger.error(f"FileNotFoundError: {e}")
|
|
return render_template('index.html', error="File jobs.xlsx tidak ditemukan!")
|
|
except jinja2.exceptions.TemplateNotFound as e:
|
|
logger.error(f"TemplateNotFound: {e}")
|
|
return render_template('index.html', error=f"Template {e} tidak ditemukan!")
|
|
except jinja2.exceptions.TemplateSyntaxError as e:
|
|
logger.error(f"TemplateSyntaxError: {e}")
|
|
return render_template('index.html', error=f"Kesalahan sintaks di template: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {e}")
|
|
import traceback
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
return render_template('index.html', error=f"Terjadi kesalahan: {e}")
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True) |