TIF_NGANJUK_E41210983/server.py

240 lines
11 KiB
Python

from flask import Flask, render_template, request, redirect, url_for
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import logging
import jinja2
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = Flask(__name__)
def load_data(job_file):
try:
jobs = pd.read_excel(job_file)
expected_job_columns = ['id', 'job', 'min_ipk', 'job_desc', 'jurusan', 'keterampilan', 'minat']
# Debug: tampilkan kolom yang ada
logger.info(f"Kolom yang ditemukan di {job_file}: {list(jobs.columns)}")
logger.info(f"Kolom yang diharapkan: {expected_job_columns}")
# Cek apakah semua kolom yang diperlukan ada (tidak harus urutan yang sama)
missing_columns = [col for col in expected_job_columns if col not in jobs.columns]
if missing_columns:
logger.error(f"Kolom yang hilang: {missing_columns}")
raise ValueError(f"File {job_file} harus memiliki kolom: {expected_job_columns}. Kolom yang hilang: {missing_columns}")
# Reorder kolom sesuai ekspektasi dan hapus kolom yang tidak diperlukan
jobs = jobs[expected_job_columns]
jobs = jobs.dropna()
jobs['jurusan'] = jobs['jurusan'].str.split(',').str[0].str.strip().str.lower().str.rstrip('.')
jobs['keterampilan'] = jobs['keterampilan'].str.split(',').apply(lambda x: [skill.strip().lower() for skill in x])
jobs['minat'] = jobs['minat'].str.strip().str.lower()
logger.info(f"Data jobs berhasil dimuat dan dibersihkan. Shape: {jobs.shape}")
logger.info(f"Sample data:\n{jobs.head()}")
return jobs
except Exception as e:
logger.error(f"Error memuat data: {e}")
raise
def preprocess_data(jobs, ipk, jurusan, keterampilan, minat):
try:
# Dapatkan semua nilai unik
all_jurusan = sorted(set(jobs['jurusan']).union([jurusan.lower()]))
all_keterampilan = sorted(set([skill for sublist in jobs['keterampilan'] for skill in sublist]).union([keterampilan.lower()]))
all_minat = sorted(set(jobs['minat']).union([minat.lower()]))
logger.info(f"Total jurusan unik: {len(all_jurusan)}")
logger.info(f"Total keterampilan unik: {len(all_keterampilan)}")
logger.info(f"Total minat unik: {len(all_minat)}")
def to_one_hot(value, all_values):
vector = np.array([1 if value.lower() == val else 0 for val in all_values])
return vector
# Buat vektor untuk jobs
job_jurusan_vectors = np.array([to_one_hot(jurusan, all_jurusan) for jurusan in jobs['jurusan']])
job_minat_vectors = np.array([to_one_hot(minat, all_minat) for minat in jobs['minat']])
# Buat vektor keterampilan untuk jobs
job_keterampilan_vectors = np.zeros((len(jobs), len(all_keterampilan)))
for i, skills in enumerate(jobs['keterampilan']):
for skill in skills:
if skill in all_keterampilan:
job_keterampilan_vectors[i, all_keterampilan.index(skill)] = 1
# Normalisasi IPK
scaler = MinMaxScaler()
job_gpa = scaler.fit_transform(jobs['min_ipk'].values.reshape(-1, 1)).flatten() # Flatten untuk 1D
user_gpa = scaler.transform(np.array([[ipk]])).flatten() # Flatten untuk 1D
# Debug dimensi sebelum hstack
logger.info(f"Dimensi job_jurusan_vectors: {job_jurusan_vectors.shape}")
logger.info(f"Dimensi job_keterampilan_vectors: {job_keterampilan_vectors.shape}")
logger.info(f"Dimensi job_minat_vectors: {job_minat_vectors.shape}")
logger.info(f"Dimensi job_gpa: {job_gpa.shape}")
# Gabungkan semua vektor jobs
job_vectors = np.hstack([
job_jurusan_vectors,
job_keterampilan_vectors,
job_minat_vectors,
job_gpa.reshape(-1, 1) # Reshape untuk konsistensi
])
# Buat vektor user
user_jurusan_vector = to_one_hot(jurusan, all_jurusan)
user_keterampilan_vector = to_one_hot(keterampilan, all_keterampilan)
user_minat_vector = to_one_hot(minat, all_minat)
# Debug dimensi user vectors
logger.info(f"Dimensi user_jurusan_vector: {user_jurusan_vector.shape}")
logger.info(f"Dimensi user_keterampilan_vector: {user_keterampilan_vector.shape}")
logger.info(f"Dimensi user_minat_vector: {user_minat_vector.shape}")
logger.info(f"Dimensi user_gpa: {user_gpa.shape}")
# Gabungkan vektor user
user_vector = np.hstack([
user_jurusan_vector,
user_keterampilan_vector,
user_minat_vector,
user_gpa
])
logger.info(f"Dimensi final job_vectors: {job_vectors.shape}")
logger.info(f"Dimensi final user_vector: {user_vector.shape}")
return job_vectors, user_vector, all_jurusan, all_keterampilan, all_minat
except Exception as e:
logger.error(f"Error dalam preprocess_data: {e}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
raise
def recommend_jobs(job_vectors, user_vector, jobs, ipk, top_n=3):
try:
eligible_jobs = jobs[jobs['min_ipk'] <= ipk].copy()
if eligible_jobs.empty:
logger.info(f"Tidak ada pekerjaan yang memenuhi syarat untuk IPK {ipk}")
return eligible_jobs
eligible_indices = eligible_jobs.index
eligible_job_vectors = job_vectors[eligible_indices]
# Reshape user_vector untuk cosine_similarity
user_vector_reshaped = user_vector.reshape(1, -1)
logger.info(f"Dimensi eligible_job_vectors: {eligible_job_vectors.shape}")
logger.info(f"Dimensi user_vector_reshaped: {user_vector_reshaped.shape}")
similarity_scores = cosine_similarity(user_vector_reshaped, eligible_job_vectors)
top_indices = np.argsort(similarity_scores[0])[::-1][:min(top_n, len(eligible_jobs))]
recommendations = eligible_jobs.iloc[top_indices].copy()
recommendations['similarity_score'] = similarity_scores[0][top_indices]
logger.info(f"Rekomendasi untuk IPK {ipk}: {len(recommendations)} pekerjaan ditemukan")
return recommendations
except Exception as e:
logger.error(f"Error dalam recommend_jobs: {e}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
raise
@app.route('/')
def index():
return render_template('index.html')
@app.route('/recommend', methods=['POST'])
def recommend():
try:
# Debug: Print semua data yang diterima
logger.info("=== DEBUG: Data yang diterima dari form ===")
for key, value in request.form.items():
logger.info(f"{key}: '{value}' (type: {type(value)}, len: {len(value)})")
nama = request.form['nama'].strip()
ipk_input = request.form['ipk'].strip()
jurusan = request.form['jurusan'].strip().lower() or 'teknik informatika'
keterampilan = request.form['keterampilan'].strip().lower() or 'python'
minat = request.form['minat'].strip().lower() or 'investigative (analitis)'
logger.info(f"=== DEBUG: Data setelah processing ===")
logger.info(f"nama: '{nama}'")
logger.info(f"ipk_input: '{ipk_input}'")
logger.info(f"jurusan: '{jurusan}'")
logger.info(f"keterampilan: '{keterampilan}'")
logger.info(f"minat: '{minat}'")
# Debug: Cek apakah IPK kosong atau None
if not ipk_input:
logger.error("IPK input kosong!")
return render_template('index.html', error="IPK tidak boleh kosong!")
# Replace koma dengan titik
ipk_input = ipk_input.replace(',', '.')
logger.info(f"IPK setelah replace koma: '{ipk_input}'")
# Coba konversi ke float
try:
ipk = float(ipk_input)
logger.info(f"IPK berhasil dikonversi: {ipk} (type: {type(ipk)})")
except ValueError as e:
logger.error(f"Gagal konversi IPK '{ipk_input}' ke float: {e}")
return render_template('index.html', error=f"IPK harus berupa angka desimal yang valid (contoh: 3.50)! Input: '{ipk_input}'")
# Validasi range IPK
if not (0.0 <= ipk <= 4.0):
logger.warning(f"IPK {ipk} tidak valid, harus antara 0.0 dan 4.0")
return render_template('index.html', error=f"IPK harus antara 0.0 dan 4.0! IPK Anda: {ipk}")
# Validasi dropdown
if not jurusan or not keterampilan or not minat:
logger.warning("Salah satu dropdown tidak dipilih")
logger.warning(f"jurusan: '{jurusan}', keterampilan: '{keterampilan}', minat: '{minat}'")
return render_template('index.html', error="Harap pilih jurusan, keterampilan, dan minat!")
logger.info("Semua validasi berhasil, memproses rekomendasi...")
logger.info("Loading jobs.xlsx")
jobs = load_data('jobs.xlsx')
logger.info("Preprocessing data")
job_vectors, user_vector, _, _, _ = preprocess_data(jobs, ipk, jurusan, keterampilan, minat)
logger.info("Generating recommendations")
recommendations = recommend_jobs(job_vectors, user_vector, jobs, ipk)
logger.info(f"Recommendations generated: {len(recommendations)} jobs")
return render_template('result.html',
user_name=nama,
user_ipk=ipk,
user_jurusan=jurusan,
user_keterampilan=keterampilan,
user_minat=minat,
recommendations=recommendations.to_dict(orient='records'))
except ValueError as e:
logger.error(f"ValueError: {e}")
return render_template('index.html', error=f"Terjadi kesalahan pemrosesan data: {str(e)}")
except FileNotFoundError as e:
logger.error(f"FileNotFoundError: {e}")
return render_template('index.html', error="File jobs.xlsx tidak ditemukan!")
except jinja2.exceptions.TemplateNotFound as e:
logger.error(f"TemplateNotFound: {e}")
return render_template('index.html', error=f"Template {e} tidak ditemukan!")
except jinja2.exceptions.TemplateSyntaxError as e:
logger.error(f"TemplateSyntaxError: {e}")
return render_template('index.html', error=f"Kesalahan sintaks di template: {e}")
except Exception as e:
logger.error(f"Unexpected error: {e}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return render_template('index.html', error=f"Terjadi kesalahan: {e}")
if __name__ == '__main__':
app.run(debug=True)