StreamlitWhitKMean/admin_dashboard.py

180 lines
7.7 KiB
Python

import streamlit as st
import pandas as pd
import time
import os
import pickle
from utils.db import save_file_metadata, save_dataset, engine
from utils.preprocessing import preprocess, preprocess_initial
from utils.train_kmeans import train_and_save_kmeans, train_kmeans_with_selected_files
from sqlalchemy import text
import utils.auth as auth
MODELS_DIR = "models"
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)
# Load KMeans model
kmeans_model_path = os.path.join(MODELS_DIR, "kmeans_model.pkl")
if os.path.exists(kmeans_model_path):
kmeans_model = pickle.load(open(kmeans_model_path, "rb"))
else:
kmeans_model = None
def render():
# ✅ Inisialisasi session state yang diperlukan
if "cluster_labels" not in st.session_state:
st.session_state.cluster_labels = {0: "Tinggi", 1: "Sedang", 2: "Rendah"}
if "label_applied" not in st.session_state:
st.session_state.label_applied = False
if not st.session_state.get("authenticated"):
st.warning("Silakan login terlebih dahulu.")
return
auth.logout()
st.title("Admin Panel - Upload Data")
# ===================
# Upload Data Baru
# ===================
st.subheader("📤 Upload Data Baru")
uploaded_file = st.file_uploader("Upload File CSV", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file, header=None)
st.write("Data Awal:")
st.dataframe(df.head())
initial_df = preprocess_initial(df)
st.write("Setelah Preprocessing Awal (struktur & rekap):")
st.dataframe(initial_df.head())
preprocessed_df = preprocess(initial_df)
result_df = preprocessed_df.copy()
if kmeans_model:
result_df["cluster"] = kmeans_model.predict(result_df[["RataRataHarga", "RataRataHargaTertinggiDiPasar"]])
else:
st.warning("Model belum tersedia. Silakan reset model terlebih dahulu.")
# Preview clustering untuk admin (tanpa cluster_label disimpan)
preview_df = result_df.copy()
st.write("Hasil Preprocessing (winsorization):")
st.dataframe(preprocessed_df.head())
st.write("Perbandingan Perubahan Kolom:")
cols_common = list(set(initial_df.columns) & set(preprocessed_df.columns))
diff_report = []
for col in cols_common:
if not initial_df[col].equals(preprocessed_df[col]):
changed = (initial_df[col] != preprocessed_df[col]).sum()
diff_report.append((col, changed))
if diff_report:
diff_df = pd.DataFrame(diff_report, columns=["Kolom", "Jumlah Data Berubah"])
st.dataframe(diff_df)
else:
st.info("Tidak ada perubahan terdeteksi pada kolom yang sama.")
st.write("Hasil Clustering Preview:")
st.dataframe(preview_df)
if st.button("Konfirmasi dan Simpan ke Database"):
file_id = save_file_metadata(uploaded_file.name)
save_dataset(file_id, result_df) # result_df tanpa cluster_label
with st.spinner("Melatih ulang model KMeans..."):
train_and_save_kmeans()
st.success(f"File berhasil disimpan ke database dengan ID: {file_id} dan model telah diperbarui!")
# ===================
# Reset Model Button
# ===================
st.subheader("⚙️ Manajemen Model")
with st.expander("Reset Model"):
st.warning("Ini akan menghapus model lama dan melatih ulang berdasarkan seluruh data yang ada.")
if st.checkbox("Saya yakin ingin mereset model"):
if st.button("Reset Model (Latih ulang dari seluruh data)"):
with st.spinner("Sedang melatih ulang model dari awal..."):
train_and_save_kmeans()
st.success("✅ Model berhasil di-reset dan dilatih ulang dari seluruh data yang ada.")
time.sleep(3)
st.rerun()
# ===================
# Manual Train Model dengan Pilihan File
# ===================
with st.expander("🔁 Latih Ulang Manual (Pilih File)"):
file_options = pd.read_sql("SELECT id, file_name FROM uploaded_files ORDER BY uploaded_at DESC", engine)
if not file_options.empty:
selected_ids = st.multiselect("Pilih file yang akan digunakan untuk pelatihan:",
options=file_options['id'],
format_func=lambda x: f"ID {x} - {file_options[file_options['id']==x]['file_name'].values[0]}")
if st.button("Latih Model dengan File Terpilih"):
if selected_ids:
with st.spinner("Melatih model dengan file terpilih..."):
train_kmeans_with_selected_files(selected_ids)
st.success("✅ Model berhasil dilatih ulang dengan file yang dipilih.")
time.sleep(3)
st.rerun()
else:
st.warning("Silakan pilih minimal satu file terlebih dahulu.")
# ===================
# Cluster Labeling Setting (Untuk Visualization)
# ===================
st.subheader("🏷️ Pengaturan Label Cluster untuk Visualisasi")
st.markdown("Label ini hanya digunakan untuk visualisasi, tidak memengaruhi data asli.")
label_options = ["Tinggi", "Sedang", "Rendah"]
assigned_labels = {}
used_labels = set()
for cluster_id in range(3):
available_labels = [lbl for lbl in label_options if lbl not in used_labels or st.session_state.cluster_labels.get(cluster_id) == lbl]
selected = st.selectbox(f"Label untuk Cluster {cluster_id}", options=available_labels, index=available_labels.index(st.session_state.cluster_labels.get(cluster_id)), key=f"viz_label_{cluster_id}")
assigned_labels[cluster_id] = selected
used_labels.add(selected)
if st.button("Terapkan Label Visualisasi"):
st.session_state.cluster_labels = assigned_labels
st.session_state.label_applied = True
st.success("✅ Label visualisasi cluster berhasil diperbarui.")
# ===================
# Riwayat File Upload
# ===================
st.markdown("---")
st.header("📁 Daftar File yang Sudah Diupload")
files = pd.read_sql("SELECT * FROM uploaded_files ORDER BY uploaded_at DESC", engine)
if not files.empty:
for index, row in files.iterrows():
with st.expander(f"{row['file_name']} ({row['uploaded_at'][:19]})"):
df_preview = pd.read_sql_table(f"dataset_{row['id']}", con=engine)
preview_option = st.radio("Tampilkan:", ["5 Data Pertama", "Seluruh Data"], horizontal=True, key=f"preview_option_{row['id']}")
if preview_option == "5 Data Pertama":
st.dataframe(df_preview.head())
else:
st.dataframe(df_preview)
col1, col2, col3 = st.columns(3)
with col1:
if st.checkbox(f"Konfirmasi hapus file ID {row['id']}"):
if st.button("Hapus", key=f"hapus_{row['id']}"):
with engine.begin() as conn:
conn.execute(text("DELETE FROM uploaded_files WHERE id = :id"), {"id": row['id']})
st.success("✅ File berhasil dihapus.")
st.rerun()
with col2:
new_name = st.text_input("Ganti Nama File", value=row['file_name'], key=f"rename_{row['id']}")
if st.button("Simpan Nama Baru", key=f"simpan_{row['id']}"):
with engine.begin() as conn:
conn.execute(text("UPDATE uploaded_files SET file_name = :name WHERE id = :id"), {"name": new_name, "id": row['id']})
st.rerun()