This commit is contained in:
commit
669d069b02
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,180 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
import time
|
||||
import os
|
||||
import pickle
|
||||
from utils.db import save_file_metadata, save_dataset, engine
|
||||
from utils.preprocessing import preprocess, preprocess_initial
|
||||
from utils.train_kmeans import train_and_save_kmeans, train_kmeans_with_selected_files
|
||||
from sqlalchemy import text
|
||||
import utils.auth as auth
|
||||
|
||||
|
||||
|
||||
MODELS_DIR = "models"
|
||||
DATA_DIR = "data"
|
||||
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
# Load KMeans model
|
||||
kmeans_model_path = os.path.join(MODELS_DIR, "kmeans_model.pkl")
|
||||
if os.path.exists(kmeans_model_path):
|
||||
kmeans_model = pickle.load(open(kmeans_model_path, "rb"))
|
||||
else:
|
||||
kmeans_model = None
|
||||
|
||||
|
||||
|
||||
def render():
|
||||
# ✅ Inisialisasi session state yang diperlukan
|
||||
if "cluster_labels" not in st.session_state:
|
||||
st.session_state.cluster_labels = {0: "Tinggi", 1: "Sedang", 2: "Rendah"}
|
||||
if "label_applied" not in st.session_state:
|
||||
st.session_state.label_applied = False
|
||||
|
||||
if not st.session_state.get("authenticated"):
|
||||
st.warning("Silakan login terlebih dahulu.")
|
||||
return
|
||||
|
||||
auth.logout()
|
||||
|
||||
st.title("Admin Panel - Upload Data")
|
||||
# ===================
|
||||
# Upload Data Baru
|
||||
# ===================
|
||||
st.subheader("📤 Upload Data Baru")
|
||||
uploaded_file = st.file_uploader("Upload File CSV", type=["csv"])
|
||||
|
||||
if uploaded_file:
|
||||
df = pd.read_csv(uploaded_file, header=None)
|
||||
st.write("Data Awal:")
|
||||
st.dataframe(df.head())
|
||||
|
||||
initial_df = preprocess_initial(df)
|
||||
st.write("Setelah Preprocessing Awal (struktur & rekap):")
|
||||
st.dataframe(initial_df.head())
|
||||
|
||||
preprocessed_df = preprocess(initial_df)
|
||||
result_df = preprocessed_df.copy()
|
||||
|
||||
if kmeans_model:
|
||||
result_df["cluster"] = kmeans_model.predict(result_df[["RataRataHarga", "RataRataHargaTertinggiDiPasar"]])
|
||||
else:
|
||||
st.warning("Model belum tersedia. Silakan reset model terlebih dahulu.")
|
||||
|
||||
# Preview clustering untuk admin (tanpa cluster_label disimpan)
|
||||
preview_df = result_df.copy()
|
||||
|
||||
st.write("Hasil Preprocessing (winsorization):")
|
||||
st.dataframe(preprocessed_df.head())
|
||||
|
||||
st.write("Perbandingan Perubahan Kolom:")
|
||||
cols_common = list(set(initial_df.columns) & set(preprocessed_df.columns))
|
||||
diff_report = []
|
||||
for col in cols_common:
|
||||
if not initial_df[col].equals(preprocessed_df[col]):
|
||||
changed = (initial_df[col] != preprocessed_df[col]).sum()
|
||||
diff_report.append((col, changed))
|
||||
if diff_report:
|
||||
diff_df = pd.DataFrame(diff_report, columns=["Kolom", "Jumlah Data Berubah"])
|
||||
st.dataframe(diff_df)
|
||||
else:
|
||||
st.info("Tidak ada perubahan terdeteksi pada kolom yang sama.")
|
||||
|
||||
st.write("Hasil Clustering Preview:")
|
||||
st.dataframe(preview_df)
|
||||
|
||||
if st.button("Konfirmasi dan Simpan ke Database"):
|
||||
file_id = save_file_metadata(uploaded_file.name)
|
||||
save_dataset(file_id, result_df) # result_df tanpa cluster_label
|
||||
|
||||
with st.spinner("Melatih ulang model KMeans..."):
|
||||
train_and_save_kmeans()
|
||||
st.success(f"File berhasil disimpan ke database dengan ID: {file_id} dan model telah diperbarui!")
|
||||
|
||||
# ===================
|
||||
# Reset Model Button
|
||||
# ===================
|
||||
st.subheader("⚙️ Manajemen Model")
|
||||
with st.expander("Reset Model"):
|
||||
st.warning("Ini akan menghapus model lama dan melatih ulang berdasarkan seluruh data yang ada.")
|
||||
if st.checkbox("Saya yakin ingin mereset model"):
|
||||
if st.button("Reset Model (Latih ulang dari seluruh data)"):
|
||||
with st.spinner("Sedang melatih ulang model dari awal..."):
|
||||
train_and_save_kmeans()
|
||||
st.success("✅ Model berhasil di-reset dan dilatih ulang dari seluruh data yang ada.")
|
||||
time.sleep(3)
|
||||
st.rerun()
|
||||
|
||||
# ===================
|
||||
# Manual Train Model dengan Pilihan File
|
||||
# ===================
|
||||
with st.expander("🔁 Latih Ulang Manual (Pilih File)"):
|
||||
file_options = pd.read_sql("SELECT id, file_name FROM uploaded_files ORDER BY uploaded_at DESC", engine)
|
||||
if not file_options.empty:
|
||||
selected_ids = st.multiselect("Pilih file yang akan digunakan untuk pelatihan:",
|
||||
options=file_options['id'],
|
||||
format_func=lambda x: f"ID {x} - {file_options[file_options['id']==x]['file_name'].values[0]}")
|
||||
if st.button("Latih Model dengan File Terpilih"):
|
||||
if selected_ids:
|
||||
with st.spinner("Melatih model dengan file terpilih..."):
|
||||
train_kmeans_with_selected_files(selected_ids)
|
||||
st.success("✅ Model berhasil dilatih ulang dengan file yang dipilih.")
|
||||
time.sleep(3)
|
||||
st.rerun()
|
||||
else:
|
||||
st.warning("Silakan pilih minimal satu file terlebih dahulu.")
|
||||
|
||||
# ===================
|
||||
# Cluster Labeling Setting (Untuk Visualization)
|
||||
# ===================
|
||||
st.subheader("🏷️ Pengaturan Label Cluster untuk Visualisasi")
|
||||
st.markdown("Label ini hanya digunakan untuk visualisasi, tidak memengaruhi data asli.")
|
||||
label_options = ["Tinggi", "Sedang", "Rendah"]
|
||||
assigned_labels = {}
|
||||
used_labels = set()
|
||||
for cluster_id in range(3):
|
||||
available_labels = [lbl for lbl in label_options if lbl not in used_labels or st.session_state.cluster_labels.get(cluster_id) == lbl]
|
||||
selected = st.selectbox(f"Label untuk Cluster {cluster_id}", options=available_labels, index=available_labels.index(st.session_state.cluster_labels.get(cluster_id)), key=f"viz_label_{cluster_id}")
|
||||
assigned_labels[cluster_id] = selected
|
||||
used_labels.add(selected)
|
||||
|
||||
if st.button("Terapkan Label Visualisasi"):
|
||||
st.session_state.cluster_labels = assigned_labels
|
||||
st.session_state.label_applied = True
|
||||
st.success("✅ Label visualisasi cluster berhasil diperbarui.")
|
||||
|
||||
|
||||
# ===================
|
||||
# Riwayat File Upload
|
||||
# ===================
|
||||
st.markdown("---")
|
||||
st.header("📁 Daftar File yang Sudah Diupload")
|
||||
|
||||
files = pd.read_sql("SELECT * FROM uploaded_files ORDER BY uploaded_at DESC", engine)
|
||||
if not files.empty:
|
||||
for index, row in files.iterrows():
|
||||
with st.expander(f"{row['file_name']} ({row['uploaded_at'][:19]})"):
|
||||
df_preview = pd.read_sql_table(f"dataset_{row['id']}", con=engine)
|
||||
|
||||
preview_option = st.radio("Tampilkan:", ["5 Data Pertama", "Seluruh Data"], horizontal=True, key=f"preview_option_{row['id']}")
|
||||
if preview_option == "5 Data Pertama":
|
||||
st.dataframe(df_preview.head())
|
||||
else:
|
||||
st.dataframe(df_preview)
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
if st.checkbox(f"Konfirmasi hapus file ID {row['id']}"):
|
||||
if st.button("Hapus", key=f"hapus_{row['id']}"):
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text("DELETE FROM uploaded_files WHERE id = :id"), {"id": row['id']})
|
||||
st.success("✅ File berhasil dihapus.")
|
||||
st.rerun()
|
||||
with col2:
|
||||
new_name = st.text_input("Ganti Nama File", value=row['file_name'], key=f"rename_{row['id']}")
|
||||
if st.button("Simpan Nama Baru", key=f"simpan_{row['id']}"):
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text("UPDATE uploaded_files SET file_name = :name WHERE id = :id"), {"name": new_name, "id": row['id']})
|
||||
st.rerun()
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"username": "admin", "password": "$2b$12$iHuWFWeYpb4FBA0W9htyO.qLxZzaBydN.7vrXtGJypFzEi49GJFZi", "security_question": "Nama hewan peliharaan pertama Anda?", "security_answer": "nalakucinglucu"}
|
|
@ -0,0 +1,64 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
# from sqlalchemy import create_engine
|
||||
from utils.db import init_db, engine
|
||||
from utils.preprocessing import preprocess
|
||||
from utils.visualizations import show_scorecard, show_clustermap, show_top_bottom_locations, show_price_trend, show_data_table
|
||||
import pickle
|
||||
|
||||
model = pickle.load(open("models/kmeans_model.pkl", "rb"))
|
||||
|
||||
init_db()
|
||||
|
||||
def load_dataset(file_id):
|
||||
return pd.read_sql_table(f"dataset_{file_id}", con=engine)
|
||||
|
||||
|
||||
def render():
|
||||
st.markdown(
|
||||
"""
|
||||
<h1 style='text-align: center;
|
||||
margin-bottom: 50px;
|
||||
padding-bottom: 50px;
|
||||
margin-top: 0px;'>
|
||||
Visualisasi Harga Beras Medium Jawa Timur
|
||||
</h1>
|
||||
""",
|
||||
unsafe_allow_html=True
|
||||
)
|
||||
|
||||
files = pd.read_sql("SELECT * FROM uploaded_files ORDER BY uploaded_at DESC", engine)
|
||||
if files.empty:
|
||||
st.warning("Belum ada data yang diunggah admin.")
|
||||
return
|
||||
|
||||
file_dict = {f"{row['file_name']} ({row['uploaded_at'][:19]})": row['id'] for _, row in files.iterrows()}
|
||||
selected_label = st.sidebar.selectbox("Pilih Dataset:", list(file_dict.keys()))
|
||||
selected_file_id = file_dict.get(selected_label)
|
||||
|
||||
if selected_file_id is None:
|
||||
st.warning("Silakan pilih dataset terlebih dahulu.")
|
||||
return
|
||||
|
||||
df = load_dataset(selected_file_id)
|
||||
df_clean = preprocess(df)
|
||||
df_clean["cluster"] = model.predict(df_clean[["RataRataHarga", "RataRataHargaTertinggiDiPasar"]])
|
||||
|
||||
# Filter Cluster di Sidebar
|
||||
cluster_options = sorted(df_clean["cluster"].unique())
|
||||
selected_cluster = st.sidebar.multiselect("Pilih Cluster:", options=cluster_options, default=cluster_options)
|
||||
|
||||
# Filter Lokasi di Sidebar
|
||||
lokasi_options = sorted(df_clean["Lokasi"].unique()) if "Lokasi" in df_clean.columns else []
|
||||
selected_lokasi = st.sidebar.multiselect("Pilih Lokasi:", options=lokasi_options, default=lokasi_options)
|
||||
|
||||
filtered_df = df_clean[
|
||||
(df_clean["cluster"].isin(selected_cluster)) &
|
||||
(df_clean["Lokasi"].isin(selected_lokasi))
|
||||
]
|
||||
|
||||
show_scorecard(filtered_df)
|
||||
show_clustermap(filtered_df)
|
||||
show_top_bottom_locations(filtered_df)
|
||||
show_price_trend()
|
||||
show_data_table(filtered_df)
|
Binary file not shown.
|
@ -0,0 +1,19 @@
|
|||
import streamlit as st
|
||||
import admin_dashboard
|
||||
import dashboard_user
|
||||
from utils.auth import login
|
||||
|
||||
st.set_page_config(page_title="Visualisasi Clustering", layout="wide")
|
||||
|
||||
page = st.sidebar.selectbox("Pilih Halaman", [
|
||||
"User Dashboard",
|
||||
"Admin Dashboard"
|
||||
])
|
||||
|
||||
if page == "User Dashboard":
|
||||
dashboard_user.render()
|
||||
elif page == "Admin Dashboard":
|
||||
if st.session_state.get("authenticated"):
|
||||
admin_dashboard.render()
|
||||
else:
|
||||
login()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,128 @@
|
|||
import streamlit as st
|
||||
import bcrypt
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
CONFIG_PATH = "config.json"
|
||||
|
||||
# Muat file konfigurasi
|
||||
def load_config():
|
||||
if not os.path.exists(CONFIG_PATH):
|
||||
return {"username": "admin", "password": "", "security_question": "", "security_answer": ""}
|
||||
with open(CONFIG_PATH, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
# Simpan konfigurasi
|
||||
def save_config(config):
|
||||
with open(CONFIG_PATH, "w") as f:
|
||||
json.dump(config, f, indent=4)
|
||||
|
||||
# Validasi kekuatan password
|
||||
def is_strong_password(password):
|
||||
if len(password) < 8:
|
||||
return False
|
||||
if not re.search(r"[a-z]", password):
|
||||
return False
|
||||
if not re.search(r"[A-Z]", password):
|
||||
return False
|
||||
if not re.search(r"\d", password):
|
||||
return False
|
||||
if not re.search(r"[^\w\s]", password):
|
||||
return False
|
||||
return True
|
||||
|
||||
# Login form
|
||||
def login():
|
||||
config = load_config()
|
||||
|
||||
if 'authenticated' not in st.session_state:
|
||||
st.session_state.authenticated = False
|
||||
if 'show_reset' not in st.session_state:
|
||||
st.session_state.show_reset = False
|
||||
if 'login_failed' not in st.session_state:
|
||||
st.session_state.login_failed = False
|
||||
|
||||
if not st.session_state.authenticated:
|
||||
st.subheader("🔐 Login Admin")
|
||||
|
||||
username = st.text_input("Username")
|
||||
password = st.text_input("Password", type="password")
|
||||
|
||||
if st.button("Login"):
|
||||
if username == config.get("username") and bcrypt.checkpw(password.encode(), config.get("password").encode()):
|
||||
st.session_state.authenticated = True
|
||||
st.session_state.login_failed = False
|
||||
st.rerun()
|
||||
else:
|
||||
st.session_state.login_failed = True
|
||||
st.error("❌ Username atau password salah.")
|
||||
|
||||
# Hanya muncul jika login gagal
|
||||
if st.session_state.login_failed and not st.session_state.show_reset:
|
||||
if st.button("Lupa Password"):
|
||||
st.session_state.show_reset = True
|
||||
|
||||
if st.session_state.show_reset:
|
||||
st.info("Jawab pertanyaan rahasia untuk reset password.")
|
||||
question = config.get("security_question")
|
||||
answer = st.text_input("Pertanyaan:", placeholder=question)
|
||||
new_pass = st.text_input("Password Baru", type="password")
|
||||
if st.button("Ganti Password"):
|
||||
if answer.lower() == config.get("security_answer").lower():
|
||||
new_hash = bcrypt.hashpw(new_pass.encode(), bcrypt.gensalt()).decode()
|
||||
config["password"] = new_hash
|
||||
with open("config.json", "w") as f:
|
||||
json.dump(config, f)
|
||||
st.success("✅ Password berhasil diganti!")
|
||||
st.session_state.show_reset = False
|
||||
st.session_state.login_failed = False
|
||||
else:
|
||||
st.error("❌ Jawaban salah.")
|
||||
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
# Form reset password via pertanyaan rahasia
|
||||
def show_password_reset_form():
|
||||
config = load_config()
|
||||
st.subheader("🔐 Reset Password")
|
||||
|
||||
pertanyaan = config.get("security_question", "Pertanyaan tidak tersedia")
|
||||
jawaban_input = st.text_input(pertanyaan)
|
||||
new_pass = st.text_input("Password Baru", type="password")
|
||||
|
||||
if st.button("Ganti Password"):
|
||||
if jawaban_input.strip().lower() == config.get("security_answer", "").strip().lower():
|
||||
if not is_strong_password(new_pass):
|
||||
st.warning("Password harus minimal 8 karakter, mengandung huruf besar, huruf kecil, angka, dan simbol.")
|
||||
return
|
||||
config["password"] = bcrypt.hashpw(new_pass.encode(), bcrypt.gensalt()).decode()
|
||||
save_config(config)
|
||||
st.success("Password berhasil diubah! Silakan login kembali.")
|
||||
# Reset semua flag
|
||||
st.session_state.show_reset = False
|
||||
st.session_state.show_reset_button = False
|
||||
else:
|
||||
st.error("Jawaban pertanyaan salah.")
|
||||
|
||||
# Logout
|
||||
def logout():
|
||||
with st.sidebar:
|
||||
if st.button("🚪 Logout"):
|
||||
st.session_state.confirm_logout = True
|
||||
|
||||
# Tampilkan dialog konfirmasi jika diklik
|
||||
if st.session_state.get("confirm_logout", False):
|
||||
with st.sidebar:
|
||||
st.warning("Apakah Anda yakin ingin logout?")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
if st.button("✅ Ya, Logout"):
|
||||
st.session_state.clear()
|
||||
st.rerun()
|
||||
with col2:
|
||||
if st.button("❌ Batal"):
|
||||
st.session_state.confirm_logout = False
|
|
@ -0,0 +1,28 @@
|
|||
from sqlalchemy import create_engine, text
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
|
||||
engine = create_engine("sqlite:///data/files.db")
|
||||
|
||||
def init_db():
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text("""
|
||||
CREATE TABLE IF NOT EXISTS uploaded_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
file_name TEXT,
|
||||
uploaded_at TEXT
|
||||
)
|
||||
"""))
|
||||
|
||||
def save_file_metadata(file_name):
|
||||
now = datetime.now()
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text("""
|
||||
INSERT INTO uploaded_files (file_name, uploaded_at)
|
||||
VALUES (:f, :t)
|
||||
"""), {"f": file_name, "t": now.isoformat()})
|
||||
result = conn.execute(text("SELECT last_insert_rowid()"))
|
||||
return result.fetchone()[0]
|
||||
|
||||
def save_dataset(file_id, df):
|
||||
df.to_sql(f"dataset_{file_id}", con=engine, index=False, if_exists="replace")
|
|
@ -0,0 +1,116 @@
|
|||
import pandas as pd
|
||||
import re
|
||||
|
||||
def preprocess_initial(df):
|
||||
# Cari baris awal yang mengandung angka (1, 1.1, dst)
|
||||
start_row = df[df[0].astype(str).str.contains(r'^\d+(\.\d+)?$', na=False)].index[0]
|
||||
|
||||
# Ambil data dari baris tersebut
|
||||
data = df.iloc[start_row:].reset_index(drop=True)
|
||||
data.columns = ['No', 'Lokasi'] + [f'Hari_{i}' for i in range(1, len(data.columns) - 1)]
|
||||
|
||||
# Bersihkan kolom
|
||||
data['No'] = data['No'].astype(str).str.strip()
|
||||
data['Lokasi'] = data['Lokasi'].astype(str).apply(lambda x: re.sub(r'[\u200b\xa0]+', ' ', x)).str.strip()
|
||||
|
||||
# Ubah harga jadi numerik
|
||||
for col in data.columns[2:]:
|
||||
data[col] = pd.to_numeric(data[col].str.replace('.', '', regex=False), errors='coerce')
|
||||
|
||||
# Tandai baris wilayah
|
||||
data['IsWilayah'] = ~data['No'].str.contains(r'\.', na=False)
|
||||
|
||||
# Sebarkan nama wilayah ke bawah
|
||||
data['Wilayah'] = data['Lokasi'].where(data['IsWilayah']).ffill()
|
||||
|
||||
# ============================
|
||||
# Proses Data Pasar
|
||||
# ============================
|
||||
pasar_df = data[~data['IsWilayah']].copy()
|
||||
harga_cols = [col for col in data.columns if col.startswith("Hari_")]
|
||||
pasar_df['RataRataHargaPasar'] = pasar_df[harga_cols].mean(axis=1)
|
||||
|
||||
# Rekap per wilayah
|
||||
rekap_df = pasar_df.groupby('Wilayah').agg(
|
||||
JumlahPasar=('Lokasi', 'count'),
|
||||
RataRataHarga=('RataRataHargaPasar', 'mean'),
|
||||
RataRataHargaTertinggiDiPasar=('RataRataHargaPasar', 'max')
|
||||
).reset_index()
|
||||
|
||||
# Ambil nama pasar dengan harga tertinggi
|
||||
idx_max = pasar_df.groupby('Wilayah')['RataRataHargaPasar'].idxmax()
|
||||
tertinggi_df = pasar_df.loc[idx_max, ['Wilayah', 'Lokasi']].rename(
|
||||
columns={'Lokasi': 'PasarDenganRataRataHargaTertinggi'}
|
||||
)
|
||||
|
||||
# Gabungkan
|
||||
final_df = rekap_df.merge(tertinggi_df, on='Wilayah')
|
||||
final_df.insert(0, 'No', range(1, len(final_df) + 1))
|
||||
final_df.rename(columns={'Wilayah': 'Lokasi'}, inplace=True)
|
||||
|
||||
# ============================
|
||||
# Tambahkan Latitude & Longitude untuk Wilayah
|
||||
# ============================
|
||||
koordinat = {
|
||||
'Kota Surabaya': (-7.2575, 112.7521),
|
||||
'Kota Malang': (-7.9819, 112.6265),
|
||||
'Kota Kediri': (-7.8166, 112.0114),
|
||||
'Kabupaten Sidoarjo': (-7.4467, 112.7181),
|
||||
'Kabupaten Gresik': (-7.1607, 112.6530),
|
||||
'Kota Mojokerto': (-7.4722, 112.4336),
|
||||
'Kota Pasuruan': (-7.6451, 112.9086),
|
||||
'Kabupaten Lamongan': (-7.1120, 112.4148),
|
||||
'Kabupaten Jombang': (-7.5469, 112.2334),
|
||||
'Kabupaten Tuban': (-6.8971, 112.0505),
|
||||
'Kabupaten Bangkalan': (-7.0335, 112.7467),
|
||||
'Kabupaten Bojonegoro': (-7.1500, 111.8833),
|
||||
'Kabupaten Mojokerto': (-7.4700, 112.4330),
|
||||
'Kabupaten Pasuruan': (-7.6895, 112.6855),
|
||||
'Kabupaten Probolinggo': (-7.7764, 113.2196),
|
||||
'Kota Probolinggo': (-7.7543, 113.2150),
|
||||
'Kabupaten Banyuwangi': (-8.2186, 114.3690),
|
||||
'Kabupaten Blitar': (-8.0941, 112.3096),
|
||||
'Kabupaten Bondowoso': (-7.9135, 113.8213),
|
||||
'Kabupaten Jember': (-8.1725, 113.7005),
|
||||
'Kabupaten Lumajang': (-8.1350, 113.2249),
|
||||
'Kabupaten Madiun': (-7.6295, 111.5230),
|
||||
'Kabupaten Magetan': (-7.6524, 111.3355),
|
||||
'Kabupaten Nganjuk': (-7.6055, 111.9031),
|
||||
'Kabupaten Ngawi': (-7.4059, 111.4468),
|
||||
'Kabupaten Pamekasan': (-7.1566, 113.4780),
|
||||
'Kabupaten Ponorogo': (-7.8659, 111.4691),
|
||||
'Kabupaten Sampang': (-7.1917, 113.2490),
|
||||
'Kabupaten Sumenep': (-6.9245, 113.9066),
|
||||
'Kabupaten Situbondo': (-7.7069, 114.0099),
|
||||
'Kabupaten Trenggalek': (-8.0593, 111.7084),
|
||||
'Kabupaten Tulungagung': (-8.0657, 111.9010),
|
||||
'Kota Batu': (-7.8671, 112.5239),
|
||||
'Kota Blitar': (-8.0987, 112.1680),
|
||||
'Kota Madiun': (-7.6309, 111.5230),
|
||||
'Kabupaten Kediri': (-7.8480, 112.0113),
|
||||
'Kabupaten Malang': (-8.1065, 112.6660),
|
||||
'Kabupaten Pacitan': (-8.1948, 111.1056)
|
||||
}
|
||||
|
||||
final_df['Latitude'] = final_df['Lokasi'].map(lambda x: koordinat.get(x, (None, None))[0])
|
||||
final_df['Longitude'] = final_df['Lokasi'].map(lambda x: koordinat.get(x, (None, None))[1])
|
||||
|
||||
return final_df
|
||||
|
||||
|
||||
def winsorize_series(series):
|
||||
Q1 = series.quantile(0.25)
|
||||
Q3 = series.quantile(0.75)
|
||||
IQR = Q3 - Q1
|
||||
lower_bound = Q1 - 1.5 * IQR
|
||||
upper_bound = Q3 + 1.5 * IQR
|
||||
return series.clip(lower=lower_bound, upper=upper_bound)
|
||||
|
||||
|
||||
def preprocess(df):
|
||||
target_cols = ['RataRataHarga', 'RataRataHargaTertinggiDiPasar']
|
||||
|
||||
df_clean = df.copy()
|
||||
for col in target_cols:
|
||||
df_clean[col] = winsorize_series(df_clean[col])
|
||||
return df_clean
|
|
@ -0,0 +1,55 @@
|
|||
import os
|
||||
import pickle
|
||||
import pandas as pd
|
||||
from sklearn.cluster import KMeans
|
||||
from utils.db import engine
|
||||
from utils.preprocessing import preprocess
|
||||
from sqlalchemy import text
|
||||
|
||||
MODELS_DIR = "models"
|
||||
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||
|
||||
def train_and_save_kmeans():
|
||||
with engine.connect() as conn:
|
||||
table_names = conn.exec_driver_sql("""
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name LIKE 'dataset_%'
|
||||
""").fetchall()
|
||||
|
||||
dfs = []
|
||||
for (table_name,) in table_names:
|
||||
df = pd.read_sql_table(table_name, con=engine)
|
||||
df_clean = preprocess(df)
|
||||
dfs.append(df_clean)
|
||||
|
||||
if not dfs:
|
||||
raise ValueError("Tidak ada dataset untuk pelatihan.")
|
||||
|
||||
all_data = pd.concat(dfs, ignore_index=True)
|
||||
features = all_data[['RataRataHarga', 'RataRataHargaTertinggiDiPasar']]
|
||||
|
||||
model = KMeans(n_clusters=3, random_state=42)
|
||||
model.fit(features)
|
||||
|
||||
with open(os.path.join(MODELS_DIR, "kmeans_model.pkl"), "wb") as f:
|
||||
pickle.dump(model, f)
|
||||
|
||||
def train_kmeans_with_selected_files(file_ids):
|
||||
dfs = []
|
||||
for file_id in file_ids:
|
||||
table_name = f"dataset_{file_id}"
|
||||
df = pd.read_sql_table(table_name, con=engine)
|
||||
df_clean = preprocess(df)
|
||||
dfs.append(df_clean)
|
||||
|
||||
if not dfs:
|
||||
raise ValueError("Tidak ada dataset terpilih untuk pelatihan.")
|
||||
|
||||
all_data = pd.concat(dfs, ignore_index=True)
|
||||
features = all_data[['RataRataHarga', 'RataRataHargaTertinggiDiPasar']]
|
||||
|
||||
model = KMeans(n_clusters=3, random_state=42)
|
||||
model.fit(features)
|
||||
|
||||
with open(os.path.join(MODELS_DIR, "kmeans_model.pkl"), "wb") as f:
|
||||
pickle.dump(model, f)
|
|
@ -0,0 +1,218 @@
|
|||
import streamlit as st
|
||||
import plotly.express as px
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from sqlalchemy import text
|
||||
from utils.db import engine
|
||||
from sklearn.metrics import silhouette_score
|
||||
|
||||
def show_scorecard(df):
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
# Jumlah lokasi = banyak baris
|
||||
col1.metric("Jumlah Lokasi", len(df))
|
||||
|
||||
# Jumlah total pasar dari seluruh baris
|
||||
total_pasar = int(df["JumlahPasar"].sum())
|
||||
col2.metric("Total Jumlah Pasar", total_pasar)
|
||||
|
||||
# Rata-rata harga
|
||||
rata_rata_harga = round(df['RataRataHarga'].mean(), 2)
|
||||
col3.metric("Rata-rata Harga Jawa Timur", f"Rp {rata_rata_harga:,.2f}")
|
||||
|
||||
# Harga pasar tertinggi
|
||||
if not df.empty:
|
||||
max_row = df.loc[df['RataRataHargaTertinggiDiPasar'].idxmax()]
|
||||
nama_pasar = max_row['PasarDenganRataRataHargaTertinggi']
|
||||
nama_lokasi = max_row['Lokasi']
|
||||
harga_tertinggi = max_row['RataRataHargaTertinggiDiPasar']
|
||||
|
||||
col4.metric(
|
||||
label="Harga Tertinggi di Pasar",
|
||||
value=f"Rp {harga_tertinggi:,.2f}",
|
||||
delta=f"{nama_pasar} - {nama_lokasi}"
|
||||
)
|
||||
else:
|
||||
col4.metric("Harga Tertinggi di Pasar", "Data tidak tersedia", "")
|
||||
|
||||
|
||||
def show_clustermap(df):
|
||||
st.subheader("🗺️ Wilayah Dengan Cluster Tingkat Harga di Jawa Timur")
|
||||
|
||||
required_columns = {"cluster", "Latitude", "Longitude", "Lokasi", "RataRataHarga"}
|
||||
if not required_columns.issubset(df.columns):
|
||||
missing_cols = required_columns - set(df.columns)
|
||||
st.error(f"❌ Kolom yang hilang: {missing_cols}")
|
||||
st.info(f"✅ Kolom yang tersedia: {list(df.columns)}")
|
||||
return
|
||||
|
||||
try:
|
||||
df_map = df.copy()
|
||||
df_map['cluster'] = df_map['cluster'].astype(int)
|
||||
df_map['Latitude'] = pd.to_numeric(df_map['Latitude'], errors='coerce')
|
||||
df_map['Longitude'] = pd.to_numeric(df_map['Longitude'], errors='coerce')
|
||||
df_map['RataRataHarga'] = pd.to_numeric(df_map['RataRataHarga'], errors='coerce')
|
||||
df_map = df_map.dropna(subset=['Latitude', 'Longitude', 'RataRataHarga'])
|
||||
|
||||
if len(df_map) == 0:
|
||||
st.error("❌ Tidak ada data valid untuk ditampilkan")
|
||||
return
|
||||
|
||||
default_labels = {0: "Tinggi", 1: "Sedang", 2: "Rendah"}
|
||||
cluster_labels = st.session_state.get("cluster_labels", default_labels)
|
||||
|
||||
color_map = {"Tinggi": "#4CAF50", "Rendah": "#F44336", "Sedang": "#FFC107"}
|
||||
size_map = {"Tinggi": 20, "Sedang": 10, "Rendah": 5}
|
||||
|
||||
df_map['cluster_label'] = df_map['cluster'].map(cluster_labels)
|
||||
df_map['size'] = df_map['cluster_label'].map(size_map).fillna(8)
|
||||
|
||||
fig = px.scatter_mapbox(
|
||||
df_map,
|
||||
lat="Latitude",
|
||||
lon="Longitude",
|
||||
color="cluster_label",
|
||||
size="size",
|
||||
hover_name="Lokasi",
|
||||
hover_data={"RataRataHarga": True, "Latitude": False, "Longitude": False},
|
||||
zoom=7,
|
||||
height=550,
|
||||
color_discrete_map=color_map,
|
||||
size_max=30
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
mapbox_style="open-street-map",
|
||||
mapbox_center={"lat": -7.5, "lon": 112.5},
|
||||
margin={"r": 0, "t": 0, "l": 0, "b": 0}
|
||||
)
|
||||
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error saat membuat peta: {str(e)}")
|
||||
st.write("Debug info:")
|
||||
st.write(f"Data shape: {df.shape}")
|
||||
st.write(f"Columns: {list(df.columns)}")
|
||||
if len(df) > 0:
|
||||
st.write("Sample data:")
|
||||
st.dataframe(df.head(3))
|
||||
|
||||
def show_top_bottom_locations(df):
|
||||
st.subheader("📊 5 Lokasi dengan Harga Tertinggi dan Terendah")
|
||||
|
||||
if "Lokasi" not in df.columns or "RataRataHarga" not in df.columns:
|
||||
st.error("Data tidak mengandung kolom 'Lokasi' atau 'RataRataHarga'")
|
||||
return
|
||||
|
||||
df_grouped = df.groupby("Lokasi")["RataRataHarga"].mean().reset_index()
|
||||
df_top5 = df_grouped.sort_values(by="RataRataHarga", ascending=False).head(5)
|
||||
df_bottom5 = df_grouped.sort_values(by="RataRataHarga", ascending=True).head(5)
|
||||
|
||||
col1, col2 = st.columns([1, 1])
|
||||
with col1:
|
||||
st.markdown("<h4 style='text-align:center;'>5 Lokasi Tertinggi</h4>", unsafe_allow_html=True)
|
||||
fig_top = px.bar(
|
||||
df_top5.sort_values("RataRataHarga"),
|
||||
x="RataRataHarga",
|
||||
y="Lokasi",
|
||||
orientation="h",
|
||||
text="RataRataHarga",
|
||||
color_discrete_sequence=["#73D2F6"]
|
||||
)
|
||||
fig_top.update_traces(texttemplate='%{text:.2f}', textposition='outside', width=0.4)
|
||||
fig_top.update_layout(xaxis_title=None, yaxis_title=None, margin=dict(l=10, r=10, t=30, b=10))
|
||||
st.plotly_chart(fig_top, use_container_width=True)
|
||||
|
||||
with col2:
|
||||
st.markdown("<h4 style='text-align:center;'>5 Lokasi Terendah</h4>", unsafe_allow_html=True)
|
||||
fig_bot = px.bar(
|
||||
df_bottom5.sort_values("RataRataHarga"),
|
||||
x="RataRataHarga",
|
||||
y="Lokasi",
|
||||
orientation="h",
|
||||
text="RataRataHarga",
|
||||
color_discrete_sequence=["#73D2F6"]
|
||||
)
|
||||
fig_bot.update_traces(texttemplate='%{text:.2f}', textposition='outside', width=0.4)
|
||||
fig_bot.update_layout(xaxis_title=None, yaxis_title=None, margin=dict(l=10, r=10, t=30, b=10))
|
||||
st.plotly_chart(fig_bot, use_container_width=True)
|
||||
|
||||
def show_price_trend():
|
||||
st.subheader("📈 Linechart Rata-Rata Harga Seluruh Kab/Kota")
|
||||
tables = pd.read_sql("""
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name LIKE 'dataset_%'
|
||||
""", engine)
|
||||
|
||||
trend_data = []
|
||||
for (table_name,) in tables.itertuples(index=False):
|
||||
try:
|
||||
df = pd.read_sql_table(table_name, engine)
|
||||
file_id = int(table_name.replace("dataset_", ""))
|
||||
|
||||
uploaded_at_df = pd.read_sql(
|
||||
f"SELECT uploaded_at FROM uploaded_files WHERE id={file_id}", engine
|
||||
)
|
||||
|
||||
if uploaded_at_df.empty:
|
||||
continue # Lewati jika tidak ada info waktu upload
|
||||
|
||||
uploaded_at = uploaded_at_df.iloc[0, 0]
|
||||
rata_rata = df["RataRataHarga"].mean()
|
||||
trend_data.append({
|
||||
"uploaded_at": uploaded_at,
|
||||
"RataRataHarga": rata_rata
|
||||
})
|
||||
except Exception as e:
|
||||
st.warning(f"Gagal membaca data dari {table_name}: {e}")
|
||||
|
||||
if not trend_data:
|
||||
st.info("Belum ada data yang bisa ditampilkan untuk tren harga.")
|
||||
return
|
||||
|
||||
df_trend = pd.DataFrame(trend_data).sort_values("uploaded_at")
|
||||
df_trend['uploaded_at'] = pd.to_datetime(df_trend['uploaded_at'])
|
||||
|
||||
fig = px.line(df_trend, x="uploaded_at", y="RataRataHarga", markers=True)
|
||||
fig.update_layout(
|
||||
xaxis_title="Waktu Upload",
|
||||
yaxis_title="Rata-rata Harga",
|
||||
# title={"text": "Perkembangan Harga dari Waktu ke Waktu", "x": 0.5},
|
||||
height=450
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
def show_data_table(df):
|
||||
st.subheader("🗒️ Data Tabel")
|
||||
|
||||
if df.empty:
|
||||
st.info("Data kosong atau belum dipilih.")
|
||||
return
|
||||
|
||||
# Deteksi jika ada kolom index seperti Unnamed: 0, hapus itu dulu
|
||||
first_col = df.columns[0]
|
||||
if first_col.lower() in ["unnamed: 0", "index", "0"]:
|
||||
df = df.drop(columns=first_col)
|
||||
|
||||
# ===============================
|
||||
# ✅ Tentukan kolom yang akan di-hide
|
||||
# ===============================
|
||||
hide_columns_positions = [0, 6, 7, 8] # Kolom ke-1,7,8,9 (posisi 0-based)
|
||||
|
||||
# Pastikan posisi tidak melebihi jumlah kolom yang ada
|
||||
hide_columns_positions = [
|
||||
idx for idx in hide_columns_positions if idx < len(df.columns)
|
||||
]
|
||||
|
||||
# Dapatkan nama kolom yang ingin disembunyikan
|
||||
hide_columns = [df.columns[idx] for idx in hide_columns_positions]
|
||||
|
||||
# Pilih kolom yang tidak disembunyikan
|
||||
columns_to_display = [col for col in df.columns if col not in hide_columns]
|
||||
|
||||
# ===============================
|
||||
# ✅ Tampilkan dataframe dengan kolom yang sudah di-hide
|
||||
# ===============================
|
||||
st.dataframe(df[columns_to_display])
|
Loading…
Reference in New Issue