This commit is contained in:
agungbima009 2025-06-28 15:28:38 +07:00
commit 669d069b02
21 changed files with 809 additions and 0 deletions

Binary file not shown.

Binary file not shown.

180
admin_dashboard.py Normal file
View File

@ -0,0 +1,180 @@
import streamlit as st
import pandas as pd
import time
import os
import pickle
from utils.db import save_file_metadata, save_dataset, engine
from utils.preprocessing import preprocess, preprocess_initial
from utils.train_kmeans import train_and_save_kmeans, train_kmeans_with_selected_files
from sqlalchemy import text
import utils.auth as auth
MODELS_DIR = "models"
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)
# Load KMeans model
kmeans_model_path = os.path.join(MODELS_DIR, "kmeans_model.pkl")
if os.path.exists(kmeans_model_path):
kmeans_model = pickle.load(open(kmeans_model_path, "rb"))
else:
kmeans_model = None
def render():
# ✅ Inisialisasi session state yang diperlukan
if "cluster_labels" not in st.session_state:
st.session_state.cluster_labels = {0: "Tinggi", 1: "Sedang", 2: "Rendah"}
if "label_applied" not in st.session_state:
st.session_state.label_applied = False
if not st.session_state.get("authenticated"):
st.warning("Silakan login terlebih dahulu.")
return
auth.logout()
st.title("Admin Panel - Upload Data")
# ===================
# Upload Data Baru
# ===================
st.subheader("📤 Upload Data Baru")
uploaded_file = st.file_uploader("Upload File CSV", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file, header=None)
st.write("Data Awal:")
st.dataframe(df.head())
initial_df = preprocess_initial(df)
st.write("Setelah Preprocessing Awal (struktur & rekap):")
st.dataframe(initial_df.head())
preprocessed_df = preprocess(initial_df)
result_df = preprocessed_df.copy()
if kmeans_model:
result_df["cluster"] = kmeans_model.predict(result_df[["RataRataHarga", "RataRataHargaTertinggiDiPasar"]])
else:
st.warning("Model belum tersedia. Silakan reset model terlebih dahulu.")
# Preview clustering untuk admin (tanpa cluster_label disimpan)
preview_df = result_df.copy()
st.write("Hasil Preprocessing (winsorization):")
st.dataframe(preprocessed_df.head())
st.write("Perbandingan Perubahan Kolom:")
cols_common = list(set(initial_df.columns) & set(preprocessed_df.columns))
diff_report = []
for col in cols_common:
if not initial_df[col].equals(preprocessed_df[col]):
changed = (initial_df[col] != preprocessed_df[col]).sum()
diff_report.append((col, changed))
if diff_report:
diff_df = pd.DataFrame(diff_report, columns=["Kolom", "Jumlah Data Berubah"])
st.dataframe(diff_df)
else:
st.info("Tidak ada perubahan terdeteksi pada kolom yang sama.")
st.write("Hasil Clustering Preview:")
st.dataframe(preview_df)
if st.button("Konfirmasi dan Simpan ke Database"):
file_id = save_file_metadata(uploaded_file.name)
save_dataset(file_id, result_df) # result_df tanpa cluster_label
with st.spinner("Melatih ulang model KMeans..."):
train_and_save_kmeans()
st.success(f"File berhasil disimpan ke database dengan ID: {file_id} dan model telah diperbarui!")
# ===================
# Reset Model Button
# ===================
st.subheader("⚙️ Manajemen Model")
with st.expander("Reset Model"):
st.warning("Ini akan menghapus model lama dan melatih ulang berdasarkan seluruh data yang ada.")
if st.checkbox("Saya yakin ingin mereset model"):
if st.button("Reset Model (Latih ulang dari seluruh data)"):
with st.spinner("Sedang melatih ulang model dari awal..."):
train_and_save_kmeans()
st.success("✅ Model berhasil di-reset dan dilatih ulang dari seluruh data yang ada.")
time.sleep(3)
st.rerun()
# ===================
# Manual Train Model dengan Pilihan File
# ===================
with st.expander("🔁 Latih Ulang Manual (Pilih File)"):
file_options = pd.read_sql("SELECT id, file_name FROM uploaded_files ORDER BY uploaded_at DESC", engine)
if not file_options.empty:
selected_ids = st.multiselect("Pilih file yang akan digunakan untuk pelatihan:",
options=file_options['id'],
format_func=lambda x: f"ID {x} - {file_options[file_options['id']==x]['file_name'].values[0]}")
if st.button("Latih Model dengan File Terpilih"):
if selected_ids:
with st.spinner("Melatih model dengan file terpilih..."):
train_kmeans_with_selected_files(selected_ids)
st.success("✅ Model berhasil dilatih ulang dengan file yang dipilih.")
time.sleep(3)
st.rerun()
else:
st.warning("Silakan pilih minimal satu file terlebih dahulu.")
# ===================
# Cluster Labeling Setting (Untuk Visualization)
# ===================
st.subheader("🏷️ Pengaturan Label Cluster untuk Visualisasi")
st.markdown("Label ini hanya digunakan untuk visualisasi, tidak memengaruhi data asli.")
label_options = ["Tinggi", "Sedang", "Rendah"]
assigned_labels = {}
used_labels = set()
for cluster_id in range(3):
available_labels = [lbl for lbl in label_options if lbl not in used_labels or st.session_state.cluster_labels.get(cluster_id) == lbl]
selected = st.selectbox(f"Label untuk Cluster {cluster_id}", options=available_labels, index=available_labels.index(st.session_state.cluster_labels.get(cluster_id)), key=f"viz_label_{cluster_id}")
assigned_labels[cluster_id] = selected
used_labels.add(selected)
if st.button("Terapkan Label Visualisasi"):
st.session_state.cluster_labels = assigned_labels
st.session_state.label_applied = True
st.success("✅ Label visualisasi cluster berhasil diperbarui.")
# ===================
# Riwayat File Upload
# ===================
st.markdown("---")
st.header("📁 Daftar File yang Sudah Diupload")
files = pd.read_sql("SELECT * FROM uploaded_files ORDER BY uploaded_at DESC", engine)
if not files.empty:
for index, row in files.iterrows():
with st.expander(f"{row['file_name']} ({row['uploaded_at'][:19]})"):
df_preview = pd.read_sql_table(f"dataset_{row['id']}", con=engine)
preview_option = st.radio("Tampilkan:", ["5 Data Pertama", "Seluruh Data"], horizontal=True, key=f"preview_option_{row['id']}")
if preview_option == "5 Data Pertama":
st.dataframe(df_preview.head())
else:
st.dataframe(df_preview)
col1, col2, col3 = st.columns(3)
with col1:
if st.checkbox(f"Konfirmasi hapus file ID {row['id']}"):
if st.button("Hapus", key=f"hapus_{row['id']}"):
with engine.begin() as conn:
conn.execute(text("DELETE FROM uploaded_files WHERE id = :id"), {"id": row['id']})
st.success("✅ File berhasil dihapus.")
st.rerun()
with col2:
new_name = st.text_input("Ganti Nama File", value=row['file_name'], key=f"rename_{row['id']}")
if st.button("Simpan Nama Baru", key=f"simpan_{row['id']}"):
with engine.begin() as conn:
conn.execute(text("UPDATE uploaded_files SET file_name = :name WHERE id = :id"), {"name": new_name, "id": row['id']})
st.rerun()

1
config.json Normal file
View File

@ -0,0 +1 @@
{"username": "admin", "password": "$2b$12$iHuWFWeYpb4FBA0W9htyO.qLxZzaBydN.7vrXtGJypFzEi49GJFZi", "security_question": "Nama hewan peliharaan pertama Anda?", "security_answer": "nalakucinglucu"}

64
dashboard_user.py Normal file
View File

@ -0,0 +1,64 @@
import streamlit as st
import pandas as pd
# from sqlalchemy import create_engine
from utils.db import init_db, engine
from utils.preprocessing import preprocess
from utils.visualizations import show_scorecard, show_clustermap, show_top_bottom_locations, show_price_trend, show_data_table
import pickle
model = pickle.load(open("models/kmeans_model.pkl", "rb"))
init_db()
def load_dataset(file_id):
return pd.read_sql_table(f"dataset_{file_id}", con=engine)
def render():
st.markdown(
"""
<h1 style='text-align: center;
margin-bottom: 50px;
padding-bottom: 50px;
margin-top: 0px;'>
Visualisasi Harga Beras Medium Jawa Timur
</h1>
""",
unsafe_allow_html=True
)
files = pd.read_sql("SELECT * FROM uploaded_files ORDER BY uploaded_at DESC", engine)
if files.empty:
st.warning("Belum ada data yang diunggah admin.")
return
file_dict = {f"{row['file_name']} ({row['uploaded_at'][:19]})": row['id'] for _, row in files.iterrows()}
selected_label = st.sidebar.selectbox("Pilih Dataset:", list(file_dict.keys()))
selected_file_id = file_dict.get(selected_label)
if selected_file_id is None:
st.warning("Silakan pilih dataset terlebih dahulu.")
return
df = load_dataset(selected_file_id)
df_clean = preprocess(df)
df_clean["cluster"] = model.predict(df_clean[["RataRataHarga", "RataRataHargaTertinggiDiPasar"]])
# Filter Cluster di Sidebar
cluster_options = sorted(df_clean["cluster"].unique())
selected_cluster = st.sidebar.multiselect("Pilih Cluster:", options=cluster_options, default=cluster_options)
# Filter Lokasi di Sidebar
lokasi_options = sorted(df_clean["Lokasi"].unique()) if "Lokasi" in df_clean.columns else []
selected_lokasi = st.sidebar.multiselect("Pilih Lokasi:", options=lokasi_options, default=lokasi_options)
filtered_df = df_clean[
(df_clean["cluster"].isin(selected_cluster)) &
(df_clean["Lokasi"].isin(selected_lokasi))
]
show_scorecard(filtered_df)
show_clustermap(filtered_df)
show_top_bottom_locations(filtered_df)
show_price_trend()
show_data_table(filtered_df)

BIN
data/files.db Normal file

Binary file not shown.

19
main.py Normal file
View File

@ -0,0 +1,19 @@
import streamlit as st
import admin_dashboard
import dashboard_user
from utils.auth import login
st.set_page_config(page_title="Visualisasi Clustering", layout="wide")
page = st.sidebar.selectbox("Pilih Halaman", [
"User Dashboard",
"Admin Dashboard"
])
if page == "User Dashboard":
dashboard_user.render()
elif page == "Admin Dashboard":
if st.session_state.get("authenticated"):
admin_dashboard.render()
else:
login()

BIN
models/kmeans_model.pkl Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

128
utils/auth.py Normal file
View File

@ -0,0 +1,128 @@
import streamlit as st
import bcrypt
import json
import os
import re
CONFIG_PATH = "config.json"
# Muat file konfigurasi
def load_config():
if not os.path.exists(CONFIG_PATH):
return {"username": "admin", "password": "", "security_question": "", "security_answer": ""}
with open(CONFIG_PATH, "r") as f:
return json.load(f)
# Simpan konfigurasi
def save_config(config):
with open(CONFIG_PATH, "w") as f:
json.dump(config, f, indent=4)
# Validasi kekuatan password
def is_strong_password(password):
if len(password) < 8:
return False
if not re.search(r"[a-z]", password):
return False
if not re.search(r"[A-Z]", password):
return False
if not re.search(r"\d", password):
return False
if not re.search(r"[^\w\s]", password):
return False
return True
# Login form
def login():
config = load_config()
if 'authenticated' not in st.session_state:
st.session_state.authenticated = False
if 'show_reset' not in st.session_state:
st.session_state.show_reset = False
if 'login_failed' not in st.session_state:
st.session_state.login_failed = False
if not st.session_state.authenticated:
st.subheader("🔐 Login Admin")
username = st.text_input("Username")
password = st.text_input("Password", type="password")
if st.button("Login"):
if username == config.get("username") and bcrypt.checkpw(password.encode(), config.get("password").encode()):
st.session_state.authenticated = True
st.session_state.login_failed = False
st.rerun()
else:
st.session_state.login_failed = True
st.error("❌ Username atau password salah.")
# Hanya muncul jika login gagal
if st.session_state.login_failed and not st.session_state.show_reset:
if st.button("Lupa Password"):
st.session_state.show_reset = True
if st.session_state.show_reset:
st.info("Jawab pertanyaan rahasia untuk reset password.")
question = config.get("security_question")
answer = st.text_input("Pertanyaan:", placeholder=question)
new_pass = st.text_input("Password Baru", type="password")
if st.button("Ganti Password"):
if answer.lower() == config.get("security_answer").lower():
new_hash = bcrypt.hashpw(new_pass.encode(), bcrypt.gensalt()).decode()
config["password"] = new_hash
with open("config.json", "w") as f:
json.dump(config, f)
st.success("✅ Password berhasil diganti!")
st.session_state.show_reset = False
st.session_state.login_failed = False
else:
st.error("❌ Jawaban salah.")
return False
else:
return True
# Form reset password via pertanyaan rahasia
def show_password_reset_form():
config = load_config()
st.subheader("🔐 Reset Password")
pertanyaan = config.get("security_question", "Pertanyaan tidak tersedia")
jawaban_input = st.text_input(pertanyaan)
new_pass = st.text_input("Password Baru", type="password")
if st.button("Ganti Password"):
if jawaban_input.strip().lower() == config.get("security_answer", "").strip().lower():
if not is_strong_password(new_pass):
st.warning("Password harus minimal 8 karakter, mengandung huruf besar, huruf kecil, angka, dan simbol.")
return
config["password"] = bcrypt.hashpw(new_pass.encode(), bcrypt.gensalt()).decode()
save_config(config)
st.success("Password berhasil diubah! Silakan login kembali.")
# Reset semua flag
st.session_state.show_reset = False
st.session_state.show_reset_button = False
else:
st.error("Jawaban pertanyaan salah.")
# Logout
def logout():
with st.sidebar:
if st.button("🚪 Logout"):
st.session_state.confirm_logout = True
# Tampilkan dialog konfirmasi jika diklik
if st.session_state.get("confirm_logout", False):
with st.sidebar:
st.warning("Apakah Anda yakin ingin logout?")
col1, col2 = st.columns(2)
with col1:
if st.button("✅ Ya, Logout"):
st.session_state.clear()
st.rerun()
with col2:
if st.button("❌ Batal"):
st.session_state.confirm_logout = False

28
utils/db.py Normal file
View File

@ -0,0 +1,28 @@
from sqlalchemy import create_engine, text
from datetime import datetime
import pandas as pd
engine = create_engine("sqlite:///data/files.db")
def init_db():
with engine.connect() as conn:
conn.execute(text("""
CREATE TABLE IF NOT EXISTS uploaded_files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_name TEXT,
uploaded_at TEXT
)
"""))
def save_file_metadata(file_name):
now = datetime.now()
with engine.begin() as conn:
conn.execute(text("""
INSERT INTO uploaded_files (file_name, uploaded_at)
VALUES (:f, :t)
"""), {"f": file_name, "t": now.isoformat()})
result = conn.execute(text("SELECT last_insert_rowid()"))
return result.fetchone()[0]
def save_dataset(file_id, df):
df.to_sql(f"dataset_{file_id}", con=engine, index=False, if_exists="replace")

116
utils/preprocessing.py Normal file
View File

@ -0,0 +1,116 @@
import pandas as pd
import re
def preprocess_initial(df):
# Cari baris awal yang mengandung angka (1, 1.1, dst)
start_row = df[df[0].astype(str).str.contains(r'^\d+(\.\d+)?$', na=False)].index[0]
# Ambil data dari baris tersebut
data = df.iloc[start_row:].reset_index(drop=True)
data.columns = ['No', 'Lokasi'] + [f'Hari_{i}' for i in range(1, len(data.columns) - 1)]
# Bersihkan kolom
data['No'] = data['No'].astype(str).str.strip()
data['Lokasi'] = data['Lokasi'].astype(str).apply(lambda x: re.sub(r'[\u200b\xa0]+', ' ', x)).str.strip()
# Ubah harga jadi numerik
for col in data.columns[2:]:
data[col] = pd.to_numeric(data[col].str.replace('.', '', regex=False), errors='coerce')
# Tandai baris wilayah
data['IsWilayah'] = ~data['No'].str.contains(r'\.', na=False)
# Sebarkan nama wilayah ke bawah
data['Wilayah'] = data['Lokasi'].where(data['IsWilayah']).ffill()
# ============================
# Proses Data Pasar
# ============================
pasar_df = data[~data['IsWilayah']].copy()
harga_cols = [col for col in data.columns if col.startswith("Hari_")]
pasar_df['RataRataHargaPasar'] = pasar_df[harga_cols].mean(axis=1)
# Rekap per wilayah
rekap_df = pasar_df.groupby('Wilayah').agg(
JumlahPasar=('Lokasi', 'count'),
RataRataHarga=('RataRataHargaPasar', 'mean'),
RataRataHargaTertinggiDiPasar=('RataRataHargaPasar', 'max')
).reset_index()
# Ambil nama pasar dengan harga tertinggi
idx_max = pasar_df.groupby('Wilayah')['RataRataHargaPasar'].idxmax()
tertinggi_df = pasar_df.loc[idx_max, ['Wilayah', 'Lokasi']].rename(
columns={'Lokasi': 'PasarDenganRataRataHargaTertinggi'}
)
# Gabungkan
final_df = rekap_df.merge(tertinggi_df, on='Wilayah')
final_df.insert(0, 'No', range(1, len(final_df) + 1))
final_df.rename(columns={'Wilayah': 'Lokasi'}, inplace=True)
# ============================
# Tambahkan Latitude & Longitude untuk Wilayah
# ============================
koordinat = {
'Kota Surabaya': (-7.2575, 112.7521),
'Kota Malang': (-7.9819, 112.6265),
'Kota Kediri': (-7.8166, 112.0114),
'Kabupaten Sidoarjo': (-7.4467, 112.7181),
'Kabupaten Gresik': (-7.1607, 112.6530),
'Kota Mojokerto': (-7.4722, 112.4336),
'Kota Pasuruan': (-7.6451, 112.9086),
'Kabupaten Lamongan': (-7.1120, 112.4148),
'Kabupaten Jombang': (-7.5469, 112.2334),
'Kabupaten Tuban': (-6.8971, 112.0505),
'Kabupaten Bangkalan': (-7.0335, 112.7467),
'Kabupaten Bojonegoro': (-7.1500, 111.8833),
'Kabupaten Mojokerto': (-7.4700, 112.4330),
'Kabupaten Pasuruan': (-7.6895, 112.6855),
'Kabupaten Probolinggo': (-7.7764, 113.2196),
'Kota Probolinggo': (-7.7543, 113.2150),
'Kabupaten Banyuwangi': (-8.2186, 114.3690),
'Kabupaten Blitar': (-8.0941, 112.3096),
'Kabupaten Bondowoso': (-7.9135, 113.8213),
'Kabupaten Jember': (-8.1725, 113.7005),
'Kabupaten Lumajang': (-8.1350, 113.2249),
'Kabupaten Madiun': (-7.6295, 111.5230),
'Kabupaten Magetan': (-7.6524, 111.3355),
'Kabupaten Nganjuk': (-7.6055, 111.9031),
'Kabupaten Ngawi': (-7.4059, 111.4468),
'Kabupaten Pamekasan': (-7.1566, 113.4780),
'Kabupaten Ponorogo': (-7.8659, 111.4691),
'Kabupaten Sampang': (-7.1917, 113.2490),
'Kabupaten Sumenep': (-6.9245, 113.9066),
'Kabupaten Situbondo': (-7.7069, 114.0099),
'Kabupaten Trenggalek': (-8.0593, 111.7084),
'Kabupaten Tulungagung': (-8.0657, 111.9010),
'Kota Batu': (-7.8671, 112.5239),
'Kota Blitar': (-8.0987, 112.1680),
'Kota Madiun': (-7.6309, 111.5230),
'Kabupaten Kediri': (-7.8480, 112.0113),
'Kabupaten Malang': (-8.1065, 112.6660),
'Kabupaten Pacitan': (-8.1948, 111.1056)
}
final_df['Latitude'] = final_df['Lokasi'].map(lambda x: koordinat.get(x, (None, None))[0])
final_df['Longitude'] = final_df['Lokasi'].map(lambda x: koordinat.get(x, (None, None))[1])
return final_df
def winsorize_series(series):
Q1 = series.quantile(0.25)
Q3 = series.quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
return series.clip(lower=lower_bound, upper=upper_bound)
def preprocess(df):
target_cols = ['RataRataHarga', 'RataRataHargaTertinggiDiPasar']
df_clean = df.copy()
for col in target_cols:
df_clean[col] = winsorize_series(df_clean[col])
return df_clean

55
utils/train_kmeans.py Normal file
View File

@ -0,0 +1,55 @@
import os
import pickle
import pandas as pd
from sklearn.cluster import KMeans
from utils.db import engine
from utils.preprocessing import preprocess
from sqlalchemy import text
MODELS_DIR = "models"
os.makedirs(MODELS_DIR, exist_ok=True)
def train_and_save_kmeans():
with engine.connect() as conn:
table_names = conn.exec_driver_sql("""
SELECT name FROM sqlite_master
WHERE type='table' AND name LIKE 'dataset_%'
""").fetchall()
dfs = []
for (table_name,) in table_names:
df = pd.read_sql_table(table_name, con=engine)
df_clean = preprocess(df)
dfs.append(df_clean)
if not dfs:
raise ValueError("Tidak ada dataset untuk pelatihan.")
all_data = pd.concat(dfs, ignore_index=True)
features = all_data[['RataRataHarga', 'RataRataHargaTertinggiDiPasar']]
model = KMeans(n_clusters=3, random_state=42)
model.fit(features)
with open(os.path.join(MODELS_DIR, "kmeans_model.pkl"), "wb") as f:
pickle.dump(model, f)
def train_kmeans_with_selected_files(file_ids):
dfs = []
for file_id in file_ids:
table_name = f"dataset_{file_id}"
df = pd.read_sql_table(table_name, con=engine)
df_clean = preprocess(df)
dfs.append(df_clean)
if not dfs:
raise ValueError("Tidak ada dataset terpilih untuk pelatihan.")
all_data = pd.concat(dfs, ignore_index=True)
features = all_data[['RataRataHarga', 'RataRataHargaTertinggiDiPasar']]
model = KMeans(n_clusters=3, random_state=42)
model.fit(features)
with open(os.path.join(MODELS_DIR, "kmeans_model.pkl"), "wb") as f:
pickle.dump(model, f)

218
utils/visualizations.py Normal file
View File

@ -0,0 +1,218 @@
import streamlit as st
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sqlalchemy import text
from utils.db import engine
from sklearn.metrics import silhouette_score
def show_scorecard(df):
col1, col2, col3, col4 = st.columns(4)
# Jumlah lokasi = banyak baris
col1.metric("Jumlah Lokasi", len(df))
# Jumlah total pasar dari seluruh baris
total_pasar = int(df["JumlahPasar"].sum())
col2.metric("Total Jumlah Pasar", total_pasar)
# Rata-rata harga
rata_rata_harga = round(df['RataRataHarga'].mean(), 2)
col3.metric("Rata-rata Harga Jawa Timur", f"Rp {rata_rata_harga:,.2f}")
# Harga pasar tertinggi
if not df.empty:
max_row = df.loc[df['RataRataHargaTertinggiDiPasar'].idxmax()]
nama_pasar = max_row['PasarDenganRataRataHargaTertinggi']
nama_lokasi = max_row['Lokasi']
harga_tertinggi = max_row['RataRataHargaTertinggiDiPasar']
col4.metric(
label="Harga Tertinggi di Pasar",
value=f"Rp {harga_tertinggi:,.2f}",
delta=f"{nama_pasar} - {nama_lokasi}"
)
else:
col4.metric("Harga Tertinggi di Pasar", "Data tidak tersedia", "")
def show_clustermap(df):
st.subheader("🗺️ Wilayah Dengan Cluster Tingkat Harga di Jawa Timur")
required_columns = {"cluster", "Latitude", "Longitude", "Lokasi", "RataRataHarga"}
if not required_columns.issubset(df.columns):
missing_cols = required_columns - set(df.columns)
st.error(f"❌ Kolom yang hilang: {missing_cols}")
st.info(f"✅ Kolom yang tersedia: {list(df.columns)}")
return
try:
df_map = df.copy()
df_map['cluster'] = df_map['cluster'].astype(int)
df_map['Latitude'] = pd.to_numeric(df_map['Latitude'], errors='coerce')
df_map['Longitude'] = pd.to_numeric(df_map['Longitude'], errors='coerce')
df_map['RataRataHarga'] = pd.to_numeric(df_map['RataRataHarga'], errors='coerce')
df_map = df_map.dropna(subset=['Latitude', 'Longitude', 'RataRataHarga'])
if len(df_map) == 0:
st.error("❌ Tidak ada data valid untuk ditampilkan")
return
default_labels = {0: "Tinggi", 1: "Sedang", 2: "Rendah"}
cluster_labels = st.session_state.get("cluster_labels", default_labels)
color_map = {"Tinggi": "#4CAF50", "Rendah": "#F44336", "Sedang": "#FFC107"}
size_map = {"Tinggi": 20, "Sedang": 10, "Rendah": 5}
df_map['cluster_label'] = df_map['cluster'].map(cluster_labels)
df_map['size'] = df_map['cluster_label'].map(size_map).fillna(8)
fig = px.scatter_mapbox(
df_map,
lat="Latitude",
lon="Longitude",
color="cluster_label",
size="size",
hover_name="Lokasi",
hover_data={"RataRataHarga": True, "Latitude": False, "Longitude": False},
zoom=7,
height=550,
color_discrete_map=color_map,
size_max=30
)
fig.update_layout(
mapbox_style="open-street-map",
mapbox_center={"lat": -7.5, "lon": 112.5},
margin={"r": 0, "t": 0, "l": 0, "b": 0}
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"❌ Error saat membuat peta: {str(e)}")
st.write("Debug info:")
st.write(f"Data shape: {df.shape}")
st.write(f"Columns: {list(df.columns)}")
if len(df) > 0:
st.write("Sample data:")
st.dataframe(df.head(3))
def show_top_bottom_locations(df):
st.subheader("📊 5 Lokasi dengan Harga Tertinggi dan Terendah")
if "Lokasi" not in df.columns or "RataRataHarga" not in df.columns:
st.error("Data tidak mengandung kolom 'Lokasi' atau 'RataRataHarga'")
return
df_grouped = df.groupby("Lokasi")["RataRataHarga"].mean().reset_index()
df_top5 = df_grouped.sort_values(by="RataRataHarga", ascending=False).head(5)
df_bottom5 = df_grouped.sort_values(by="RataRataHarga", ascending=True).head(5)
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("<h4 style='text-align:center;'>5 Lokasi Tertinggi</h4>", unsafe_allow_html=True)
fig_top = px.bar(
df_top5.sort_values("RataRataHarga"),
x="RataRataHarga",
y="Lokasi",
orientation="h",
text="RataRataHarga",
color_discrete_sequence=["#73D2F6"]
)
fig_top.update_traces(texttemplate='%{text:.2f}', textposition='outside', width=0.4)
fig_top.update_layout(xaxis_title=None, yaxis_title=None, margin=dict(l=10, r=10, t=30, b=10))
st.plotly_chart(fig_top, use_container_width=True)
with col2:
st.markdown("<h4 style='text-align:center;'>5 Lokasi Terendah</h4>", unsafe_allow_html=True)
fig_bot = px.bar(
df_bottom5.sort_values("RataRataHarga"),
x="RataRataHarga",
y="Lokasi",
orientation="h",
text="RataRataHarga",
color_discrete_sequence=["#73D2F6"]
)
fig_bot.update_traces(texttemplate='%{text:.2f}', textposition='outside', width=0.4)
fig_bot.update_layout(xaxis_title=None, yaxis_title=None, margin=dict(l=10, r=10, t=30, b=10))
st.plotly_chart(fig_bot, use_container_width=True)
def show_price_trend():
st.subheader("📈 Linechart Rata-Rata Harga Seluruh Kab/Kota")
tables = pd.read_sql("""
SELECT name FROM sqlite_master
WHERE type='table' AND name LIKE 'dataset_%'
""", engine)
trend_data = []
for (table_name,) in tables.itertuples(index=False):
try:
df = pd.read_sql_table(table_name, engine)
file_id = int(table_name.replace("dataset_", ""))
uploaded_at_df = pd.read_sql(
f"SELECT uploaded_at FROM uploaded_files WHERE id={file_id}", engine
)
if uploaded_at_df.empty:
continue # Lewati jika tidak ada info waktu upload
uploaded_at = uploaded_at_df.iloc[0, 0]
rata_rata = df["RataRataHarga"].mean()
trend_data.append({
"uploaded_at": uploaded_at,
"RataRataHarga": rata_rata
})
except Exception as e:
st.warning(f"Gagal membaca data dari {table_name}: {e}")
if not trend_data:
st.info("Belum ada data yang bisa ditampilkan untuk tren harga.")
return
df_trend = pd.DataFrame(trend_data).sort_values("uploaded_at")
df_trend['uploaded_at'] = pd.to_datetime(df_trend['uploaded_at'])
fig = px.line(df_trend, x="uploaded_at", y="RataRataHarga", markers=True)
fig.update_layout(
xaxis_title="Waktu Upload",
yaxis_title="Rata-rata Harga",
# title={"text": "Perkembangan Harga dari Waktu ke Waktu", "x": 0.5},
height=450
)
st.plotly_chart(fig, use_container_width=True)
def show_data_table(df):
st.subheader("🗒️ Data Tabel")
if df.empty:
st.info("Data kosong atau belum dipilih.")
return
# Deteksi jika ada kolom index seperti Unnamed: 0, hapus itu dulu
first_col = df.columns[0]
if first_col.lower() in ["unnamed: 0", "index", "0"]:
df = df.drop(columns=first_col)
# ===============================
# ✅ Tentukan kolom yang akan di-hide
# ===============================
hide_columns_positions = [0, 6, 7, 8] # Kolom ke-1,7,8,9 (posisi 0-based)
# Pastikan posisi tidak melebihi jumlah kolom yang ada
hide_columns_positions = [
idx for idx in hide_columns_positions if idx < len(df.columns)
]
# Dapatkan nama kolom yang ingin disembunyikan
hide_columns = [df.columns[idx] for idx in hide_columns_positions]
# Pilih kolom yang tidak disembunyikan
columns_to_display = [col for col in df.columns if col not in hide_columns]
# ===============================
# ✅ Tampilkan dataframe dengan kolom yang sudah di-hide
# ===============================
st.dataframe(df[columns_to_display])