first commit
This commit is contained in:
commit
77e1092ceb
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm",
|
||||
"customizations": {
|
||||
"codespaces": {
|
||||
"openFiles": [
|
||||
"README.md",
|
||||
"app.py"
|
||||
]
|
||||
},
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-python.vscode-pylance"
|
||||
]
|
||||
}
|
||||
},
|
||||
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
|
||||
"postAttachCommand": {
|
||||
"server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
|
||||
},
|
||||
"portsAttributes": {
|
||||
"8501": {
|
||||
"label": "Application",
|
||||
"onAutoForward": "openPreview"
|
||||
}
|
||||
},
|
||||
"forwardPorts": [
|
||||
8501
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
3.10
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
# Analisis Sentimen Kebijakan Anggaran Pendidikan 🎓
|
||||
|
||||
Aplikasi ini adalah dashboard interaktif untuk menganalisis sentimen masyarakat di Twitter mengenai isu pemotongan dan efisiensi anggaran pendidikan (Dana BOS, PIP, KIP Kuliah).
|
||||
|
||||
## 🚀 Fitur Utama
|
||||
* **Sentiment Analysis:** Menggunakan model Deep Learning **Bi-LSTM**.
|
||||
* **Topic Modeling:** Ekstraksi topik pembicaraan menggunakan **LDA**.
|
||||
* **Interactive Dashboard:** Visualisasi data real-time dengan **Streamlit** & **Plotly**.
|
||||
|
||||
## 🛠️ Tech Stack
|
||||
* Python 3.10
|
||||
* TensorFlow / Keras
|
||||
* Streamlit
|
||||
* Pandas & NumPy
|
||||
* Sastrawi (Preprocessing)
|
||||
|
||||
## 📦 Cara Menjalankan (Local)
|
||||
1. Clone repository ini.
|
||||
2. Install library: `pip install -r requirements.txt`.
|
||||
3. Jalankan: `streamlit run app.py`.
|
||||
Binary file not shown.
|
|
@ -0,0 +1,88 @@
|
|||
import streamlit as st
|
||||
from streamlit_option_menu import option_menu
|
||||
|
||||
# --- IMPORT MODUL LOKAL ---
|
||||
from utils import load_resources
|
||||
from views.beranda import render_beranda
|
||||
from views.visualisasi import render_visualisasi
|
||||
from views.proses_data import render_proses_data
|
||||
from views.analisis_teks import render_analisis_teks
|
||||
from views.analisis_csv import render_analisis_csv
|
||||
|
||||
# ==============================================================================
|
||||
# 1. SETUP KONFIGURASI HALAMAN
|
||||
# ==============================================================================
|
||||
st.set_page_config(
|
||||
page_title="Dashboard Analisis Sentimen Isu Efisiensi Anggaran Sektor Pendidikan",
|
||||
page_icon="🎓",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded"
|
||||
)
|
||||
|
||||
# Load Model LSTM & Tokenizer
|
||||
model, tokenizer = load_resources()
|
||||
|
||||
# ==============================================================================
|
||||
# 2. SIDEBAR NAVIGATION (MENU KIRI)
|
||||
# ==============================================================================
|
||||
with st.sidebar:
|
||||
col1, col2, col3 = st.columns([1, 1.5, 1])
|
||||
with col2:
|
||||
st.image("images/data_analytics.png", use_column_width=True)
|
||||
|
||||
st.markdown(
|
||||
"""
|
||||
<h2 style='text-align: center; margin-top: 10px; margin-bottom: 5px; font-weight: 800; font-size: 26px; line-height: 1.2;'>
|
||||
Sistem Analisis Sentimen
|
||||
</h2>
|
||||
<p style='text-align: center; color: gray; font-size: 14px;'>
|
||||
Kebijakan Efisiensi Anggaran Pendidikan
|
||||
</p>
|
||||
""",
|
||||
unsafe_allow_html=True
|
||||
)
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
selected = option_menu(
|
||||
menu_title="Menu Utama",
|
||||
options=["Beranda", "Visualisasi", "Proses Data", "Analisis Teks", "Analisis File CSV"],
|
||||
icons=["house", "bar-chart", "gear", "chat-text", "file-earmark-spreadsheet"],
|
||||
menu_icon="cast",
|
||||
default_index=0,
|
||||
styles={
|
||||
"nav-link-selected": {"background-color": "#007BFF"} # Warna biru aktif
|
||||
}
|
||||
)
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
st.markdown("<br>", unsafe_allow_html=True)
|
||||
|
||||
bot_col1, bot_col2, bot_col3 = st.columns([1, 5, 1])
|
||||
with bot_col2:
|
||||
st.image("images/logo_jti.png", use_column_width=True)
|
||||
|
||||
st.markdown(
|
||||
"""
|
||||
<div style='text-align: center; color: gray; font-size: 13px; margin-top: 25px; margin-bottom: 40px;'>
|
||||
© 2026 - Skripsi<br>
|
||||
<b>Renaldi Endrawan</b>
|
||||
</div>
|
||||
""",
|
||||
unsafe_allow_html=True
|
||||
)
|
||||
|
||||
# ==============================================================================
|
||||
# 3. ROUTING HALAMAN (MENAMPILKAN KONTEN)
|
||||
# ==============================================================================
|
||||
if selected == "Beranda":
|
||||
render_beranda()
|
||||
elif selected == "Visualisasi":
|
||||
render_visualisasi()
|
||||
elif selected == "Proses Data":
|
||||
render_proses_data()
|
||||
elif selected == "Analisis Teks":
|
||||
render_analisis_teks(model, tokenizer)
|
||||
elif selected == "Analisis File CSV":
|
||||
render_analisis_csv(model, tokenizer)
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,30 @@
|
|||
[
|
||||
{
|
||||
"Waktu": "2026-01-31 16:07:05",
|
||||
"Teks Asli": "Sangat kecewa dana BOS telat cair berbulan-bulan, gaji guru honorer jadi tertahan. Parah banget kinerjanya!",
|
||||
"Teks Bersih": "sangat kecewa dana bos telat cair berbulanbulan gaji guru honorer jadi tertahan parah banget kinerjanya",
|
||||
"Label": "Negatif",
|
||||
"Keyakinan (%)": 57.3
|
||||
},
|
||||
{
|
||||
"Waktu": "2026-01-31 16:07:32",
|
||||
"Teks Asli": "Mendikbudristek hari ini resmi membahas kebijakan efisiensi anggaran pendidikan tahun 2025. Skema pencairan dana BOS dan tunjangan guru akan mengalami sedikit perubahan mekanisme.",
|
||||
"Teks Bersih": "mendikbudristek hari ini resmi membahas kebijakan efisiensi anggaran pendidikan tahun skema pencairan dana bos dan tunjangan guru akan mengalami sedikit perubahan mekanisme",
|
||||
"Label": "Netral",
|
||||
"Keyakinan (%)": 66.16
|
||||
},
|
||||
{
|
||||
"Waktu": "2026-01-31 16:09:13",
|
||||
"Teks Asli": "Sangat setuju dengan langkah efisiensi anggaran BOS ini. Daripada bocor dikorupsi oknum sekolah, lebih baik diawasi ketat dan disalurkan tepat sasaran. Pendidikan Indonesia pasti lebih maju!",
|
||||
"Teks Bersih": "sangat setuju dengan langkah efisiensi anggaran bos ini daripada bocor dikorupsi oknum sekolah lebih baik diawasi ketat dan disalurkan tepat sasaran pendidikan indonesia pasti lebih maju",
|
||||
"Label": "Positif",
|
||||
"Keyakinan (%)": 99.93
|
||||
},
|
||||
{
|
||||
"Waktu": "2026-04-06 08:17:31",
|
||||
"Teks Asli": "Sangat kecewa dengan kebijakan efisiensi ini. Anggaran KIP Kuliah dipotong drastis, banyak mahasiswa dari keluarga tidak mampu terancam putus kuliah. Pemerintah sama sekali tidak pro rakyat kecil! \ud83d\ude21",
|
||||
"Teks Bersih": "sangat kecewa dengan kebijakan efisiensi ini anggaran kip kuliah dipotong drastis banyak mahasiswa dari keluarga tidak mampu terancam putus kuliah pemerintah sama sekali tidak pro rakyat kecil",
|
||||
"Label": "Negatif",
|
||||
"Keyakinan (%)": 91.82
|
||||
}
|
||||
]
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 29 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
|
|
@ -0,0 +1,6 @@
|
|||
Skenario,Porsi_Data,Akurasi
|
||||
P1,20%,78.33333333333333
|
||||
P2,40%,80.83333333333333
|
||||
P3,60%,80.55555555555556
|
||||
P4,80%,82.08333333333333
|
||||
P5,100%,84.33333333333334
|
||||
|
|
|
@ -0,0 +1,301 @@
|
|||
y_true,y_pred
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
2,2
|
||||
2,2
|
||||
1,0
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
2,2
|
||||
1,1
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
2,2
|
||||
1,1
|
||||
2,2
|
||||
1,0
|
||||
1,1
|
||||
0,1
|
||||
1,0
|
||||
0,0
|
||||
1,1
|
||||
1,0
|
||||
1,2
|
||||
2,2
|
||||
1,0
|
||||
1,1
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
1,0
|
||||
2,1
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
1,0
|
||||
1,1
|
||||
0,0
|
||||
1,0
|
||||
0,0
|
||||
0,2
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,0
|
||||
2,2
|
||||
2,2
|
||||
0,1
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,0
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
1,0
|
||||
0,0
|
||||
0,0
|
||||
0,1
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
2,1
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,0
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,0
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
1,0
|
||||
1,1
|
||||
1,1
|
||||
0,1
|
||||
0,1
|
||||
1,1
|
||||
0,0
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
1,1
|
||||
0,1
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,2
|
||||
1,1
|
||||
0,0
|
||||
1,0
|
||||
1,1
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,2
|
||||
0,0
|
||||
2,0
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,0
|
||||
0,1
|
||||
1,0
|
||||
0,1
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
2,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,1
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
2,2
|
||||
1,1
|
||||
1,0
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
1,0
|
||||
2,2
|
||||
2,2
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
1,1
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
2,2
|
||||
1,0
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
2,0
|
||||
0,0
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
1,0
|
||||
2,2
|
||||
2,1
|
||||
0,1
|
||||
2,2
|
||||
2,2
|
||||
1,1
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
1,1
|
||||
2,2
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
1,0
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
1,1
|
||||
0,0
|
||||
0,1
|
||||
0,0
|
||||
1,1
|
||||
1,1
|
||||
1,1
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
2,2
|
||||
0,1
|
||||
2,1
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
1,1
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
1,0
|
||||
0,0
|
||||
2,2
|
||||
0,0
|
||||
0,0
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
Sentimen,Topik Ke,Kata Kunci
|
||||
Negatif,1,"pendidikan, anggaran, efisiensi, yang, di, tidak, orang"
|
||||
Negatif,2,"pendidikan, anggaran, tidak, efisiensi, ini, dana, dan"
|
||||
Negatif,3,"yang, tidak, efisiensi, pendidikan, anggaran, itu, ini"
|
||||
Netral,1,"dan, efisiensi, anggaran, guru, ya, kementerian, ini"
|
||||
Netral,2,"anggaran, efisiensi, kuliah, kip, pendidikan, tidak, dan"
|
||||
Netral,3,"pendidikan, efisiensi, anggaran, pip, dan, tidak, cair"
|
||||
Positif,1,"untuk, ini, anggaran, pendidikan, tidak, di, bagus"
|
||||
Positif,2,"pendidikan, untuk, anggaran, indonesia, benar, efisiensi, yang"
|
||||
Positif,3,"ini, efisiensi, anggaran, dan, pemerintah, tidak, kip"
|
||||
|
Binary file not shown.
|
|
@ -0,0 +1,10 @@
|
|||
Num_Topics,Coherence_Score
|
||||
2,0.31239850490929644
|
||||
3,0.2878041079491884
|
||||
4,0.3032298786129941
|
||||
5,0.30100283745683337
|
||||
6,0.3880164824328067
|
||||
7,0.3987580925505836
|
||||
8,0.3785593889346621
|
||||
9,0.38313774362229563
|
||||
10,0.46718575350823743
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
accuracy,loss,val_accuracy,val_loss,learning_rate,Epoch,Skenario
|
||||
0.38141027092933655,1.09345543384552,0.6166666746139526,1.080513834953308,0.0010000000474974513,1,P1
|
||||
0.7211538553237915,1.0516897439956665,0.7166666388511658,1.018355131149292,0.0010000000474974513,2,P1
|
||||
0.7596153616905212,0.8945255875587463,0.6833333373069763,0.7797202467918396,0.0010000000474974513,3,P1
|
||||
0.7371794581413269,0.5743359923362732,0.7166666388511658,0.7589436173439026,0.0010000000474974513,4,P1
|
||||
0.7692307829856873,0.4817313253879547,0.7333333492279053,0.7645898461341858,0.0010000000474974513,5,P1
|
||||
0.8653846383094788,0.387156218290329,0.7666666507720947,0.7059115767478943,0.0010000000474974513,6,P1
|
||||
0.9487179517745972,0.26129141449928284,0.7833333611488342,0.699984610080719,0.0010000000474974513,7,P1
|
||||
0.9839743375778198,0.1384553611278534,0.800000011920929,0.7633869051933289,0.0010000000474974513,8,P1
|
||||
0.9935897588729858,0.07908293604850769,0.8166666626930237,0.8693917393684387,0.0010000000474974513,9,P1
|
||||
0.9935897588729858,0.04564093425869942,0.800000011920929,0.9836347103118896,0.0010000000474974513,10,P1
|
||||
1.0,0.025794249027967453,0.7833333611488342,1.0316051244735718,0.0005000000237487257,11,P1
|
||||
0.9967948794364929,0.028194565325975418,0.7833333611488342,1.0789639949798584,0.0005000000237487257,12,P1
|
||||
0.9967948794364929,0.021308064460754395,0.7833333611488342,1.1356014013290405,0.0005000000237487257,13,P1
|
||||
1.0,0.016758248209953308,0.7833333611488342,1.1703376770019531,0.0002500000118743628,14,P1
|
||||
1.0,0.016384366899728775,0.7833333611488342,1.210790753364563,0.0002500000118743628,15,P1
|
||||
0.5560897588729858,1.069062352180481,0.6583333611488342,0.9929993748664856,0.0010000000474974513,1,P2
|
||||
0.6666666865348816,0.7357844114303589,0.6916666626930237,0.6130390167236328,0.0010000000474974513,2,P2
|
||||
0.8397436141967773,0.48598557710647583,0.7666666507720947,0.5232926607131958,0.0010000000474974513,3,P2
|
||||
0.9375,0.31453579664230347,0.7833333611488342,0.5067169070243835,0.0010000000474974513,4,P2
|
||||
0.9695512652397156,0.16148658096790314,0.8083333373069763,0.45324838161468506,0.0010000000474974513,5,P2
|
||||
0.9855769276618958,0.0931703969836235,0.8166666626930237,0.5186069011688232,0.0010000000474974513,6,P2
|
||||
0.9903846383094788,0.07859157770872116,0.8083333373069763,0.7238136529922485,0.0010000000474974513,7,P2
|
||||
0.9967948794364929,0.030187053605914116,0.7916666865348816,0.8180117011070251,0.0010000000474974513,8,P2
|
||||
0.9983974099159241,0.020236998796463013,0.8166666626930237,0.7661744952201843,0.0005000000237487257,9,P2
|
||||
0.9967948794364929,0.015788376331329346,0.824999988079071,0.7735798358917236,0.0005000000237487257,10,P2
|
||||
0.9967948794364929,0.012403394095599651,0.824999988079071,0.80744469165802,0.0005000000237487257,11,P2
|
||||
0.9983974099159241,0.010184520855545998,0.824999988079071,0.8309624195098877,0.0002500000118743628,12,P2
|
||||
0.9967948794364929,0.012382127344608307,0.8083333373069763,0.925710141658783,0.0002500000118743628,13,P2
|
||||
0.5260915756225586,1.0014312267303467,0.6222222447395325,0.7828471660614014,0.0010000000474974513,1,P3
|
||||
0.6656017303466797,0.609158456325531,0.6944444179534912,0.6250120401382446,0.0010000000474974513,2,P3
|
||||
0.7507987022399902,0.49529990553855896,0.7388888597488403,0.554004430770874,0.0010000000474974513,3,P3
|
||||
0.9105431437492371,0.3310180902481079,0.8055555820465088,0.46935632824897766,0.0010000000474974513,4,P3
|
||||
0.9712460041046143,0.1286899894475937,0.7722222208976746,0.7690128684043884,0.0010000000474974513,5,P3
|
||||
0.98296058177948,0.05941445380449295,0.7777777910232544,0.8493516445159912,0.0010000000474974513,6,P3
|
||||
0.9968051314353943,0.029140625149011612,0.7833333611488342,1.0082753896713257,0.0010000000474974513,7,P3
|
||||
0.9978700876235962,0.01636233739554882,0.7833333611488342,1.0569945573806763,0.0005000000237487257,8,P3
|
||||
0.9968051314353943,0.01626773364841938,0.7611111402511597,1.1638100147247314,0.0005000000237487257,9,P3
|
||||
0.9957401752471924,0.015717728063464165,0.7833333611488342,1.028610110282898,0.0005000000237487257,10,P3
|
||||
0.9989350438117981,0.009835228323936462,0.8055555820465088,1.0765795707702637,0.0002500000118743628,11,P3
|
||||
0.9978700876235962,0.009616348892450333,0.7833333611488342,1.1071890592575073,0.0002500000118743628,12,P3
|
||||
0.5899280309677124,0.9616552591323853,0.6833333373069763,0.62132328748703,0.0010000000474974513,1,P4
|
||||
0.7082334160804749,0.5806796550750732,0.7916666865348816,0.534578263759613,0.0010000000474974513,2,P4
|
||||
0.8832933902740479,0.369579553604126,0.8083333373069763,0.4666915237903595,0.0010000000474974513,3,P4
|
||||
0.9600319862365723,0.1569712609052658,0.8208333253860474,0.464290052652359,0.0010000000474974513,4,P4
|
||||
0.9848121404647827,0.07800274342298508,0.6833333373069763,0.8866328001022339,0.0010000000474974513,5,P4
|
||||
0.9928057789802551,0.041098933666944504,0.7958333492279053,0.804090142250061,0.0010000000474974513,6,P4
|
||||
0.9920064210891724,0.03647830709815025,0.8041666746139526,0.6013585329055786,0.0010000000474974513,7,P4
|
||||
0.996802568435669,0.016663840040564537,0.8208333253860474,0.7018534541130066,0.0005000000237487257,8,P4
|
||||
0.9992006421089172,0.010782836936414242,0.8166666626930237,0.7502598166465759,0.0005000000237487257,9,P4
|
||||
0.9992006421089172,0.0074098482728004456,0.8125,0.7980797290802002,0.0005000000237487257,10,P4
|
||||
1.0,0.0051342034712433815,0.8166666626930237,0.8163005113601685,0.0002500000118743628,11,P4
|
||||
1.0,0.005056099500507116,0.8166666626930237,0.8405582308769226,0.0002500000118743628,12,P4
|
||||
0.575815737247467,0.9292432069778442,0.7333333492279053,0.7067171931266785,0.0010000000474974513,1,P5
|
||||
0.8202175498008728,0.4824707508087158,0.8433333039283752,0.4428572952747345,0.0010000000474974513,2,P5
|
||||
0.944337785243988,0.18990492820739746,0.800000011920929,0.5153856873512268,0.0010000000474974513,3,P5
|
||||
0.9782469868659973,0.08659510314464569,0.8299999833106995,0.5515473484992981,0.0010000000474974513,4,P5
|
||||
0.9865642786026001,0.05353087931871414,0.8633333444595337,0.5090053081512451,0.0010000000474974513,5,P5
|
||||
0.9948816299438477,0.022251253947615623,0.8500000238418579,0.5737677216529846,0.0005000000237487257,6,P5
|
||||
0.9955214262008667,0.020851323381066322,0.8433333039283752,0.6206527948379517,0.0005000000237487257,7,P5
|
||||
0.9948816299438477,0.017847692593932152,0.8100000023841858,0.9818077683448792,0.0005000000237487257,8,P5
|
||||
0.9948816299438477,0.01693383976817131,0.8433333039283752,0.6270163059234619,0.0002500000118743628,9,P5
|
||||
0.9968010187149048,0.013330518268048763,0.8399999737739563,0.6716769933700562,0.0002500000118743628,10,P5
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
,precision,recall,f1-score,support
|
||||
negatif,0.8125,0.9,0.8540145985401459,130.0
|
||||
netral,0.8160919540229885,0.7395833333333334,0.7759562841530054,96.0
|
||||
positif,0.9420289855072463,0.8783783783783784,0.9090909090909091,74.0
|
||||
accuracy,0.8433333333333334,0.8433333333333334,0.8433333333333334,300.0
|
||||
macro avg,0.8568736465100782,0.8393205705705706,0.8463539305946868,300.0
|
||||
weighted avg,0.8455999083791437,0.8433333333333334,0.8426214278721159,300.0
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
Word,Frequency
|
||||
pendidikan,519
|
||||
anggaran,501
|
||||
efisiensi,439
|
||||
yang,400
|
||||
tidak,361
|
||||
di,248
|
||||
dan,225
|
||||
dana,205
|
||||
ini,187
|
||||
ada,167
|
||||
itu,133
|
||||
saja,131
|
||||
tapi,116
|
||||
sudah,110
|
||||
untuk,107
|
||||
dipotong,106
|
||||
juga,97
|
||||
sekolah,97
|
||||
pemotongan,96
|
||||
kalau,96
|
||||
kena,96
|
||||
kuliah,94
|
||||
karena,93
|
||||
banget,93
|
||||
buat,92
|
||||
dari,91
|
||||
bisa,91
|
||||
ya,90
|
||||
jadi,88
|
||||
guru,84
|
||||
ke,78
|
||||
dengan,76
|
||||
mau,76
|
||||
pemerintah,74
|
||||
kip,72
|
||||
sama,71
|
||||
mahasiswa,69
|
||||
bos,67
|
||||
indonesia,67
|
||||
pip,64
|
||||
banyak,62
|
||||
saya,61
|
||||
sih,60
|
||||
orang,58
|
||||
kampus,56
|
||||
enggak,56
|
||||
malah,54
|
||||
sampai,53
|
||||
kesehatan,52
|
||||
apa,52
|
||||
sekarang,51
|
||||
nya,51
|
||||
masih,50
|
||||
pada,49
|
||||
makan,49
|
||||
tuh,49
|
||||
lagi,49
|
||||
rakyat,46
|
||||
negara,46
|
||||
akan,45
|
||||
bikin,45
|
||||
beasiswa,45
|
||||
potong,44
|
||||
gua,44
|
||||
dapat,44
|
||||
aku,44
|
||||
bukan,43
|
||||
memang,43
|
||||
anak,43
|
||||
bagaimana,42
|
||||
tolak,42
|
||||
kayak,40
|
||||
gratis,40
|
||||
dikurangi,39
|
||||
mana,38
|
||||
terus,37
|
||||
demo,36
|
||||
padahal,36
|
||||
amp,35
|
||||
kebijakan,35
|
||||
semua,34
|
||||
lebih,34
|
||||
dipangkas,34
|
||||
kok,34
|
||||
jangan,34
|
||||
mereka,34
|
||||
tahu,34
|
||||
gue,33
|
||||
dia,32
|
||||
benar,32
|
||||
tahun,31
|
||||
kan,31
|
||||
naik,31
|
||||
hal,30
|
||||
gaji,30
|
||||
makin,30
|
||||
cuma,30
|
||||
uu,30
|
||||
lu,29
|
||||
belum,29
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
Word,Frequency
|
||||
anggaran,299
|
||||
efisiensi,269
|
||||
pendidikan,218
|
||||
dan,172
|
||||
tidak,151
|
||||
yang,125
|
||||
di,114
|
||||
pip,94
|
||||
ini,89
|
||||
kuliah,88
|
||||
kip,87
|
||||
dana,78
|
||||
cair,66
|
||||
beasiswa,65
|
||||
mahasiswa,61
|
||||
untuk,60
|
||||
ada,60
|
||||
indonesia,53
|
||||
pemotongan,53
|
||||
ke,49
|
||||
itu,47
|
||||
kena,46
|
||||
dari,46
|
||||
ya,42
|
||||
guru,41
|
||||
sudah,40
|
||||
bantuan,37
|
||||
tapi,34
|
||||
pada,34
|
||||
bos,34
|
||||
program,34
|
||||
akan,34
|
||||
sekolah,32
|
||||
pintar,30
|
||||
aku,29
|
||||
dengan,28
|
||||
sri,28
|
||||
mulyani,28
|
||||
tetap,28
|
||||
cek,28
|
||||
bisa,28
|
||||
juga,26
|
||||
cara,26
|
||||
tinggi,25
|
||||
pemerintah,25
|
||||
tahun,25
|
||||
kebijakan,24
|
||||
dampak,24
|
||||
tunjangan,24
|
||||
sama,24
|
||||
dapat,23
|
||||
pastikan,23
|
||||
sampai,23
|
||||
penerima,22
|
||||
jadi,22
|
||||
bahwa,21
|
||||
orang,21
|
||||
dalam,21
|
||||
simak,20
|
||||
atau,20
|
||||
kapan,19
|
||||
menegaskan,19
|
||||
gaji,19
|
||||
kesehatan,18
|
||||
rp,18
|
||||
februari,18
|
||||
kartu,17
|
||||
soal,17
|
||||
prabowo,17
|
||||
hingga,17
|
||||
lagi,17
|
||||
masih,17
|
||||
karena,17
|
||||
kalau,17
|
||||
kita,17
|
||||
baru,16
|
||||
pemangkasan,16
|
||||
daerah,16
|
||||
oleh,16
|
||||
apa,16
|
||||
menteri,15
|
||||
seperti,15
|
||||
memastikan,15
|
||||
aksi,15
|
||||
terdampak,15
|
||||
belum,15
|
||||
kampus,15
|
||||
presiden,15
|
||||
terbaru,15
|
||||
kali,15
|
||||
buat,14
|
||||
terkait,14
|
||||
sektor,14
|
||||
mendiktisaintek,14
|
||||
semua,14
|
||||
sih,14
|
||||
kan,13
|
||||
informasi,13
|
||||
kak,13
|
||||
ukt,13
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
Word,Frequency
|
||||
anggaran,305
|
||||
pendidikan,279
|
||||
efisiensi,268
|
||||
ini,249
|
||||
yang,219
|
||||
untuk,216
|
||||
tidak,157
|
||||
pemerintah,120
|
||||
dan,120
|
||||
kebijakan,115
|
||||
lebih,96
|
||||
dana,92
|
||||
sangat,90
|
||||
indonesia,80
|
||||
benar,80
|
||||
di,80
|
||||
jadi,79
|
||||
ada,76
|
||||
masa,72
|
||||
maju,72
|
||||
tepat,69
|
||||
kip,67
|
||||
matang,66
|
||||
kuliah,64
|
||||
bisa,63
|
||||
dengan,62
|
||||
beasiswa,56
|
||||
langkah,54
|
||||
proyek,53
|
||||
kualitas,50
|
||||
bukan,47
|
||||
terus,46
|
||||
program,44
|
||||
akan,44
|
||||
sudah,41
|
||||
sekolah,41
|
||||
nyata,40
|
||||
sasaran,40
|
||||
bagus,39
|
||||
mendukung,39
|
||||
saya,39
|
||||
membutuhkan,38
|
||||
guru,38
|
||||
honorer,38
|
||||
depan,38
|
||||
sekali,37
|
||||
memperbaiki,37
|
||||
kerja,37
|
||||
penghematan,36
|
||||
dialihkan,36
|
||||
fasilitas,36
|
||||
adalah,36
|
||||
demi,36
|
||||
tertinggal,36
|
||||
daerah,36
|
||||
lagi,35
|
||||
makin,35
|
||||
solusi,34
|
||||
pelajar,34
|
||||
terbaik,34
|
||||
kesejahteraan,34
|
||||
digunakan,34
|
||||
dari,33
|
||||
pemotongan,33
|
||||
memikirkan,33
|
||||
transisi,33
|
||||
dihemat,33
|
||||
optimis,33
|
||||
jelas,32
|
||||
pemborosan,32
|
||||
pemangkasan,31
|
||||
membuat,31
|
||||
terima,31
|
||||
kasih,31
|
||||
apresiasi,31
|
||||
efisien,31
|
||||
perlu,31
|
||||
tingginya,31
|
||||
harus,31
|
||||
setinggi,31
|
||||
transparan,31
|
||||
tanpa,30
|
||||
tetap,29
|
||||
penting,29
|
||||
mantap,28
|
||||
tapi,28
|
||||
ke,28
|
||||
bos,27
|
||||
guna,27
|
||||
kebocoran,27
|
||||
tersalurkan,26
|
||||
berani,26
|
||||
krusial,26
|
||||
pada,26
|
||||
bebas,26
|
||||
korupsi,26
|
||||
kini,26
|
||||
mengefisiensikan,26
|
||||
setuju,26
|
||||
mahal,26
|
||||
|
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1 @@
|
|||
graphviz
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
streamlit==1.28.0
|
||||
tensorflow==2.12.0
|
||||
pandas==1.5.3
|
||||
numpy==1.23.5
|
||||
plotly==5.15.0
|
||||
matplotlib==3.7.1
|
||||
seaborn==0.12.2
|
||||
wordcloud==1.9.2
|
||||
Sastrawi==1.0.1
|
||||
streamlit-option-menu==0.3.6
|
||||
graphviz==0.20.1
|
||||
h5py==3.8.0
|
||||
scikit-learn==1.2.2
|
||||
gensim==4.3.1
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import json
|
||||
import h5py
|
||||
import re
|
||||
import pickle
|
||||
import os
|
||||
import streamlit as st
|
||||
import tensorflow as tf
|
||||
|
||||
try:
|
||||
from tensorflow.keras.utils import pad_sequences
|
||||
except ImportError:
|
||||
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
||||
|
||||
# ==============================================================================
|
||||
# 1. KONFIGURASI GLOBAL
|
||||
# ==============================================================================
|
||||
MAX_SEQUENCE_LENGTH = 100
|
||||
MODEL_PATH = 'model/Model_Sentiment_LSTM.h5'
|
||||
TOKENIZER_JSON_PATH = 'model/tokenizer_sentiment.json'
|
||||
TOKENIZER_PICKLE_PATH = 'model/tokenizer_sentiment.pickle'
|
||||
|
||||
# ==============================================================================
|
||||
# 2. PATCHING MODEL
|
||||
# ==============================================================================
|
||||
def recursive_fix_config(config):
|
||||
"""Memperbaiki konfigurasi model agar bisa dibaca di berbagai versi TF"""
|
||||
if isinstance(config, list):
|
||||
return [recursive_fix_config(x) for x in config]
|
||||
if isinstance(config, dict):
|
||||
if 'batch_shape' in config:
|
||||
config['batch_input_shape'] = config.pop('batch_shape')
|
||||
if 'dtype' in config:
|
||||
if isinstance(config['dtype'], dict) or 'Policy' in str(config['dtype']):
|
||||
config['dtype'] = 'float32'
|
||||
for key, value in config.items():
|
||||
config[key] = recursive_fix_config(value)
|
||||
return config
|
||||
|
||||
# ==============================================================================
|
||||
# 3. LOAD RESOURCES (MODEL & TOKENIZER)
|
||||
# ==============================================================================
|
||||
@st.cache_resource
|
||||
def load_resources():
|
||||
model = None
|
||||
tokenizer = None
|
||||
|
||||
# --- A. LOAD MODEL ---
|
||||
if not os.path.exists(MODEL_PATH):
|
||||
st.error(f"❌ File model tidak ditemukan di: {MODEL_PATH}")
|
||||
return None, None
|
||||
|
||||
try:
|
||||
model = tf.keras.models.load_model(MODEL_PATH, compile=False)
|
||||
except Exception:
|
||||
try:
|
||||
with h5py.File(MODEL_PATH, mode='r') as f:
|
||||
model_config_str = f.attrs.get('model_config')
|
||||
if isinstance(model_config_str, bytes):
|
||||
model_config_str = model_config_str.decode('utf-8')
|
||||
|
||||
model_config_dict = json.loads(model_config_str)
|
||||
fixed_config = recursive_fix_config(model_config_dict)
|
||||
|
||||
model = tf.keras.models.model_from_json(json.dumps(fixed_config))
|
||||
model.load_weights(MODEL_PATH)
|
||||
except Exception as e:
|
||||
st.error(f"❌ Gagal memuat model: {e}")
|
||||
return None, None
|
||||
|
||||
# --- B. LOAD TOKENIZER ---
|
||||
try:
|
||||
if os.path.exists(TOKENIZER_JSON_PATH):
|
||||
with open(TOKENIZER_JSON_PATH, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
try:
|
||||
parsed_json = json.loads(content)
|
||||
if isinstance(parsed_json, str):
|
||||
input_tokenizer = parsed_json
|
||||
else:
|
||||
input_tokenizer = json.dumps(parsed_json)
|
||||
except:
|
||||
input_tokenizer = content
|
||||
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(input_tokenizer)
|
||||
elif os.path.exists(TOKENIZER_PICKLE_PATH):
|
||||
with open(TOKENIZER_PICKLE_PATH, 'rb') as handle:
|
||||
tokenizer = pickle.load(handle)
|
||||
else:
|
||||
st.error("❌ File Tokenizer tidak ditemukan.")
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Gagal memuat tokenizer: {e}")
|
||||
return None, None
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
# ==============================================================================
|
||||
# 4. PREPROCESSING TEKS
|
||||
# ==============================================================================
|
||||
slang_dict = {
|
||||
'bgt': 'banget', 'yg': 'yang', 'gak': 'tidak', 'ga': 'tidak',
|
||||
'kalo': 'kalau', 'kl': 'kalau', 'dr': 'dari', 'krn': 'karena',
|
||||
'jd': 'jadi', 'sdh': 'sudah', 'aja': 'saja', 'dgn': 'dengan',
|
||||
'tdk': 'tidak', 'tp': 'tapi', 'sy': 'saya', 'utk': 'untuk',
|
||||
'd': 'di', 'blm': 'belum', 'jgn': 'jangan', 'gw': 'saya',
|
||||
'lo': 'kamu', 'sm': 'sama', 'tau': 'tahu', 'kpn': 'kapan',
|
||||
'bs': 'bisa', 'lbh': 'lebih', 'kmrn': 'kemarin',
|
||||
'nggak': 'tidak', 'enggak': 'tidak', 'gk': 'tidak',
|
||||
'kaga': 'tidak', 'tak': 'tidak', 'g': 'tidak',
|
||||
'bener': 'benar', 'bnr': 'benar', 'msh': 'masih',
|
||||
'udah': 'sudah', 'sprt': 'seperti', 'opr': 'operasional',
|
||||
'tlg': 'tolong', 'bkn': 'bukan', 'aq': 'aku', 'km': 'kamu', 'dlm': 'dalam'
|
||||
}
|
||||
|
||||
def clean_text(text):
|
||||
if not isinstance(text, str): return ""
|
||||
text = text.lower()
|
||||
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'@\w+', '', text)
|
||||
text = re.sub(r'#\w+', '', text)
|
||||
text = re.sub(r'\d+', '', text)
|
||||
text = re.sub(r'[^\w\s]', ' ', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
words = text.split()
|
||||
normalized_words = [slang_dict.get(w, w) for w in words]
|
||||
return " ".join(normalized_words)
|
||||
|
||||
# ==============================================================================
|
||||
# 5. PREDIKSI
|
||||
# ==============================================================================
|
||||
def predict_sentiment(text, model, tokenizer):
|
||||
if not text or not model or not tokenizer:
|
||||
return "Error", 0.0, [0, 0, 0], text
|
||||
|
||||
cleaned_text = clean_text(text)
|
||||
seq = tokenizer.texts_to_sequences([cleaned_text])
|
||||
|
||||
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')
|
||||
|
||||
prediction = model.predict(padded, verbose=0)[0]
|
||||
|
||||
labels = ['Negatif', 'Netral', 'Positif']
|
||||
label_idx = np.argmax(prediction)
|
||||
label = labels[label_idx]
|
||||
confidence = prediction[label_idx] * 100
|
||||
|
||||
return label, confidence, prediction, cleaned_text
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,207 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import altair as alt
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from utils import predict_sentiment
|
||||
|
||||
def render_analisis_csv(model, tokenizer):
|
||||
st.title("📂 Analisis File CSV (Batch)")
|
||||
st.markdown("Unggah file data (CSV) yang berisi ribuan komentar, dan biarkan AI menganalisis sentimennya secara massal.")
|
||||
|
||||
st.info("💡 **Panduan Upload:** Pastikan file CSV Anda memiliki kolom bernama **Teks Tweet** yang berisi teks/opini. Jika namanya berbeda, mohon ubah terlebih dahulu di Excel.")
|
||||
|
||||
# 1. INISIALISASI SESSION STATE
|
||||
if 'batch_results' not in st.session_state:
|
||||
st.session_state['batch_results'] = None
|
||||
if 'original_text_col' not in st.session_state:
|
||||
st.session_state['original_text_col'] = None
|
||||
|
||||
# ==============================================================================
|
||||
# 2. AREA UPLOAD FILE
|
||||
# ==============================================================================
|
||||
uploaded_file = st.file_uploader("Upload File CSV di sini:")
|
||||
|
||||
if uploaded_file is None:
|
||||
st.session_state['batch_results'] = None
|
||||
st.session_state['original_text_col'] = None
|
||||
|
||||
if uploaded_file is not None:
|
||||
# --- VALIDASI EKSTENSI (MEMENUHI TEST CASE 2) ---
|
||||
if not uploaded_file.name.lower().endswith('.csv'):
|
||||
st.error("❌ **Error:** Format file tidak didukung! Sistem hanya dapat memproses file berekstensi **.csv**.")
|
||||
return # Menghentikan proses agar tidak lanjut ke bawah
|
||||
|
||||
try:
|
||||
df_upload = pd.read_csv(uploaded_file)
|
||||
|
||||
# --- VALIDASI 1: Cek apakah file kosong ---
|
||||
if df_upload.empty:
|
||||
st.error("❌ File CSV yang Anda unggah kosong (0 baris). Silakan periksa kembali file Anda.")
|
||||
return
|
||||
|
||||
# --- VALIDASI 2: VALIDASI KOLOM KETAT (STRICT) ---
|
||||
KOLOM_WAJIB = "Teks Tweet"
|
||||
|
||||
# Cek apakah kolom wajib ada (case-sensitive)
|
||||
if KOLOM_WAJIB not in df_upload.columns:
|
||||
st.error(f"❌ **Error Format:** File CSV Anda tidak memiliki kolom bernama **'{KOLOM_WAJIB}'**.")
|
||||
st.warning(f"Perbaiki file Anda: Buka di Excel, ubah nama kolom yang berisi teks opini menjadi '{KOLOM_WAJIB}', simpan kembali sebagai CSV, lalu unggah ulang.")
|
||||
return
|
||||
|
||||
st.markdown("---")
|
||||
st.subheader("⚙️ Konfigurasi Analisis")
|
||||
|
||||
text_col = KOLOM_WAJIB
|
||||
st.success(f"✅ Kolom target **'{text_col}'** ditemukan! Total Data: **{len(df_upload)} baris**.")
|
||||
|
||||
if st.button("🚀 Mulai Proses Analisis", type="primary", use_container_width=True):
|
||||
with st.spinner('🤖 AI sedang memproses... Mohon tunggu.'):
|
||||
# Membersihkan nilai NaN sebelum diproses
|
||||
df_upload[text_col] = df_upload[text_col].fillna("")
|
||||
|
||||
results_label, results_clean = [], []
|
||||
my_bar = st.progress(0, text="Memproses data...")
|
||||
total_data = len(df_upload)
|
||||
error_count = 0
|
||||
|
||||
for i, row in df_upload.iterrows():
|
||||
teks = str(row[text_col])
|
||||
|
||||
# Lewati jika teks kosong untuk mempercepat
|
||||
if not teks.strip():
|
||||
results_label.append("Netral")
|
||||
results_clean.append("")
|
||||
else:
|
||||
try:
|
||||
lbl, conf, _, cln = predict_sentiment(teks, model, tokenizer)
|
||||
results_label.append(lbl)
|
||||
results_clean.append(cln)
|
||||
except Exception as e:
|
||||
results_label.append("Error")
|
||||
results_clean.append("GAGAL DIPROSES")
|
||||
error_count += 1
|
||||
|
||||
persen = (i + 1) / total_data
|
||||
my_bar.progress(persen, text=f"Selesai: {i+1} dari {total_data} data ({int(persen*100)}%)")
|
||||
|
||||
# Simpan hasil ke DataFrame
|
||||
df_upload['Teks_Bersih'] = results_clean
|
||||
df_upload['Prediksi_Sentimen'] = results_label
|
||||
|
||||
st.session_state['batch_results'] = df_upload
|
||||
st.session_state['original_text_col'] = text_col
|
||||
|
||||
if error_count > 0:
|
||||
st.warning(f"⚠️ Analisis selesai, namun ada **{error_count} baris yang gagal diproses** (ditandai dengan label 'Error').")
|
||||
else:
|
||||
st.success("✅ Semua data berhasil dianalisis tanpa masalah!")
|
||||
|
||||
except pd.errors.EmptyDataError:
|
||||
st.error("❌ **Error:** File CSV kosong atau format rusak.")
|
||||
except pd.errors.ParserError:
|
||||
st.error("❌ **Error Parsing:** Susunan koma (delimiter) pada file CSV berantakan. Harap simpan ulang file Excel ke format CSV.")
|
||||
except Exception as e:
|
||||
st.error(f"❌ **Kesalahan Sistem:** Terjadi masalah yang tidak terduga: `{e}`")
|
||||
|
||||
# ==============================================================================
|
||||
# 3. AREA HASIL PREDIKSI
|
||||
# ==============================================================================
|
||||
if st.session_state['batch_results'] is not None:
|
||||
st.markdown("---")
|
||||
df_final = st.session_state['batch_results'].copy()
|
||||
df_final.index = range(1, len(df_final) + 1)
|
||||
kolom_asli = st.session_state['original_text_col']
|
||||
|
||||
df_final['Prediksi_Sentimen'] = df_final['Prediksi_Sentimen'].astype(str).str.strip().str.title()
|
||||
|
||||
tab1, tab2, tab3 = st.tabs(["📋 Tabel Hasil", "📊 Statistik & Grafik", "☁️ WordCloud"])
|
||||
|
||||
# --- TAB 1: TABEL HASIL ---
|
||||
with tab1:
|
||||
st.subheader("📋 Pratinjau Data Hasil Analisis")
|
||||
st.dataframe(df_final, use_container_width=True)
|
||||
|
||||
st.write("")
|
||||
csv = df_final.to_csv(index=False).encode('utf-8')
|
||||
st.download_button("📥 Download Hasil Lengkap (CSV)", data=csv, file_name="Hasil_Analisis_Batch.csv", mime="text/csv")
|
||||
|
||||
# --- TAB 2: STATISTIK & GRAFIK ---
|
||||
with tab2:
|
||||
st.subheader("📊 Statistik Sentimen Data Baru")
|
||||
count_res = df_final['Prediksi_Sentimen'].value_counts().reset_index()
|
||||
count_res.columns = ['Sentimen', 'Jumlah']
|
||||
|
||||
warna_map = pd.DataFrame({
|
||||
'Sentimen': ['Positif', 'Netral', 'Negatif'],
|
||||
'Warna': ['#00CC96', '#808495', '#FF4B4B']
|
||||
})
|
||||
chart_data = count_res.merge(warna_map, on='Sentimen')
|
||||
|
||||
col_stat1, col_stat2 = st.columns(2)
|
||||
|
||||
with col_stat1:
|
||||
st.caption("Distribusi Jumlah")
|
||||
c = alt.Chart(chart_data).mark_bar().encode(
|
||||
x=alt.X('Sentimen', sort=['Negatif', 'Netral', 'Positif']),
|
||||
y='Jumlah',
|
||||
color=alt.Color('Sentimen', scale=alt.Scale(domain=['Positif', 'Netral', 'Negatif'], range=['#00CC96', '#808495', '#FF4B4B']), legend=None),
|
||||
tooltip=['Sentimen', 'Jumlah']
|
||||
).properties(height=350)
|
||||
st.altair_chart(c, use_container_width=True)
|
||||
|
||||
with col_stat2:
|
||||
st.caption("Proporsi Persentase")
|
||||
fig_pie = px.pie(count_res, names='Sentimen', values='Jumlah', hole=0.4,
|
||||
color='Sentimen', color_discrete_map={'Negatif':'#FF4B4B', 'Netral':'#808495', 'Positif':'#00CC96'})
|
||||
st.plotly_chart(fig_pie, use_container_width=True)
|
||||
|
||||
# --- TAB 3: WORDCLOUD ---
|
||||
with tab3:
|
||||
st.subheader("☁️ WordCloud: Representasi Visual Teks")
|
||||
|
||||
pilihan_wc = [
|
||||
"1. Data Mentah",
|
||||
"2. Data Bersih (Preprocessed)",
|
||||
"3. Sentimen NEGATIF",
|
||||
"4. Sentimen NETRAL",
|
||||
"5. Sentimen POSITIF"
|
||||
]
|
||||
sent_choice = st.selectbox("Pilih Kategori Teks (Langsung Berubah):", pilihan_wc)
|
||||
|
||||
filter_sentimen = df_final['Prediksi_Sentimen'].str.lower()
|
||||
text_wc = ""
|
||||
tema_warna = 'viridis'
|
||||
|
||||
if "Mentah" in sent_choice:
|
||||
text_wc = " ".join(df_final[kolom_asli].astype(str))
|
||||
tema_warna = "cividis"
|
||||
elif "Bersih" in sent_choice:
|
||||
text_wc = " ".join(df_final['Teks_Bersih'].astype(str))
|
||||
tema_warna = "viridis"
|
||||
elif "NEGATIF" in sent_choice:
|
||||
text_wc = " ".join(df_final[filter_sentimen == 'negatif']['Teks_Bersih'].astype(str))
|
||||
tema_warna = "Reds"
|
||||
elif "NETRAL" in sent_choice:
|
||||
text_wc = " ".join(df_final[filter_sentimen == 'netral']['Teks_Bersih'].astype(str))
|
||||
tema_warna = "Greys"
|
||||
elif "POSITIF" in sent_choice:
|
||||
text_wc = " ".join(df_final[filter_sentimen == 'positif']['Teks_Bersih'].astype(str))
|
||||
tema_warna = "Greens"
|
||||
|
||||
# TAMPILKAN WORDCLOUD
|
||||
if not text_wc.strip():
|
||||
st.warning("⚠️ Tidak ada data untuk kategori ini di file Anda.")
|
||||
else:
|
||||
with st.spinner("Menggambar WordCloud..."):
|
||||
wc = WordCloud(width=800, height=400, background_color='white', colormap=tema_warna, max_words=100).generate(text_wc)
|
||||
wc_image = wc.to_image()
|
||||
wc_array = np.array(wc_image)
|
||||
|
||||
fig_wc, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wc_array, interpolation='bilinear')
|
||||
ax.axis("off")
|
||||
st.pyplot(fig_wc)
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
from utils import predict_sentiment
|
||||
|
||||
HISTORY_FILE = 'data/riwayat_analisis.json'
|
||||
|
||||
def load_history():
|
||||
if os.path.exists(HISTORY_FILE):
|
||||
with open(HISTORY_FILE, 'r', encoding='utf-8') as f:
|
||||
try: return json.load(f)
|
||||
except: return []
|
||||
return []
|
||||
|
||||
def save_history(data):
|
||||
os.makedirs('data', exist_ok=True)
|
||||
with open(HISTORY_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
|
||||
# ==============================================================================
|
||||
# Fungsi Clear sekarang menghapus Teks DAN Hasil Prediksi
|
||||
# ==============================================================================
|
||||
def clear_input():
|
||||
st.session_state['input_teks_analisis'] = ""
|
||||
st.session_state['latest_result'] = None
|
||||
|
||||
# ==============================================================================
|
||||
# RENDER HALAMAN UTAMA
|
||||
# ==============================================================================
|
||||
def render_analisis_teks(model, tokenizer):
|
||||
st.title("💬 Analisis Sentimen (Single Text)")
|
||||
st.markdown("Ketikkan kalimat opini terkait kebijakan efisiensi anggaran pendidikan, dan biarkan AI memprediksi sentimennya secara *real-time*.")
|
||||
|
||||
# 1. INISIALISASI SESSION STATE
|
||||
if 'history_analisis' not in st.session_state:
|
||||
st.session_state['history_analisis'] = load_history()
|
||||
if 'latest_result' not in st.session_state:
|
||||
st.session_state['latest_result'] = None
|
||||
if 'show_confirm' not in st.session_state:
|
||||
st.session_state['show_confirm'] = False
|
||||
if 'rows_to_delete' not in st.session_state:
|
||||
st.session_state['rows_to_delete'] = []
|
||||
if 'input_teks_analisis' not in st.session_state:
|
||||
st.session_state['input_teks_analisis'] = ""
|
||||
|
||||
# ==============================================================================
|
||||
# 2. AREA INPUT TEKS & TOMBOL
|
||||
# ==============================================================================
|
||||
input_text = st.text_area(
|
||||
"Masukkan Teks Opini di sini:",
|
||||
height=150,
|
||||
placeholder="Contoh: Sangat kecewa anggaran KIP Kuliah dipotong...",
|
||||
key='input_teks_analisis'
|
||||
)
|
||||
|
||||
# ==============================================================================
|
||||
# Deteksi Hapus Manual (Backspace)
|
||||
# ==============================================================================
|
||||
if not input_text.strip():
|
||||
st.session_state['latest_result'] = None
|
||||
|
||||
col_btn1, col_btn2, col_spacer = st.columns([2, 2, 6])
|
||||
|
||||
with col_btn1:
|
||||
btn_analisis = st.button("🔍 Analisis Sekarang", type="primary", use_container_width=True)
|
||||
|
||||
with col_btn2:
|
||||
st.button("🧹 Bersihkan Teks", on_click=clear_input, use_container_width=True)
|
||||
|
||||
if btn_analisis:
|
||||
if input_text.strip():
|
||||
with st.spinner('🤖 Model LSTM sedang memproses teks...'):
|
||||
label, confidence, probs, clean_txt = predict_sentiment(input_text, model, tokenizer)
|
||||
|
||||
probabilitas_bersih = [float(p) for p in probs]
|
||||
|
||||
st.session_state['latest_result'] = {
|
||||
"label": label,
|
||||
"confidence": confidence,
|
||||
"probs": probabilitas_bersih,
|
||||
"clean_txt": clean_txt
|
||||
}
|
||||
|
||||
waktu_sekarang = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
new_entry = {
|
||||
"Waktu": waktu_sekarang,
|
||||
"Teks Asli": input_text,
|
||||
"Teks Bersih": clean_txt,
|
||||
"Label": label,
|
||||
"Keyakinan (%)": round(confidence, 2)
|
||||
}
|
||||
st.session_state['history_analisis'].append(new_entry)
|
||||
save_history(st.session_state['history_analisis'])
|
||||
else:
|
||||
st.warning("⚠️ Mohon masukkan teks terlebih dahulu.")
|
||||
|
||||
# ==============================================================================
|
||||
# 3. AREA HASIL PREDIKSI
|
||||
# ==============================================================================
|
||||
if st.session_state['latest_result']:
|
||||
res = st.session_state['latest_result']
|
||||
st.markdown("---")
|
||||
col_res1, col_res2 = st.columns([1, 2])
|
||||
|
||||
with col_res1:
|
||||
st.subheader("🎯 Hasil Prediksi")
|
||||
if res['label'] == "Positif": st.success(f"**🟢 SENTIMEN POSITIF**")
|
||||
elif res['label'] == "Negatif": st.error(f"**🔴 SENTIMEN NEGATIF**")
|
||||
else: st.warning(f"**⚪ SENTIMEN NETRAL**")
|
||||
st.metric("Tingkat Keyakinan (Confidence)", f"{res['confidence']:.2f}%")
|
||||
|
||||
with col_res2:
|
||||
st.subheader("📊 Distribusi Probabilitas")
|
||||
st.caption("Detail perhitungan matematis model (Total 100%)")
|
||||
|
||||
st.write(f"🔴 **Negatif:** {res['probs'][0]*100:.1f}%")
|
||||
st.progress(res['probs'][0])
|
||||
|
||||
st.write(f"⚪ **Netral:** {res['probs'][1]*100:.1f}%")
|
||||
st.progress(res['probs'][1])
|
||||
|
||||
st.write(f"🟢 **Positif:** {res['probs'][2]*100:.1f}%")
|
||||
st.progress(res['probs'][2])
|
||||
|
||||
st.markdown("#### 🔍 Teks Hasil Preprocessing (Cleaning & Normalisasi)")
|
||||
st.info(f"{res['clean_txt']}")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ==============================================================================
|
||||
# 4. AREA HISTORY
|
||||
# ==============================================================================
|
||||
st.subheader("📚 Riwayat Analisis")
|
||||
|
||||
if len(st.session_state['history_analisis']) > 0:
|
||||
# 1. Siapkan Data
|
||||
df_history = pd.DataFrame(st.session_state['history_analisis'])
|
||||
df_display = df_history.iloc[::-1].reset_index(drop=True)
|
||||
|
||||
if 'Pilih' not in df_display.columns:
|
||||
df_display.insert(0, "Pilih", False)
|
||||
|
||||
# 2. Filter & Select All
|
||||
c_search, c_all = st.columns([3, 1])
|
||||
with c_search:
|
||||
q = st.text_input("Cari:", placeholder="Filter riwayat...", label_visibility="collapsed")
|
||||
with c_all:
|
||||
if st.checkbox("Pilih Semua"):
|
||||
df_display['Pilih'] = True
|
||||
|
||||
if q:
|
||||
df_display = df_display[df_display['Teks Asli'].str.contains(q, case=False, na=False)]
|
||||
|
||||
# 3. Tabel Editor
|
||||
with st.container():
|
||||
edited_df = st.data_editor(
|
||||
df_display,
|
||||
column_config={
|
||||
"Pilih": st.column_config.CheckboxColumn("Hapus?", width="small", default=False),
|
||||
"Waktu": st.column_config.TextColumn("Waktu", disabled=True),
|
||||
"Teks Asli": st.column_config.TextColumn("Teks Tweet", disabled=True),
|
||||
"Label": st.column_config.TextColumn("Prediksi", disabled=True),
|
||||
"Keyakinan (%)": st.column_config.NumberColumn("Score", format="%.2f%%", disabled=True)
|
||||
},
|
||||
hide_index=True,
|
||||
use_container_width=True,
|
||||
key="history_editor"
|
||||
)
|
||||
|
||||
# 4. Tombol Aksi (LAYOUT BARU)
|
||||
selected_rows = edited_df[edited_df['Pilih'] == True]
|
||||
count = len(selected_rows)
|
||||
|
||||
popup_placeholder = st.empty()
|
||||
|
||||
# --- BARIS 1: TOMBOL HAPUS (Merah & Primary) ---
|
||||
col_del_1, col_del_2 = st.columns(2)
|
||||
|
||||
with col_del_1:
|
||||
if st.button(f"🗑️ Hapus ({count}) Item", type="primary", disabled=count==0, use_container_width=True):
|
||||
st.session_state['rows_to_delete'] = selected_rows['Waktu'].tolist()
|
||||
st.session_state['show_confirm'] = True
|
||||
|
||||
with col_del_2:
|
||||
if st.button("🚨 Hapus Semua", type="secondary", use_container_width=True):
|
||||
st.session_state['rows_to_delete'] = "ALL"
|
||||
st.session_state['show_confirm'] = True
|
||||
|
||||
# --- BARIS 2: TOMBOL DOWNLOAD (Hijau/Standar - Di Bawah) ---
|
||||
st.write("")
|
||||
|
||||
csv_data = df_display.drop(columns=['Pilih']).to_csv(index=False).encode('utf-8')
|
||||
st.download_button(
|
||||
label="📥 Download CSV (Backup Data Riwayat)",
|
||||
data=csv_data,
|
||||
file_name="Riwayat_Analisis.csv",
|
||||
mime="text/csv",
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
# --- 5. LOGIKA POP-UP KONFIRMASI ---
|
||||
if st.session_state.get('show_confirm', False):
|
||||
with popup_placeholder.container():
|
||||
st.markdown("---")
|
||||
msg = "SEMUA DATA" if st.session_state['rows_to_delete'] == "ALL" else f"{len(st.session_state['rows_to_delete'])} DATA TERPILIH"
|
||||
|
||||
with st.chat_message("assistant", avatar="⚠️"):
|
||||
st.write(f"**KONFIRMASI:** Apakah Anda yakin ingin menghapus **{msg}**?")
|
||||
|
||||
col_yes, col_no = st.columns([1, 4])
|
||||
with col_yes:
|
||||
if st.button("✅ Ya, Hapus"):
|
||||
if st.session_state['rows_to_delete'] == "ALL":
|
||||
st.session_state['history_analisis'] = []
|
||||
else:
|
||||
targets = st.session_state['rows_to_delete']
|
||||
st.session_state['history_analisis'] = [
|
||||
item for item in st.session_state['history_analisis']
|
||||
if item['Waktu'] not in targets
|
||||
]
|
||||
|
||||
save_history(st.session_state['history_analisis'])
|
||||
st.session_state['show_confirm'] = False
|
||||
st.session_state['rows_to_delete'] = []
|
||||
st.success("Berhasil dihapus!")
|
||||
st.rerun()
|
||||
|
||||
with col_no:
|
||||
if st.button("❌ Batal"):
|
||||
st.session_state['show_confirm'] = False
|
||||
st.session_state['rows_to_delete'] = []
|
||||
st.rerun()
|
||||
st.markdown("---")
|
||||
|
||||
else:
|
||||
st.info("📝 Belum ada riwayat analisis.")
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def render_beranda():
|
||||
st.title("🎓 Selamat Datang di Sistem Analisis Sentimen")
|
||||
st.markdown("### Kebijakan Efisiensi Anggaran Pendidikan (2025)")
|
||||
st.markdown("Sistem ini dikembangkan untuk menganalisis opini masyarakat di media sosial X (Twitter) terkait isu pemotongan atau efisiensi anggaran di sektor pendidikan, seperti Dana BOS, PIP, KIP Kuliah, dan Tunjangan Guru.")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ==============================================================================
|
||||
# PENGAMBILAN DATA
|
||||
# ==============================================================================
|
||||
# 1. Akurasi Testing
|
||||
try:
|
||||
df_perf = pd.read_csv('model/Tabel_Performa_LSTM.csv', index_col=0)
|
||||
akurasi_testing = round(df_perf.loc['accuracy', 'f1-score'] * 100, 2)
|
||||
except:
|
||||
akurasi_testing = 0.0
|
||||
|
||||
# --- 1. RINGKASAN METRIK MODEL ---
|
||||
st.subheader("📊 Ringkasan Model Machine Learning")
|
||||
m1, m2, m3, m4 = st.columns(4)
|
||||
|
||||
m1.metric("Arsitektur", "LSTM", "Deep Learning")
|
||||
m2.metric("Akurasi Model", f"{akurasi_testing}%", "Data Testing P5")
|
||||
m3.metric("Pembagian Data", "80 : 20", "Latih : Uji")
|
||||
m4.metric("Metode Ekstraksi", "LDA", "Topic Modeling")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# --- 2. METODOLOGI PENELITIAN ---
|
||||
st.subheader("🛠️ Metodologi & Arsitektur Sistem")
|
||||
col_metode1, col_metode2 = st.columns([1, 1])
|
||||
|
||||
with col_metode1:
|
||||
st.markdown("""
|
||||
**Tahapan Pemrosesan:**
|
||||
1. **Crawling Data:** Pengambilan data via Tweet Harvest (Feb-Mar 2025).
|
||||
2. **Preprocessing:** Case folding, Cleaning, Tokenizing dan Normalisasi Slang. *(Tanpa Stopword & Stemming agar urutan konteks kalimat tetap utuh)*.
|
||||
3. **Word Embedding:** Standard Keras Embedding (Dimensi 128) dengan fitur *Masking*.
|
||||
4. **Deep Learning:** Model **Long Short-Term Memory (LSTM)** biasa untuk klasifikasi sentimen (Negatif, Netral, Positif).
|
||||
5. **Topic Modeling:** Latent Dirichlet Allocation (LDA) untuk mengetahui topik dominan.
|
||||
""")
|
||||
|
||||
with col_metode2:
|
||||
st.info("**Mengapa menggunakan LSTM?** \n\nPenggunaan algoritma LSTM yang dipadukan dengan *Keras Embedding* terbukti lebih ringan dari segi komputasi namun tetap optimal dalam menangkap pola konteks kalimat secara sekuensial (berurutan). Fitur *Masking* memastikan padding kalimat tidak merusak makna sentimen.")
|
||||
|
||||
st.success(f"**Hasil Pelatihan Model:** \nMelalui 5 tahapan percobaan (skenario 20% hingga 100% data latih), Akurasi Testing pada skenario P5 (100% data) mencapai **{akurasi_testing}%**. Ini menunjukkan model mampu memprediksi data baru dengan sangat baik.")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# --- 3. FITUR SISTEM ---
|
||||
st.subheader("✨ Fitur Utama Sistem")
|
||||
f1, f2, f3 = st.columns(3)
|
||||
with f1:
|
||||
st.success("**1. Dashboard Visualisasi**\n\nMenampilkan tren waktu, WordCloud, dan distribusi sentimen masyarakat secara interaktif.")
|
||||
with f2:
|
||||
st.warning("**2. Analisis Teks Langsung**\n\nPengguna dapat mengetikkan kalimat opini baru dan model akan memprediksi sentimennya secara *real-time*.")
|
||||
with f3:
|
||||
st.info("**3. Analisis File CSV**\n\nMengunggah data komentar/tweet dalam jumlah banyak sekaligus untuk dianalisis massal.")
|
||||
|
|
@ -0,0 +1,445 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import hashlib
|
||||
from sklearn.model_selection import train_test_split
|
||||
import graphviz
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
@st.cache_data
|
||||
def load_data(file_path):
|
||||
try:
|
||||
return pd.read_csv(file_path)
|
||||
except:
|
||||
return pd.DataFrame()
|
||||
|
||||
def render_proses_data():
|
||||
st.title("⚙️ Tahapan Proses Data & Modeling")
|
||||
st.markdown("Berikut adalah dokumentasi teknis alur pengolahan data dari mentah hingga evaluasi model, disertai penjelasan metodologi.")
|
||||
|
||||
# LOAD DATA
|
||||
df_mentah = load_data('data/Data_Lengkap_Tokenisasi.csv')
|
||||
|
||||
# ==============================================================================
|
||||
# NAVIGASI
|
||||
# ==============================================================================
|
||||
opsi_tahapan = [
|
||||
"1. Crawling Data",
|
||||
"2. Preprocessing",
|
||||
"3. Persiapan Data Latih",
|
||||
"4. Arsitektur Model",
|
||||
"5. Evaluasi Model",
|
||||
"6. Topic Modeling (LDA)"
|
||||
]
|
||||
|
||||
pilihan = st.radio("Pilih Tahapan Proses:", options=opsi_tahapan, horizontal=True, label_visibility="collapsed")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ==============================================================================
|
||||
# KONTEN TAHAPAN
|
||||
# ==============================================================================
|
||||
|
||||
# --- 1. CRAWLING DATA ---
|
||||
if pilihan == "1. Crawling Data":
|
||||
st.header("1. Pengumpulan Data (Crawling)")
|
||||
st.info("Tools: **Tweet-Harvest (Node.js)** API Scraper")
|
||||
|
||||
st.success(f"✅ Total Data Terkumpul: **{len(df_mentah):,} Data** (Setelah Deduplikasi)")
|
||||
st.warning("⚠️ **Catatan Imbalance:** Distribusi sentimen awal tidak seimbang, ditangani dengan ROS (Random Over Sampling) pada tahap Training.")
|
||||
|
||||
st.markdown("### 📋 Kriteria Pengambilan Data")
|
||||
st.markdown("""
|
||||
- **Platform**: X (Twitter)
|
||||
- **Periode**: 01 Februari 2025 - 31 Maret 2025
|
||||
- **Filter Sistem**: Hanya Bahasa Indonesia (`lang:id`) & Mengabaikan Retweet (`-is:retweet`).
|
||||
|
||||
**Kata Kunci (Search Queries):**
|
||||
|
||||
**1. Core Keywords (Isu Utama):**
|
||||
* `"efisiensi anggaran pendidikan" lang:id -is:retweet`
|
||||
* `"pemotongan anggaran pendidikan" lang:id -is:retweet`
|
||||
* `"anggaran pendidikan dikurangi" lang:id -is:retweet`
|
||||
|
||||
**2. Program Spesifik:**
|
||||
* `("dana BOS" OR "bantuan operasional sekolah") ("dipotong" OR "dikurangi" OR "efisiensi" OR "kurang") lang:id -is:retweet`
|
||||
* `("PIP" OR "program indonesia pintar") ("dipotong" OR "dikurangi" OR "efisiensi" OR "cair") lang:id -is:retweet`
|
||||
* `("KIP Kuliah" OR "kartu indonesia pintar") ("dipotong" OR "dikurangi" OR "efisiensi" OR "sulit") lang:id -is:retweet`
|
||||
* `("tunjangan guru" OR "sertifikasi guru") ("dipotong" OR "dikurangi" OR "efisiensi" OR "telat") lang:id -is:retweet`
|
||||
|
||||
**3. Kombinasi Isu Umum:**
|
||||
* `(anggaran OR dana) (pendidikan OR sekolah OR kampus OR guru) (efisiensi OR potong OR dikurangi OR berkurang) lang:id -is:retweet`
|
||||
|
||||
- **Proses Lanjutan**: Deduplikasi (Hapus ID & Teks yang berulang).
|
||||
""")
|
||||
|
||||
st.markdown("### 🔍 Preview Data Mentah")
|
||||
if not df_mentah.empty:
|
||||
search_mentah = st.text_input("Cari kata dalam Tweet (Mentah):", placeholder="Contoh: dana bos", key="cari_mentah")
|
||||
|
||||
if search_mentah:
|
||||
df_tampil = df_mentah[df_mentah['Teks Tweet'].str.contains(search_mentah, case=False, na=False)].copy()
|
||||
else:
|
||||
df_tampil = df_mentah.copy()
|
||||
|
||||
df_tampil = df_tampil[['created_at', 'username', 'Teks Tweet']].rename(columns={'created_at': 'Created At', 'username': 'Username'})
|
||||
df_tampil.index = range(1, len(df_tampil) + 1)
|
||||
|
||||
st.dataframe(df_tampil, use_container_width=True, height=250)
|
||||
|
||||
# --- 2. PREPROCESSING ---
|
||||
elif pilihan == "2. Preprocessing":
|
||||
st.header("2. Preprocessing Teks")
|
||||
|
||||
st.markdown("""
|
||||
**Tujuan:** Mengubah data teks tidak terstruktur menjadi format bersih yang siap diproses mesin.
|
||||
|
||||
Pada penelitian ini, kami memutuskan untuk **TIDAK MELAKUKAN Stemming & Stopword Removal**.
|
||||
* **Alasan:** Model Deep Learning (seperti LSTM) membutuhkan konteks kalimat utuh untuk memahami nuansa sentimen (contoh: kata *"tidak"* sangat penting untuk membalikkan makna *"suka"* menjadi *"tidak suka"*). Menghapus *stopword* justru dapat merusak tata bahasa yang akan dibaca oleh model secara sekuensial.
|
||||
""")
|
||||
|
||||
with st.expander("ℹ️ Rincian 5 Langkah Preprocessing", expanded=True):
|
||||
st.markdown("""
|
||||
1. **Case Folding:** Menyeragamkan huruf menjadi kecil (*lowercase*).
|
||||
2. **Cleaning:** Menghapus elemen non-teks (URL, Mention `@`, Hashtag `#`, Angka, Tanda Baca).
|
||||
3. **Tokenizing:** Memecah kalimat menjadi potongan kata per kata.
|
||||
4. **Normalisasi Slang:** Mengubah kata tidak baku (*bgt, gk, sy*) menjadi baku (*banget, tidak, saya*) menggunakan kamus *lexicon*.
|
||||
5. **Detokenizing:** Menggabungkan kata kembali menjadi kalimat utuh.
|
||||
""")
|
||||
|
||||
st.subheader("🔍 Komparasi Sebelum vs Sesudah")
|
||||
if not df_mentah.empty:
|
||||
search_pre = st.text_input("Cari kata (Hasil Akhir):", placeholder="Contoh: guru", key="cari_pre")
|
||||
|
||||
cols = ['Teks Tweet', 'Tweet_CaseFolded', 'Tweet_Cleaned', 'Tweet_Tokenized', 'Tweet_Normalized', 'Tweet_Final']
|
||||
|
||||
cols_exist = [c for c in cols if c in df_mentah.columns]
|
||||
df_tampil_pre = df_mentah[cols_exist].copy()
|
||||
|
||||
if search_pre:
|
||||
df_tampil_pre = df_tampil_pre[df_tampil_pre['Tweet_Final'].str.contains(search_pre, case=False, na=False)]
|
||||
|
||||
df_tampil_pre.index = range(1, len(df_tampil_pre) + 1)
|
||||
st.dataframe(df_tampil_pre, use_container_width=True, height=400)
|
||||
else:
|
||||
st.warning("Data preprocessing belum tersedia.")
|
||||
|
||||
# --- 3. PERSIAPAN DATA LATIH ---
|
||||
elif pilihan == "3. Persiapan Data Latih":
|
||||
st.header("3. Transformasi & Splitting Data")
|
||||
|
||||
st.markdown("""
|
||||
Agar teks dapat diproses oleh Neural Network, data harus diubah menjadi bentuk numerik (vektor).
|
||||
Selain itu, dilakukan penyeimbangan data agar model tidak bias.
|
||||
""")
|
||||
|
||||
st.subheader("A. Tokenization & Padding")
|
||||
st.write("Setiap kata unik dalam dataset diberi ID angka. Karena panjang tweet berbeda-beda, kita lakukan **Padding (Post)** agar semua input memiliki panjang seragam (**100 kata**). Angka 0 di akhir akan diabaikan oleh fitur *Masking* pada model.")
|
||||
|
||||
if not df_mentah.empty and 'Label' in df_mentah.columns:
|
||||
df_token = df_mentah.dropna(subset=['Label']).copy()
|
||||
|
||||
# Helper simulasi token
|
||||
def get_word_id(word): return int(hashlib.md5(word.encode()).hexdigest(), 16) % 3000 + 1
|
||||
|
||||
df_token['Detail Token'] = df_token['Tweet_Final'].apply(lambda t: ", ".join([f"{w}:{get_word_id(w)}" for w in str(t).split()[:10]]))
|
||||
df_token['Padding Sequence (100)'] = df_token['Tweet_Final'].apply(lambda t: str(([get_word_id(w) for w in str(t).split()] + [0]*100)[:20]) + " ...")
|
||||
|
||||
st.dataframe(df_token[['Tweet_Final', 'Detail Token', 'Padding Sequence (100)']], use_container_width=True)
|
||||
|
||||
st.markdown("---")
|
||||
st.subheader("B. Splitting 80:20 & Skenario 5 Percobaan")
|
||||
|
||||
st.markdown("""
|
||||
**Skenario Pelatihan:**
|
||||
Model dilatih menggunakan **5 Skenario Percobaan** (P1 hingga P5) dengan porsi data latih masing-masing 20%, 40%, 60%, 80%, dan 100% (dari total 80% split data latih).
|
||||
|
||||
**Penanganan Imbalance (ROS):**
|
||||
Kami menduplikasi data minoritas (Positif/Netral) secara acak (*Random Over Sampling*) di **setiap porsi data latih** hingga jumlahnya setara dengan kelas mayoritas (Negatif). Data Testing (20%) dibiarkan murni agar evaluasi tetap objektif.
|
||||
""")
|
||||
|
||||
df_train, df_test = train_test_split(df_token, test_size=0.2, random_state=42, stratify=df_token['Label'])
|
||||
kelas_mayoritas = df_train['Label'].value_counts().max()
|
||||
|
||||
col_metric1, col_metric2, col_metric3 = st.columns(3)
|
||||
col_metric1.metric("Maksimal Data Latih (80%)", f"{len(df_train):,} Sample", "Skenario P5")
|
||||
col_metric2.metric("Data Uji Tetap (20%)", f"{len(df_test):,} Sample", "Validasi Objektif")
|
||||
col_metric3.metric("Target ROS P5", f"{kelas_mayoritas}", "Per Kelas Sentimen")
|
||||
|
||||
st.success(f"✅ **Status Data:** Dataset latih telah diseimbangkan (Balanced) menggunakan teknik ROS pada tahapan pemodelan.")
|
||||
|
||||
# --- 4. ARSITEKTUR MODEL ---
|
||||
elif pilihan == "4. Arsitektur Model":
|
||||
st.header("🧠 4. Arsitektur Model: LSTM Standar")
|
||||
|
||||
st.markdown("""
|
||||
Kami menggunakan arsitektur **Long Short-Term Memory (LSTM)** yang dipadukan dengan *Keras Embedding Layer* dan fitur *Masking*.
|
||||
""")
|
||||
|
||||
c_text, c_spacer, c_img = st.columns([1.5, 0.2, 1])
|
||||
|
||||
with c_text:
|
||||
st.subheader("Rincian Layer & Fungsinya:")
|
||||
st.markdown("""
|
||||
1. **Embedding (Keras):** Mengubah indeks kata menjadi vektor padat (128 dimensi). Fitur `mask_zero=True` diaktifkan agar model murni fokus pada teks tanpa terdistraksi oleh angka padding (0) di akhir kalimat.
|
||||
2. **SpatialDropout1D (0.2):** Mematikan sebagian 1D feature maps secara acak untuk mencegah model "menghafal" data secara berlebihan (*overfitting*).
|
||||
3. **LSTM (64 Units):** Memproses urutan kata secara sekuensial (dari awal hingga akhir kalimat) agar model bisa memahami relasi dan pola frasa sentimen dengan sangat baik.
|
||||
4. **Dense Layer (32 Units):** Ekstraksi fitur tingkat tinggi menggunakan fungsi aktivasi ReLU dengan peluruhan (Dropout 0.2).
|
||||
5. **Dense Output (3 Units):** Layer akhir dengan aktivasi *Softmax* yang menghasilkan nilai probabilitas klasifikasi untuk **Negatif, Netral, dan Positif**.
|
||||
""")
|
||||
|
||||
param_data = {
|
||||
"Nama Layer": ["Embedding", "SpatialDropout", "LSTM", "Dense", "Dense Output"],
|
||||
"Output Shape": ["(None, 100, 128)", "(None, 100, 128)", "(None, 64)", "(None, 32)", "(None, 3)"],
|
||||
"Jml Parameter": ["1,280,000", "0", "49,408", "2,080", "99"]
|
||||
}
|
||||
st.dataframe(pd.DataFrame(param_data), use_container_width=True)
|
||||
|
||||
with c_spacer:
|
||||
st.empty()
|
||||
|
||||
with c_img:
|
||||
st.caption("Visualisasi Alur Data:")
|
||||
try:
|
||||
graph = graphviz.Digraph(node_attr={'shape': 'box', 'style': 'filled', 'fillcolor': '#E8F0FE'})
|
||||
graph.attr(rankdir='TB')
|
||||
|
||||
graph.node('I', 'Input Teks\n(Integer Encoded)', fillcolor='#FFEBEE')
|
||||
graph.node('E', 'Embedding Layer\n(Dimensi 128, Masking)', fillcolor='#FFF3E0')
|
||||
graph.node('L', 'LSTM Layer\n(Proses Sekuensial)', fillcolor='#E3F2FD')
|
||||
graph.node('D', 'Dense & Softmax\n(Klasifikasi 3 Kelas)', fillcolor='#E8F5E9')
|
||||
|
||||
graph.edge('I', 'E')
|
||||
graph.edge('E', 'L')
|
||||
graph.edge('L', 'D')
|
||||
|
||||
st.graphviz_chart(graph, use_container_width=True)
|
||||
except:
|
||||
st.info("Install graphviz untuk melihat diagram alir.")
|
||||
|
||||
# ==============================================================================
|
||||
# 5. EVALUASI MODEL
|
||||
# ==============================================================================
|
||||
elif pilihan == "5. Evaluasi Model":
|
||||
st.header("5. Evaluasi Performa Model (Skenario P1-P5)")
|
||||
st.markdown("Evaluasi ini mencakup perbandingan 5 skenario pelatihan berdasarkan ukuran rasio data latih (20% hingga 100%), yang diuji menggunakan **Data Testing murni (20%)**.")
|
||||
|
||||
tab_a, tab_b, tab_c = st.tabs(["📊 Metrik (Model P5)", "📈 Perbandingan 5 Skenario", "📉 Detail Learning Curve"])
|
||||
|
||||
# --- TAB A: TABEL ANGKA ---
|
||||
with tab_a:
|
||||
st.subheader("1. Classification Report (Model P5)")
|
||||
st.markdown("""
|
||||
- **Precision:** Ketepatan prediksi model (Meminimalisir salah tebak positif palsu).
|
||||
- **Recall:** Kelengkapan prediksi (Meminimalisir salah tebak negatif palsu).
|
||||
- **F1-Score:** Rata-rata harmonis antara Precision dan Recall.
|
||||
""")
|
||||
|
||||
path_perf = 'model/Tabel_Performa_LSTM.csv'
|
||||
if not os.path.exists(path_perf): path_perf = 'Tabel_Performa_LSTM.csv'
|
||||
|
||||
if os.path.exists(path_perf):
|
||||
df_perf = pd.read_csv(path_perf, index_col=0)
|
||||
st.table(
|
||||
df_perf.style.highlight_max(axis=0, props='background-color: #FFEB3B; color: black; font-weight: bold')
|
||||
)
|
||||
if 'accuracy' in df_perf.index:
|
||||
acc = df_perf.loc['accuracy', 'f1-score']
|
||||
st.metric("Akurasi Total (Data Testing P5)", f"{acc*100:.2f}%")
|
||||
else:
|
||||
st.warning("⚠️ File 'Tabel_Performa_LSTM.csv' belum tersedia.")
|
||||
|
||||
st.markdown("---")
|
||||
st.subheader("2. Confusion Matrix (Model P5)")
|
||||
path_cm = 'model/Data_Confusion_Matrix.csv'
|
||||
if os.path.exists(path_cm):
|
||||
df_cm_data = pd.read_csv(path_cm)
|
||||
if 'y_true' in df_cm_data.columns and 'y_pred' in df_cm_data.columns:
|
||||
labels = ['Negatif', 'Netral', 'Positif']
|
||||
cm = confusion_matrix(df_cm_data['y_true'], df_cm_data['y_pred'])
|
||||
fig_cm = px.imshow(cm, text_auto=True, labels=dict(x="Prediksi Model", y="Label Aktual (Asli)", color="Jumlah Data"), x=labels, y=labels, color_continuous_scale='Blues')
|
||||
fig_cm.update_layout(title="Matrix Kebenaran Prediksi P5")
|
||||
st.plotly_chart(fig_cm, use_container_width=True)
|
||||
else:
|
||||
st.warning("⚠️ File 'Data_Confusion_Matrix.csv' tidak ditemukan.")
|
||||
|
||||
# --- TAB B: BAR CHART PERBANDINGAN SKENARIO (DINAMIS DARI CSV) ---
|
||||
with tab_b:
|
||||
st.subheader("Perbandingan Akurasi Skenario P1 hingga P5")
|
||||
st.markdown("Grafik interaktif ini menunjukkan bahwa semakin besar porsi data latih yang diberikan, maka kemampuan model dalam mengklasifikasi sentimen cenderung semakin baik.")
|
||||
|
||||
path_akurasi = 'model/Akurasi_Skenario.csv'
|
||||
if os.path.exists(path_akurasi):
|
||||
df_acc_skenario = pd.read_csv(path_akurasi)
|
||||
rata_rata = df_acc_skenario['Akurasi'].mean()
|
||||
|
||||
# Buat label gabungan P1 (20%), dst
|
||||
df_acc_skenario['Label_X'] = df_acc_skenario['Skenario'] + " (" + df_acc_skenario['Porsi_Data'] + ")"
|
||||
|
||||
fig_bar = px.bar(
|
||||
df_acc_skenario, x='Label_X', y='Akurasi',
|
||||
text='Akurasi',
|
||||
color='Skenario',
|
||||
color_discrete_sequence=px.colors.qualitative.Set1,
|
||||
title="Persentase Akurasi per Skenario Data Latih",
|
||||
labels={'Label_X': 'Skenario (Porsi Data Latih)', 'Akurasi': 'Akurasi (%)'}
|
||||
)
|
||||
|
||||
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside')
|
||||
fig_bar.add_hline(y=rata_rata, line_dash="dot", line_color="red", annotation_text=f"Rata-rata: {rata_rata:.2f}%")
|
||||
fig_bar.update_layout(yaxis_range=[0, 100], showlegend=False)
|
||||
|
||||
st.plotly_chart(fig_bar, use_container_width=True)
|
||||
else:
|
||||
st.warning("⚠️ File 'Akurasi_Skenario.csv' belum tersedia. Harap export dari Colab.")
|
||||
|
||||
# --- TAB C: KURVA PEMBELAJARAN SEMUA SKENARIO (DINAMIS DARI CSV) ---
|
||||
with tab_c:
|
||||
st.subheader("Grafik Pergerakan Learning Curve")
|
||||
st.info("Pilih skenario di bawah ini untuk melihat detail pergerakan Akurasi dan Loss-nya secara interaktif.")
|
||||
|
||||
path_hist_semua = 'model/Riwayat_Training_Semua.csv'
|
||||
if os.path.exists(path_hist_semua):
|
||||
df_all_hist = pd.read_csv(path_hist_semua)
|
||||
|
||||
# Opsi interaktif untuk memilih Skenario
|
||||
skenario_pilihan = st.selectbox("Pilih Skenario:", ['P1', 'P2', 'P3', 'P4', 'P5'], index=4)
|
||||
|
||||
# Filter data berdasarkan skenario yang dipilih
|
||||
df_hist_filter = df_all_hist[df_all_hist['Skenario'] == skenario_pilihan]
|
||||
|
||||
col_chart1, col_chart2 = st.columns(2)
|
||||
|
||||
with col_chart1:
|
||||
fig_acc_line = go.Figure()
|
||||
fig_acc_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['accuracy'], mode='lines+markers', name='Train Acc'))
|
||||
fig_acc_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['val_accuracy'], mode='lines+markers', name='Val Acc'))
|
||||
fig_acc_line.update_layout(title=f"Akurasi ({skenario_pilihan})", xaxis_title="Epochs", yaxis_title="Akurasi", hovermode="x unified")
|
||||
st.plotly_chart(fig_acc_line, use_container_width=True)
|
||||
|
||||
with col_chart2:
|
||||
fig_loss_line = go.Figure()
|
||||
fig_loss_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['loss'], mode='lines+markers', name='Train Loss', line=dict(color='orange')))
|
||||
fig_loss_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['val_loss'], mode='lines+markers', name='Val Loss', line=dict(color='red')))
|
||||
fig_loss_line.update_layout(title=f"Loss ({skenario_pilihan})", xaxis_title="Epochs", yaxis_title="Loss", hovermode="x unified")
|
||||
st.plotly_chart(fig_loss_line, use_container_width=True)
|
||||
else:
|
||||
st.warning("⚠️ File 'Riwayat_Training_Semua.csv' belum tersedia. Harap export dari Colab.")
|
||||
|
||||
# ==============================================================================
|
||||
# 6. TOPIC MODELING (LDA)
|
||||
# ==============================================================================
|
||||
elif pilihan == "6. Topic Modeling (LDA)":
|
||||
st.header("6. Topic Modeling (LDA)")
|
||||
st.markdown("""
|
||||
**Tujuan:** Menggali "Apa yang sebenarnya dibicarakan publik?" di balik masing-masing sentimen menggunakan metode **Latent Dirichlet Allocation (LDA)**.
|
||||
""")
|
||||
|
||||
# --- BAGIAN A: METRIK EVALUASI (COHERENCE SCORE) ---
|
||||
st.subheader("A. Optimasi Jumlah Topik (Coherence Score)")
|
||||
st.info("💡 Grafik ini menunjukkan bagaimana model menentukan jumlah topik (K) terbaik secara ilmiah berdasarkan skor *Coherence c_v* tertinggi.")
|
||||
|
||||
col_lda1, col_lda2 = st.columns([2, 1])
|
||||
|
||||
with col_lda1:
|
||||
path_coherence = 'model/Nilai_Coherence.csv'
|
||||
if not os.path.exists(path_coherence): path_coherence = 'Nilai_Coherence.csv'
|
||||
|
||||
if os.path.exists(path_coherence):
|
||||
df_coh = pd.read_csv(path_coherence)
|
||||
|
||||
# Plot Line Chart
|
||||
fig_coh = px.line(df_coh, x='Num_Topics', y='Coherence_Score', markers=True,
|
||||
title="Pergerakan Nilai Coherence Score",
|
||||
labels={'Num_Topics': 'Jumlah Topik', 'Coherence_Score': 'Skor Koherensi (c_v)'})
|
||||
|
||||
max_score = df_coh['Coherence_Score'].max()
|
||||
best_topic_num = df_coh.loc[df_coh['Coherence_Score'].idxmax(), 'Num_Topics']
|
||||
|
||||
fig_coh.add_annotation(x=best_topic_num, y=max_score,
|
||||
text=f"Optimal: {int(best_topic_num)} Topik",
|
||||
showarrow=True, arrowhead=1)
|
||||
|
||||
st.plotly_chart(fig_coh, use_container_width=True)
|
||||
else:
|
||||
st.warning("⚠️ File 'Nilai_Coherence.csv' tidak ditemukan.")
|
||||
|
||||
with col_lda2:
|
||||
st.markdown("### 📝 Interpretasi:")
|
||||
st.write("""
|
||||
Algoritma mesin bekerja dengan mencari pola kata yang sering muncul bersamaan di dalam satu dokumen teks.
|
||||
|
||||
**Coherence Score** bertugas untuk mengukur seberapa masuk akal ("nyambung") kumpulan kata-kata dalam satu topik. Semakin tinggi skornya, maka topik tersebut akan semakin mudah diinterpretasikan oleh pembaca/manusia.
|
||||
""")
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# --- BAGIAN B: VISUALISASI TOPIK (BAR CHART DARI CSV) ---
|
||||
st.subheader("B. Visualisasi Kata Kunci per Topik")
|
||||
st.write("Berikut adalah distribusi kata-kata kunci dominan yang mewakili setiap topik berdasarkan prediksi sentimen data *testing*.")
|
||||
|
||||
path_lda = 'model/Hasil_Analisis_Topik_LDA.csv'
|
||||
if not os.path.exists(path_lda): path_lda = 'Hasil_Analisis_Topik_LDA.csv'
|
||||
|
||||
if os.path.exists(path_lda):
|
||||
try:
|
||||
df_lda = pd.read_csv(path_lda)
|
||||
|
||||
# Fungsi Parsing Teks dari format CSV
|
||||
def parse_lda_string(text_data):
|
||||
data_items = []
|
||||
# Memisahkan format yang sudah kita bersihkan di Colab
|
||||
for word in str(text_data).split(','):
|
||||
word = word.strip()
|
||||
if word:
|
||||
# Bobot diset dinamis untuk memunculkan visual Bar Horizontal (berdasarkan urutan)
|
||||
data_items.append({'Kata': word})
|
||||
|
||||
df_res = pd.DataFrame(data_items)
|
||||
if not df_res.empty:
|
||||
# Memberikan bobot buatan berdasarkan urutan (agar chart terbentuk rapi dari atas ke bawah)
|
||||
df_res['Bobot'] = range(len(df_res), 0, -1)
|
||||
df_res = df_res.sort_values(by='Bobot', ascending=True)
|
||||
return df_res
|
||||
|
||||
# Tabs untuk Topik
|
||||
t_neg, t_net, t_pos = st.tabs(["🔴 Topik Negatif", "⚪ Topik Netral", "🟢 Topik Positif"])
|
||||
mapping = {'negatif': t_neg, 'netral': t_net, 'positif': t_pos}
|
||||
|
||||
for sentimen, tab in mapping.items():
|
||||
with tab:
|
||||
# Filter CSV berdasarkan sentimen
|
||||
df_subset = df_lda[df_lda['Sentimen'].str.lower() == sentimen]
|
||||
|
||||
if df_subset.empty:
|
||||
st.warning(f"Belum ada data ekstraksi topik untuk sentimen {sentimen.upper()}.")
|
||||
else:
|
||||
col_t1, col_t2 = st.columns(2)
|
||||
|
||||
# Tampilkan Topik dengan 2 kolom berjajar
|
||||
for idx, row in df_subset.iterrows():
|
||||
topik_ke = row['Topik Ke']
|
||||
df_chart = parse_lda_string(row['Kata Kunci'])
|
||||
|
||||
if not df_chart.empty:
|
||||
fig = px.bar(
|
||||
df_chart, x='Bobot', y='Kata', orientation='h',
|
||||
title=f"<b>Topik {topik_ke}</b>",
|
||||
color='Bobot',
|
||||
color_continuous_scale='Reds' if sentimen == 'negatif' else 'Greys' if sentimen == 'netral' else 'Greens'
|
||||
)
|
||||
# Sembunyikan X-axis karena ini hanya bobot representasi urutan
|
||||
fig.update_layout(height=280, showlegend=False, xaxis_title=None, xaxis_visible=False)
|
||||
|
||||
if idx % 2 == 0:
|
||||
with col_t1: st.plotly_chart(fig, use_container_width=True)
|
||||
else:
|
||||
with col_t2: st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Gagal memproses visualisasi data LDA: {e}")
|
||||
else:
|
||||
st.warning("⚠️ File 'Hasil_Analisis_Topik_LDA.csv' belum tersedia di dalam folder model.")
|
||||
|
|
@ -0,0 +1,339 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import plotly.express as px
|
||||
from wordcloud import WordCloud
|
||||
import os
|
||||
import math
|
||||
|
||||
def render_visualisasi():
|
||||
st.title("📈 Dashboard Visualisasi Data")
|
||||
st.markdown("Analisis visual interaktif terhadap data opini publik terkait kebijakan anggaran pendidikan.")
|
||||
|
||||
# ==============================================================================
|
||||
# 1. LOAD DATA UTAMA
|
||||
# ==============================================================================
|
||||
file_path = 'data/Data_Lengkap_Tokenisasi.csv'
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
st.error(f"❌ File dataset tidak ditemukan di: {file_path}")
|
||||
return
|
||||
|
||||
# Load Data
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
if 'Label' in df.columns:
|
||||
df['Label_Clean'] = df['Label'].astype(str).str.lower().str.strip()
|
||||
else:
|
||||
st.error("❌ Kolom 'Label' tidak ditemukan dalam CSV.")
|
||||
return
|
||||
|
||||
if 'created_at' in df.columns:
|
||||
df['Tanggal'] = pd.to_datetime(df['created_at']).dt.date
|
||||
elif 'Tanggal' in df.columns:
|
||||
df['Tanggal'] = pd.to_datetime(df['Tanggal']).dt.date
|
||||
else:
|
||||
st.warning("⚠️ Kolom tanggal tidak ditemukan. Grafik tren waktu mungkin tidak muncul.")
|
||||
|
||||
# ==============================================================================
|
||||
# 2. VISUALISASI DISTRIBUSI SENTIMEN (PIE & BAR)
|
||||
# ==============================================================================
|
||||
st.subheader("📊 Distribusi & Polaritas Sentimen")
|
||||
|
||||
col_pie, col_bar = st.columns([1, 1.5])
|
||||
|
||||
# --- A. PIE CHART ---
|
||||
with col_pie:
|
||||
df_pie = df['Label_Clean'].value_counts().reset_index()
|
||||
df_pie.columns = ['Sentimen', 'Jumlah']
|
||||
|
||||
fig_pie = px.pie(
|
||||
df_pie,
|
||||
names='Sentimen',
|
||||
values='Jumlah',
|
||||
hole=0.4,
|
||||
color='Sentimen',
|
||||
color_discrete_map={'negatif':'#FF4B4B', 'netral':'#808495', 'positif':'#00CC96'},
|
||||
title="Persentase Sentimen"
|
||||
)
|
||||
fig_pie.update_layout(showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5))
|
||||
st.plotly_chart(fig_pie, use_container_width=True)
|
||||
|
||||
# --- B. TREN WAKTU ---
|
||||
with col_bar:
|
||||
if 'Tanggal' in df.columns:
|
||||
start_date = pd.to_datetime("2025-02-01").date()
|
||||
end_date = pd.to_datetime("2025-03-31").date()
|
||||
|
||||
df_filtered = df[
|
||||
(df['Tanggal'] >= start_date) &
|
||||
(df['Tanggal'] <= end_date)
|
||||
]
|
||||
|
||||
kolom_label = 'Label' if 'Label' in df_filtered.columns else 'Label_Clean'
|
||||
|
||||
df_trend = df_filtered.groupby(['Tanggal', kolom_label]).size().reset_index(name='Jumlah')
|
||||
|
||||
fig_trend = px.line(
|
||||
df_trend,
|
||||
x='Tanggal',
|
||||
y='Jumlah',
|
||||
color=kolom_label,
|
||||
markers=True,
|
||||
color_discrete_map={
|
||||
'negatif':'#FF4B4B', 'netral':'#808495', 'positif':'#00CC96',
|
||||
'Negatif':'#FF4B4B', 'Netral':'#808495', 'Positif':'#00CC96',
|
||||
'negative':'#FF4B4B', 'neutral':'#808495', 'positive':'#00CC96'
|
||||
},
|
||||
title="Tren Sentimen Harian (Feb - Mar 2025)"
|
||||
)
|
||||
|
||||
fig_trend.update_xaxes(range=[start_date, end_date])
|
||||
fig_trend.update_layout(xaxis_title="Tanggal", yaxis_title="Jumlah Tweet", hovermode="x unified", legend=dict(orientation="h", y=1.1))
|
||||
|
||||
st.plotly_chart(fig_trend, use_container_width=True)
|
||||
else:
|
||||
st.info("Data Tanggal tidak tersedia untuk menampilkan tren.")
|
||||
|
||||
# ==============================================================================
|
||||
# 3. WORDCLOUD
|
||||
# ==============================================================================
|
||||
st.subheader("☁️ WordCloud: Representasi Visual Teks")
|
||||
st.write("Kata-kata yang paling sering muncul dalam setiap kategori.")
|
||||
|
||||
# 1. Fungsi Asli untuk generate dari Teks (Data Mentah & Bersih)
|
||||
def generate_wc(text, colormap):
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
st.warning("⚠️ Tidak ada data teks yang cukup.")
|
||||
return
|
||||
|
||||
with st.spinner("Sedang menggambar WordCloud..."):
|
||||
try:
|
||||
wc = WordCloud(width=800, height=400, background_color='white', colormap=colormap, max_words=100).generate(text)
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wc, interpolation='bilinear')
|
||||
ax.axis("off")
|
||||
st.pyplot(fig)
|
||||
except Exception as e:
|
||||
st.error(f"Error WordCloud: {e}")
|
||||
|
||||
# 2. FUNGSI BARU: Generate WordCloud langsung dari CSV Frekuensi agar instan
|
||||
def generate_wc_from_freq(file_path, colormap):
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
df_freq = pd.read_csv(file_path)
|
||||
# Mengubah format DataFrame menjadi Dictionary (Syarat mutlak WordCloud)
|
||||
freq_dict = dict(zip(df_freq['Word'], df_freq['Frequency']))
|
||||
|
||||
with st.spinner("Merender WordCloud instan dari CSV..."):
|
||||
wc = WordCloud(width=800, height=400, background_color='white', colormap=colormap, max_words=100)
|
||||
wc.generate_from_frequencies(freq_dict)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wc, interpolation='bilinear')
|
||||
ax.axis("off")
|
||||
st.pyplot(fig)
|
||||
except Exception as e:
|
||||
st.error(f"Error memproses file CSV WordFreq: {e}")
|
||||
else:
|
||||
st.warning(f"⚠️ File frekuensi belum tersedia: {file_path}")
|
||||
|
||||
# Tabs Navigasi WordCloud
|
||||
tab_mentah, tab_bersih, tab_neg, tab_net, tab_pos = st.tabs([
|
||||
"Data Mentah", "Data Bersih", "Negatif", "Netral", "Positif"
|
||||
])
|
||||
|
||||
with tab_mentah:
|
||||
st.caption("Data dari kolom 'Teks Tweet' (Original)")
|
||||
generate_wc(" ".join(df['Teks Tweet'].dropna().astype(str)), 'cividis')
|
||||
|
||||
with tab_bersih:
|
||||
st.caption("Data dari kolom 'Tweet_Final' (Preprocessed)")
|
||||
if 'Tweet_Final' in df.columns:
|
||||
generate_wc(" ".join(df['Tweet_Final'].dropna().astype(str)), 'viridis')
|
||||
else:
|
||||
st.warning("Kolom Tweet_Final tidak ada.")
|
||||
|
||||
# MENGGUNAKAN FILE CSV WORDFREQ DI SINI
|
||||
with tab_neg:
|
||||
st.caption("Kata dominan sentimen NEGATIF (Sumber: WordFreq_Negatif.csv)")
|
||||
generate_wc_from_freq('model/WordFreq_Negatif.csv', 'Reds')
|
||||
|
||||
with tab_net:
|
||||
st.caption("Kata dominan sentimen NETRAL (Sumber: WordFreq_Netral.csv)")
|
||||
generate_wc_from_freq('model/WordFreq_Netral.csv', 'Greys')
|
||||
|
||||
with tab_pos:
|
||||
st.caption("Kata dominan sentimen POSITIF (Sumber: WordFreq_Positif.csv)")
|
||||
generate_wc_from_freq('model/WordFreq_Positif.csv', 'Greens')
|
||||
|
||||
st.markdown("---")
|
||||
|
||||
# ==============================================================================
|
||||
# 4. TOPIC MODELING
|
||||
# ==============================================================================
|
||||
st.subheader("📌 4. Topic Modeling (LDA) & Kata Kunci")
|
||||
st.write("Ekstraksi topik dominan dari hasil algoritma Latent Dirichlet Allocation (LDA).")
|
||||
|
||||
path_lda = 'model/Hasil_Analisis_Topik_LDA.csv'
|
||||
if not os.path.exists(path_lda): path_lda = 'Hasil_Analisis_Topik_LDA.csv'
|
||||
|
||||
if os.path.exists(path_lda):
|
||||
try:
|
||||
df_lda = pd.read_csv(path_lda)
|
||||
|
||||
def parse_lda_string(text_data):
|
||||
data_items = []
|
||||
for word in str(text_data).split(','):
|
||||
word = word.strip()
|
||||
if word:
|
||||
data_items.append({'Kata': word})
|
||||
|
||||
df_res = pd.DataFrame(data_items)
|
||||
if not df_res.empty:
|
||||
df_res['Bobot'] = range(len(df_res), 0, -1)
|
||||
df_res = df_res.sort_values(by='Bobot', ascending=True)
|
||||
return df_res
|
||||
|
||||
t_neg, t_net, t_pos = st.tabs(["🔴 Topik Negatif", "⚪ Topik Netral", "🟢 Topik Positif"])
|
||||
mapping = {'negatif': t_neg, 'netral': t_net, 'positif': t_pos}
|
||||
|
||||
for sentimen, tab in mapping.items():
|
||||
with tab:
|
||||
df_subset = df_lda[df_lda['Sentimen'].str.lower() == sentimen]
|
||||
|
||||
if df_subset.empty:
|
||||
st.warning(f"Belum ada data topik untuk {sentimen}.")
|
||||
else:
|
||||
for idx, row in df_subset.iterrows():
|
||||
topik_ke = row['Topik Ke']
|
||||
df_chart = parse_lda_string(row['Kata Kunci'])
|
||||
|
||||
if not df_chart.empty:
|
||||
fig = px.bar(
|
||||
df_chart, x='Bobot', y='Kata', orientation='h',
|
||||
title=f"<b>Topik {topik_ke}</b>",
|
||||
color='Bobot',
|
||||
color_continuous_scale='Reds' if sentimen == 'negatif' else 'Greys' if sentimen == 'netral' else 'Greens'
|
||||
)
|
||||
fig.update_layout(height=300, showlegend=False, xaxis_title=None, xaxis_visible=False)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
st.divider()
|
||||
except Exception as e:
|
||||
st.error(f"Gagal memproses data LDA: {e}")
|
||||
else:
|
||||
st.warning("⚠️ File 'Hasil_Analisis_Topik_LDA.csv' belum tersedia di folder model.")
|
||||
|
||||
# ==============================================================================
|
||||
# 5. DATA EXPLORER & EVALUASI MODEL
|
||||
# ==============================================================================
|
||||
st.subheader("🔍 Data Explorer & Evaluasi Model")
|
||||
|
||||
tab_data, tab_eval = st.tabs(["Data Explorer", "Tabel Performa (Evaluasi)"])
|
||||
|
||||
# --- TAB 1: DATA EXPLORER ---
|
||||
with tab_data:
|
||||
col_f1, col_f2 = st.columns([1, 2])
|
||||
with col_f1: filter_label = st.selectbox("Filter Sentimen:", ['Semua', 'negatif', 'netral', 'positif'])
|
||||
with col_f2: search_keyword = st.text_input("Cari Tweet:", "")
|
||||
|
||||
cols_available = [c for c in ['created_at', 'username', 'Teks Tweet', 'Label_Clean'] if c in df.columns]
|
||||
df_show = df[cols_available].copy()
|
||||
|
||||
rename_map = {'created_at': 'Tanggal', 'username': 'Username', 'Label_Clean': 'Label'}
|
||||
df_show = df_show.rename(columns=rename_map)
|
||||
|
||||
if filter_label != 'Semua' and 'Label' in df_show.columns:
|
||||
df_show = df_show[df_show['Label'] == filter_label]
|
||||
|
||||
if search_keyword and 'Teks Tweet' in df_show.columns:
|
||||
df_show = df_show[df_show['Teks Tweet'].str.contains(search_keyword, case=False, na=False)]
|
||||
|
||||
df_show.index = range(1, len(df_show) + 1)
|
||||
|
||||
baris_per_halaman = 20
|
||||
total_data = len(df_show)
|
||||
total_halaman = math.ceil(total_data / baris_per_halaman)
|
||||
|
||||
if total_data > 0:
|
||||
c_nav, c_stat = st.columns([1, 3])
|
||||
with c_nav:
|
||||
halaman = st.number_input("Halaman", min_value=1, max_value=max(1, total_halaman), step=1)
|
||||
with c_stat:
|
||||
st.write("")
|
||||
st.caption(f"Menampilkan **{total_data}** Data (Halaman {halaman} dari {total_halaman})")
|
||||
|
||||
start_idx = (halaman - 1) * baris_per_halaman
|
||||
end_idx = start_idx + baris_per_halaman
|
||||
df_page = df_show.iloc[start_idx:end_idx]
|
||||
|
||||
st.dataframe(df_page, use_container_width=True)
|
||||
else:
|
||||
st.warning("Data tidak ditemukan.")
|
||||
|
||||
# --- TAB 2: TABEL EVALUASI & CONFUSION MATRIX ---
|
||||
with tab_eval:
|
||||
st.subheader("1. Tabel Performa (Classification Report)")
|
||||
st.markdown("""
|
||||
Metrik evaluasi model berdasarkan data testing (20%):
|
||||
* **Precision**: Ketepatan tebakan.
|
||||
* **Recall**: Kemampuan menemukan data yang relevan.
|
||||
* **F1-Score**: Rata-rata harmonis (Paling penting untuk data tidak seimbang).
|
||||
""")
|
||||
|
||||
path_perf = 'model/Tabel_Performa_LSTM.csv'
|
||||
if not os.path.exists(path_perf): path_perf = 'Tabel_Performa_LSTM.csv'
|
||||
|
||||
if os.path.exists(path_perf):
|
||||
try:
|
||||
df_perf = pd.read_csv(path_perf, index_col=0)
|
||||
st.table(
|
||||
df_perf.style.highlight_max(axis=0, props='background-color: #FFEB3B; color: black; font-weight: bold')
|
||||
)
|
||||
except Exception as e:
|
||||
st.error(f"Gagal memuat tabel evaluasi: {e}")
|
||||
else:
|
||||
st.warning("⚠️ File 'Tabel_Performa_LSTM.csv' belum tersedia.")
|
||||
|
||||
st.divider()
|
||||
|
||||
st.subheader("2. Confusion Matrix")
|
||||
st.markdown("Visualisasi ini menunjukkan **detail kesalahan prediksi**. Sumbu Y adalah Label Asli, Sumbu X adalah Prediksi Model.")
|
||||
|
||||
path_cm = 'model/Data_Confusion_Matrix.csv'
|
||||
if not os.path.exists(path_cm): path_cm = 'Data_Confusion_Matrix.csv'
|
||||
|
||||
if os.path.exists(path_cm):
|
||||
try:
|
||||
df_cm_data = pd.read_csv(path_cm)
|
||||
|
||||
if 'y_true' in df_cm_data.columns and 'y_pred' in df_cm_data.columns:
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
labels = ['Negatif', 'Netral', 'Positif']
|
||||
cm = confusion_matrix(df_cm_data['y_true'], df_cm_data['y_pred'])
|
||||
|
||||
fig_cm = px.imshow(
|
||||
cm,
|
||||
text_auto=True,
|
||||
labels=dict(x="Prediksi Model", y="Label Aktual (Asli)", color="Jumlah Data"),
|
||||
x=labels,
|
||||
y=labels,
|
||||
color_continuous_scale='Blues',
|
||||
aspect="auto"
|
||||
)
|
||||
fig_cm.update_layout(title="Confusion Matrix Heatmap")
|
||||
st.plotly_chart(fig_cm, use_container_width=True)
|
||||
|
||||
total_benar = np.trace(cm)
|
||||
total_data = np.sum(cm)
|
||||
akurasi_cm = (total_benar / total_data) * 100
|
||||
st.caption(f"💡 **Interpretasi:** Dari total **{total_data}** data testing, model berhasil menebak benar sebanyak **{total_benar}** data ({akurasi_cm:.2f}%).")
|
||||
|
||||
else:
|
||||
st.error("Format CSV Confusion Matrix salah. Harus ada kolom 'y_true' dan 'y_pred'.")
|
||||
except Exception as e:
|
||||
st.error(f"Gagal memproses Confusion Matrix: {e}")
|
||||
else:
|
||||
st.info("ℹ️ **Data Confusion Matrix belum tersedia.** Silakan jalankan kode penyimpanan `Data_Confusion_Matrix.csv` di Google Colab (Bagian Evaluasi).")
|
||||
Loading…
Reference in New Issue