first commit

This commit is contained in:
renaldiendrawan 2026-05-21 13:01:53 +07:00
commit 77e1092ceb
34 changed files with 16196 additions and 0 deletions

View File

@ -0,0 +1,33 @@
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm",
"customizations": {
"codespaces": {
"openFiles": [
"README.md",
"app.py"
]
},
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
"postAttachCommand": {
"server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
},
"portsAttributes": {
"8501": {
"label": "Application",
"onAutoForward": "openPreview"
}
},
"forwardPorts": [
8501
]
}

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.10

20
README.md Normal file
View File

@ -0,0 +1,20 @@
# Analisis Sentimen Kebijakan Anggaran Pendidikan 🎓
Aplikasi ini adalah dashboard interaktif untuk menganalisis sentimen masyarakat di Twitter mengenai isu pemotongan dan efisiensi anggaran pendidikan (Dana BOS, PIP, KIP Kuliah).
## 🚀 Fitur Utama
* **Sentiment Analysis:** Menggunakan model Deep Learning **Bi-LSTM**.
* **Topic Modeling:** Ekstraksi topik pembicaraan menggunakan **LDA**.
* **Interactive Dashboard:** Visualisasi data real-time dengan **Streamlit** & **Plotly**.
## 🛠️ Tech Stack
* Python 3.10
* TensorFlow / Keras
* Streamlit
* Pandas & NumPy
* Sastrawi (Preprocessing)
## 📦 Cara Menjalankan (Local)
1. Clone repository ini.
2. Install library: `pip install -r requirements.txt`.
3. Jalankan: `streamlit run app.py`.

Binary file not shown.

88
app.py Normal file
View File

@ -0,0 +1,88 @@
import streamlit as st
from streamlit_option_menu import option_menu
# --- IMPORT MODUL LOKAL ---
from utils import load_resources
from views.beranda import render_beranda
from views.visualisasi import render_visualisasi
from views.proses_data import render_proses_data
from views.analisis_teks import render_analisis_teks
from views.analisis_csv import render_analisis_csv
# ==============================================================================
# 1. SETUP KONFIGURASI HALAMAN
# ==============================================================================
st.set_page_config(
page_title="Dashboard Analisis Sentimen Isu Efisiensi Anggaran Sektor Pendidikan",
page_icon="🎓",
layout="wide",
initial_sidebar_state="expanded"
)
# Load Model LSTM & Tokenizer
model, tokenizer = load_resources()
# ==============================================================================
# 2. SIDEBAR NAVIGATION (MENU KIRI)
# ==============================================================================
with st.sidebar:
col1, col2, col3 = st.columns([1, 1.5, 1])
with col2:
st.image("images/data_analytics.png", use_column_width=True)
st.markdown(
"""
<h2 style='text-align: center; margin-top: 10px; margin-bottom: 5px; font-weight: 800; font-size: 26px; line-height: 1.2;'>
Sistem Analisis Sentimen
</h2>
<p style='text-align: center; color: gray; font-size: 14px;'>
Kebijakan Efisiensi Anggaran Pendidikan
</p>
""",
unsafe_allow_html=True
)
st.markdown("---")
selected = option_menu(
menu_title="Menu Utama",
options=["Beranda", "Visualisasi", "Proses Data", "Analisis Teks", "Analisis File CSV"],
icons=["house", "bar-chart", "gear", "chat-text", "file-earmark-spreadsheet"],
menu_icon="cast",
default_index=0,
styles={
"nav-link-selected": {"background-color": "#007BFF"} # Warna biru aktif
}
)
st.markdown("---")
st.markdown("<br>", unsafe_allow_html=True)
bot_col1, bot_col2, bot_col3 = st.columns([1, 5, 1])
with bot_col2:
st.image("images/logo_jti.png", use_column_width=True)
st.markdown(
"""
<div style='text-align: center; color: gray; font-size: 13px; margin-top: 25px; margin-bottom: 40px;'>
© 2026 - Skripsi<br>
<b>Renaldi Endrawan</b>
</div>
""",
unsafe_allow_html=True
)
# ==============================================================================
# 3. ROUTING HALAMAN (MENAMPILKAN KONTEN)
# ==============================================================================
if selected == "Beranda":
render_beranda()
elif selected == "Visualisasi":
render_visualisasi()
elif selected == "Proses Data":
render_proses_data()
elif selected == "Analisis Teks":
render_analisis_teks(model, tokenizer)
elif selected == "Analisis File CSV":
render_analisis_csv(model, tokenizer)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
[
{
"Waktu": "2026-01-31 16:07:05",
"Teks Asli": "Sangat kecewa dana BOS telat cair berbulan-bulan, gaji guru honorer jadi tertahan. Parah banget kinerjanya!",
"Teks Bersih": "sangat kecewa dana bos telat cair berbulanbulan gaji guru honorer jadi tertahan parah banget kinerjanya",
"Label": "Negatif",
"Keyakinan (%)": 57.3
},
{
"Waktu": "2026-01-31 16:07:32",
"Teks Asli": "Mendikbudristek hari ini resmi membahas kebijakan efisiensi anggaran pendidikan tahun 2025. Skema pencairan dana BOS dan tunjangan guru akan mengalami sedikit perubahan mekanisme.",
"Teks Bersih": "mendikbudristek hari ini resmi membahas kebijakan efisiensi anggaran pendidikan tahun skema pencairan dana bos dan tunjangan guru akan mengalami sedikit perubahan mekanisme",
"Label": "Netral",
"Keyakinan (%)": 66.16
},
{
"Waktu": "2026-01-31 16:09:13",
"Teks Asli": "Sangat setuju dengan langkah efisiensi anggaran BOS ini. Daripada bocor dikorupsi oknum sekolah, lebih baik diawasi ketat dan disalurkan tepat sasaran. Pendidikan Indonesia pasti lebih maju!",
"Teks Bersih": "sangat setuju dengan langkah efisiensi anggaran bos ini daripada bocor dikorupsi oknum sekolah lebih baik diawasi ketat dan disalurkan tepat sasaran pendidikan indonesia pasti lebih maju",
"Label": "Positif",
"Keyakinan (%)": 99.93
},
{
"Waktu": "2026-04-06 08:17:31",
"Teks Asli": "Sangat kecewa dengan kebijakan efisiensi ini. Anggaran KIP Kuliah dipotong drastis, banyak mahasiswa dari keluarga tidak mampu terancam putus kuliah. Pemerintah sama sekali tidak pro rakyat kecil! \ud83d\ude21",
"Teks Bersih": "sangat kecewa dengan kebijakan efisiensi ini anggaran kip kuliah dipotong drastis banyak mahasiswa dari keluarga tidak mampu terancam putus kuliah pemerintah sama sekali tidak pro rakyat kecil",
"Label": "Negatif",
"Keyakinan (%)": 91.82
}
]

BIN
images/data_analytics.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
images/logo_jti.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

View File

@ -0,0 +1,6 @@
Skenario,Porsi_Data,Akurasi
P1,20%,78.33333333333333
P2,40%,80.83333333333333
P3,60%,80.55555555555556
P4,80%,82.08333333333333
P5,100%,84.33333333333334
1 Skenario Porsi_Data Akurasi
2 P1 20% 78.33333333333333
3 P2 40% 80.83333333333333
4 P3 60% 80.55555555555556
5 P4 80% 82.08333333333333
6 P5 100% 84.33333333333334

View File

@ -0,0 +1,301 @@
y_true,y_pred
0,0
0,0
2,2
2,2
2,2
1,0
2,2
0,0
0,0
2,2
1,1
0,0
0,0
0,0
0,0
1,0
0,0
0,0
0,0
0,0
0,0
2,2
2,2
1,1
1,1
2,2
2,2
0,0
2,2
1,1
0,0
0,0
0,0
0,0
2,2
2,2
1,1
2,2
1,0
1,1
0,1
1,0
0,0
1,1
1,0
1,2
2,2
1,0
1,1
0,0
2,2
0,0
1,0
2,1
1,1
1,1
0,0
1,1
0,0
1,1
0,0
1,1
0,0
0,0
1,1
2,2
2,2
1,0
1,1
0,0
1,0
0,0
0,2
1,1
0,0
1,1
2,2
0,0
0,0
0,0
0,0
1,0
2,2
2,2
0,1
0,0
1,1
2,2
2,2
0,0
0,0
0,0
0,0
1,0
1,1
1,1
0,0
1,1
1,0
0,0
0,0
0,1
0,0
0,0
2,2
1,1
0,0
2,1
2,2
0,0
1,1
1,1
0,0
1,1
0,0
0,0
0,0
0,0
0,0
0,0
1,0
0,0
2,2
0,0
2,2
0,0
0,0
0,0
2,2
0,0
0,0
2,2
0,0
1,1
0,0
0,0
0,0
0,0
2,0
1,1
2,2
2,2
0,0
1,0
1,1
1,1
0,1
0,1
1,1
0,0
2,2
2,2
0,0
0,0
0,0
1,1
2,2
0,0
1,1
1,1
0,1
2,2
0,0
0,0
1,1
0,0
0,0
0,0
0,0
1,2
1,1
0,0
1,0
1,1
1,1
2,2
0,0
2,2
0,0
0,0
0,2
0,0
2,0
2,2
0,0
1,1
2,2
0,0
1,1
1,1
1,1
1,0
0,1
1,0
0,1
0,0
2,2
0,0
2,0
0,0
0,0
0,0
0,0
2,1
2,2
0,0
2,2
2,2
1,1
1,0
1,1
2,2
0,0
0,0
0,0
1,0
2,2
2,2
2,2
1,1
0,0
1,1
2,2
2,2
2,2
0,0
1,1
2,2
1,1
0,0
2,2
1,1
1,1
1,1
1,1
1,1
2,2
1,0
1,1
1,1
1,1
1,1
1,1
0,0
2,0
0,0
1,1
1,1
1,1
1,1
0,0
1,0
2,2
2,1
0,1
2,2
2,2
1,1
2,2
0,0
2,2
1,1
1,1
2,2
2,2
0,0
2,2
1,1
1,0
0,0
1,1
0,0
1,1
0,0
0,1
0,0
1,1
1,1
1,1
0,0
2,2
0,0
2,2
0,1
2,1
0,0
0,0
2,2
1,1
0,0
0,0
0,0
0,0
2,2
0,0
1,0
0,0
2,2
0,0
0,0
1 y_true y_pred
2 0 0
3 0 0
4 2 2
5 2 2
6 2 2
7 1 0
8 2 2
9 0 0
10 0 0
11 2 2
12 1 1
13 0 0
14 0 0
15 0 0
16 0 0
17 1 0
18 0 0
19 0 0
20 0 0
21 0 0
22 0 0
23 2 2
24 2 2
25 1 1
26 1 1
27 2 2
28 2 2
29 0 0
30 2 2
31 1 1
32 0 0
33 0 0
34 0 0
35 0 0
36 2 2
37 2 2
38 1 1
39 2 2
40 1 0
41 1 1
42 0 1
43 1 0
44 0 0
45 1 1
46 1 0
47 1 2
48 2 2
49 1 0
50 1 1
51 0 0
52 2 2
53 0 0
54 1 0
55 2 1
56 1 1
57 1 1
58 0 0
59 1 1
60 0 0
61 1 1
62 0 0
63 1 1
64 0 0
65 0 0
66 1 1
67 2 2
68 2 2
69 1 0
70 1 1
71 0 0
72 1 0
73 0 0
74 0 2
75 1 1
76 0 0
77 1 1
78 2 2
79 0 0
80 0 0
81 0 0
82 0 0
83 1 0
84 2 2
85 2 2
86 0 1
87 0 0
88 1 1
89 2 2
90 2 2
91 0 0
92 0 0
93 0 0
94 0 0
95 1 0
96 1 1
97 1 1
98 0 0
99 1 1
100 1 0
101 0 0
102 0 0
103 0 1
104 0 0
105 0 0
106 2 2
107 1 1
108 0 0
109 2 1
110 2 2
111 0 0
112 1 1
113 1 1
114 0 0
115 1 1
116 0 0
117 0 0
118 0 0
119 0 0
120 0 0
121 0 0
122 1 0
123 0 0
124 2 2
125 0 0
126 2 2
127 0 0
128 0 0
129 0 0
130 2 2
131 0 0
132 0 0
133 2 2
134 0 0
135 1 1
136 0 0
137 0 0
138 0 0
139 0 0
140 2 0
141 1 1
142 2 2
143 2 2
144 0 0
145 1 0
146 1 1
147 1 1
148 0 1
149 0 1
150 1 1
151 0 0
152 2 2
153 2 2
154 0 0
155 0 0
156 0 0
157 1 1
158 2 2
159 0 0
160 1 1
161 1 1
162 0 1
163 2 2
164 0 0
165 0 0
166 1 1
167 0 0
168 0 0
169 0 0
170 0 0
171 1 2
172 1 1
173 0 0
174 1 0
175 1 1
176 1 1
177 2 2
178 0 0
179 2 2
180 0 0
181 0 0
182 0 2
183 0 0
184 2 0
185 2 2
186 0 0
187 1 1
188 2 2
189 0 0
190 1 1
191 1 1
192 1 1
193 1 0
194 0 1
195 1 0
196 0 1
197 0 0
198 2 2
199 0 0
200 2 0
201 0 0
202 0 0
203 0 0
204 0 0
205 2 1
206 2 2
207 0 0
208 2 2
209 2 2
210 1 1
211 1 0
212 1 1
213 2 2
214 0 0
215 0 0
216 0 0
217 1 0
218 2 2
219 2 2
220 2 2
221 1 1
222 0 0
223 1 1
224 2 2
225 2 2
226 2 2
227 0 0
228 1 1
229 2 2
230 1 1
231 0 0
232 2 2
233 1 1
234 1 1
235 1 1
236 1 1
237 1 1
238 2 2
239 1 0
240 1 1
241 1 1
242 1 1
243 1 1
244 1 1
245 0 0
246 2 0
247 0 0
248 1 1
249 1 1
250 1 1
251 1 1
252 0 0
253 1 0
254 2 2
255 2 1
256 0 1
257 2 2
258 2 2
259 1 1
260 2 2
261 0 0
262 2 2
263 1 1
264 1 1
265 2 2
266 2 2
267 0 0
268 2 2
269 1 1
270 1 0
271 0 0
272 1 1
273 0 0
274 1 1
275 0 0
276 0 1
277 0 0
278 1 1
279 1 1
280 1 1
281 0 0
282 2 2
283 0 0
284 2 2
285 0 1
286 2 1
287 0 0
288 0 0
289 2 2
290 1 1
291 0 0
292 0 0
293 0 0
294 0 0
295 2 2
296 0 0
297 1 0
298 0 0
299 2 2
300 0 0
301 0 0

View File

@ -0,0 +1,10 @@
Sentimen,Topik Ke,Kata Kunci
Negatif,1,"pendidikan, anggaran, efisiensi, yang, di, tidak, orang"
Negatif,2,"pendidikan, anggaran, tidak, efisiensi, ini, dana, dan"
Negatif,3,"yang, tidak, efisiensi, pendidikan, anggaran, itu, ini"
Netral,1,"dan, efisiensi, anggaran, guru, ya, kementerian, ini"
Netral,2,"anggaran, efisiensi, kuliah, kip, pendidikan, tidak, dan"
Netral,3,"pendidikan, efisiensi, anggaran, pip, dan, tidak, cair"
Positif,1,"untuk, ini, anggaran, pendidikan, tidak, di, bagus"
Positif,2,"pendidikan, untuk, anggaran, indonesia, benar, efisiensi, yang"
Positif,3,"ini, efisiensi, anggaran, dan, pemerintah, tidak, kip"
1 Sentimen Topik Ke Kata Kunci
2 Negatif 1 pendidikan, anggaran, efisiensi, yang, di, tidak, orang
3 Negatif 2 pendidikan, anggaran, tidak, efisiensi, ini, dana, dan
4 Negatif 3 yang, tidak, efisiensi, pendidikan, anggaran, itu, ini
5 Netral 1 dan, efisiensi, anggaran, guru, ya, kementerian, ini
6 Netral 2 anggaran, efisiensi, kuliah, kip, pendidikan, tidak, dan
7 Netral 3 pendidikan, efisiensi, anggaran, pip, dan, tidak, cair
8 Positif 1 untuk, ini, anggaran, pendidikan, tidak, di, bagus
9 Positif 2 pendidikan, untuk, anggaran, indonesia, benar, efisiensi, yang
10 Positif 3 ini, efisiensi, anggaran, dan, pemerintah, tidak, kip

Binary file not shown.

10
model/Nilai_Coherence.csv Normal file
View File

@ -0,0 +1,10 @@
Num_Topics,Coherence_Score
2,0.31239850490929644
3,0.2878041079491884
4,0.3032298786129941
5,0.30100283745683337
6,0.3880164824328067
7,0.3987580925505836
8,0.3785593889346621
9,0.38313774362229563
10,0.46718575350823743
1 Num_Topics Coherence_Score
2 2 0.31239850490929644
3 3 0.2878041079491884
4 4 0.3032298786129941
5 5 0.30100283745683337
6 6 0.3880164824328067
7 7 0.3987580925505836
8 8 0.3785593889346621
9 9 0.38313774362229563
10 10 0.46718575350823743

View File

@ -0,0 +1,63 @@
accuracy,loss,val_accuracy,val_loss,learning_rate,Epoch,Skenario
0.38141027092933655,1.09345543384552,0.6166666746139526,1.080513834953308,0.0010000000474974513,1,P1
0.7211538553237915,1.0516897439956665,0.7166666388511658,1.018355131149292,0.0010000000474974513,2,P1
0.7596153616905212,0.8945255875587463,0.6833333373069763,0.7797202467918396,0.0010000000474974513,3,P1
0.7371794581413269,0.5743359923362732,0.7166666388511658,0.7589436173439026,0.0010000000474974513,4,P1
0.7692307829856873,0.4817313253879547,0.7333333492279053,0.7645898461341858,0.0010000000474974513,5,P1
0.8653846383094788,0.387156218290329,0.7666666507720947,0.7059115767478943,0.0010000000474974513,6,P1
0.9487179517745972,0.26129141449928284,0.7833333611488342,0.699984610080719,0.0010000000474974513,7,P1
0.9839743375778198,0.1384553611278534,0.800000011920929,0.7633869051933289,0.0010000000474974513,8,P1
0.9935897588729858,0.07908293604850769,0.8166666626930237,0.8693917393684387,0.0010000000474974513,9,P1
0.9935897588729858,0.04564093425869942,0.800000011920929,0.9836347103118896,0.0010000000474974513,10,P1
1.0,0.025794249027967453,0.7833333611488342,1.0316051244735718,0.0005000000237487257,11,P1
0.9967948794364929,0.028194565325975418,0.7833333611488342,1.0789639949798584,0.0005000000237487257,12,P1
0.9967948794364929,0.021308064460754395,0.7833333611488342,1.1356014013290405,0.0005000000237487257,13,P1
1.0,0.016758248209953308,0.7833333611488342,1.1703376770019531,0.0002500000118743628,14,P1
1.0,0.016384366899728775,0.7833333611488342,1.210790753364563,0.0002500000118743628,15,P1
0.5560897588729858,1.069062352180481,0.6583333611488342,0.9929993748664856,0.0010000000474974513,1,P2
0.6666666865348816,0.7357844114303589,0.6916666626930237,0.6130390167236328,0.0010000000474974513,2,P2
0.8397436141967773,0.48598557710647583,0.7666666507720947,0.5232926607131958,0.0010000000474974513,3,P2
0.9375,0.31453579664230347,0.7833333611488342,0.5067169070243835,0.0010000000474974513,4,P2
0.9695512652397156,0.16148658096790314,0.8083333373069763,0.45324838161468506,0.0010000000474974513,5,P2
0.9855769276618958,0.0931703969836235,0.8166666626930237,0.5186069011688232,0.0010000000474974513,6,P2
0.9903846383094788,0.07859157770872116,0.8083333373069763,0.7238136529922485,0.0010000000474974513,7,P2
0.9967948794364929,0.030187053605914116,0.7916666865348816,0.8180117011070251,0.0010000000474974513,8,P2
0.9983974099159241,0.020236998796463013,0.8166666626930237,0.7661744952201843,0.0005000000237487257,9,P2
0.9967948794364929,0.015788376331329346,0.824999988079071,0.7735798358917236,0.0005000000237487257,10,P2
0.9967948794364929,0.012403394095599651,0.824999988079071,0.80744469165802,0.0005000000237487257,11,P2
0.9983974099159241,0.010184520855545998,0.824999988079071,0.8309624195098877,0.0002500000118743628,12,P2
0.9967948794364929,0.012382127344608307,0.8083333373069763,0.925710141658783,0.0002500000118743628,13,P2
0.5260915756225586,1.0014312267303467,0.6222222447395325,0.7828471660614014,0.0010000000474974513,1,P3
0.6656017303466797,0.609158456325531,0.6944444179534912,0.6250120401382446,0.0010000000474974513,2,P3
0.7507987022399902,0.49529990553855896,0.7388888597488403,0.554004430770874,0.0010000000474974513,3,P3
0.9105431437492371,0.3310180902481079,0.8055555820465088,0.46935632824897766,0.0010000000474974513,4,P3
0.9712460041046143,0.1286899894475937,0.7722222208976746,0.7690128684043884,0.0010000000474974513,5,P3
0.98296058177948,0.05941445380449295,0.7777777910232544,0.8493516445159912,0.0010000000474974513,6,P3
0.9968051314353943,0.029140625149011612,0.7833333611488342,1.0082753896713257,0.0010000000474974513,7,P3
0.9978700876235962,0.01636233739554882,0.7833333611488342,1.0569945573806763,0.0005000000237487257,8,P3
0.9968051314353943,0.01626773364841938,0.7611111402511597,1.1638100147247314,0.0005000000237487257,9,P3
0.9957401752471924,0.015717728063464165,0.7833333611488342,1.028610110282898,0.0005000000237487257,10,P3
0.9989350438117981,0.009835228323936462,0.8055555820465088,1.0765795707702637,0.0002500000118743628,11,P3
0.9978700876235962,0.009616348892450333,0.7833333611488342,1.1071890592575073,0.0002500000118743628,12,P3
0.5899280309677124,0.9616552591323853,0.6833333373069763,0.62132328748703,0.0010000000474974513,1,P4
0.7082334160804749,0.5806796550750732,0.7916666865348816,0.534578263759613,0.0010000000474974513,2,P4
0.8832933902740479,0.369579553604126,0.8083333373069763,0.4666915237903595,0.0010000000474974513,3,P4
0.9600319862365723,0.1569712609052658,0.8208333253860474,0.464290052652359,0.0010000000474974513,4,P4
0.9848121404647827,0.07800274342298508,0.6833333373069763,0.8866328001022339,0.0010000000474974513,5,P4
0.9928057789802551,0.041098933666944504,0.7958333492279053,0.804090142250061,0.0010000000474974513,6,P4
0.9920064210891724,0.03647830709815025,0.8041666746139526,0.6013585329055786,0.0010000000474974513,7,P4
0.996802568435669,0.016663840040564537,0.8208333253860474,0.7018534541130066,0.0005000000237487257,8,P4
0.9992006421089172,0.010782836936414242,0.8166666626930237,0.7502598166465759,0.0005000000237487257,9,P4
0.9992006421089172,0.0074098482728004456,0.8125,0.7980797290802002,0.0005000000237487257,10,P4
1.0,0.0051342034712433815,0.8166666626930237,0.8163005113601685,0.0002500000118743628,11,P4
1.0,0.005056099500507116,0.8166666626930237,0.8405582308769226,0.0002500000118743628,12,P4
0.575815737247467,0.9292432069778442,0.7333333492279053,0.7067171931266785,0.0010000000474974513,1,P5
0.8202175498008728,0.4824707508087158,0.8433333039283752,0.4428572952747345,0.0010000000474974513,2,P5
0.944337785243988,0.18990492820739746,0.800000011920929,0.5153856873512268,0.0010000000474974513,3,P5
0.9782469868659973,0.08659510314464569,0.8299999833106995,0.5515473484992981,0.0010000000474974513,4,P5
0.9865642786026001,0.05353087931871414,0.8633333444595337,0.5090053081512451,0.0010000000474974513,5,P5
0.9948816299438477,0.022251253947615623,0.8500000238418579,0.5737677216529846,0.0005000000237487257,6,P5
0.9955214262008667,0.020851323381066322,0.8433333039283752,0.6206527948379517,0.0005000000237487257,7,P5
0.9948816299438477,0.017847692593932152,0.8100000023841858,0.9818077683448792,0.0005000000237487257,8,P5
0.9948816299438477,0.01693383976817131,0.8433333039283752,0.6270163059234619,0.0002500000118743628,9,P5
0.9968010187149048,0.013330518268048763,0.8399999737739563,0.6716769933700562,0.0002500000118743628,10,P5
1 accuracy loss val_accuracy val_loss learning_rate Epoch Skenario
2 0.38141027092933655 1.09345543384552 0.6166666746139526 1.080513834953308 0.0010000000474974513 1 P1
3 0.7211538553237915 1.0516897439956665 0.7166666388511658 1.018355131149292 0.0010000000474974513 2 P1
4 0.7596153616905212 0.8945255875587463 0.6833333373069763 0.7797202467918396 0.0010000000474974513 3 P1
5 0.7371794581413269 0.5743359923362732 0.7166666388511658 0.7589436173439026 0.0010000000474974513 4 P1
6 0.7692307829856873 0.4817313253879547 0.7333333492279053 0.7645898461341858 0.0010000000474974513 5 P1
7 0.8653846383094788 0.387156218290329 0.7666666507720947 0.7059115767478943 0.0010000000474974513 6 P1
8 0.9487179517745972 0.26129141449928284 0.7833333611488342 0.699984610080719 0.0010000000474974513 7 P1
9 0.9839743375778198 0.1384553611278534 0.800000011920929 0.7633869051933289 0.0010000000474974513 8 P1
10 0.9935897588729858 0.07908293604850769 0.8166666626930237 0.8693917393684387 0.0010000000474974513 9 P1
11 0.9935897588729858 0.04564093425869942 0.800000011920929 0.9836347103118896 0.0010000000474974513 10 P1
12 1.0 0.025794249027967453 0.7833333611488342 1.0316051244735718 0.0005000000237487257 11 P1
13 0.9967948794364929 0.028194565325975418 0.7833333611488342 1.0789639949798584 0.0005000000237487257 12 P1
14 0.9967948794364929 0.021308064460754395 0.7833333611488342 1.1356014013290405 0.0005000000237487257 13 P1
15 1.0 0.016758248209953308 0.7833333611488342 1.1703376770019531 0.0002500000118743628 14 P1
16 1.0 0.016384366899728775 0.7833333611488342 1.210790753364563 0.0002500000118743628 15 P1
17 0.5560897588729858 1.069062352180481 0.6583333611488342 0.9929993748664856 0.0010000000474974513 1 P2
18 0.6666666865348816 0.7357844114303589 0.6916666626930237 0.6130390167236328 0.0010000000474974513 2 P2
19 0.8397436141967773 0.48598557710647583 0.7666666507720947 0.5232926607131958 0.0010000000474974513 3 P2
20 0.9375 0.31453579664230347 0.7833333611488342 0.5067169070243835 0.0010000000474974513 4 P2
21 0.9695512652397156 0.16148658096790314 0.8083333373069763 0.45324838161468506 0.0010000000474974513 5 P2
22 0.9855769276618958 0.0931703969836235 0.8166666626930237 0.5186069011688232 0.0010000000474974513 6 P2
23 0.9903846383094788 0.07859157770872116 0.8083333373069763 0.7238136529922485 0.0010000000474974513 7 P2
24 0.9967948794364929 0.030187053605914116 0.7916666865348816 0.8180117011070251 0.0010000000474974513 8 P2
25 0.9983974099159241 0.020236998796463013 0.8166666626930237 0.7661744952201843 0.0005000000237487257 9 P2
26 0.9967948794364929 0.015788376331329346 0.824999988079071 0.7735798358917236 0.0005000000237487257 10 P2
27 0.9967948794364929 0.012403394095599651 0.824999988079071 0.80744469165802 0.0005000000237487257 11 P2
28 0.9983974099159241 0.010184520855545998 0.824999988079071 0.8309624195098877 0.0002500000118743628 12 P2
29 0.9967948794364929 0.012382127344608307 0.8083333373069763 0.925710141658783 0.0002500000118743628 13 P2
30 0.5260915756225586 1.0014312267303467 0.6222222447395325 0.7828471660614014 0.0010000000474974513 1 P3
31 0.6656017303466797 0.609158456325531 0.6944444179534912 0.6250120401382446 0.0010000000474974513 2 P3
32 0.7507987022399902 0.49529990553855896 0.7388888597488403 0.554004430770874 0.0010000000474974513 3 P3
33 0.9105431437492371 0.3310180902481079 0.8055555820465088 0.46935632824897766 0.0010000000474974513 4 P3
34 0.9712460041046143 0.1286899894475937 0.7722222208976746 0.7690128684043884 0.0010000000474974513 5 P3
35 0.98296058177948 0.05941445380449295 0.7777777910232544 0.8493516445159912 0.0010000000474974513 6 P3
36 0.9968051314353943 0.029140625149011612 0.7833333611488342 1.0082753896713257 0.0010000000474974513 7 P3
37 0.9978700876235962 0.01636233739554882 0.7833333611488342 1.0569945573806763 0.0005000000237487257 8 P3
38 0.9968051314353943 0.01626773364841938 0.7611111402511597 1.1638100147247314 0.0005000000237487257 9 P3
39 0.9957401752471924 0.015717728063464165 0.7833333611488342 1.028610110282898 0.0005000000237487257 10 P3
40 0.9989350438117981 0.009835228323936462 0.8055555820465088 1.0765795707702637 0.0002500000118743628 11 P3
41 0.9978700876235962 0.009616348892450333 0.7833333611488342 1.1071890592575073 0.0002500000118743628 12 P3
42 0.5899280309677124 0.9616552591323853 0.6833333373069763 0.62132328748703 0.0010000000474974513 1 P4
43 0.7082334160804749 0.5806796550750732 0.7916666865348816 0.534578263759613 0.0010000000474974513 2 P4
44 0.8832933902740479 0.369579553604126 0.8083333373069763 0.4666915237903595 0.0010000000474974513 3 P4
45 0.9600319862365723 0.1569712609052658 0.8208333253860474 0.464290052652359 0.0010000000474974513 4 P4
46 0.9848121404647827 0.07800274342298508 0.6833333373069763 0.8866328001022339 0.0010000000474974513 5 P4
47 0.9928057789802551 0.041098933666944504 0.7958333492279053 0.804090142250061 0.0010000000474974513 6 P4
48 0.9920064210891724 0.03647830709815025 0.8041666746139526 0.6013585329055786 0.0010000000474974513 7 P4
49 0.996802568435669 0.016663840040564537 0.8208333253860474 0.7018534541130066 0.0005000000237487257 8 P4
50 0.9992006421089172 0.010782836936414242 0.8166666626930237 0.7502598166465759 0.0005000000237487257 9 P4
51 0.9992006421089172 0.0074098482728004456 0.8125 0.7980797290802002 0.0005000000237487257 10 P4
52 1.0 0.0051342034712433815 0.8166666626930237 0.8163005113601685 0.0002500000118743628 11 P4
53 1.0 0.005056099500507116 0.8166666626930237 0.8405582308769226 0.0002500000118743628 12 P4
54 0.575815737247467 0.9292432069778442 0.7333333492279053 0.7067171931266785 0.0010000000474974513 1 P5
55 0.8202175498008728 0.4824707508087158 0.8433333039283752 0.4428572952747345 0.0010000000474974513 2 P5
56 0.944337785243988 0.18990492820739746 0.800000011920929 0.5153856873512268 0.0010000000474974513 3 P5
57 0.9782469868659973 0.08659510314464569 0.8299999833106995 0.5515473484992981 0.0010000000474974513 4 P5
58 0.9865642786026001 0.05353087931871414 0.8633333444595337 0.5090053081512451 0.0010000000474974513 5 P5
59 0.9948816299438477 0.022251253947615623 0.8500000238418579 0.5737677216529846 0.0005000000237487257 6 P5
60 0.9955214262008667 0.020851323381066322 0.8433333039283752 0.6206527948379517 0.0005000000237487257 7 P5
61 0.9948816299438477 0.017847692593932152 0.8100000023841858 0.9818077683448792 0.0005000000237487257 8 P5
62 0.9948816299438477 0.01693383976817131 0.8433333039283752 0.6270163059234619 0.0002500000118743628 9 P5
63 0.9968010187149048 0.013330518268048763 0.8399999737739563 0.6716769933700562 0.0002500000118743628 10 P5

View File

@ -0,0 +1,7 @@
,precision,recall,f1-score,support
negatif,0.8125,0.9,0.8540145985401459,130.0
netral,0.8160919540229885,0.7395833333333334,0.7759562841530054,96.0
positif,0.9420289855072463,0.8783783783783784,0.9090909090909091,74.0
accuracy,0.8433333333333334,0.8433333333333334,0.8433333333333334,300.0
macro avg,0.8568736465100782,0.8393205705705706,0.8463539305946868,300.0
weighted avg,0.8455999083791437,0.8433333333333334,0.8426214278721159,300.0
1 precision recall f1-score support
2 negatif 0.8125 0.9 0.8540145985401459 130.0
3 netral 0.8160919540229885 0.7395833333333334 0.7759562841530054 96.0
4 positif 0.9420289855072463 0.8783783783783784 0.9090909090909091 74.0
5 accuracy 0.8433333333333334 0.8433333333333334 0.8433333333333334 300.0
6 macro avg 0.8568736465100782 0.8393205705705706 0.8463539305946868 300.0
7 weighted avg 0.8455999083791437 0.8433333333333334 0.8426214278721159 300.0

101
model/WordFreq_Negatif.csv Normal file
View File

@ -0,0 +1,101 @@
Word,Frequency
pendidikan,519
anggaran,501
efisiensi,439
yang,400
tidak,361
di,248
dan,225
dana,205
ini,187
ada,167
itu,133
saja,131
tapi,116
sudah,110
untuk,107
dipotong,106
juga,97
sekolah,97
pemotongan,96
kalau,96
kena,96
kuliah,94
karena,93
banget,93
buat,92
dari,91
bisa,91
ya,90
jadi,88
guru,84
ke,78
dengan,76
mau,76
pemerintah,74
kip,72
sama,71
mahasiswa,69
bos,67
indonesia,67
pip,64
banyak,62
saya,61
sih,60
orang,58
kampus,56
enggak,56
malah,54
sampai,53
kesehatan,52
apa,52
sekarang,51
nya,51
masih,50
pada,49
makan,49
tuh,49
lagi,49
rakyat,46
negara,46
akan,45
bikin,45
beasiswa,45
potong,44
gua,44
dapat,44
aku,44
bukan,43
memang,43
anak,43
bagaimana,42
tolak,42
kayak,40
gratis,40
dikurangi,39
mana,38
terus,37
demo,36
padahal,36
amp,35
kebijakan,35
semua,34
lebih,34
dipangkas,34
kok,34
jangan,34
mereka,34
tahu,34
gue,33
dia,32
benar,32
tahun,31
kan,31
naik,31
hal,30
gaji,30
makin,30
cuma,30
uu,30
lu,29
belum,29
1 Word Frequency
2 pendidikan 519
3 anggaran 501
4 efisiensi 439
5 yang 400
6 tidak 361
7 di 248
8 dan 225
9 dana 205
10 ini 187
11 ada 167
12 itu 133
13 saja 131
14 tapi 116
15 sudah 110
16 untuk 107
17 dipotong 106
18 juga 97
19 sekolah 97
20 pemotongan 96
21 kalau 96
22 kena 96
23 kuliah 94
24 karena 93
25 banget 93
26 buat 92
27 dari 91
28 bisa 91
29 ya 90
30 jadi 88
31 guru 84
32 ke 78
33 dengan 76
34 mau 76
35 pemerintah 74
36 kip 72
37 sama 71
38 mahasiswa 69
39 bos 67
40 indonesia 67
41 pip 64
42 banyak 62
43 saya 61
44 sih 60
45 orang 58
46 kampus 56
47 enggak 56
48 malah 54
49 sampai 53
50 kesehatan 52
51 apa 52
52 sekarang 51
53 nya 51
54 masih 50
55 pada 49
56 makan 49
57 tuh 49
58 lagi 49
59 rakyat 46
60 negara 46
61 akan 45
62 bikin 45
63 beasiswa 45
64 potong 44
65 gua 44
66 dapat 44
67 aku 44
68 bukan 43
69 memang 43
70 anak 43
71 bagaimana 42
72 tolak 42
73 kayak 40
74 gratis 40
75 dikurangi 39
76 mana 38
77 terus 37
78 demo 36
79 padahal 36
80 amp 35
81 kebijakan 35
82 semua 34
83 lebih 34
84 dipangkas 34
85 kok 34
86 jangan 34
87 mereka 34
88 tahu 34
89 gue 33
90 dia 32
91 benar 32
92 tahun 31
93 kan 31
94 naik 31
95 hal 30
96 gaji 30
97 makin 30
98 cuma 30
99 uu 30
100 lu 29
101 belum 29

101
model/WordFreq_Netral.csv Normal file
View File

@ -0,0 +1,101 @@
Word,Frequency
anggaran,299
efisiensi,269
pendidikan,218
dan,172
tidak,151
yang,125
di,114
pip,94
ini,89
kuliah,88
kip,87
dana,78
cair,66
beasiswa,65
mahasiswa,61
untuk,60
ada,60
indonesia,53
pemotongan,53
ke,49
itu,47
kena,46
dari,46
ya,42
guru,41
sudah,40
bantuan,37
tapi,34
pada,34
bos,34
program,34
akan,34
sekolah,32
pintar,30
aku,29
dengan,28
sri,28
mulyani,28
tetap,28
cek,28
bisa,28
juga,26
cara,26
tinggi,25
pemerintah,25
tahun,25
kebijakan,24
dampak,24
tunjangan,24
sama,24
dapat,23
pastikan,23
sampai,23
penerima,22
jadi,22
bahwa,21
orang,21
dalam,21
simak,20
atau,20
kapan,19
menegaskan,19
gaji,19
kesehatan,18
rp,18
februari,18
kartu,17
soal,17
prabowo,17
hingga,17
lagi,17
masih,17
karena,17
kalau,17
kita,17
baru,16
pemangkasan,16
daerah,16
oleh,16
apa,16
menteri,15
seperti,15
memastikan,15
aksi,15
terdampak,15
belum,15
kampus,15
presiden,15
terbaru,15
kali,15
buat,14
terkait,14
sektor,14
mendiktisaintek,14
semua,14
sih,14
kan,13
informasi,13
kak,13
ukt,13
1 Word Frequency
2 anggaran 299
3 efisiensi 269
4 pendidikan 218
5 dan 172
6 tidak 151
7 yang 125
8 di 114
9 pip 94
10 ini 89
11 kuliah 88
12 kip 87
13 dana 78
14 cair 66
15 beasiswa 65
16 mahasiswa 61
17 untuk 60
18 ada 60
19 indonesia 53
20 pemotongan 53
21 ke 49
22 itu 47
23 kena 46
24 dari 46
25 ya 42
26 guru 41
27 sudah 40
28 bantuan 37
29 tapi 34
30 pada 34
31 bos 34
32 program 34
33 akan 34
34 sekolah 32
35 pintar 30
36 aku 29
37 dengan 28
38 sri 28
39 mulyani 28
40 tetap 28
41 cek 28
42 bisa 28
43 juga 26
44 cara 26
45 tinggi 25
46 pemerintah 25
47 tahun 25
48 kebijakan 24
49 dampak 24
50 tunjangan 24
51 sama 24
52 dapat 23
53 pastikan 23
54 sampai 23
55 penerima 22
56 jadi 22
57 bahwa 21
58 orang 21
59 dalam 21
60 simak 20
61 atau 20
62 kapan 19
63 menegaskan 19
64 gaji 19
65 kesehatan 18
66 rp 18
67 februari 18
68 kartu 17
69 soal 17
70 prabowo 17
71 hingga 17
72 lagi 17
73 masih 17
74 karena 17
75 kalau 17
76 kita 17
77 baru 16
78 pemangkasan 16
79 daerah 16
80 oleh 16
81 apa 16
82 menteri 15
83 seperti 15
84 memastikan 15
85 aksi 15
86 terdampak 15
87 belum 15
88 kampus 15
89 presiden 15
90 terbaru 15
91 kali 15
92 buat 14
93 terkait 14
94 sektor 14
95 mendiktisaintek 14
96 semua 14
97 sih 14
98 kan 13
99 informasi 13
100 kak 13
101 ukt 13

101
model/WordFreq_Positif.csv Normal file
View File

@ -0,0 +1,101 @@
Word,Frequency
anggaran,305
pendidikan,279
efisiensi,268
ini,249
yang,219
untuk,216
tidak,157
pemerintah,120
dan,120
kebijakan,115
lebih,96
dana,92
sangat,90
indonesia,80
benar,80
di,80
jadi,79
ada,76
masa,72
maju,72
tepat,69
kip,67
matang,66
kuliah,64
bisa,63
dengan,62
beasiswa,56
langkah,54
proyek,53
kualitas,50
bukan,47
terus,46
program,44
akan,44
sudah,41
sekolah,41
nyata,40
sasaran,40
bagus,39
mendukung,39
saya,39
membutuhkan,38
guru,38
honorer,38
depan,38
sekali,37
memperbaiki,37
kerja,37
penghematan,36
dialihkan,36
fasilitas,36
adalah,36
demi,36
tertinggal,36
daerah,36
lagi,35
makin,35
solusi,34
pelajar,34
terbaik,34
kesejahteraan,34
digunakan,34
dari,33
pemotongan,33
memikirkan,33
transisi,33
dihemat,33
optimis,33
jelas,32
pemborosan,32
pemangkasan,31
membuat,31
terima,31
kasih,31
apresiasi,31
efisien,31
perlu,31
tingginya,31
harus,31
setinggi,31
transparan,31
tanpa,30
tetap,29
penting,29
mantap,28
tapi,28
ke,28
bos,27
guna,27
kebocoran,27
tersalurkan,26
berani,26
krusial,26
pada,26
bebas,26
korupsi,26
kini,26
mengefisiensikan,26
setuju,26
mahal,26
1 Word Frequency
2 anggaran 305
3 pendidikan 279
4 efisiensi 268
5 ini 249
6 yang 219
7 untuk 216
8 tidak 157
9 pemerintah 120
10 dan 120
11 kebijakan 115
12 lebih 96
13 dana 92
14 sangat 90
15 indonesia 80
16 benar 80
17 di 80
18 jadi 79
19 ada 76
20 masa 72
21 maju 72
22 tepat 69
23 kip 67
24 matang 66
25 kuliah 64
26 bisa 63
27 dengan 62
28 beasiswa 56
29 langkah 54
30 proyek 53
31 kualitas 50
32 bukan 47
33 terus 46
34 program 44
35 akan 44
36 sudah 41
37 sekolah 41
38 nyata 40
39 sasaran 40
40 bagus 39
41 mendukung 39
42 saya 39
43 membutuhkan 38
44 guru 38
45 honorer 38
46 depan 38
47 sekali 37
48 memperbaiki 37
49 kerja 37
50 penghematan 36
51 dialihkan 36
52 fasilitas 36
53 adalah 36
54 demi 36
55 tertinggal 36
56 daerah 36
57 lagi 35
58 makin 35
59 solusi 34
60 pelajar 34
61 terbaik 34
62 kesejahteraan 34
63 digunakan 34
64 dari 33
65 pemotongan 33
66 memikirkan 33
67 transisi 33
68 dihemat 33
69 optimis 33
70 jelas 32
71 pemborosan 32
72 pemangkasan 31
73 membuat 31
74 terima 31
75 kasih 31
76 apresiasi 31
77 efisien 31
78 perlu 31
79 tingginya 31
80 harus 31
81 setinggi 31
82 transparan 31
83 tanpa 30
84 tetap 29
85 penting 29
86 mantap 28
87 tapi 28
88 ke 28
89 bos 27
90 guna 27
91 kebocoran 27
92 tersalurkan 26
93 berani 26
94 krusial 26
95 pada 26
96 bebas 26
97 korupsi 26
98 kini 26
99 mengefisiensikan 26
100 setuju 26
101 mahal 26

File diff suppressed because one or more lines are too long

1
packages.txt Normal file
View File

@ -0,0 +1 @@
graphviz

14
requirements.txt Normal file
View File

@ -0,0 +1,14 @@
streamlit==1.28.0
tensorflow==2.12.0
pandas==1.5.3
numpy==1.23.5
plotly==5.15.0
matplotlib==3.7.1
seaborn==0.12.2
wordcloud==1.9.2
Sastrawi==1.0.1
streamlit-option-menu==0.3.6
graphviz==0.20.1
h5py==3.8.0
scikit-learn==1.2.2
gensim==4.3.1

150
utils.py Normal file
View File

@ -0,0 +1,150 @@
import pandas as pd
import numpy as np
import json
import h5py
import re
import pickle
import os
import streamlit as st
import tensorflow as tf
try:
from tensorflow.keras.utils import pad_sequences
except ImportError:
from tensorflow.keras.preprocessing.sequence import pad_sequences
# ==============================================================================
# 1. KONFIGURASI GLOBAL
# ==============================================================================
MAX_SEQUENCE_LENGTH = 100
MODEL_PATH = 'model/Model_Sentiment_LSTM.h5'
TOKENIZER_JSON_PATH = 'model/tokenizer_sentiment.json'
TOKENIZER_PICKLE_PATH = 'model/tokenizer_sentiment.pickle'
# ==============================================================================
# 2. PATCHING MODEL
# ==============================================================================
def recursive_fix_config(config):
"""Memperbaiki konfigurasi model agar bisa dibaca di berbagai versi TF"""
if isinstance(config, list):
return [recursive_fix_config(x) for x in config]
if isinstance(config, dict):
if 'batch_shape' in config:
config['batch_input_shape'] = config.pop('batch_shape')
if 'dtype' in config:
if isinstance(config['dtype'], dict) or 'Policy' in str(config['dtype']):
config['dtype'] = 'float32'
for key, value in config.items():
config[key] = recursive_fix_config(value)
return config
# ==============================================================================
# 3. LOAD RESOURCES (MODEL & TOKENIZER)
# ==============================================================================
@st.cache_resource
def load_resources():
model = None
tokenizer = None
# --- A. LOAD MODEL ---
if not os.path.exists(MODEL_PATH):
st.error(f"❌ File model tidak ditemukan di: {MODEL_PATH}")
return None, None
try:
model = tf.keras.models.load_model(MODEL_PATH, compile=False)
except Exception:
try:
with h5py.File(MODEL_PATH, mode='r') as f:
model_config_str = f.attrs.get('model_config')
if isinstance(model_config_str, bytes):
model_config_str = model_config_str.decode('utf-8')
model_config_dict = json.loads(model_config_str)
fixed_config = recursive_fix_config(model_config_dict)
model = tf.keras.models.model_from_json(json.dumps(fixed_config))
model.load_weights(MODEL_PATH)
except Exception as e:
st.error(f"❌ Gagal memuat model: {e}")
return None, None
# --- B. LOAD TOKENIZER ---
try:
if os.path.exists(TOKENIZER_JSON_PATH):
with open(TOKENIZER_JSON_PATH, 'r', encoding='utf-8') as f:
content = f.read()
try:
parsed_json = json.loads(content)
if isinstance(parsed_json, str):
input_tokenizer = parsed_json
else:
input_tokenizer = json.dumps(parsed_json)
except:
input_tokenizer = content
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(input_tokenizer)
elif os.path.exists(TOKENIZER_PICKLE_PATH):
with open(TOKENIZER_PICKLE_PATH, 'rb') as handle:
tokenizer = pickle.load(handle)
else:
st.error("❌ File Tokenizer tidak ditemukan.")
return None, None
except Exception as e:
st.error(f"❌ Gagal memuat tokenizer: {e}")
return None, None
return model, tokenizer
# ==============================================================================
# 4. PREPROCESSING TEKS
# ==============================================================================
slang_dict = {
'bgt': 'banget', 'yg': 'yang', 'gak': 'tidak', 'ga': 'tidak',
'kalo': 'kalau', 'kl': 'kalau', 'dr': 'dari', 'krn': 'karena',
'jd': 'jadi', 'sdh': 'sudah', 'aja': 'saja', 'dgn': 'dengan',
'tdk': 'tidak', 'tp': 'tapi', 'sy': 'saya', 'utk': 'untuk',
'd': 'di', 'blm': 'belum', 'jgn': 'jangan', 'gw': 'saya',
'lo': 'kamu', 'sm': 'sama', 'tau': 'tahu', 'kpn': 'kapan',
'bs': 'bisa', 'lbh': 'lebih', 'kmrn': 'kemarin',
'nggak': 'tidak', 'enggak': 'tidak', 'gk': 'tidak',
'kaga': 'tidak', 'tak': 'tidak', 'g': 'tidak',
'bener': 'benar', 'bnr': 'benar', 'msh': 'masih',
'udah': 'sudah', 'sprt': 'seperti', 'opr': 'operasional',
'tlg': 'tolong', 'bkn': 'bukan', 'aq': 'aku', 'km': 'kamu', 'dlm': 'dalam'
}
def clean_text(text):
if not isinstance(text, str): return ""
text = text.lower()
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
text = re.sub(r'@\w+', '', text)
text = re.sub(r'#\w+', '', text)
text = re.sub(r'\d+', '', text)
text = re.sub(r'[^\w\s]', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
words = text.split()
normalized_words = [slang_dict.get(w, w) for w in words]
return " ".join(normalized_words)
# ==============================================================================
# 5. PREDIKSI
# ==============================================================================
def predict_sentiment(text, model, tokenizer):
if not text or not model or not tokenizer:
return "Error", 0.0, [0, 0, 0], text
cleaned_text = clean_text(text)
seq = tokenizer.texts_to_sequences([cleaned_text])
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')
prediction = model.predict(padded, verbose=0)[0]
labels = ['Negatif', 'Netral', 'Positif']
label_idx = np.argmax(prediction)
label = labels[label_idx]
confidence = prediction[label_idx] * 100
return label, confidence, prediction, cleaned_text

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

207
views/analisis_csv.py Normal file
View File

@ -0,0 +1,207 @@
import streamlit as st
import pandas as pd
import plotly.express as px
import altair as alt
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
from utils import predict_sentiment
def render_analisis_csv(model, tokenizer):
st.title("📂 Analisis File CSV (Batch)")
st.markdown("Unggah file data (CSV) yang berisi ribuan komentar, dan biarkan AI menganalisis sentimennya secara massal.")
st.info("💡 **Panduan Upload:** Pastikan file CSV Anda memiliki kolom bernama **Teks Tweet** yang berisi teks/opini. Jika namanya berbeda, mohon ubah terlebih dahulu di Excel.")
# 1. INISIALISASI SESSION STATE
if 'batch_results' not in st.session_state:
st.session_state['batch_results'] = None
if 'original_text_col' not in st.session_state:
st.session_state['original_text_col'] = None
# ==============================================================================
# 2. AREA UPLOAD FILE
# ==============================================================================
uploaded_file = st.file_uploader("Upload File CSV di sini:")
if uploaded_file is None:
st.session_state['batch_results'] = None
st.session_state['original_text_col'] = None
if uploaded_file is not None:
# --- VALIDASI EKSTENSI (MEMENUHI TEST CASE 2) ---
if not uploaded_file.name.lower().endswith('.csv'):
st.error("❌ **Error:** Format file tidak didukung! Sistem hanya dapat memproses file berekstensi **.csv**.")
return # Menghentikan proses agar tidak lanjut ke bawah
try:
df_upload = pd.read_csv(uploaded_file)
# --- VALIDASI 1: Cek apakah file kosong ---
if df_upload.empty:
st.error("❌ File CSV yang Anda unggah kosong (0 baris). Silakan periksa kembali file Anda.")
return
# --- VALIDASI 2: VALIDASI KOLOM KETAT (STRICT) ---
KOLOM_WAJIB = "Teks Tweet"
# Cek apakah kolom wajib ada (case-sensitive)
if KOLOM_WAJIB not in df_upload.columns:
st.error(f"❌ **Error Format:** File CSV Anda tidak memiliki kolom bernama **'{KOLOM_WAJIB}'**.")
st.warning(f"Perbaiki file Anda: Buka di Excel, ubah nama kolom yang berisi teks opini menjadi '{KOLOM_WAJIB}', simpan kembali sebagai CSV, lalu unggah ulang.")
return
st.markdown("---")
st.subheader("⚙️ Konfigurasi Analisis")
text_col = KOLOM_WAJIB
st.success(f"✅ Kolom target **'{text_col}'** ditemukan! Total Data: **{len(df_upload)} baris**.")
if st.button("🚀 Mulai Proses Analisis", type="primary", use_container_width=True):
with st.spinner('🤖 AI sedang memproses... Mohon tunggu.'):
# Membersihkan nilai NaN sebelum diproses
df_upload[text_col] = df_upload[text_col].fillna("")
results_label, results_clean = [], []
my_bar = st.progress(0, text="Memproses data...")
total_data = len(df_upload)
error_count = 0
for i, row in df_upload.iterrows():
teks = str(row[text_col])
# Lewati jika teks kosong untuk mempercepat
if not teks.strip():
results_label.append("Netral")
results_clean.append("")
else:
try:
lbl, conf, _, cln = predict_sentiment(teks, model, tokenizer)
results_label.append(lbl)
results_clean.append(cln)
except Exception as e:
results_label.append("Error")
results_clean.append("GAGAL DIPROSES")
error_count += 1
persen = (i + 1) / total_data
my_bar.progress(persen, text=f"Selesai: {i+1} dari {total_data} data ({int(persen*100)}%)")
# Simpan hasil ke DataFrame
df_upload['Teks_Bersih'] = results_clean
df_upload['Prediksi_Sentimen'] = results_label
st.session_state['batch_results'] = df_upload
st.session_state['original_text_col'] = text_col
if error_count > 0:
st.warning(f"⚠️ Analisis selesai, namun ada **{error_count} baris yang gagal diproses** (ditandai dengan label 'Error').")
else:
st.success("✅ Semua data berhasil dianalisis tanpa masalah!")
except pd.errors.EmptyDataError:
st.error("❌ **Error:** File CSV kosong atau format rusak.")
except pd.errors.ParserError:
st.error("❌ **Error Parsing:** Susunan koma (delimiter) pada file CSV berantakan. Harap simpan ulang file Excel ke format CSV.")
except Exception as e:
st.error(f"❌ **Kesalahan Sistem:** Terjadi masalah yang tidak terduga: `{e}`")
# ==============================================================================
# 3. AREA HASIL PREDIKSI
# ==============================================================================
if st.session_state['batch_results'] is not None:
st.markdown("---")
df_final = st.session_state['batch_results'].copy()
df_final.index = range(1, len(df_final) + 1)
kolom_asli = st.session_state['original_text_col']
df_final['Prediksi_Sentimen'] = df_final['Prediksi_Sentimen'].astype(str).str.strip().str.title()
tab1, tab2, tab3 = st.tabs(["📋 Tabel Hasil", "📊 Statistik & Grafik", "☁️ WordCloud"])
# --- TAB 1: TABEL HASIL ---
with tab1:
st.subheader("📋 Pratinjau Data Hasil Analisis")
st.dataframe(df_final, use_container_width=True)
st.write("")
csv = df_final.to_csv(index=False).encode('utf-8')
st.download_button("📥 Download Hasil Lengkap (CSV)", data=csv, file_name="Hasil_Analisis_Batch.csv", mime="text/csv")
# --- TAB 2: STATISTIK & GRAFIK ---
with tab2:
st.subheader("📊 Statistik Sentimen Data Baru")
count_res = df_final['Prediksi_Sentimen'].value_counts().reset_index()
count_res.columns = ['Sentimen', 'Jumlah']
warna_map = pd.DataFrame({
'Sentimen': ['Positif', 'Netral', 'Negatif'],
'Warna': ['#00CC96', '#808495', '#FF4B4B']
})
chart_data = count_res.merge(warna_map, on='Sentimen')
col_stat1, col_stat2 = st.columns(2)
with col_stat1:
st.caption("Distribusi Jumlah")
c = alt.Chart(chart_data).mark_bar().encode(
x=alt.X('Sentimen', sort=['Negatif', 'Netral', 'Positif']),
y='Jumlah',
color=alt.Color('Sentimen', scale=alt.Scale(domain=['Positif', 'Netral', 'Negatif'], range=['#00CC96', '#808495', '#FF4B4B']), legend=None),
tooltip=['Sentimen', 'Jumlah']
).properties(height=350)
st.altair_chart(c, use_container_width=True)
with col_stat2:
st.caption("Proporsi Persentase")
fig_pie = px.pie(count_res, names='Sentimen', values='Jumlah', hole=0.4,
color='Sentimen', color_discrete_map={'Negatif':'#FF4B4B', 'Netral':'#808495', 'Positif':'#00CC96'})
st.plotly_chart(fig_pie, use_container_width=True)
# --- TAB 3: WORDCLOUD ---
with tab3:
st.subheader("☁️ WordCloud: Representasi Visual Teks")
pilihan_wc = [
"1. Data Mentah",
"2. Data Bersih (Preprocessed)",
"3. Sentimen NEGATIF",
"4. Sentimen NETRAL",
"5. Sentimen POSITIF"
]
sent_choice = st.selectbox("Pilih Kategori Teks (Langsung Berubah):", pilihan_wc)
filter_sentimen = df_final['Prediksi_Sentimen'].str.lower()
text_wc = ""
tema_warna = 'viridis'
if "Mentah" in sent_choice:
text_wc = " ".join(df_final[kolom_asli].astype(str))
tema_warna = "cividis"
elif "Bersih" in sent_choice:
text_wc = " ".join(df_final['Teks_Bersih'].astype(str))
tema_warna = "viridis"
elif "NEGATIF" in sent_choice:
text_wc = " ".join(df_final[filter_sentimen == 'negatif']['Teks_Bersih'].astype(str))
tema_warna = "Reds"
elif "NETRAL" in sent_choice:
text_wc = " ".join(df_final[filter_sentimen == 'netral']['Teks_Bersih'].astype(str))
tema_warna = "Greys"
elif "POSITIF" in sent_choice:
text_wc = " ".join(df_final[filter_sentimen == 'positif']['Teks_Bersih'].astype(str))
tema_warna = "Greens"
# TAMPILKAN WORDCLOUD
if not text_wc.strip():
st.warning("⚠️ Tidak ada data untuk kategori ini di file Anda.")
else:
with st.spinner("Menggambar WordCloud..."):
wc = WordCloud(width=800, height=400, background_color='white', colormap=tema_warna, max_words=100).generate(text_wc)
wc_image = wc.to_image()
wc_array = np.array(wc_image)
fig_wc, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wc_array, interpolation='bilinear')
ax.axis("off")
st.pyplot(fig_wc)

239
views/analisis_teks.py Normal file
View File

@ -0,0 +1,239 @@
import streamlit as st
import pandas as pd
from datetime import datetime
import json
import os
from utils import predict_sentiment
HISTORY_FILE = 'data/riwayat_analisis.json'
def load_history():
if os.path.exists(HISTORY_FILE):
with open(HISTORY_FILE, 'r', encoding='utf-8') as f:
try: return json.load(f)
except: return []
return []
def save_history(data):
os.makedirs('data', exist_ok=True)
with open(HISTORY_FILE, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=4)
# ==============================================================================
# Fungsi Clear sekarang menghapus Teks DAN Hasil Prediksi
# ==============================================================================
def clear_input():
st.session_state['input_teks_analisis'] = ""
st.session_state['latest_result'] = None
# ==============================================================================
# RENDER HALAMAN UTAMA
# ==============================================================================
def render_analisis_teks(model, tokenizer):
st.title("💬 Analisis Sentimen (Single Text)")
st.markdown("Ketikkan kalimat opini terkait kebijakan efisiensi anggaran pendidikan, dan biarkan AI memprediksi sentimennya secara *real-time*.")
# 1. INISIALISASI SESSION STATE
if 'history_analisis' not in st.session_state:
st.session_state['history_analisis'] = load_history()
if 'latest_result' not in st.session_state:
st.session_state['latest_result'] = None
if 'show_confirm' not in st.session_state:
st.session_state['show_confirm'] = False
if 'rows_to_delete' not in st.session_state:
st.session_state['rows_to_delete'] = []
if 'input_teks_analisis' not in st.session_state:
st.session_state['input_teks_analisis'] = ""
# ==============================================================================
# 2. AREA INPUT TEKS & TOMBOL
# ==============================================================================
input_text = st.text_area(
"Masukkan Teks Opini di sini:",
height=150,
placeholder="Contoh: Sangat kecewa anggaran KIP Kuliah dipotong...",
key='input_teks_analisis'
)
# ==============================================================================
# Deteksi Hapus Manual (Backspace)
# ==============================================================================
if not input_text.strip():
st.session_state['latest_result'] = None
col_btn1, col_btn2, col_spacer = st.columns([2, 2, 6])
with col_btn1:
btn_analisis = st.button("🔍 Analisis Sekarang", type="primary", use_container_width=True)
with col_btn2:
st.button("🧹 Bersihkan Teks", on_click=clear_input, use_container_width=True)
if btn_analisis:
if input_text.strip():
with st.spinner('🤖 Model LSTM sedang memproses teks...'):
label, confidence, probs, clean_txt = predict_sentiment(input_text, model, tokenizer)
probabilitas_bersih = [float(p) for p in probs]
st.session_state['latest_result'] = {
"label": label,
"confidence": confidence,
"probs": probabilitas_bersih,
"clean_txt": clean_txt
}
waktu_sekarang = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
new_entry = {
"Waktu": waktu_sekarang,
"Teks Asli": input_text,
"Teks Bersih": clean_txt,
"Label": label,
"Keyakinan (%)": round(confidence, 2)
}
st.session_state['history_analisis'].append(new_entry)
save_history(st.session_state['history_analisis'])
else:
st.warning("⚠️ Mohon masukkan teks terlebih dahulu.")
# ==============================================================================
# 3. AREA HASIL PREDIKSI
# ==============================================================================
if st.session_state['latest_result']:
res = st.session_state['latest_result']
st.markdown("---")
col_res1, col_res2 = st.columns([1, 2])
with col_res1:
st.subheader("🎯 Hasil Prediksi")
if res['label'] == "Positif": st.success(f"**🟢 SENTIMEN POSITIF**")
elif res['label'] == "Negatif": st.error(f"**🔴 SENTIMEN NEGATIF**")
else: st.warning(f"**⚪ SENTIMEN NETRAL**")
st.metric("Tingkat Keyakinan (Confidence)", f"{res['confidence']:.2f}%")
with col_res2:
st.subheader("📊 Distribusi Probabilitas")
st.caption("Detail perhitungan matematis model (Total 100%)")
st.write(f"🔴 **Negatif:** {res['probs'][0]*100:.1f}%")
st.progress(res['probs'][0])
st.write(f"⚪ **Netral:** {res['probs'][1]*100:.1f}%")
st.progress(res['probs'][1])
st.write(f"🟢 **Positif:** {res['probs'][2]*100:.1f}%")
st.progress(res['probs'][2])
st.markdown("#### 🔍 Teks Hasil Preprocessing (Cleaning & Normalisasi)")
st.info(f"{res['clean_txt']}")
st.markdown("---")
# ==============================================================================
# 4. AREA HISTORY
# ==============================================================================
st.subheader("📚 Riwayat Analisis")
if len(st.session_state['history_analisis']) > 0:
# 1. Siapkan Data
df_history = pd.DataFrame(st.session_state['history_analisis'])
df_display = df_history.iloc[::-1].reset_index(drop=True)
if 'Pilih' not in df_display.columns:
df_display.insert(0, "Pilih", False)
# 2. Filter & Select All
c_search, c_all = st.columns([3, 1])
with c_search:
q = st.text_input("Cari:", placeholder="Filter riwayat...", label_visibility="collapsed")
with c_all:
if st.checkbox("Pilih Semua"):
df_display['Pilih'] = True
if q:
df_display = df_display[df_display['Teks Asli'].str.contains(q, case=False, na=False)]
# 3. Tabel Editor
with st.container():
edited_df = st.data_editor(
df_display,
column_config={
"Pilih": st.column_config.CheckboxColumn("Hapus?", width="small", default=False),
"Waktu": st.column_config.TextColumn("Waktu", disabled=True),
"Teks Asli": st.column_config.TextColumn("Teks Tweet", disabled=True),
"Label": st.column_config.TextColumn("Prediksi", disabled=True),
"Keyakinan (%)": st.column_config.NumberColumn("Score", format="%.2f%%", disabled=True)
},
hide_index=True,
use_container_width=True,
key="history_editor"
)
# 4. Tombol Aksi (LAYOUT BARU)
selected_rows = edited_df[edited_df['Pilih'] == True]
count = len(selected_rows)
popup_placeholder = st.empty()
# --- BARIS 1: TOMBOL HAPUS (Merah & Primary) ---
col_del_1, col_del_2 = st.columns(2)
with col_del_1:
if st.button(f"🗑️ Hapus ({count}) Item", type="primary", disabled=count==0, use_container_width=True):
st.session_state['rows_to_delete'] = selected_rows['Waktu'].tolist()
st.session_state['show_confirm'] = True
with col_del_2:
if st.button("🚨 Hapus Semua", type="secondary", use_container_width=True):
st.session_state['rows_to_delete'] = "ALL"
st.session_state['show_confirm'] = True
# --- BARIS 2: TOMBOL DOWNLOAD (Hijau/Standar - Di Bawah) ---
st.write("")
csv_data = df_display.drop(columns=['Pilih']).to_csv(index=False).encode('utf-8')
st.download_button(
label="📥 Download CSV (Backup Data Riwayat)",
data=csv_data,
file_name="Riwayat_Analisis.csv",
mime="text/csv",
use_container_width=True
)
# --- 5. LOGIKA POP-UP KONFIRMASI ---
if st.session_state.get('show_confirm', False):
with popup_placeholder.container():
st.markdown("---")
msg = "SEMUA DATA" if st.session_state['rows_to_delete'] == "ALL" else f"{len(st.session_state['rows_to_delete'])} DATA TERPILIH"
with st.chat_message("assistant", avatar="⚠️"):
st.write(f"**KONFIRMASI:** Apakah Anda yakin ingin menghapus **{msg}**?")
col_yes, col_no = st.columns([1, 4])
with col_yes:
if st.button("✅ Ya, Hapus"):
if st.session_state['rows_to_delete'] == "ALL":
st.session_state['history_analisis'] = []
else:
targets = st.session_state['rows_to_delete']
st.session_state['history_analisis'] = [
item for item in st.session_state['history_analisis']
if item['Waktu'] not in targets
]
save_history(st.session_state['history_analisis'])
st.session_state['show_confirm'] = False
st.session_state['rows_to_delete'] = []
st.success("Berhasil dihapus!")
st.rerun()
with col_no:
if st.button("❌ Batal"):
st.session_state['show_confirm'] = False
st.session_state['rows_to_delete'] = []
st.rerun()
st.markdown("---")
else:
st.info("📝 Belum ada riwayat analisis.")

62
views/beranda.py Normal file
View File

@ -0,0 +1,62 @@
import streamlit as st
import pandas as pd
import os
def render_beranda():
st.title("🎓 Selamat Datang di Sistem Analisis Sentimen")
st.markdown("### Kebijakan Efisiensi Anggaran Pendidikan (2025)")
st.markdown("Sistem ini dikembangkan untuk menganalisis opini masyarakat di media sosial X (Twitter) terkait isu pemotongan atau efisiensi anggaran di sektor pendidikan, seperti Dana BOS, PIP, KIP Kuliah, dan Tunjangan Guru.")
st.markdown("---")
# ==============================================================================
# PENGAMBILAN DATA
# ==============================================================================
# 1. Akurasi Testing
try:
df_perf = pd.read_csv('model/Tabel_Performa_LSTM.csv', index_col=0)
akurasi_testing = round(df_perf.loc['accuracy', 'f1-score'] * 100, 2)
except:
akurasi_testing = 0.0
# --- 1. RINGKASAN METRIK MODEL ---
st.subheader("📊 Ringkasan Model Machine Learning")
m1, m2, m3, m4 = st.columns(4)
m1.metric("Arsitektur", "LSTM", "Deep Learning")
m2.metric("Akurasi Model", f"{akurasi_testing}%", "Data Testing P5")
m3.metric("Pembagian Data", "80 : 20", "Latih : Uji")
m4.metric("Metode Ekstraksi", "LDA", "Topic Modeling")
st.markdown("---")
# --- 2. METODOLOGI PENELITIAN ---
st.subheader("🛠️ Metodologi & Arsitektur Sistem")
col_metode1, col_metode2 = st.columns([1, 1])
with col_metode1:
st.markdown("""
**Tahapan Pemrosesan:**
1. **Crawling Data:** Pengambilan data via Tweet Harvest (Feb-Mar 2025).
2. **Preprocessing:** Case folding, Cleaning, Tokenizing dan Normalisasi Slang. *(Tanpa Stopword & Stemming agar urutan konteks kalimat tetap utuh)*.
3. **Word Embedding:** Standard Keras Embedding (Dimensi 128) dengan fitur *Masking*.
4. **Deep Learning:** Model **Long Short-Term Memory (LSTM)** biasa untuk klasifikasi sentimen (Negatif, Netral, Positif).
5. **Topic Modeling:** Latent Dirichlet Allocation (LDA) untuk mengetahui topik dominan.
""")
with col_metode2:
st.info("**Mengapa menggunakan LSTM?** \n\nPenggunaan algoritma LSTM yang dipadukan dengan *Keras Embedding* terbukti lebih ringan dari segi komputasi namun tetap optimal dalam menangkap pola konteks kalimat secara sekuensial (berurutan). Fitur *Masking* memastikan padding kalimat tidak merusak makna sentimen.")
st.success(f"**Hasil Pelatihan Model:** \nMelalui 5 tahapan percobaan (skenario 20% hingga 100% data latih), Akurasi Testing pada skenario P5 (100% data) mencapai **{akurasi_testing}%**. Ini menunjukkan model mampu memprediksi data baru dengan sangat baik.")
st.markdown("---")
# --- 3. FITUR SISTEM ---
st.subheader("✨ Fitur Utama Sistem")
f1, f2, f3 = st.columns(3)
with f1:
st.success("**1. Dashboard Visualisasi**\n\nMenampilkan tren waktu, WordCloud, dan distribusi sentimen masyarakat secara interaktif.")
with f2:
st.warning("**2. Analisis Teks Langsung**\n\nPengguna dapat mengetikkan kalimat opini baru dan model akan memprediksi sentimennya secara *real-time*.")
with f3:
st.info("**3. Analisis File CSV**\n\nMengunggah data komentar/tweet dalam jumlah banyak sekaligus untuk dianalisis massal.")

445
views/proses_data.py Normal file
View File

@ -0,0 +1,445 @@
import streamlit as st
import pandas as pd
import numpy as np
import os
import hashlib
from sklearn.model_selection import train_test_split
import graphviz
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix
@st.cache_data
def load_data(file_path):
try:
return pd.read_csv(file_path)
except:
return pd.DataFrame()
def render_proses_data():
st.title("⚙️ Tahapan Proses Data & Modeling")
st.markdown("Berikut adalah dokumentasi teknis alur pengolahan data dari mentah hingga evaluasi model, disertai penjelasan metodologi.")
# LOAD DATA
df_mentah = load_data('data/Data_Lengkap_Tokenisasi.csv')
# ==============================================================================
# NAVIGASI
# ==============================================================================
opsi_tahapan = [
"1. Crawling Data",
"2. Preprocessing",
"3. Persiapan Data Latih",
"4. Arsitektur Model",
"5. Evaluasi Model",
"6. Topic Modeling (LDA)"
]
pilihan = st.radio("Pilih Tahapan Proses:", options=opsi_tahapan, horizontal=True, label_visibility="collapsed")
st.markdown("---")
# ==============================================================================
# KONTEN TAHAPAN
# ==============================================================================
# --- 1. CRAWLING DATA ---
if pilihan == "1. Crawling Data":
st.header("1. Pengumpulan Data (Crawling)")
st.info("Tools: **Tweet-Harvest (Node.js)** API Scraper")
st.success(f"✅ Total Data Terkumpul: **{len(df_mentah):,} Data** (Setelah Deduplikasi)")
st.warning("⚠️ **Catatan Imbalance:** Distribusi sentimen awal tidak seimbang, ditangani dengan ROS (Random Over Sampling) pada tahap Training.")
st.markdown("### 📋 Kriteria Pengambilan Data")
st.markdown("""
- **Platform**: X (Twitter)
- **Periode**: 01 Februari 2025 - 31 Maret 2025
- **Filter Sistem**: Hanya Bahasa Indonesia (`lang:id`) & Mengabaikan Retweet (`-is:retweet`).
**Kata Kunci (Search Queries):**
**1. Core Keywords (Isu Utama):**
* `"efisiensi anggaran pendidikan" lang:id -is:retweet`
* `"pemotongan anggaran pendidikan" lang:id -is:retweet`
* `"anggaran pendidikan dikurangi" lang:id -is:retweet`
**2. Program Spesifik:**
* `("dana BOS" OR "bantuan operasional sekolah") ("dipotong" OR "dikurangi" OR "efisiensi" OR "kurang") lang:id -is:retweet`
* `("PIP" OR "program indonesia pintar") ("dipotong" OR "dikurangi" OR "efisiensi" OR "cair") lang:id -is:retweet`
* `("KIP Kuliah" OR "kartu indonesia pintar") ("dipotong" OR "dikurangi" OR "efisiensi" OR "sulit") lang:id -is:retweet`
* `("tunjangan guru" OR "sertifikasi guru") ("dipotong" OR "dikurangi" OR "efisiensi" OR "telat") lang:id -is:retweet`
**3. Kombinasi Isu Umum:**
* `(anggaran OR dana) (pendidikan OR sekolah OR kampus OR guru) (efisiensi OR potong OR dikurangi OR berkurang) lang:id -is:retweet`
- **Proses Lanjutan**: Deduplikasi (Hapus ID & Teks yang berulang).
""")
st.markdown("### 🔍 Preview Data Mentah")
if not df_mentah.empty:
search_mentah = st.text_input("Cari kata dalam Tweet (Mentah):", placeholder="Contoh: dana bos", key="cari_mentah")
if search_mentah:
df_tampil = df_mentah[df_mentah['Teks Tweet'].str.contains(search_mentah, case=False, na=False)].copy()
else:
df_tampil = df_mentah.copy()
df_tampil = df_tampil[['created_at', 'username', 'Teks Tweet']].rename(columns={'created_at': 'Created At', 'username': 'Username'})
df_tampil.index = range(1, len(df_tampil) + 1)
st.dataframe(df_tampil, use_container_width=True, height=250)
# --- 2. PREPROCESSING ---
elif pilihan == "2. Preprocessing":
st.header("2. Preprocessing Teks")
st.markdown("""
**Tujuan:** Mengubah data teks tidak terstruktur menjadi format bersih yang siap diproses mesin.
Pada penelitian ini, kami memutuskan untuk **TIDAK MELAKUKAN Stemming & Stopword Removal**.
* **Alasan:** Model Deep Learning (seperti LSTM) membutuhkan konteks kalimat utuh untuk memahami nuansa sentimen (contoh: kata *"tidak"* sangat penting untuk membalikkan makna *"suka"* menjadi *"tidak suka"*). Menghapus *stopword* justru dapat merusak tata bahasa yang akan dibaca oleh model secara sekuensial.
""")
with st.expander(" Rincian 5 Langkah Preprocessing", expanded=True):
st.markdown("""
1. **Case Folding:** Menyeragamkan huruf menjadi kecil (*lowercase*).
2. **Cleaning:** Menghapus elemen non-teks (URL, Mention `@`, Hashtag `#`, Angka, Tanda Baca).
3. **Tokenizing:** Memecah kalimat menjadi potongan kata per kata.
4. **Normalisasi Slang:** Mengubah kata tidak baku (*bgt, gk, sy*) menjadi baku (*banget, tidak, saya*) menggunakan kamus *lexicon*.
5. **Detokenizing:** Menggabungkan kata kembali menjadi kalimat utuh.
""")
st.subheader("🔍 Komparasi Sebelum vs Sesudah")
if not df_mentah.empty:
search_pre = st.text_input("Cari kata (Hasil Akhir):", placeholder="Contoh: guru", key="cari_pre")
cols = ['Teks Tweet', 'Tweet_CaseFolded', 'Tweet_Cleaned', 'Tweet_Tokenized', 'Tweet_Normalized', 'Tweet_Final']
cols_exist = [c for c in cols if c in df_mentah.columns]
df_tampil_pre = df_mentah[cols_exist].copy()
if search_pre:
df_tampil_pre = df_tampil_pre[df_tampil_pre['Tweet_Final'].str.contains(search_pre, case=False, na=False)]
df_tampil_pre.index = range(1, len(df_tampil_pre) + 1)
st.dataframe(df_tampil_pre, use_container_width=True, height=400)
else:
st.warning("Data preprocessing belum tersedia.")
# --- 3. PERSIAPAN DATA LATIH ---
elif pilihan == "3. Persiapan Data Latih":
st.header("3. Transformasi & Splitting Data")
st.markdown("""
Agar teks dapat diproses oleh Neural Network, data harus diubah menjadi bentuk numerik (vektor).
Selain itu, dilakukan penyeimbangan data agar model tidak bias.
""")
st.subheader("A. Tokenization & Padding")
st.write("Setiap kata unik dalam dataset diberi ID angka. Karena panjang tweet berbeda-beda, kita lakukan **Padding (Post)** agar semua input memiliki panjang seragam (**100 kata**). Angka 0 di akhir akan diabaikan oleh fitur *Masking* pada model.")
if not df_mentah.empty and 'Label' in df_mentah.columns:
df_token = df_mentah.dropna(subset=['Label']).copy()
# Helper simulasi token
def get_word_id(word): return int(hashlib.md5(word.encode()).hexdigest(), 16) % 3000 + 1
df_token['Detail Token'] = df_token['Tweet_Final'].apply(lambda t: ", ".join([f"{w}:{get_word_id(w)}" for w in str(t).split()[:10]]))
df_token['Padding Sequence (100)'] = df_token['Tweet_Final'].apply(lambda t: str(([get_word_id(w) for w in str(t).split()] + [0]*100)[:20]) + " ...")
st.dataframe(df_token[['Tweet_Final', 'Detail Token', 'Padding Sequence (100)']], use_container_width=True)
st.markdown("---")
st.subheader("B. Splitting 80:20 & Skenario 5 Percobaan")
st.markdown("""
**Skenario Pelatihan:**
Model dilatih menggunakan **5 Skenario Percobaan** (P1 hingga P5) dengan porsi data latih masing-masing 20%, 40%, 60%, 80%, dan 100% (dari total 80% split data latih).
**Penanganan Imbalance (ROS):**
Kami menduplikasi data minoritas (Positif/Netral) secara acak (*Random Over Sampling*) di **setiap porsi data latih** hingga jumlahnya setara dengan kelas mayoritas (Negatif). Data Testing (20%) dibiarkan murni agar evaluasi tetap objektif.
""")
df_train, df_test = train_test_split(df_token, test_size=0.2, random_state=42, stratify=df_token['Label'])
kelas_mayoritas = df_train['Label'].value_counts().max()
col_metric1, col_metric2, col_metric3 = st.columns(3)
col_metric1.metric("Maksimal Data Latih (80%)", f"{len(df_train):,} Sample", "Skenario P5")
col_metric2.metric("Data Uji Tetap (20%)", f"{len(df_test):,} Sample", "Validasi Objektif")
col_metric3.metric("Target ROS P5", f"{kelas_mayoritas}", "Per Kelas Sentimen")
st.success(f"✅ **Status Data:** Dataset latih telah diseimbangkan (Balanced) menggunakan teknik ROS pada tahapan pemodelan.")
# --- 4. ARSITEKTUR MODEL ---
elif pilihan == "4. Arsitektur Model":
st.header("🧠 4. Arsitektur Model: LSTM Standar")
st.markdown("""
Kami menggunakan arsitektur **Long Short-Term Memory (LSTM)** yang dipadukan dengan *Keras Embedding Layer* dan fitur *Masking*.
""")
c_text, c_spacer, c_img = st.columns([1.5, 0.2, 1])
with c_text:
st.subheader("Rincian Layer & Fungsinya:")
st.markdown("""
1. **Embedding (Keras):** Mengubah indeks kata menjadi vektor padat (128 dimensi). Fitur `mask_zero=True` diaktifkan agar model murni fokus pada teks tanpa terdistraksi oleh angka padding (0) di akhir kalimat.
2. **SpatialDropout1D (0.2):** Mematikan sebagian 1D feature maps secara acak untuk mencegah model "menghafal" data secara berlebihan (*overfitting*).
3. **LSTM (64 Units):** Memproses urutan kata secara sekuensial (dari awal hingga akhir kalimat) agar model bisa memahami relasi dan pola frasa sentimen dengan sangat baik.
4. **Dense Layer (32 Units):** Ekstraksi fitur tingkat tinggi menggunakan fungsi aktivasi ReLU dengan peluruhan (Dropout 0.2).
5. **Dense Output (3 Units):** Layer akhir dengan aktivasi *Softmax* yang menghasilkan nilai probabilitas klasifikasi untuk **Negatif, Netral, dan Positif**.
""")
param_data = {
"Nama Layer": ["Embedding", "SpatialDropout", "LSTM", "Dense", "Dense Output"],
"Output Shape": ["(None, 100, 128)", "(None, 100, 128)", "(None, 64)", "(None, 32)", "(None, 3)"],
"Jml Parameter": ["1,280,000", "0", "49,408", "2,080", "99"]
}
st.dataframe(pd.DataFrame(param_data), use_container_width=True)
with c_spacer:
st.empty()
with c_img:
st.caption("Visualisasi Alur Data:")
try:
graph = graphviz.Digraph(node_attr={'shape': 'box', 'style': 'filled', 'fillcolor': '#E8F0FE'})
graph.attr(rankdir='TB')
graph.node('I', 'Input Teks\n(Integer Encoded)', fillcolor='#FFEBEE')
graph.node('E', 'Embedding Layer\n(Dimensi 128, Masking)', fillcolor='#FFF3E0')
graph.node('L', 'LSTM Layer\n(Proses Sekuensial)', fillcolor='#E3F2FD')
graph.node('D', 'Dense & Softmax\n(Klasifikasi 3 Kelas)', fillcolor='#E8F5E9')
graph.edge('I', 'E')
graph.edge('E', 'L')
graph.edge('L', 'D')
st.graphviz_chart(graph, use_container_width=True)
except:
st.info("Install graphviz untuk melihat diagram alir.")
# ==============================================================================
# 5. EVALUASI MODEL
# ==============================================================================
elif pilihan == "5. Evaluasi Model":
st.header("5. Evaluasi Performa Model (Skenario P1-P5)")
st.markdown("Evaluasi ini mencakup perbandingan 5 skenario pelatihan berdasarkan ukuran rasio data latih (20% hingga 100%), yang diuji menggunakan **Data Testing murni (20%)**.")
tab_a, tab_b, tab_c = st.tabs(["📊 Metrik (Model P5)", "📈 Perbandingan 5 Skenario", "📉 Detail Learning Curve"])
# --- TAB A: TABEL ANGKA ---
with tab_a:
st.subheader("1. Classification Report (Model P5)")
st.markdown("""
- **Precision:** Ketepatan prediksi model (Meminimalisir salah tebak positif palsu).
- **Recall:** Kelengkapan prediksi (Meminimalisir salah tebak negatif palsu).
- **F1-Score:** Rata-rata harmonis antara Precision dan Recall.
""")
path_perf = 'model/Tabel_Performa_LSTM.csv'
if not os.path.exists(path_perf): path_perf = 'Tabel_Performa_LSTM.csv'
if os.path.exists(path_perf):
df_perf = pd.read_csv(path_perf, index_col=0)
st.table(
df_perf.style.highlight_max(axis=0, props='background-color: #FFEB3B; color: black; font-weight: bold')
)
if 'accuracy' in df_perf.index:
acc = df_perf.loc['accuracy', 'f1-score']
st.metric("Akurasi Total (Data Testing P5)", f"{acc*100:.2f}%")
else:
st.warning("⚠️ File 'Tabel_Performa_LSTM.csv' belum tersedia.")
st.markdown("---")
st.subheader("2. Confusion Matrix (Model P5)")
path_cm = 'model/Data_Confusion_Matrix.csv'
if os.path.exists(path_cm):
df_cm_data = pd.read_csv(path_cm)
if 'y_true' in df_cm_data.columns and 'y_pred' in df_cm_data.columns:
labels = ['Negatif', 'Netral', 'Positif']
cm = confusion_matrix(df_cm_data['y_true'], df_cm_data['y_pred'])
fig_cm = px.imshow(cm, text_auto=True, labels=dict(x="Prediksi Model", y="Label Aktual (Asli)", color="Jumlah Data"), x=labels, y=labels, color_continuous_scale='Blues')
fig_cm.update_layout(title="Matrix Kebenaran Prediksi P5")
st.plotly_chart(fig_cm, use_container_width=True)
else:
st.warning("⚠️ File 'Data_Confusion_Matrix.csv' tidak ditemukan.")
# --- TAB B: BAR CHART PERBANDINGAN SKENARIO (DINAMIS DARI CSV) ---
with tab_b:
st.subheader("Perbandingan Akurasi Skenario P1 hingga P5")
st.markdown("Grafik interaktif ini menunjukkan bahwa semakin besar porsi data latih yang diberikan, maka kemampuan model dalam mengklasifikasi sentimen cenderung semakin baik.")
path_akurasi = 'model/Akurasi_Skenario.csv'
if os.path.exists(path_akurasi):
df_acc_skenario = pd.read_csv(path_akurasi)
rata_rata = df_acc_skenario['Akurasi'].mean()
# Buat label gabungan P1 (20%), dst
df_acc_skenario['Label_X'] = df_acc_skenario['Skenario'] + " (" + df_acc_skenario['Porsi_Data'] + ")"
fig_bar = px.bar(
df_acc_skenario, x='Label_X', y='Akurasi',
text='Akurasi',
color='Skenario',
color_discrete_sequence=px.colors.qualitative.Set1,
title="Persentase Akurasi per Skenario Data Latih",
labels={'Label_X': 'Skenario (Porsi Data Latih)', 'Akurasi': 'Akurasi (%)'}
)
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside')
fig_bar.add_hline(y=rata_rata, line_dash="dot", line_color="red", annotation_text=f"Rata-rata: {rata_rata:.2f}%")
fig_bar.update_layout(yaxis_range=[0, 100], showlegend=False)
st.plotly_chart(fig_bar, use_container_width=True)
else:
st.warning("⚠️ File 'Akurasi_Skenario.csv' belum tersedia. Harap export dari Colab.")
# --- TAB C: KURVA PEMBELAJARAN SEMUA SKENARIO (DINAMIS DARI CSV) ---
with tab_c:
st.subheader("Grafik Pergerakan Learning Curve")
st.info("Pilih skenario di bawah ini untuk melihat detail pergerakan Akurasi dan Loss-nya secara interaktif.")
path_hist_semua = 'model/Riwayat_Training_Semua.csv'
if os.path.exists(path_hist_semua):
df_all_hist = pd.read_csv(path_hist_semua)
# Opsi interaktif untuk memilih Skenario
skenario_pilihan = st.selectbox("Pilih Skenario:", ['P1', 'P2', 'P3', 'P4', 'P5'], index=4)
# Filter data berdasarkan skenario yang dipilih
df_hist_filter = df_all_hist[df_all_hist['Skenario'] == skenario_pilihan]
col_chart1, col_chart2 = st.columns(2)
with col_chart1:
fig_acc_line = go.Figure()
fig_acc_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['accuracy'], mode='lines+markers', name='Train Acc'))
fig_acc_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['val_accuracy'], mode='lines+markers', name='Val Acc'))
fig_acc_line.update_layout(title=f"Akurasi ({skenario_pilihan})", xaxis_title="Epochs", yaxis_title="Akurasi", hovermode="x unified")
st.plotly_chart(fig_acc_line, use_container_width=True)
with col_chart2:
fig_loss_line = go.Figure()
fig_loss_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['loss'], mode='lines+markers', name='Train Loss', line=dict(color='orange')))
fig_loss_line.add_trace(go.Scatter(x=df_hist_filter['Epoch'], y=df_hist_filter['val_loss'], mode='lines+markers', name='Val Loss', line=dict(color='red')))
fig_loss_line.update_layout(title=f"Loss ({skenario_pilihan})", xaxis_title="Epochs", yaxis_title="Loss", hovermode="x unified")
st.plotly_chart(fig_loss_line, use_container_width=True)
else:
st.warning("⚠️ File 'Riwayat_Training_Semua.csv' belum tersedia. Harap export dari Colab.")
# ==============================================================================
# 6. TOPIC MODELING (LDA)
# ==============================================================================
elif pilihan == "6. Topic Modeling (LDA)":
st.header("6. Topic Modeling (LDA)")
st.markdown("""
**Tujuan:** Menggali "Apa yang sebenarnya dibicarakan publik?" di balik masing-masing sentimen menggunakan metode **Latent Dirichlet Allocation (LDA)**.
""")
# --- BAGIAN A: METRIK EVALUASI (COHERENCE SCORE) ---
st.subheader("A. Optimasi Jumlah Topik (Coherence Score)")
st.info("💡 Grafik ini menunjukkan bagaimana model menentukan jumlah topik (K) terbaik secara ilmiah berdasarkan skor *Coherence c_v* tertinggi.")
col_lda1, col_lda2 = st.columns([2, 1])
with col_lda1:
path_coherence = 'model/Nilai_Coherence.csv'
if not os.path.exists(path_coherence): path_coherence = 'Nilai_Coherence.csv'
if os.path.exists(path_coherence):
df_coh = pd.read_csv(path_coherence)
# Plot Line Chart
fig_coh = px.line(df_coh, x='Num_Topics', y='Coherence_Score', markers=True,
title="Pergerakan Nilai Coherence Score",
labels={'Num_Topics': 'Jumlah Topik', 'Coherence_Score': 'Skor Koherensi (c_v)'})
max_score = df_coh['Coherence_Score'].max()
best_topic_num = df_coh.loc[df_coh['Coherence_Score'].idxmax(), 'Num_Topics']
fig_coh.add_annotation(x=best_topic_num, y=max_score,
text=f"Optimal: {int(best_topic_num)} Topik",
showarrow=True, arrowhead=1)
st.plotly_chart(fig_coh, use_container_width=True)
else:
st.warning("⚠️ File 'Nilai_Coherence.csv' tidak ditemukan.")
with col_lda2:
st.markdown("### 📝 Interpretasi:")
st.write("""
Algoritma mesin bekerja dengan mencari pola kata yang sering muncul bersamaan di dalam satu dokumen teks.
**Coherence Score** bertugas untuk mengukur seberapa masuk akal ("nyambung") kumpulan kata-kata dalam satu topik. Semakin tinggi skornya, maka topik tersebut akan semakin mudah diinterpretasikan oleh pembaca/manusia.
""")
st.markdown("---")
# --- BAGIAN B: VISUALISASI TOPIK (BAR CHART DARI CSV) ---
st.subheader("B. Visualisasi Kata Kunci per Topik")
st.write("Berikut adalah distribusi kata-kata kunci dominan yang mewakili setiap topik berdasarkan prediksi sentimen data *testing*.")
path_lda = 'model/Hasil_Analisis_Topik_LDA.csv'
if not os.path.exists(path_lda): path_lda = 'Hasil_Analisis_Topik_LDA.csv'
if os.path.exists(path_lda):
try:
df_lda = pd.read_csv(path_lda)
# Fungsi Parsing Teks dari format CSV
def parse_lda_string(text_data):
data_items = []
# Memisahkan format yang sudah kita bersihkan di Colab
for word in str(text_data).split(','):
word = word.strip()
if word:
# Bobot diset dinamis untuk memunculkan visual Bar Horizontal (berdasarkan urutan)
data_items.append({'Kata': word})
df_res = pd.DataFrame(data_items)
if not df_res.empty:
# Memberikan bobot buatan berdasarkan urutan (agar chart terbentuk rapi dari atas ke bawah)
df_res['Bobot'] = range(len(df_res), 0, -1)
df_res = df_res.sort_values(by='Bobot', ascending=True)
return df_res
# Tabs untuk Topik
t_neg, t_net, t_pos = st.tabs(["🔴 Topik Negatif", "⚪ Topik Netral", "🟢 Topik Positif"])
mapping = {'negatif': t_neg, 'netral': t_net, 'positif': t_pos}
for sentimen, tab in mapping.items():
with tab:
# Filter CSV berdasarkan sentimen
df_subset = df_lda[df_lda['Sentimen'].str.lower() == sentimen]
if df_subset.empty:
st.warning(f"Belum ada data ekstraksi topik untuk sentimen {sentimen.upper()}.")
else:
col_t1, col_t2 = st.columns(2)
# Tampilkan Topik dengan 2 kolom berjajar
for idx, row in df_subset.iterrows():
topik_ke = row['Topik Ke']
df_chart = parse_lda_string(row['Kata Kunci'])
if not df_chart.empty:
fig = px.bar(
df_chart, x='Bobot', y='Kata', orientation='h',
title=f"<b>Topik {topik_ke}</b>",
color='Bobot',
color_continuous_scale='Reds' if sentimen == 'negatif' else 'Greys' if sentimen == 'netral' else 'Greens'
)
# Sembunyikan X-axis karena ini hanya bobot representasi urutan
fig.update_layout(height=280, showlegend=False, xaxis_title=None, xaxis_visible=False)
if idx % 2 == 0:
with col_t1: st.plotly_chart(fig, use_container_width=True)
else:
with col_t2: st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Gagal memproses visualisasi data LDA: {e}")
else:
st.warning("⚠️ File 'Hasil_Analisis_Topik_LDA.csv' belum tersedia di dalam folder model.")

339
views/visualisasi.py Normal file
View File

@ -0,0 +1,339 @@
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from wordcloud import WordCloud
import os
import math
def render_visualisasi():
st.title("📈 Dashboard Visualisasi Data")
st.markdown("Analisis visual interaktif terhadap data opini publik terkait kebijakan anggaran pendidikan.")
# ==============================================================================
# 1. LOAD DATA UTAMA
# ==============================================================================
file_path = 'data/Data_Lengkap_Tokenisasi.csv'
if not os.path.exists(file_path):
st.error(f"❌ File dataset tidak ditemukan di: {file_path}")
return
# Load Data
df = pd.read_csv(file_path)
if 'Label' in df.columns:
df['Label_Clean'] = df['Label'].astype(str).str.lower().str.strip()
else:
st.error("❌ Kolom 'Label' tidak ditemukan dalam CSV.")
return
if 'created_at' in df.columns:
df['Tanggal'] = pd.to_datetime(df['created_at']).dt.date
elif 'Tanggal' in df.columns:
df['Tanggal'] = pd.to_datetime(df['Tanggal']).dt.date
else:
st.warning("⚠️ Kolom tanggal tidak ditemukan. Grafik tren waktu mungkin tidak muncul.")
# ==============================================================================
# 2. VISUALISASI DISTRIBUSI SENTIMEN (PIE & BAR)
# ==============================================================================
st.subheader("📊 Distribusi & Polaritas Sentimen")
col_pie, col_bar = st.columns([1, 1.5])
# --- A. PIE CHART ---
with col_pie:
df_pie = df['Label_Clean'].value_counts().reset_index()
df_pie.columns = ['Sentimen', 'Jumlah']
fig_pie = px.pie(
df_pie,
names='Sentimen',
values='Jumlah',
hole=0.4,
color='Sentimen',
color_discrete_map={'negatif':'#FF4B4B', 'netral':'#808495', 'positif':'#00CC96'},
title="Persentase Sentimen"
)
fig_pie.update_layout(showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5))
st.plotly_chart(fig_pie, use_container_width=True)
# --- B. TREN WAKTU ---
with col_bar:
if 'Tanggal' in df.columns:
start_date = pd.to_datetime("2025-02-01").date()
end_date = pd.to_datetime("2025-03-31").date()
df_filtered = df[
(df['Tanggal'] >= start_date) &
(df['Tanggal'] <= end_date)
]
kolom_label = 'Label' if 'Label' in df_filtered.columns else 'Label_Clean'
df_trend = df_filtered.groupby(['Tanggal', kolom_label]).size().reset_index(name='Jumlah')
fig_trend = px.line(
df_trend,
x='Tanggal',
y='Jumlah',
color=kolom_label,
markers=True,
color_discrete_map={
'negatif':'#FF4B4B', 'netral':'#808495', 'positif':'#00CC96',
'Negatif':'#FF4B4B', 'Netral':'#808495', 'Positif':'#00CC96',
'negative':'#FF4B4B', 'neutral':'#808495', 'positive':'#00CC96'
},
title="Tren Sentimen Harian (Feb - Mar 2025)"
)
fig_trend.update_xaxes(range=[start_date, end_date])
fig_trend.update_layout(xaxis_title="Tanggal", yaxis_title="Jumlah Tweet", hovermode="x unified", legend=dict(orientation="h", y=1.1))
st.plotly_chart(fig_trend, use_container_width=True)
else:
st.info("Data Tanggal tidak tersedia untuk menampilkan tren.")
# ==============================================================================
# 3. WORDCLOUD
# ==============================================================================
st.subheader("☁️ WordCloud: Representasi Visual Teks")
st.write("Kata-kata yang paling sering muncul dalam setiap kategori.")
# 1. Fungsi Asli untuk generate dari Teks (Data Mentah & Bersih)
def generate_wc(text, colormap):
if not isinstance(text, str) or not text.strip():
st.warning("⚠️ Tidak ada data teks yang cukup.")
return
with st.spinner("Sedang menggambar WordCloud..."):
try:
wc = WordCloud(width=800, height=400, background_color='white', colormap=colormap, max_words=100).generate(text)
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wc, interpolation='bilinear')
ax.axis("off")
st.pyplot(fig)
except Exception as e:
st.error(f"Error WordCloud: {e}")
# 2. FUNGSI BARU: Generate WordCloud langsung dari CSV Frekuensi agar instan
def generate_wc_from_freq(file_path, colormap):
if os.path.exists(file_path):
try:
df_freq = pd.read_csv(file_path)
# Mengubah format DataFrame menjadi Dictionary (Syarat mutlak WordCloud)
freq_dict = dict(zip(df_freq['Word'], df_freq['Frequency']))
with st.spinner("Merender WordCloud instan dari CSV..."):
wc = WordCloud(width=800, height=400, background_color='white', colormap=colormap, max_words=100)
wc.generate_from_frequencies(freq_dict)
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wc, interpolation='bilinear')
ax.axis("off")
st.pyplot(fig)
except Exception as e:
st.error(f"Error memproses file CSV WordFreq: {e}")
else:
st.warning(f"⚠️ File frekuensi belum tersedia: {file_path}")
# Tabs Navigasi WordCloud
tab_mentah, tab_bersih, tab_neg, tab_net, tab_pos = st.tabs([
"Data Mentah", "Data Bersih", "Negatif", "Netral", "Positif"
])
with tab_mentah:
st.caption("Data dari kolom 'Teks Tweet' (Original)")
generate_wc(" ".join(df['Teks Tweet'].dropna().astype(str)), 'cividis')
with tab_bersih:
st.caption("Data dari kolom 'Tweet_Final' (Preprocessed)")
if 'Tweet_Final' in df.columns:
generate_wc(" ".join(df['Tweet_Final'].dropna().astype(str)), 'viridis')
else:
st.warning("Kolom Tweet_Final tidak ada.")
# MENGGUNAKAN FILE CSV WORDFREQ DI SINI
with tab_neg:
st.caption("Kata dominan sentimen NEGATIF (Sumber: WordFreq_Negatif.csv)")
generate_wc_from_freq('model/WordFreq_Negatif.csv', 'Reds')
with tab_net:
st.caption("Kata dominan sentimen NETRAL (Sumber: WordFreq_Netral.csv)")
generate_wc_from_freq('model/WordFreq_Netral.csv', 'Greys')
with tab_pos:
st.caption("Kata dominan sentimen POSITIF (Sumber: WordFreq_Positif.csv)")
generate_wc_from_freq('model/WordFreq_Positif.csv', 'Greens')
st.markdown("---")
# ==============================================================================
# 4. TOPIC MODELING
# ==============================================================================
st.subheader("📌 4. Topic Modeling (LDA) & Kata Kunci")
st.write("Ekstraksi topik dominan dari hasil algoritma Latent Dirichlet Allocation (LDA).")
path_lda = 'model/Hasil_Analisis_Topik_LDA.csv'
if not os.path.exists(path_lda): path_lda = 'Hasil_Analisis_Topik_LDA.csv'
if os.path.exists(path_lda):
try:
df_lda = pd.read_csv(path_lda)
def parse_lda_string(text_data):
data_items = []
for word in str(text_data).split(','):
word = word.strip()
if word:
data_items.append({'Kata': word})
df_res = pd.DataFrame(data_items)
if not df_res.empty:
df_res['Bobot'] = range(len(df_res), 0, -1)
df_res = df_res.sort_values(by='Bobot', ascending=True)
return df_res
t_neg, t_net, t_pos = st.tabs(["🔴 Topik Negatif", "⚪ Topik Netral", "🟢 Topik Positif"])
mapping = {'negatif': t_neg, 'netral': t_net, 'positif': t_pos}
for sentimen, tab in mapping.items():
with tab:
df_subset = df_lda[df_lda['Sentimen'].str.lower() == sentimen]
if df_subset.empty:
st.warning(f"Belum ada data topik untuk {sentimen}.")
else:
for idx, row in df_subset.iterrows():
topik_ke = row['Topik Ke']
df_chart = parse_lda_string(row['Kata Kunci'])
if not df_chart.empty:
fig = px.bar(
df_chart, x='Bobot', y='Kata', orientation='h',
title=f"<b>Topik {topik_ke}</b>",
color='Bobot',
color_continuous_scale='Reds' if sentimen == 'negatif' else 'Greys' if sentimen == 'netral' else 'Greens'
)
fig.update_layout(height=300, showlegend=False, xaxis_title=None, xaxis_visible=False)
st.plotly_chart(fig, use_container_width=True)
st.divider()
except Exception as e:
st.error(f"Gagal memproses data LDA: {e}")
else:
st.warning("⚠️ File 'Hasil_Analisis_Topik_LDA.csv' belum tersedia di folder model.")
# ==============================================================================
# 5. DATA EXPLORER & EVALUASI MODEL
# ==============================================================================
st.subheader("🔍 Data Explorer & Evaluasi Model")
tab_data, tab_eval = st.tabs(["Data Explorer", "Tabel Performa (Evaluasi)"])
# --- TAB 1: DATA EXPLORER ---
with tab_data:
col_f1, col_f2 = st.columns([1, 2])
with col_f1: filter_label = st.selectbox("Filter Sentimen:", ['Semua', 'negatif', 'netral', 'positif'])
with col_f2: search_keyword = st.text_input("Cari Tweet:", "")
cols_available = [c for c in ['created_at', 'username', 'Teks Tweet', 'Label_Clean'] if c in df.columns]
df_show = df[cols_available].copy()
rename_map = {'created_at': 'Tanggal', 'username': 'Username', 'Label_Clean': 'Label'}
df_show = df_show.rename(columns=rename_map)
if filter_label != 'Semua' and 'Label' in df_show.columns:
df_show = df_show[df_show['Label'] == filter_label]
if search_keyword and 'Teks Tweet' in df_show.columns:
df_show = df_show[df_show['Teks Tweet'].str.contains(search_keyword, case=False, na=False)]
df_show.index = range(1, len(df_show) + 1)
baris_per_halaman = 20
total_data = len(df_show)
total_halaman = math.ceil(total_data / baris_per_halaman)
if total_data > 0:
c_nav, c_stat = st.columns([1, 3])
with c_nav:
halaman = st.number_input("Halaman", min_value=1, max_value=max(1, total_halaman), step=1)
with c_stat:
st.write("")
st.caption(f"Menampilkan **{total_data}** Data (Halaman {halaman} dari {total_halaman})")
start_idx = (halaman - 1) * baris_per_halaman
end_idx = start_idx + baris_per_halaman
df_page = df_show.iloc[start_idx:end_idx]
st.dataframe(df_page, use_container_width=True)
else:
st.warning("Data tidak ditemukan.")
# --- TAB 2: TABEL EVALUASI & CONFUSION MATRIX ---
with tab_eval:
st.subheader("1. Tabel Performa (Classification Report)")
st.markdown("""
Metrik evaluasi model berdasarkan data testing (20%):
* **Precision**: Ketepatan tebakan.
* **Recall**: Kemampuan menemukan data yang relevan.
* **F1-Score**: Rata-rata harmonis (Paling penting untuk data tidak seimbang).
""")
path_perf = 'model/Tabel_Performa_LSTM.csv'
if not os.path.exists(path_perf): path_perf = 'Tabel_Performa_LSTM.csv'
if os.path.exists(path_perf):
try:
df_perf = pd.read_csv(path_perf, index_col=0)
st.table(
df_perf.style.highlight_max(axis=0, props='background-color: #FFEB3B; color: black; font-weight: bold')
)
except Exception as e:
st.error(f"Gagal memuat tabel evaluasi: {e}")
else:
st.warning("⚠️ File 'Tabel_Performa_LSTM.csv' belum tersedia.")
st.divider()
st.subheader("2. Confusion Matrix")
st.markdown("Visualisasi ini menunjukkan **detail kesalahan prediksi**. Sumbu Y adalah Label Asli, Sumbu X adalah Prediksi Model.")
path_cm = 'model/Data_Confusion_Matrix.csv'
if not os.path.exists(path_cm): path_cm = 'Data_Confusion_Matrix.csv'
if os.path.exists(path_cm):
try:
df_cm_data = pd.read_csv(path_cm)
if 'y_true' in df_cm_data.columns and 'y_pred' in df_cm_data.columns:
from sklearn.metrics import confusion_matrix
labels = ['Negatif', 'Netral', 'Positif']
cm = confusion_matrix(df_cm_data['y_true'], df_cm_data['y_pred'])
fig_cm = px.imshow(
cm,
text_auto=True,
labels=dict(x="Prediksi Model", y="Label Aktual (Asli)", color="Jumlah Data"),
x=labels,
y=labels,
color_continuous_scale='Blues',
aspect="auto"
)
fig_cm.update_layout(title="Confusion Matrix Heatmap")
st.plotly_chart(fig_cm, use_container_width=True)
total_benar = np.trace(cm)
total_data = np.sum(cm)
akurasi_cm = (total_benar / total_data) * 100
st.caption(f"💡 **Interpretasi:** Dari total **{total_data}** data testing, model berhasil menebak benar sebanyak **{total_benar}** data ({akurasi_cm:.2f}%).")
else:
st.error("Format CSV Confusion Matrix salah. Harus ada kolom 'y_true' dan 'y_pred'.")
except Exception as e:
st.error(f"Gagal memproses Confusion Matrix: {e}")
else:
st.info(" **Data Confusion Matrix belum tersedia.** Silakan jalankan kode penyimpanan `Data_Confusion_Matrix.csv` di Google Colab (Bagian Evaluasi).")