feat: add filter dashboard

This commit is contained in:
ja'far shodiq 2025-04-11 17:49:34 +08:00
parent f5b0c56cdd
commit f05dd566f6
8 changed files with 5599 additions and 8419 deletions

4
.gitignore vendored
View File

@ -1,3 +1,5 @@
/Pengumpulan Data/data-analisis/datasets-balance.csv
/Pengumpulan Data/data-analisis/datasets-balanced.csv
/Pengumpulan Data/data-analisis/datasets-tfidf.csv
/Pengumpulan Data/data-analisis/datasets-tfidf.csv
/Pengumpulan Data/
/Pengumpulan Data

View File

@ -13,5 +13,5 @@ Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji
""")
app.add_app("Home", frontend.app)
app.add_app("Test", test.app)
# app.add_app("Test", test.app)
app.run()

View File

@ -5,7 +5,9 @@ from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import plotly.graph_objects as go
import os
from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
from datetime import timedelta
def inject_css():
css_path = os.path.join(os.path.dirname(__file__), "style.css")
@ -15,24 +17,83 @@ def inject_css():
def app():
inject_css()
# Placeholder Data (Will be replaced with actual backend data)
# Load semua data dengan format tanggal yang benar
sentiment_data = pd.read_csv(
'datasets/datasets-keyword-label.csv',
parse_dates=['created_at'],
infer_datetime_format=True
date_format="%d %b %Y" # Format yang sesuai dengan "30 Dec 2024"
)
# data wordcloud
# Tidak perlu normalisasi timezone karena sudah menggunakan format tanpa jam
# sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at']).dt.tz_localize(None)
# Tambahkan filter di sidebar
with st.sidebar:
st.markdown("## 🔍 Filter Dashboard")
# Mendapatkan tanggal minimum dan maksimum dari dataset
min_date = sentiment_data['created_at'].min().date()
max_date = sentiment_data['created_at'].max().date()
# Menampilkan opsi filter waktu
st.markdown("### ⏱️ Filter Waktu")
# Filter waktu menggunakan slider tanggal
date_range = st.slider(
"Pilih Rentang Waktu:",
min_value=min_date,
max_value=max_date,
value=(min_date, max_date),
format="DD-MMM-YYYY"
)
# Convert date range to datetime for filtering
start_datetime = pd.Timestamp(date_range[0])
end_datetime = pd.Timestamp(date_range[1]) + timedelta(days=1) - timedelta(seconds=1) # end of day
# Menampilkan periode waktu yang dipilih
st.info(f"Menampilkan data dari: {start_datetime.strftime('%d %B %Y')} hingga {end_datetime.strftime('%d %B %Y')}")
# Filter keyword
st.markdown("### 🏷️ Filter Keyword")
keywords = sorted(sentiment_data['keyword'].unique())
selected_keywords = st.multiselect(
"Pilih Keyword",
options=keywords,
default=keywords
)
# # Filter sentimen
# st.markdown("### 😊 Filter Sentimen")
# sentiments = ['Positif', 'Negatif', 'Netral']
# selected_sentiments = st.multiselect(
# "Pilih Sentimen",
# options=sentiments,
# default=sentiments
# )
# Terapkan filter
filtered_data = sentiment_data[
(sentiment_data['created_at'] >= start_datetime) &
(sentiment_data['created_at'] <= end_datetime) &
(sentiment_data['keyword'].isin(selected_keywords)) #&
# (sentiment_data['label'].isin(selected_sentiments))
]
# # Tampilkan jumlah data yang ditampilkan
# st.write(f"Menampilkan {len(filtered_data)} dari {len(sentiment_data)} data")
# Data wordcloud - tetap menggunakan semua data
wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
# performance data
# Performance data - tetap menggunakan semua data
performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
# Row 1: Pie Chart
col1, col2 = st.columns(2)
with col1:
st.subheader('Sentiment Distribution')
label_counts = get_label_counts(sentiment_data) # Panggil fungsi backend
label_counts = get_label_counts(filtered_data)
pie_fig = px.pie(label_counts, names='label', values='count')
pie_fig.update_traces(textinfo='percent+label')
pie_fig.update_layout(showlegend=False)
@ -41,17 +102,19 @@ def app():
# Distribusi Sentimen bedasarkan Model
with col2:
st.subheader('Keyword Sentiment Distribution')
keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data) # Panggil fungsi backend
keyword_sentiment_counts = get_keyword_sentiment_distribution(filtered_data)
# Mengatur urutan kategori label di frontend
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'],
categories=['Negatif', 'Positif', 'Netral'],
ordered=True)
# Membuat bar chart dengan urutan label yang diatur
bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
category_orders={'label': ['Negatif', 'Positif', 'Netral']})
st.plotly_chart(bar_fig, use_container_width=True)
# Row 2: Wordclouds
# Row 2: Wordclouds (tidak difilter)
label_colors = {
'positif': 'green',
'negatif': 'red',
@ -68,7 +131,7 @@ def app():
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot(plt)
# cols = st.columns(len(wordclouds))
# for col, (label, wordcloud) in zip(cols, wordclouds.items()):
# with col:
@ -81,29 +144,44 @@ def app():
# margin=dict(l=0, r=0, t=0, b=0)
# )
# st.plotly_chart(fig, use_container_height=False)
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
# Row 3: Line Chart & Grouped Bar Chart
col5, col6 = st.columns(2)
with col5:
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
st.subheader('Sentiment Trends Over Time')
pivot_sentiment = get_pivot_sentiment(sentiment_data)
pivot_sentiment = get_pivot_sentiment(filtered_data)
line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
st.plotly_chart(line_fig, use_container_width=True)
with col6:
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
st.subheader('Model Performance')
avg_metrics_df = get_avg_metrics(performance_data) # Panggil fungsi backend
avg_metrics_df = get_avg_metrics(performance_data)
bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
x='variable', y='value', color='model', barmode='group')
st.plotly_chart(bar_group_fig, use_container_width=True)
# with col8:
# Data table
st.subheader('Data Tables')
columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
columns_to_display = ['created_at', 'cleanning_text', 'keyword', 'label']
display_df = filtered_data[columns_to_display].copy()
# Urutkan data berdasarkan tanggal (created_at) dari yang terbaru
display_df = display_df.sort_values(by='created_at', ascending=False)
# Format kolom created_at menjadi format DD-BULAN-YYYY (contoh: 30-December-2024)
display_df['created_at'] = display_df['created_at'].dt.strftime('%d-%B-%Y')
display_df = display_df.rename(columns={
'created_at': 'Tanggal',
'cleanning_text': 'Tweet',
'keyword': 'Kata Kunci',
'label': 'Sentimen'
})
st.dataframe(display_df, hide_index=True, use_container_width=True)
if __name__ == "__main__":
app()

View File

@ -10,22 +10,22 @@ def app():
# Tombol untuk melakukan prediksi
if st.button('Prediksi Sentimen'):
if model_choice == 'SVM':
model_path = 'models/svm_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
elif model_choice == 'Naive Bayes':
model_path = 'models/nb_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
elif model_choice == 'KNN':
model_path = 'models/knn_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
# if model_choice == 'SVM':
# model_path = 'models/svm_model.pkl'
# vectorizer_path = 'models/datasets-tfidf.pkl'
# elif model_choice == 'Naive Bayes':
# model_path = 'models/nb_model.pkl'
# vectorizer_path = 'models/datasets-tfidf.pkl'
# elif model_choice == 'KNN':
# model_path = 'models/knn_model.pkl'
# vectorizer_path = 'models/datasets-tfidf.pkl'
# Load model dan vectorizer
model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
# model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
# Prediksi sentimen
prediction = predict_sentiment(model, vectorizer, user_input)
st.write(f'#### Prediksi Sentimen: {prediction}')
# prediction = predict_sentiment(model, vectorizer, user_input)
st.write(f'#### Prediksi Sentimen:')
if __name__ == '__main__':

View File

@ -24,9 +24,12 @@ def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
"""
# Pastikan kolom 'created_at' bertipe datetime
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
# Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])
# Ekstraksi tahun dari kolom 'created_at'
sentiment_data['year'] = sentiment_data['created_at'].dt.year
# Group by tahun dan label, lalu hitung jumlahnya
yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
# Mengatur urutan kategori label
@ -52,7 +55,8 @@ def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
"""
# Pastikan kolom 'created_at' bertipe datetime
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
# Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])
# Ekstraksi tahun dari kolom 'created_at'
sentiment_data['Year'] = sentiment_data['created_at'].dt.year

File diff suppressed because it is too large Load Diff

View File

@ -81,7 +81,7 @@ class MultiApp:
)
# Render sidebar content
show_sidebar()
# show_sidebar()
# Eksekusi app function
app['function']()

View File

@ -52,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -60,23 +60,23 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 8369 entries, 0 to 8368\n",
"RangeIndex: 5387 entries, 0 to 5386\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 created_at 8369 non-null datetime64[ns, UTC]\n",
" 1 full_text 8369 non-null object \n",
" 2 keyword 8369 non-null object \n",
" 3 cleanning_text 8369 non-null object \n",
" 4 case_folding 8369 non-null object \n",
" 5 convert_slang_word 8369 non-null object \n",
" 6 filtering 8369 non-null object \n",
" 7 tokenizing 8369 non-null object \n",
" 8 stemming 8369 non-null object \n",
" 9 score 8369 non-null int64 \n",
" 10 label 8369 non-null object \n",
" 0 created_at 5387 non-null datetime64[ns, UTC]\n",
" 1 full_text 5387 non-null object \n",
" 2 keyword 5387 non-null object \n",
" 3 cleanning_text 5387 non-null object \n",
" 4 case_folding 5387 non-null object \n",
" 5 convert_slang_word 5387 non-null object \n",
" 6 filtering 5387 non-null object \n",
" 7 tokenizing 5387 non-null object \n",
" 8 stemming 5387 non-null object \n",
" 9 score 5387 non-null int64 \n",
" 10 label 5387 non-null object \n",
"dtypes: datetime64[ns, UTC](1), int64(1), object(9)\n",
"memory usage: 719.3+ KB\n"
"memory usage: 463.1+ KB\n"
]
}
],
@ -85,7 +85,7 @@
"import datetime\n",
"import pandas as pd\n",
"\n",
"datasets = pd.read_csv('datasets/datasets-keyword-label.csv') \n",
"datasets = pd.read_csv('datasets/datasets-keyword-labels.csv') \n",
"datasets['created_at'] = pd.to_datetime(datasets['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
"\n",
"datasets.info()\n"
@ -93,11 +93,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"datasets.to_csv('datasets/datasets-keyword-label.csv', index=False)"
"datasets.to_csv('datasets/datasets-keyword-labels.csv', index=False)"
]
},
{
@ -364,6 +364,84 @@
"# Cetak hasil prediksi\n",
"print(\"Sentimen Prediksi:\", predicted_class[0])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" created_at\n",
"0 30 Dec 2024\n",
"1 30 Dec 2024\n",
"2 29 Dec 2024\n",
"3 28 Dec 2024\n",
"4 28 Dec 2024\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# Baca file CSV\n",
"df = pd.read_csv('datasets/datasets-keyword-labels.csv')\n",
"\n",
"# Pastikan kolom created_at dikonversi ke format datetime\n",
"df['created_at'] = pd.to_datetime(df['created_at'])\n",
"\n",
"# Format ulang tanggal untuk hanya menampilkan tanggal, bulan, dan tahun\n",
"df['created_at'] = df['created_at'].dt.strftime('%d %b %Y')\n",
"\n",
"# Simpan kembali ke CSV\n",
"df.to_csv('datasets/datasets-keyword-label.csv', index=False)\n",
"\n",
"# Tampilkan beberapa baris untuk memverifikasi\n",
"print(df[['created_at']].head())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5387 entries, 0 to 5386\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 created_at 5387 non-null object\n",
" 1 full_text 5387 non-null object\n",
" 2 keyword 5387 non-null object\n",
" 3 cleanning_text 5387 non-null object\n",
" 4 case_folding 5387 non-null object\n",
" 5 convert_slang_word 5387 non-null object\n",
" 6 filtering 5387 non-null object\n",
" 7 tokenizing 5387 non-null object\n",
" 8 stemming 5387 non-null object\n",
" 9 score 5387 non-null int64 \n",
" 10 label 5387 non-null object\n",
"dtypes: int64(1), object(10)\n",
"memory usage: 463.1+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {