✨ feat: add filter dashboard
This commit is contained in:
parent
f5b0c56cdd
commit
f05dd566f6
|
@ -1,3 +1,5 @@
|
|||
/Pengumpulan Data/data-analisis/datasets-balance.csv
|
||||
/Pengumpulan Data/data-analisis/datasets-balanced.csv
|
||||
/Pengumpulan Data/data-analisis/datasets-tfidf.csv
|
||||
/Pengumpulan Data/data-analisis/datasets-tfidf.csv
|
||||
/Pengumpulan Data/
|
||||
/Pengumpulan Data
|
||||
|
|
|
@ -13,5 +13,5 @@ Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji
|
|||
""")
|
||||
|
||||
app.add_app("Home", frontend.app)
|
||||
app.add_app("Test", test.app)
|
||||
# app.add_app("Test", test.app)
|
||||
app.run()
|
||||
|
|
|
@ -5,7 +5,9 @@ from plotly.subplots import make_subplots
|
|||
import matplotlib.pyplot as plt
|
||||
from wordcloud import WordCloud
|
||||
import plotly.graph_objects as go
|
||||
import os
|
||||
from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
|
||||
from datetime import timedelta
|
||||
|
||||
def inject_css():
|
||||
css_path = os.path.join(os.path.dirname(__file__), "style.css")
|
||||
|
@ -15,24 +17,83 @@ def inject_css():
|
|||
def app():
|
||||
inject_css()
|
||||
|
||||
# Placeholder Data (Will be replaced with actual backend data)
|
||||
# Load semua data dengan format tanggal yang benar
|
||||
sentiment_data = pd.read_csv(
|
||||
'datasets/datasets-keyword-label.csv',
|
||||
parse_dates=['created_at'],
|
||||
infer_datetime_format=True
|
||||
date_format="%d %b %Y" # Format yang sesuai dengan "30 Dec 2024"
|
||||
)
|
||||
|
||||
# data wordcloud
|
||||
# Tidak perlu normalisasi timezone karena sudah menggunakan format tanpa jam
|
||||
# sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at']).dt.tz_localize(None)
|
||||
|
||||
# Tambahkan filter di sidebar
|
||||
with st.sidebar:
|
||||
st.markdown("## 🔍 Filter Dashboard")
|
||||
|
||||
# Mendapatkan tanggal minimum dan maksimum dari dataset
|
||||
min_date = sentiment_data['created_at'].min().date()
|
||||
max_date = sentiment_data['created_at'].max().date()
|
||||
|
||||
# Menampilkan opsi filter waktu
|
||||
st.markdown("### ⏱️ Filter Waktu")
|
||||
|
||||
# Filter waktu menggunakan slider tanggal
|
||||
date_range = st.slider(
|
||||
"Pilih Rentang Waktu:",
|
||||
min_value=min_date,
|
||||
max_value=max_date,
|
||||
value=(min_date, max_date),
|
||||
format="DD-MMM-YYYY"
|
||||
)
|
||||
|
||||
# Convert date range to datetime for filtering
|
||||
start_datetime = pd.Timestamp(date_range[0])
|
||||
end_datetime = pd.Timestamp(date_range[1]) + timedelta(days=1) - timedelta(seconds=1) # end of day
|
||||
|
||||
# Menampilkan periode waktu yang dipilih
|
||||
st.info(f"Menampilkan data dari: {start_datetime.strftime('%d %B %Y')} hingga {end_datetime.strftime('%d %B %Y')}")
|
||||
|
||||
# Filter keyword
|
||||
st.markdown("### 🏷️ Filter Keyword")
|
||||
keywords = sorted(sentiment_data['keyword'].unique())
|
||||
selected_keywords = st.multiselect(
|
||||
"Pilih Keyword",
|
||||
options=keywords,
|
||||
default=keywords
|
||||
)
|
||||
|
||||
# # Filter sentimen
|
||||
# st.markdown("### 😊 Filter Sentimen")
|
||||
# sentiments = ['Positif', 'Negatif', 'Netral']
|
||||
# selected_sentiments = st.multiselect(
|
||||
# "Pilih Sentimen",
|
||||
# options=sentiments,
|
||||
# default=sentiments
|
||||
# )
|
||||
|
||||
# Terapkan filter
|
||||
filtered_data = sentiment_data[
|
||||
(sentiment_data['created_at'] >= start_datetime) &
|
||||
(sentiment_data['created_at'] <= end_datetime) &
|
||||
(sentiment_data['keyword'].isin(selected_keywords)) #&
|
||||
# (sentiment_data['label'].isin(selected_sentiments))
|
||||
]
|
||||
|
||||
# # Tampilkan jumlah data yang ditampilkan
|
||||
# st.write(f"Menampilkan {len(filtered_data)} dari {len(sentiment_data)} data")
|
||||
|
||||
# Data wordcloud - tetap menggunakan semua data
|
||||
wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
|
||||
|
||||
# performance data
|
||||
# Performance data - tetap menggunakan semua data
|
||||
performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
|
||||
|
||||
# Row 1: Pie Chart
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.subheader('Sentiment Distribution')
|
||||
label_counts = get_label_counts(sentiment_data) # Panggil fungsi backend
|
||||
label_counts = get_label_counts(filtered_data)
|
||||
pie_fig = px.pie(label_counts, names='label', values='count')
|
||||
pie_fig.update_traces(textinfo='percent+label')
|
||||
pie_fig.update_layout(showlegend=False)
|
||||
|
@ -41,17 +102,19 @@ def app():
|
|||
# Distribusi Sentimen bedasarkan Model
|
||||
with col2:
|
||||
st.subheader('Keyword Sentiment Distribution')
|
||||
keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data) # Panggil fungsi backend
|
||||
keyword_sentiment_counts = get_keyword_sentiment_distribution(filtered_data)
|
||||
|
||||
# Mengatur urutan kategori label di frontend
|
||||
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
|
||||
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'],
|
||||
categories=['Negatif', 'Positif', 'Netral'],
|
||||
ordered=True)
|
||||
|
||||
# Membuat bar chart dengan urutan label yang diatur
|
||||
bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
|
||||
category_orders={'label': ['Negatif', 'Positif', 'Netral']})
|
||||
st.plotly_chart(bar_fig, use_container_width=True)
|
||||
|
||||
# Row 2: Wordclouds
|
||||
# Row 2: Wordclouds (tidak difilter)
|
||||
label_colors = {
|
||||
'positif': 'green',
|
||||
'negatif': 'red',
|
||||
|
@ -68,7 +131,7 @@ def app():
|
|||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
st.pyplot(plt)
|
||||
|
||||
|
||||
# cols = st.columns(len(wordclouds))
|
||||
# for col, (label, wordcloud) in zip(cols, wordclouds.items()):
|
||||
# with col:
|
||||
|
@ -81,29 +144,44 @@ def app():
|
|||
# margin=dict(l=0, r=0, t=0, b=0)
|
||||
# )
|
||||
# st.plotly_chart(fig, use_container_height=False)
|
||||
|
||||
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
|
||||
|
||||
# Row 3: Line Chart & Grouped Bar Chart
|
||||
col5, col6 = st.columns(2)
|
||||
|
||||
with col5:
|
||||
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
|
||||
st.subheader('Sentiment Trends Over Time')
|
||||
pivot_sentiment = get_pivot_sentiment(sentiment_data)
|
||||
pivot_sentiment = get_pivot_sentiment(filtered_data)
|
||||
line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
|
||||
st.plotly_chart(line_fig, use_container_width=True)
|
||||
|
||||
with col6:
|
||||
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
|
||||
st.subheader('Model Performance')
|
||||
avg_metrics_df = get_avg_metrics(performance_data) # Panggil fungsi backend
|
||||
avg_metrics_df = get_avg_metrics(performance_data)
|
||||
bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
|
||||
x='variable', y='value', color='model', barmode='group')
|
||||
st.plotly_chart(bar_group_fig, use_container_width=True)
|
||||
|
||||
# with col8:
|
||||
# Data table
|
||||
st.subheader('Data Tables')
|
||||
columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
|
||||
st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
|
||||
columns_to_display = ['created_at', 'cleanning_text', 'keyword', 'label']
|
||||
display_df = filtered_data[columns_to_display].copy()
|
||||
|
||||
# Urutkan data berdasarkan tanggal (created_at) dari yang terbaru
|
||||
display_df = display_df.sort_values(by='created_at', ascending=False)
|
||||
|
||||
# Format kolom created_at menjadi format DD-BULAN-YYYY (contoh: 30-December-2024)
|
||||
display_df['created_at'] = display_df['created_at'].dt.strftime('%d-%B-%Y')
|
||||
|
||||
display_df = display_df.rename(columns={
|
||||
'created_at': 'Tanggal',
|
||||
'cleanning_text': 'Tweet',
|
||||
'keyword': 'Kata Kunci',
|
||||
'label': 'Sentimen'
|
||||
})
|
||||
|
||||
st.dataframe(display_df, hide_index=True, use_container_width=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
|
@ -10,22 +10,22 @@ def app():
|
|||
|
||||
# Tombol untuk melakukan prediksi
|
||||
if st.button('Prediksi Sentimen'):
|
||||
if model_choice == 'SVM':
|
||||
model_path = 'models/svm_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
elif model_choice == 'Naive Bayes':
|
||||
model_path = 'models/nb_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
elif model_choice == 'KNN':
|
||||
model_path = 'models/knn_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
# if model_choice == 'SVM':
|
||||
# model_path = 'models/svm_model.pkl'
|
||||
# vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
# elif model_choice == 'Naive Bayes':
|
||||
# model_path = 'models/nb_model.pkl'
|
||||
# vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
# elif model_choice == 'KNN':
|
||||
# model_path = 'models/knn_model.pkl'
|
||||
# vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
|
||||
# Load model dan vectorizer
|
||||
model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
|
||||
# model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
|
||||
|
||||
# Prediksi sentimen
|
||||
prediction = predict_sentiment(model, vectorizer, user_input)
|
||||
st.write(f'#### Prediksi Sentimen: {prediction}')
|
||||
# prediction = predict_sentiment(model, vectorizer, user_input)
|
||||
st.write(f'#### Prediksi Sentimen:')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -24,9 +24,12 @@ def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
|||
Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
|
||||
"""
|
||||
# Pastikan kolom 'created_at' bertipe datetime
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
|
||||
# Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])
|
||||
|
||||
# Ekstraksi tahun dari kolom 'created_at'
|
||||
sentiment_data['year'] = sentiment_data['created_at'].dt.year
|
||||
|
||||
# Group by tahun dan label, lalu hitung jumlahnya
|
||||
yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
|
||||
# Mengatur urutan kategori label
|
||||
|
@ -52,7 +55,8 @@ def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
|||
Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
|
||||
"""
|
||||
# Pastikan kolom 'created_at' bertipe datetime
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
|
||||
# Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])
|
||||
|
||||
# Ekstraksi tahun dari kolom 'created_at'
|
||||
sentiment_data['Year'] = sentiment_data['created_at'].dt.year
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -81,7 +81,7 @@ class MultiApp:
|
|||
)
|
||||
|
||||
# Render sidebar content
|
||||
show_sidebar()
|
||||
# show_sidebar()
|
||||
|
||||
# Eksekusi app function
|
||||
app['function']()
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -60,23 +60,23 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 8369 entries, 0 to 8368\n",
|
||||
"RangeIndex: 5387 entries, 0 to 5386\n",
|
||||
"Data columns (total 11 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 created_at 8369 non-null datetime64[ns, UTC]\n",
|
||||
" 1 full_text 8369 non-null object \n",
|
||||
" 2 keyword 8369 non-null object \n",
|
||||
" 3 cleanning_text 8369 non-null object \n",
|
||||
" 4 case_folding 8369 non-null object \n",
|
||||
" 5 convert_slang_word 8369 non-null object \n",
|
||||
" 6 filtering 8369 non-null object \n",
|
||||
" 7 tokenizing 8369 non-null object \n",
|
||||
" 8 stemming 8369 non-null object \n",
|
||||
" 9 score 8369 non-null int64 \n",
|
||||
" 10 label 8369 non-null object \n",
|
||||
" 0 created_at 5387 non-null datetime64[ns, UTC]\n",
|
||||
" 1 full_text 5387 non-null object \n",
|
||||
" 2 keyword 5387 non-null object \n",
|
||||
" 3 cleanning_text 5387 non-null object \n",
|
||||
" 4 case_folding 5387 non-null object \n",
|
||||
" 5 convert_slang_word 5387 non-null object \n",
|
||||
" 6 filtering 5387 non-null object \n",
|
||||
" 7 tokenizing 5387 non-null object \n",
|
||||
" 8 stemming 5387 non-null object \n",
|
||||
" 9 score 5387 non-null int64 \n",
|
||||
" 10 label 5387 non-null object \n",
|
||||
"dtypes: datetime64[ns, UTC](1), int64(1), object(9)\n",
|
||||
"memory usage: 719.3+ KB\n"
|
||||
"memory usage: 463.1+ KB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -85,7 +85,7 @@
|
|||
"import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"datasets = pd.read_csv('datasets/datasets-keyword-label.csv') \n",
|
||||
"datasets = pd.read_csv('datasets/datasets-keyword-labels.csv') \n",
|
||||
"datasets['created_at'] = pd.to_datetime(datasets['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
|
||||
"\n",
|
||||
"datasets.info()\n"
|
||||
|
@ -93,11 +93,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datasets.to_csv('datasets/datasets-keyword-label.csv', index=False)"
|
||||
"datasets.to_csv('datasets/datasets-keyword-labels.csv', index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -364,6 +364,84 @@
|
|||
"# Cetak hasil prediksi\n",
|
||||
"print(\"Sentimen Prediksi:\", predicted_class[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" created_at\n",
|
||||
"0 30 Dec 2024\n",
|
||||
"1 30 Dec 2024\n",
|
||||
"2 29 Dec 2024\n",
|
||||
"3 28 Dec 2024\n",
|
||||
"4 28 Dec 2024\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"# Baca file CSV\n",
|
||||
"df = pd.read_csv('datasets/datasets-keyword-labels.csv')\n",
|
||||
"\n",
|
||||
"# Pastikan kolom created_at dikonversi ke format datetime\n",
|
||||
"df['created_at'] = pd.to_datetime(df['created_at'])\n",
|
||||
"\n",
|
||||
"# Format ulang tanggal untuk hanya menampilkan tanggal, bulan, dan tahun\n",
|
||||
"df['created_at'] = df['created_at'].dt.strftime('%d %b %Y')\n",
|
||||
"\n",
|
||||
"# Simpan kembali ke CSV\n",
|
||||
"df.to_csv('datasets/datasets-keyword-label.csv', index=False)\n",
|
||||
"\n",
|
||||
"# Tampilkan beberapa baris untuk memverifikasi\n",
|
||||
"print(df[['created_at']].head())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 5387 entries, 0 to 5386\n",
|
||||
"Data columns (total 11 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 created_at 5387 non-null object\n",
|
||||
" 1 full_text 5387 non-null object\n",
|
||||
" 2 keyword 5387 non-null object\n",
|
||||
" 3 cleanning_text 5387 non-null object\n",
|
||||
" 4 case_folding 5387 non-null object\n",
|
||||
" 5 convert_slang_word 5387 non-null object\n",
|
||||
" 6 filtering 5387 non-null object\n",
|
||||
" 7 tokenizing 5387 non-null object\n",
|
||||
" 8 stemming 5387 non-null object\n",
|
||||
" 9 score 5387 non-null int64 \n",
|
||||
" 10 label 5387 non-null object\n",
|
||||
"dtypes: int64(1), object(10)\n",
|
||||
"memory usage: 463.1+ KB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in New Issue