import streamlit as st import plotly.express as px import pandas as pd from plotly.subplots import make_subplots import matplotlib.pyplot as plt from wordcloud import WordCloud import plotly.graph_objects as go import os from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds from datetime import timedelta def inject_css(): css_path = os.path.join(os.path.dirname(__file__), "style.css") with open(css_path) as f: st.markdown(f"", unsafe_allow_html=True) def app(): inject_css() # Load semua data dengan format tanggal yang benar sentiment_data = pd.read_csv( 'datasets/datasets-keyword-label.csv', parse_dates=['created_at'], date_format="%d %b %Y" # Format yang sesuai dengan "30 Dec 2024" ) # Tidak perlu normalisasi timezone karena sudah menggunakan format tanpa jam # sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at']).dt.tz_localize(None) # Tambahkan filter di sidebar with st.sidebar: st.markdown("## 🔍 Filter Dashboard") # Mendapatkan tanggal minimum dan maksimum dari dataset min_date = sentiment_data['created_at'].min().date() max_date = sentiment_data['created_at'].max().date() # Menampilkan opsi filter waktu st.markdown("### ⏱️ Filter Waktu") # Filter waktu menggunakan slider tanggal date_range = st.slider( "Pilih Rentang Waktu:", min_value=min_date, max_value=max_date, value=(min_date, max_date), format="DD-MMM-YYYY" ) # Convert date range to datetime for filtering start_datetime = pd.Timestamp(date_range[0]) end_datetime = pd.Timestamp(date_range[1]) + timedelta(days=1) - timedelta(seconds=1) # end of day # Menampilkan periode waktu yang dipilih st.info(f"Menampilkan data dari: {start_datetime.strftime('%d %B %Y')} hingga {end_datetime.strftime('%d %B %Y')}") # Filter keyword st.markdown("### 🏷️ Filter Keyword") keywords = sorted(sentiment_data['keyword'].unique()) selected_keywords = st.multiselect( "Pilih Keyword", options=keywords, default=keywords ) # # Filter sentimen # st.markdown("### 😊 Filter Sentimen") # sentiments = ['Positif', 'Negatif', 'Netral'] # selected_sentiments = st.multiselect( # "Pilih Sentimen", # options=sentiments, # default=sentiments # ) # Terapkan filter filtered_data = sentiment_data[ (sentiment_data['created_at'] >= start_datetime) & (sentiment_data['created_at'] <= end_datetime) & (sentiment_data['keyword'].isin(selected_keywords)) #& # (sentiment_data['label'].isin(selected_sentiments)) ] # # Tampilkan jumlah data yang ditampilkan # st.write(f"Menampilkan {len(filtered_data)} dari {len(sentiment_data)} data") # Data wordcloud - tetap menggunakan semua data wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv') # Performance data - tetap menggunakan semua data performance_data = pd.read_csv('datasets/evaluation_results_combine.csv') # Row 1: Pie Chart col1, col2 = st.columns(2) with col1: st.subheader('Sentiment Distribution') label_counts = get_label_counts(filtered_data) pie_fig = px.pie(label_counts, names='label', values='count') pie_fig.update_traces(textinfo='percent+label') pie_fig.update_layout(showlegend=False) st.plotly_chart(pie_fig, use_container_width=True) # Distribusi Sentimen bedasarkan Model with col2: st.subheader('Keyword Sentiment Distribution') keyword_sentiment_counts = get_keyword_sentiment_distribution(filtered_data) # Mengatur urutan kategori label di frontend keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True) # Membuat bar chart dengan urutan label yang diatur bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group', category_orders={'label': ['Negatif', 'Positif', 'Netral']}) st.plotly_chart(bar_fig, use_container_width=True) # Row 2: Wordclouds (tidak difilter) label_colors = { 'positif': 'green', 'negatif': 'red', 'netral': 'gray' } wordclouds = generate_wordclouds(wordcloud_data, label_colors) cols = st.columns(len(wordclouds)) for col, (label, wordcloud) in zip(cols, wordclouds.items()): with col: st.subheader(f'Word Cloud {label}') plt.figure(figsize=(10, 6)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') st.pyplot(plt) # cols = st.columns(len(wordclouds)) # for col, (label, wordcloud) in zip(cols, wordclouds.items()): # with col: # st.subheader(f'Word Cloud {label}') # fig = px.imshow(wordcloud, binary_string=True) # fig.update_layout( # coloraxis_showscale=False, # xaxis=dict(visible=False), # yaxis=dict(visible=False), # margin=dict(l=0, r=0, t=0, b=0) # ) # st.plotly_chart(fig, use_container_height=False) st.markdown('
', unsafe_allow_html=True) # Row 3: Line Chart & Grouped Bar Chart col5, col6 = st.columns(2) with col5: st.subheader('Sentiment Trends Over Time') pivot_sentiment = get_pivot_sentiment(filtered_data) line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True) st.plotly_chart(line_fig, use_container_width=True) with col6: st.subheader('Model Performance') avg_metrics_df = get_avg_metrics(performance_data) bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']), x='variable', y='value', color='model', barmode='group') st.plotly_chart(bar_group_fig, use_container_width=True) # Data table st.subheader('Data Tables') columns_to_display = ['created_at', 'cleanning_text', 'keyword', 'label'] display_df = filtered_data[columns_to_display].copy() # Urutkan data berdasarkan tanggal (created_at) dari yang terbaru display_df = display_df.sort_values(by='created_at', ascending=False) # Format kolom created_at menjadi format DD-BULAN-YYYY (contoh: 30-December-2024) display_df['created_at'] = display_df['created_at'].dt.strftime('%d-%B-%Y') display_df = display_df.rename(columns={ 'created_at': 'Tanggal', 'cleanning_text': 'Tweet', 'keyword': 'Kata Kunci', 'label': 'Sentimen' }) st.dataframe(display_df, hide_index=True, use_container_width=True) if __name__ == "__main__": app()