✨ feat: add filter dashboard

2025-04-11 17:49:34 +08:00 · 2025-04-11 17:49:34 +08:00 · f05dd566f6
parent f5b0c56cdd
commit f05dd566f6
8 changed files with 5599 additions and 8419 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 /Pengumpulan Data/data-analisis/datasets-balance.csv
 /Pengumpulan Data/data-analisis/datasets-balanced.csv
-/Pengumpulan Data/data-analisis/datasets-tfidf.csv
+/Pengumpulan Data/data-analisis/datasets-tfidf.csv
+/Pengumpulan Data/
+/Pengumpulan Data
--- a/dashboard/app.py
+++ b/dashboard/app.py
@ -13,5 +13,5 @@ Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji
 """)

 app.add_app("Home", frontend.app)
-app.add_app("Test", test.app)
+# app.add_app("Test", test.app)
 app.run()
--- a/dashboard/apps/frontend.py
+++ b/dashboard/apps/frontend.py
@ -5,7 +5,9 @@ from plotly.subplots import make_subplots
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 import plotly.graph_objects as go
+import os
 from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
+from datetime import timedelta

 def inject_css():
    css_path = os.path.join(os.path.dirname(__file__), "style.css")
@ -15,24 +17,83 @@ def inject_css():
 def app():
    inject_css()

-    # Placeholder Data (Will be replaced with actual backend data)
+    # Load semua data dengan format tanggal yang benar
    sentiment_data = pd.read_csv(
        'datasets/datasets-keyword-label.csv',
        parse_dates=['created_at'],
-        infer_datetime_format=True
+        date_format="%d %b %Y"  # Format yang sesuai dengan "30 Dec 2024"
    )
    
-    # data wordcloud
+    # Tidak perlu normalisasi timezone karena sudah menggunakan format tanpa jam
+    # sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at']).dt.tz_localize(None)
+    
+    # Tambahkan filter di sidebar
+    with st.sidebar:
+        st.markdown("## 🔍 Filter Dashboard")
+        
+        # Mendapatkan tanggal minimum dan maksimum dari dataset
+        min_date = sentiment_data['created_at'].min().date()
+        max_date = sentiment_data['created_at'].max().date()
+        
+        # Menampilkan opsi filter waktu
+        st.markdown("### ⏱️ Filter Waktu")
+        
+        # Filter waktu menggunakan slider tanggal
+        date_range = st.slider(
+            "Pilih Rentang Waktu:",
+            min_value=min_date,
+            max_value=max_date,
+            value=(min_date, max_date),
+            format="DD-MMM-YYYY"
+        )
+        
+        # Convert date range to datetime for filtering
+        start_datetime = pd.Timestamp(date_range[0])
+        end_datetime = pd.Timestamp(date_range[1]) + timedelta(days=1) - timedelta(seconds=1)  # end of day
+        
+        # Menampilkan periode waktu yang dipilih
+        st.info(f"Menampilkan data dari: {start_datetime.strftime('%d %B %Y')} hingga {end_datetime.strftime('%d %B %Y')}")
+        
+        # Filter keyword
+        st.markdown("### 🏷️ Filter Keyword")
+        keywords = sorted(sentiment_data['keyword'].unique())
+        selected_keywords = st.multiselect(
+            "Pilih Keyword",
+            options=keywords,
+            default=keywords
+        )
+        
+        # # Filter sentimen
+        # st.markdown("### 😊 Filter Sentimen")
+        # sentiments = ['Positif', 'Negatif', 'Netral']
+        # selected_sentiments = st.multiselect(
+        #     "Pilih Sentimen",
+        #     options=sentiments,
+        #     default=sentiments
+        # )
+
+    # Terapkan filter
+    filtered_data = sentiment_data[
+        (sentiment_data['created_at'] >= start_datetime) & 
+        (sentiment_data['created_at'] <= end_datetime) &
+        (sentiment_data['keyword'].isin(selected_keywords)) #&
+        # (sentiment_data['label'].isin(selected_sentiments))
+    ]
+    
+    # # Tampilkan jumlah data yang ditampilkan
+    # st.write(f"Menampilkan {len(filtered_data)} dari {len(sentiment_data)} data")
+    
+    # Data wordcloud - tetap menggunakan semua data
    wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
    
-    # performance data
+    # Performance data - tetap menggunakan semua data
    performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
    
    # Row 1: Pie Chart 
    col1, col2 = st.columns(2)
    with col1:
        st.subheader('Sentiment Distribution')
-        label_counts = get_label_counts(sentiment_data)  # Panggil fungsi backend
+        label_counts = get_label_counts(filtered_data)
        pie_fig = px.pie(label_counts, names='label', values='count')
        pie_fig.update_traces(textinfo='percent+label')
        pie_fig.update_layout(showlegend=False)
@ -41,17 +102,19 @@ def app():
    # Distribusi Sentimen bedasarkan Model
    with col2:        
        st.subheader('Keyword Sentiment Distribution')
-        keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data)  # Panggil fungsi backend
+        keyword_sentiment_counts = get_keyword_sentiment_distribution(filtered_data)
        
        # Mengatur urutan kategori label di frontend
-        keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
+        keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], 
+                                                            categories=['Negatif', 'Positif', 'Netral'], 
+                                                            ordered=True)
        
        # Membuat bar chart dengan urutan label yang diatur
        bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
                        category_orders={'label': ['Negatif', 'Positif', 'Netral']})
        st.plotly_chart(bar_fig, use_container_width=True)

-    # Row 2: Wordclouds
+    # Row 2: Wordclouds (tidak difilter)
    label_colors = {
        'positif': 'green',
        'negatif': 'red',
@ -68,7 +131,7 @@ def app():
            plt.imshow(wordcloud, interpolation='bilinear')
            plt.axis('off')
            st.pyplot(plt)
-    
+            
    # cols = st.columns(len(wordclouds))
    # for col, (label, wordcloud) in zip(cols, wordclouds.items()):
    #     with col:
@ -81,29 +144,44 @@ def app():
    #             margin=dict(l=0, r=0, t=0, b=0)
    #             )
    #         st.plotly_chart(fig, use_container_height=False)
+            
+            st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)

    # Row 3: Line Chart & Grouped Bar Chart
    col5, col6 = st.columns(2)
    
    with col5:
-        st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
        st.subheader('Sentiment Trends Over Time')
-        pivot_sentiment = get_pivot_sentiment(sentiment_data)
+        pivot_sentiment = get_pivot_sentiment(filtered_data)
        line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
        st.plotly_chart(line_fig, use_container_width=True)
    
    with col6:
-        st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
        st.subheader('Model Performance')
-        avg_metrics_df = get_avg_metrics(performance_data)  # Panggil fungsi backend
+        avg_metrics_df = get_avg_metrics(performance_data)
        bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
                            x='variable', y='value', color='model', barmode='group')
        st.plotly_chart(bar_group_fig, use_container_width=True)

-    # with col8:
+    # Data table
    st.subheader('Data Tables')
-    columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
-    st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
+    columns_to_display = ['created_at', 'cleanning_text', 'keyword', 'label']
+    display_df = filtered_data[columns_to_display].copy()
+    
+    # Urutkan data berdasarkan tanggal (created_at) dari yang terbaru
+    display_df = display_df.sort_values(by='created_at', ascending=False)
+    
+    # Format kolom created_at menjadi format DD-BULAN-YYYY (contoh: 30-December-2024)
+    display_df['created_at'] = display_df['created_at'].dt.strftime('%d-%B-%Y')
+    
+    display_df = display_df.rename(columns={
+    'created_at': 'Tanggal',
+    'cleanning_text': 'Tweet',
+    'keyword': 'Kata Kunci',
+    'label': 'Sentimen'
+    })
+
+    st.dataframe(display_df, hide_index=True, use_container_width=True)

 if __name__ == "__main__":
    app()
--- a/dashboard/apps/test.py
+++ b/dashboard/apps/test.py
@ -10,22 +10,22 @@ def app():

    # Tombol untuk melakukan prediksi
    if st.button('Prediksi Sentimen'):
-        if model_choice == 'SVM':
-            model_path = 'models/svm_model.pkl'
-            vectorizer_path = 'models/datasets-tfidf.pkl'
-        elif model_choice == 'Naive Bayes':
-            model_path = 'models/nb_model.pkl'
-            vectorizer_path = 'models/datasets-tfidf.pkl'
-        elif model_choice == 'KNN':
-            model_path = 'models/knn_model.pkl'
-            vectorizer_path = 'models/datasets-tfidf.pkl'
+        # if model_choice == 'SVM':
+        #     model_path = 'models/svm_model.pkl'
+        #     vectorizer_path = 'models/datasets-tfidf.pkl'
+        # elif model_choice == 'Naive Bayes':
+        #     model_path = 'models/nb_model.pkl'
+        #     vectorizer_path = 'models/datasets-tfidf.pkl'
+        # elif model_choice == 'KNN':
+        #     model_path = 'models/knn_model.pkl'
+        #     vectorizer_path = 'models/datasets-tfidf.pkl'
            
        # Load model dan vectorizer
-        model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
+        # model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
        
        # Prediksi sentimen
-        prediction = predict_sentiment(model, vectorizer, user_input)
-        st.write(f'#### Prediksi Sentimen: {prediction}')
+        # prediction = predict_sentiment(model, vectorizer, user_input)
+        st.write(f'#### Prediksi Sentimen:')

    
 if __name__ == '__main__':
--- a/dashboard/backend.py
+++ b/dashboard/backend.py
@ -24,9 +24,12 @@ def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
    Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
    """
    # Pastikan kolom 'created_at' bertipe datetime
-    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
+    # Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
+    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])
+    
    # Ekstraksi tahun dari kolom 'created_at'
    sentiment_data['year'] = sentiment_data['created_at'].dt.year
+    
    # Group by tahun dan label, lalu hitung jumlahnya
    yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
    # Mengatur urutan kategori label
@ -52,7 +55,8 @@ def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
    Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
    """
    # Pastikan kolom 'created_at' bertipe datetime
-    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
+    # Tidak perlu parameter utc karena kita menggunakan tanggal tanpa timezone
+    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'])

    # Ekstraksi tahun dari kolom 'created_at'
    sentiment_data['Year'] = sentiment_data['created_at'].dt.year
--- a/dashboard/datasets/datasets-keyword-label.csv
+++ b/dashboard/datasets/datasets-keyword-label.csv
--- a/dashboard/multiapp.py
+++ b/dashboard/multiapp.py
@ -81,7 +81,7 @@ class MultiApp:
        )
        
        # Render sidebar content
-        show_sidebar()
+        # show_sidebar()
        
        # Eksekusi app function
        app['function']()
--- a/dashboard/testing.ipynb
+++ b/dashboard/testing.ipynb
@ -52,7 +52,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -60,23 +60,23 @@
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 8369 entries, 0 to 8368\n",
+      "RangeIndex: 5387 entries, 0 to 5386\n",
      "Data columns (total 11 columns):\n",
      " #   Column              Non-Null Count  Dtype              \n",
      "---  ------              --------------  -----              \n",
-      " 0   created_at          8369 non-null   datetime64[ns, UTC]\n",
-      " 1   full_text           8369 non-null   object             \n",
-      " 2   keyword             8369 non-null   object             \n",
-      " 3   cleanning_text      8369 non-null   object             \n",
-      " 4   case_folding        8369 non-null   object             \n",
-      " 5   convert_slang_word  8369 non-null   object             \n",
-      " 6   filtering           8369 non-null   object             \n",
-      " 7   tokenizing          8369 non-null   object             \n",
-      " 8   stemming            8369 non-null   object             \n",
-      " 9   score               8369 non-null   int64              \n",
-      " 10  label               8369 non-null   object             \n",
+      " 0   created_at          5387 non-null   datetime64[ns, UTC]\n",
+      " 1   full_text           5387 non-null   object             \n",
+      " 2   keyword             5387 non-null   object             \n",
+      " 3   cleanning_text      5387 non-null   object             \n",
+      " 4   case_folding        5387 non-null   object             \n",
+      " 5   convert_slang_word  5387 non-null   object             \n",
+      " 6   filtering           5387 non-null   object             \n",
+      " 7   tokenizing          5387 non-null   object             \n",
+      " 8   stemming            5387 non-null   object             \n",
+      " 9   score               5387 non-null   int64              \n",
+      " 10  label               5387 non-null   object             \n",
      "dtypes: datetime64[ns, UTC](1), int64(1), object(9)\n",
-      "memory usage: 719.3+ KB\n"
+      "memory usage: 463.1+ KB\n"
     ]
    }
   ],
@ -85,7 +85,7 @@
    "import datetime\n",
    "import pandas as pd\n",
    "\n",
-    "datasets = pd.read_csv('datasets/datasets-keyword-label.csv') \n",
+    "datasets = pd.read_csv('datasets/datasets-keyword-labels.csv') \n",
    "datasets['created_at'] = pd.to_datetime(datasets['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
    "\n",
    "datasets.info()\n"
@ -93,11 +93,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
-    "datasets.to_csv('datasets/datasets-keyword-label.csv', index=False)"
+    "datasets.to_csv('datasets/datasets-keyword-labels.csv', index=False)"
   ]
  },
  {
@ -364,6 +364,84 @@
    "# Cetak hasil prediksi\n",
    "print(\"Sentimen Prediksi:\", predicted_class[0])"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    created_at\n",
+      "0  30 Dec 2024\n",
+      "1  30 Dec 2024\n",
+      "2  29 Dec 2024\n",
+      "3  28 Dec 2024\n",
+      "4  28 Dec 2024\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Baca file CSV\n",
+    "df = pd.read_csv('datasets/datasets-keyword-labels.csv')\n",
+    "\n",
+    "# Pastikan kolom created_at dikonversi ke format datetime\n",
+    "df['created_at'] = pd.to_datetime(df['created_at'])\n",
+    "\n",
+    "# Format ulang tanggal untuk hanya menampilkan tanggal, bulan, dan tahun\n",
+    "df['created_at'] = df['created_at'].dt.strftime('%d %b %Y')\n",
+    "\n",
+    "# Simpan kembali ke CSV\n",
+    "df.to_csv('datasets/datasets-keyword-label.csv', index=False)\n",
+    "\n",
+    "# Tampilkan beberapa baris untuk memverifikasi\n",
+    "print(df[['created_at']].head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 5387 entries, 0 to 5386\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column              Non-Null Count  Dtype \n",
+      "---  ------              --------------  ----- \n",
+      " 0   created_at          5387 non-null   object\n",
+      " 1   full_text           5387 non-null   object\n",
+      " 2   keyword             5387 non-null   object\n",
+      " 3   cleanning_text      5387 non-null   object\n",
+      " 4   case_folding        5387 non-null   object\n",
+      " 5   convert_slang_word  5387 non-null   object\n",
+      " 6   filtering           5387 non-null   object\n",
+      " 7   tokenizing          5387 non-null   object\n",
+      " 8   stemming            5387 non-null   object\n",
+      " 9   score               5387 non-null   int64 \n",
+      " 10  label               5387 non-null   object\n",
+      "dtypes: int64(1), object(10)\n",
+      "memory usage: 463.1+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {