amankan skripsi

laptop rusak
2025-03-28 15:16:08 +08:00 · 2025-03-28 15:16:08 +08:00 · 13e9d04a47
commit 13e9d04a47
12 changed files with 19584 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/Pengumpulan Data
--- a/dashboard/app.py
+++ b/dashboard/app.py
@ -0,0 +1,17 @@
+import streamlit as st
+from multiapp import MultiApp
+from apps import frontend,test 
+
+# Set page configuration
+st.set_page_config(page_title='Sentiment Analysis Dashboard', layout='wide')
+
+app = MultiApp()
+
+st.markdown("""
+# Sentiment Analysis Dashboard
+Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji dan kesehatan mental generasi Z.
+""")
+
+app.add_app("Home", frontend.app)
+app.add_app("Test", test.app)
+app.run()
--- a/dashboard/apps/frontend.py
+++ b/dashboard/apps/frontend.py
@ -0,0 +1,109 @@
+import streamlit as st
+import plotly.express as px
+import pandas as pd
+from plotly.subplots import make_subplots
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import plotly.graph_objects as go
+from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
+
+def inject_css():
+    css_path = os.path.join(os.path.dirname(__file__), "style.css")
+    with open(css_path) as f:
+        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+
+def app():
+    inject_css()
+
+    # Placeholder Data (Will be replaced with actual backend data)
+    sentiment_data = pd.read_csv(
+        'datasets/datasets-keyword-label.csv',
+        parse_dates=['created_at'],
+        infer_datetime_format=True
+    )
+    
+    # data wordcloud
+    wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
+    
+    # performance data
+    performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
+    
+    # Row 1: Pie Chart 
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader('Sentiment Distribution')
+        label_counts = get_label_counts(sentiment_data)  # Panggil fungsi backend
+        pie_fig = px.pie(label_counts, names='label', values='count')
+        pie_fig.update_traces(textinfo='percent+label')
+        pie_fig.update_layout(showlegend=False)
+        st.plotly_chart(pie_fig, use_container_width=True)
+        
+    # Distribusi Sentimen bedasarkan Model
+    with col2:        
+        st.subheader('Keyword Sentiment Distribution')
+        keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data)  # Panggil fungsi backend
+        
+        # Mengatur urutan kategori label di frontend
+        keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
+        
+        # Membuat bar chart dengan urutan label yang diatur
+        bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
+                        category_orders={'label': ['Negatif', 'Positif', 'Netral']})
+        st.plotly_chart(bar_fig, use_container_width=True)
+
+    # Row 2: Wordclouds
+    label_colors = {
+        'positif': 'green',
+        'negatif': 'red',
+        'netral': 'gray'
+    }
+    
+    wordclouds = generate_wordclouds(wordcloud_data, label_colors)
+
+    cols = st.columns(len(wordclouds))
+    for col, (label, wordcloud) in zip(cols, wordclouds.items()):
+        with col:
+            st.subheader(f'Word Cloud {label}')
+            plt.figure(figsize=(10, 6))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.axis('off')
+            st.pyplot(plt)
+    
+    # cols = st.columns(len(wordclouds))
+    # for col, (label, wordcloud) in zip(cols, wordclouds.items()):
+    #     with col:
+    #         st.subheader(f'Word Cloud {label}')
+    #         fig = px.imshow(wordcloud, binary_string=True)
+    #         fig.update_layout(
+    #             coloraxis_showscale=False,
+    #             xaxis=dict(visible=False),
+    #             yaxis=dict(visible=False),
+    #             margin=dict(l=0, r=0, t=0, b=0)
+    #             )
+    #         st.plotly_chart(fig, use_container_height=False)
+
+    # Row 3: Line Chart & Grouped Bar Chart
+    col5, col6 = st.columns(2)
+    
+    with col5:
+        st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
+        st.subheader('Sentiment Trends Over Time')
+        pivot_sentiment = get_pivot_sentiment(sentiment_data)
+        line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
+        st.plotly_chart(line_fig, use_container_width=True)
+    
+    with col6:
+        st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
+        st.subheader('Model Performance')
+        avg_metrics_df = get_avg_metrics(performance_data)  # Panggil fungsi backend
+        bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
+                            x='variable', y='value', color='model', barmode='group')
+        st.plotly_chart(bar_group_fig, use_container_width=True)
+
+    # with col8:
+    st.subheader('Data Tables')
+    columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
+    st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
+
+if __name__ == "__main__":
+    app()
--- a/dashboard/apps/style.css
+++ b/dashboard/apps/style.css
@ -0,0 +1,3 @@
+.column-costum {
+    margin-top: 5rem; /* Adjust the value as needed */
+}
--- a/dashboard/apps/test.py
+++ b/dashboard/apps/test.py
@ -0,0 +1,32 @@
+import streamlit as st
+from backend import load_model_and_vectorizer, predict_sentiment
+
+def app():
+    # Dropdown untuk memilih model
+    model_choice = st.selectbox('Pilih Model', ['SVM', 'Naive Bayes', 'KNN'])
+
+    # Input teks dari user
+    user_input = st.text_area('Masukkan teks untuk analisis sentimen')
+
+    # Tombol untuk melakukan prediksi
+    if st.button('Prediksi Sentimen'):
+        if model_choice == 'SVM':
+            model_path = 'models/svm_model.pkl'
+            vectorizer_path = 'models/datasets-tfidf.pkl'
+        elif model_choice == 'Naive Bayes':
+            model_path = 'models/nb_model.pkl'
+            vectorizer_path = 'models/datasets-tfidf.pkl'
+        elif model_choice == 'KNN':
+            model_path = 'models/knn_model.pkl'
+            vectorizer_path = 'models/datasets-tfidf.pkl'
+            
+        # Load model dan vectorizer
+        model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
+        
+        # Prediksi sentimen
+        prediction = predict_sentiment(model, vectorizer, user_input)
+        st.write(f'#### Prediksi Sentimen: {prediction}')
+
+    
+if __name__ == '__main__':
+    app()
--- a/dashboard/backend.py
+++ b/dashboard/backend.py
@ -0,0 +1,132 @@
+import pandas as pd
+from wordcloud import WordCloud, get_single_color_func
+import joblib
+
+def get_label_counts(sentiment_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame dengan jumlah masing-masing label.
+    """
+    label_counts = sentiment_data['label'].value_counts().reset_index()
+    label_counts.columns = ['label', 'count']
+    return label_counts
+
+def get_sentiment_distribution(y_pred: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame dengan jumlah masing-masing label dari hasil prediksi.
+    """
+    sentiment_counts = y_pred['predicted_label'].value_counts().reset_index()
+    sentiment_counts.columns = ['label', 'count']
+    return sentiment_counts
+
+
+def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
+    """
+    # Pastikan kolom 'created_at' bertipe datetime
+    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
+    # Ekstraksi tahun dari kolom 'created_at'
+    sentiment_data['year'] = sentiment_data['created_at'].dt.year
+    # Group by tahun dan label, lalu hitung jumlahnya
+    yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
+    # Mengatur urutan kategori label
+    yearly_sentiment['label'] = pd.Categorical(yearly_sentiment['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
+    # Urutkan DataFrame berdasarkan tahun dan label
+    yearly_sentiment = yearly_sentiment.sort_values(by=['year', 'label'])
+
+    return yearly_sentiment
+
+def get_keyword_sentiment_distribution(sentiment_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame dengan distribusi sentimen per keyword.
+    """
+    keyword_sentiment_counts = sentiment_data.groupby(['keyword', 'label']).size().reset_index(name='count')
+    
+    # Mengatur urutan kategori label
+    keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
+    
+    return keyword_sentiment_counts
+
+def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
+    """
+    # Pastikan kolom 'created_at' bertipe datetime
+    sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
+
+    # Ekstraksi tahun dari kolom 'created_at'
+    sentiment_data['Year'] = sentiment_data['created_at'].dt.year
+
+    # Group by Year dan Label, lalu hitung jumlahnya
+    yearly_sentiment = sentiment_data.groupby(['Year', 'label']).size().reset_index(name='count')
+
+    # Pivot agar setiap label menjadi kolom tersendiri
+    pivot_sentiment = yearly_sentiment.pivot(index='Year', columns='label', values='count').fillna(0)
+
+    # Reset index agar kolom 'Year' tersedia sebagai kolom biasa
+    pivot_sentiment = pivot_sentiment.reset_index()
+
+    return pivot_sentiment
+
+def extract_avg_metrics(report: str) -> dict:
+    """
+    Mengekstrak nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score dari kolom Classification Report.
+    """
+    report_dict = eval(report)
+    return {
+        'Accuracy': report_dict['accuracy'],
+        'Precision': report_dict['macro avg']['precision'],
+        'Recall': report_dict['macro avg']['recall'],
+        'F1-score': report_dict['macro avg']['f1-score']
+    }
+
+def get_avg_metrics(performance_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Menghasilkan DataFrame dengan nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score.
+    """
+    performance_data['Classification Report'] = performance_data['Classification Report'].apply(extract_avg_metrics)
+    avg_metrics_df = performance_data['Classification Report'].apply(pd.Series)
+    avg_metrics_df['model'] = performance_data['model']
+    return avg_metrics_df
+
+def generate_wordclouds(wordcloud_data: pd.DataFrame, label_colors: dict) -> dict:
+    """
+    Menghasilkan wordcloud untuk setiap label dalam wordcloud_data.
+    """
+    wordclouds = {}
+    labels = wordcloud_data['label'].unique()
+    for label in labels:
+        words = wordcloud_data[wordcloud_data['label'] == label].set_index('word')['count'].to_dict()
+        wordcloud = WordCloud(width=1000, height=500, background_color='white', color_func=get_single_color_func(label_colors[label])).generate_from_frequencies(words)
+        wordclouds[label] = wordcloud
+    return wordclouds
+
+# ======================================
+# Memuat Model dan Prediksi Sentimen
+# ======================================
+
+vectorizer = joblib.load('models/datasets-tfidf.pkl')
+
+def load_model_and_vectorizer(model_path, vectorizer_path):
+    """
+    Memuat model dari file pickle.
+    """
+    try:
+        model = joblib.load(model_path)
+        text_vectorizer = joblib.load(vectorizer_path)
+        return model, text_vectorizer
+    except Exception as e:
+        print(f"Error loading model or vectorizer: {e}")
+        return None, None
+
+def predict_sentiment(model, text_vectorizer, text):
+    """
+    Melakukan prediksi sentimen terhadap teks yang diberikan menggunakan model yang dipilih.
+    """
+    try:
+        text_vectorized = text_vectorizer.transform([text])
+        prediction = model.predict(text_vectorized)
+        return prediction[0]
+    except Exception as e:
+        print(f"Error predicting sentiment: {e}")
+        return None
--- a/dashboard/datasets/datasets-keyword-label.csv
+++ b/dashboard/datasets/datasets-keyword-label.csv
--- a/dashboard/datasets/evaluation_results_combine.csv
+++ b/dashboard/datasets/evaluation_results_combine.csv
@ -0,0 +1,4 @@
+model,Confusion Matrix,Classification Report,Cross-Validation Scores,Mean CV Score,Std Dev CV Score
+SVM,[500  71  20  15 558  18  12  45 534],"{'Negatif': {'precision': 0.9487666034155597, 'recall': 0.8460236886632826, 'f1-score': 0.8944543828264758, 'support': 591.0}, 'Netral': {'precision': 0.827893175074184, 'recall': 0.9441624365482234, 'f1-score': 0.8822134387351779, 'support': 591.0}, 'Positif': {'precision': 0.9335664335664335, 'recall': 0.9035532994923858, 'f1-score': 0.9183147033533964, 'support': 591.0}, 'accuracy': 0.8979131415679639, 'macro avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050167, 'support': 1773.0}, 'weighted avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050166, 'support': 1773.0}}","[0.8155668358714044, 0.8217710095882684, 0.8798646362098139, 0.9419063733784546, 0.9187817258883249]",0.8755781161872532,0.0505587229522866
+Naïve Bayes,[225 187 179   4 579   8  48  79 464],"{'Negatif': {'precision': 0.8122743682310469, 'recall': 0.38071065989847713, 'f1-score': 0.5184331797235023, 'support': 591.0}, 'Netral': {'precision': 0.685207100591716, 'recall': 0.9796954314720813, 'f1-score': 0.8064066852367688, 'support': 591.0}, 'Positif': {'precision': 0.7127496159754224, 'recall': 0.7851099830795262, 'f1-score': 0.7471819645732689, 'support': 591.0}, 'accuracy': 0.7151720248166948, 'macro avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}, 'weighted avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}}","[0.6802030456852792, 0.6965595036661026, 0.6999435984207558, 0.7439368302312465, 0.7693175408911449]",0.7179921037789059,0.0332281420796022
+KNN,[ 97 347 147   1 580  10   3 120 468],"{'Negatif': {'precision': 0.9603960396039604, 'recall': 0.16412859560067683, 'f1-score': 0.28034682080924855, 'support': 591.0}, 'Netral': {'precision': 0.55396370582617, 'recall': 0.9813874788494078, 'f1-score': 0.7081807081807082, 'support': 591.0}, 'Positif': {'precision': 0.7488, 'recall': 0.7918781725888325, 'f1-score': 0.7697368421052632, 'support': 591.0}, 'accuracy': 0.6457980823463056, 'macro avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463058, 'f1-score': 0.5860881236984067, 'support': 1773.0}, 'weighted avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463056, 'f1-score': 0.5860881236984066, 'support': 1773.0}}","[0.5640157924421884, 0.5747320924985899, 0.6424139875916526, 0.757473209249859, 0.6728708403835307]",0.6423011844331642,0.0705596154664938
--- a/dashboard/datasets/word_count_labeled.csv
+++ b/dashboard/datasets/word_count_labeled.csv
--- a/dashboard/multiapp.py
+++ b/dashboard/multiapp.py
@ -0,0 +1,90 @@
+import streamlit as st
+import pandas as pd
+
+def show_sidebar():
+    with st.sidebar:
+        # Navigation styling
+        st.markdown("""
+        <style>
+            div[role="radiogroup"] > label > div:first-child {
+                padding: 12px;
+                border-radius: 8px;
+                margin: 8px 0;
+                transition: all 0.3s;
+            }
+            div[role="radiogroup"] > label > div:first-child:hover {
+                background: #f0f2f6;
+            }
+            .sidebar .sidebar-content {
+                padding: 4rem 1rem !important;
+            }
+        </style>
+        """, unsafe_allow_html=True)
+
+        with st.expander("📌 **Panduan Penggunaan**", expanded=True):
+            st.markdown("""
+            1. 🖼️ Pilih model analisis sentimen yang diinginkan
+            2. ✍️ Masukkan teks yang ingin dianalisis
+            3. ⏳ Klik tombol prediksi dan tunggu hasil analisis
+            4. 📊 Hasil analisis akan ditampilkan di layar
+            """)
+
+        with st.expander("📊 **Statistik Model**"):
+            st.markdown("""
+            - **SVM:**
+              - Akurasi: 90%
+            - **Naive Bayes:**
+              - Akurasi: 72%
+            - **KNN:**
+              - Akurasi: 65%
+            """)
+
+        with st.expander("ℹ️ **Informasi Teknis**"):
+            st.markdown("""
+            - **🧠 Model yang digunakan:**
+              - SVM, Naive Bayes, KNN
+            - **📁 Dataset:**
+              - Dataset komentar Twitter tentang gaji dan kesehatan mental
+            - **🔄 Teknik Preprocessing:**
+              - Tokenization, Stopword Removal, TF-IDF Vectorization
+            - **⚙️ Optimizer:**
+              - SVM: Default
+              - Naive Bayes: Default
+              - KNN: Default
+            """)
+
+        st.markdown("---")
+        st.warning("""
+        **Disclaimer Medis:**  
+        Hasil analisis ini bersifat informatif awal dan tidak menggantikan diagnosis medis profesional. 
+        Selalu konsultasikan dengan dokter spesialis untuk pemeriksaan lengkap.
+        """)
+
+class MultiApp:
+    def __init__(self):
+        self.apps = []
+
+    def add_app(self, title, func):
+        self.apps.append({
+            "title": title,
+            "function": func
+        })
+
+    def run(self):
+        # Render navigation dengan styling improved
+        st.sidebar.markdown("## 🧭 Navigasi Aplikasi")
+        app = st.sidebar.radio(
+            '',
+            self.apps,
+            format_func=lambda app: f"👉 {app['title']}",
+            label_visibility="collapsed"
+        )
+        
+        # Render sidebar content
+        show_sidebar()
+        
+        # Eksekusi app function
+        app['function']()
+
+if __name__ == "__main__":
+    app()
--- a/dashboard/requirements.txt
+++ b/dashboard/requirements.txt
@ -0,0 +1,8 @@
+streamlit
+pandas
+matplotlib
+plotly
+seaborn
+wordcloud
+multiapp
+scikit-learn
--- a/dashboard/testing.ipynb
+++ b/dashboard/testing.ipynb