commit
13e9d04a47
|
@ -0,0 +1 @@
|
|||
/Pengumpulan Data
|
|
@ -0,0 +1,17 @@
|
|||
import streamlit as st
|
||||
from multiapp import MultiApp
|
||||
from apps import frontend,test
|
||||
|
||||
# Set page configuration
|
||||
st.set_page_config(page_title='Sentiment Analysis Dashboard', layout='wide')
|
||||
|
||||
app = MultiApp()
|
||||
|
||||
st.markdown("""
|
||||
# Sentiment Analysis Dashboard
|
||||
Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji dan kesehatan mental generasi Z.
|
||||
""")
|
||||
|
||||
app.add_app("Home", frontend.app)
|
||||
app.add_app("Test", test.app)
|
||||
app.run()
|
|
@ -0,0 +1,109 @@
|
|||
import streamlit as st
|
||||
import plotly.express as px
|
||||
import pandas as pd
|
||||
from plotly.subplots import make_subplots
|
||||
import matplotlib.pyplot as plt
|
||||
from wordcloud import WordCloud
|
||||
import plotly.graph_objects as go
|
||||
from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
|
||||
|
||||
def inject_css():
|
||||
css_path = os.path.join(os.path.dirname(__file__), "style.css")
|
||||
with open(css_path) as f:
|
||||
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
||||
|
||||
def app():
|
||||
inject_css()
|
||||
|
||||
# Placeholder Data (Will be replaced with actual backend data)
|
||||
sentiment_data = pd.read_csv(
|
||||
'datasets/datasets-keyword-label.csv',
|
||||
parse_dates=['created_at'],
|
||||
infer_datetime_format=True
|
||||
)
|
||||
|
||||
# data wordcloud
|
||||
wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
|
||||
|
||||
# performance data
|
||||
performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
|
||||
|
||||
# Row 1: Pie Chart
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.subheader('Sentiment Distribution')
|
||||
label_counts = get_label_counts(sentiment_data) # Panggil fungsi backend
|
||||
pie_fig = px.pie(label_counts, names='label', values='count')
|
||||
pie_fig.update_traces(textinfo='percent+label')
|
||||
pie_fig.update_layout(showlegend=False)
|
||||
st.plotly_chart(pie_fig, use_container_width=True)
|
||||
|
||||
# Distribusi Sentimen bedasarkan Model
|
||||
with col2:
|
||||
st.subheader('Keyword Sentiment Distribution')
|
||||
keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data) # Panggil fungsi backend
|
||||
|
||||
# Mengatur urutan kategori label di frontend
|
||||
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
|
||||
|
||||
# Membuat bar chart dengan urutan label yang diatur
|
||||
bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
|
||||
category_orders={'label': ['Negatif', 'Positif', 'Netral']})
|
||||
st.plotly_chart(bar_fig, use_container_width=True)
|
||||
|
||||
# Row 2: Wordclouds
|
||||
label_colors = {
|
||||
'positif': 'green',
|
||||
'negatif': 'red',
|
||||
'netral': 'gray'
|
||||
}
|
||||
|
||||
wordclouds = generate_wordclouds(wordcloud_data, label_colors)
|
||||
|
||||
cols = st.columns(len(wordclouds))
|
||||
for col, (label, wordcloud) in zip(cols, wordclouds.items()):
|
||||
with col:
|
||||
st.subheader(f'Word Cloud {label}')
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
st.pyplot(plt)
|
||||
|
||||
# cols = st.columns(len(wordclouds))
|
||||
# for col, (label, wordcloud) in zip(cols, wordclouds.items()):
|
||||
# with col:
|
||||
# st.subheader(f'Word Cloud {label}')
|
||||
# fig = px.imshow(wordcloud, binary_string=True)
|
||||
# fig.update_layout(
|
||||
# coloraxis_showscale=False,
|
||||
# xaxis=dict(visible=False),
|
||||
# yaxis=dict(visible=False),
|
||||
# margin=dict(l=0, r=0, t=0, b=0)
|
||||
# )
|
||||
# st.plotly_chart(fig, use_container_height=False)
|
||||
|
||||
# Row 3: Line Chart & Grouped Bar Chart
|
||||
col5, col6 = st.columns(2)
|
||||
|
||||
with col5:
|
||||
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
|
||||
st.subheader('Sentiment Trends Over Time')
|
||||
pivot_sentiment = get_pivot_sentiment(sentiment_data)
|
||||
line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
|
||||
st.plotly_chart(line_fig, use_container_width=True)
|
||||
|
||||
with col6:
|
||||
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
|
||||
st.subheader('Model Performance')
|
||||
avg_metrics_df = get_avg_metrics(performance_data) # Panggil fungsi backend
|
||||
bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
|
||||
x='variable', y='value', color='model', barmode='group')
|
||||
st.plotly_chart(bar_group_fig, use_container_width=True)
|
||||
|
||||
# with col8:
|
||||
st.subheader('Data Tables')
|
||||
columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
|
||||
st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
|
@ -0,0 +1,3 @@
|
|||
.column-costum {
|
||||
margin-top: 5rem; /* Adjust the value as needed */
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
import streamlit as st
|
||||
from backend import load_model_and_vectorizer, predict_sentiment
|
||||
|
||||
def app():
|
||||
# Dropdown untuk memilih model
|
||||
model_choice = st.selectbox('Pilih Model', ['SVM', 'Naive Bayes', 'KNN'])
|
||||
|
||||
# Input teks dari user
|
||||
user_input = st.text_area('Masukkan teks untuk analisis sentimen')
|
||||
|
||||
# Tombol untuk melakukan prediksi
|
||||
if st.button('Prediksi Sentimen'):
|
||||
if model_choice == 'SVM':
|
||||
model_path = 'models/svm_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
elif model_choice == 'Naive Bayes':
|
||||
model_path = 'models/nb_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
elif model_choice == 'KNN':
|
||||
model_path = 'models/knn_model.pkl'
|
||||
vectorizer_path = 'models/datasets-tfidf.pkl'
|
||||
|
||||
# Load model dan vectorizer
|
||||
model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
|
||||
|
||||
# Prediksi sentimen
|
||||
prediction = predict_sentiment(model, vectorizer, user_input)
|
||||
st.write(f'#### Prediksi Sentimen: {prediction}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app()
|
|
@ -0,0 +1,132 @@
|
|||
import pandas as pd
|
||||
from wordcloud import WordCloud, get_single_color_func
|
||||
import joblib
|
||||
|
||||
def get_label_counts(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame dengan jumlah masing-masing label.
|
||||
"""
|
||||
label_counts = sentiment_data['label'].value_counts().reset_index()
|
||||
label_counts.columns = ['label', 'count']
|
||||
return label_counts
|
||||
|
||||
def get_sentiment_distribution(y_pred: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame dengan jumlah masing-masing label dari hasil prediksi.
|
||||
"""
|
||||
sentiment_counts = y_pred['predicted_label'].value_counts().reset_index()
|
||||
sentiment_counts.columns = ['label', 'count']
|
||||
return sentiment_counts
|
||||
|
||||
|
||||
def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
|
||||
"""
|
||||
# Pastikan kolom 'created_at' bertipe datetime
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
|
||||
# Ekstraksi tahun dari kolom 'created_at'
|
||||
sentiment_data['year'] = sentiment_data['created_at'].dt.year
|
||||
# Group by tahun dan label, lalu hitung jumlahnya
|
||||
yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
|
||||
# Mengatur urutan kategori label
|
||||
yearly_sentiment['label'] = pd.Categorical(yearly_sentiment['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
|
||||
# Urutkan DataFrame berdasarkan tahun dan label
|
||||
yearly_sentiment = yearly_sentiment.sort_values(by=['year', 'label'])
|
||||
|
||||
return yearly_sentiment
|
||||
|
||||
def get_keyword_sentiment_distribution(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame dengan distribusi sentimen per keyword.
|
||||
"""
|
||||
keyword_sentiment_counts = sentiment_data.groupby(['keyword', 'label']).size().reset_index(name='count')
|
||||
|
||||
# Mengatur urutan kategori label
|
||||
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
|
||||
|
||||
return keyword_sentiment_counts
|
||||
|
||||
def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
|
||||
"""
|
||||
# Pastikan kolom 'created_at' bertipe datetime
|
||||
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
|
||||
|
||||
# Ekstraksi tahun dari kolom 'created_at'
|
||||
sentiment_data['Year'] = sentiment_data['created_at'].dt.year
|
||||
|
||||
# Group by Year dan Label, lalu hitung jumlahnya
|
||||
yearly_sentiment = sentiment_data.groupby(['Year', 'label']).size().reset_index(name='count')
|
||||
|
||||
# Pivot agar setiap label menjadi kolom tersendiri
|
||||
pivot_sentiment = yearly_sentiment.pivot(index='Year', columns='label', values='count').fillna(0)
|
||||
|
||||
# Reset index agar kolom 'Year' tersedia sebagai kolom biasa
|
||||
pivot_sentiment = pivot_sentiment.reset_index()
|
||||
|
||||
return pivot_sentiment
|
||||
|
||||
def extract_avg_metrics(report: str) -> dict:
|
||||
"""
|
||||
Mengekstrak nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score dari kolom Classification Report.
|
||||
"""
|
||||
report_dict = eval(report)
|
||||
return {
|
||||
'Accuracy': report_dict['accuracy'],
|
||||
'Precision': report_dict['macro avg']['precision'],
|
||||
'Recall': report_dict['macro avg']['recall'],
|
||||
'F1-score': report_dict['macro avg']['f1-score']
|
||||
}
|
||||
|
||||
def get_avg_metrics(performance_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Menghasilkan DataFrame dengan nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score.
|
||||
"""
|
||||
performance_data['Classification Report'] = performance_data['Classification Report'].apply(extract_avg_metrics)
|
||||
avg_metrics_df = performance_data['Classification Report'].apply(pd.Series)
|
||||
avg_metrics_df['model'] = performance_data['model']
|
||||
return avg_metrics_df
|
||||
|
||||
def generate_wordclouds(wordcloud_data: pd.DataFrame, label_colors: dict) -> dict:
|
||||
"""
|
||||
Menghasilkan wordcloud untuk setiap label dalam wordcloud_data.
|
||||
"""
|
||||
wordclouds = {}
|
||||
labels = wordcloud_data['label'].unique()
|
||||
for label in labels:
|
||||
words = wordcloud_data[wordcloud_data['label'] == label].set_index('word')['count'].to_dict()
|
||||
wordcloud = WordCloud(width=1000, height=500, background_color='white', color_func=get_single_color_func(label_colors[label])).generate_from_frequencies(words)
|
||||
wordclouds[label] = wordcloud
|
||||
return wordclouds
|
||||
|
||||
# ======================================
|
||||
# Memuat Model dan Prediksi Sentimen
|
||||
# ======================================
|
||||
|
||||
vectorizer = joblib.load('models/datasets-tfidf.pkl')
|
||||
|
||||
def load_model_and_vectorizer(model_path, vectorizer_path):
|
||||
"""
|
||||
Memuat model dari file pickle.
|
||||
"""
|
||||
try:
|
||||
model = joblib.load(model_path)
|
||||
text_vectorizer = joblib.load(vectorizer_path)
|
||||
return model, text_vectorizer
|
||||
except Exception as e:
|
||||
print(f"Error loading model or vectorizer: {e}")
|
||||
return None, None
|
||||
|
||||
def predict_sentiment(model, text_vectorizer, text):
|
||||
"""
|
||||
Melakukan prediksi sentimen terhadap teks yang diberikan menggunakan model yang dipilih.
|
||||
"""
|
||||
try:
|
||||
text_vectorized = text_vectorizer.transform([text])
|
||||
prediction = model.predict(text_vectorized)
|
||||
return prediction[0]
|
||||
except Exception as e:
|
||||
print(f"Error predicting sentiment: {e}")
|
||||
return None
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,4 @@
|
|||
model,Confusion Matrix,Classification Report,Cross-Validation Scores,Mean CV Score,Std Dev CV Score
|
||||
SVM,[500 71 20 15 558 18 12 45 534],"{'Negatif': {'precision': 0.9487666034155597, 'recall': 0.8460236886632826, 'f1-score': 0.8944543828264758, 'support': 591.0}, 'Netral': {'precision': 0.827893175074184, 'recall': 0.9441624365482234, 'f1-score': 0.8822134387351779, 'support': 591.0}, 'Positif': {'precision': 0.9335664335664335, 'recall': 0.9035532994923858, 'f1-score': 0.9183147033533964, 'support': 591.0}, 'accuracy': 0.8979131415679639, 'macro avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050167, 'support': 1773.0}, 'weighted avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050166, 'support': 1773.0}}","[0.8155668358714044, 0.8217710095882684, 0.8798646362098139, 0.9419063733784546, 0.9187817258883249]",0.8755781161872532,0.0505587229522866
|
||||
Naïve Bayes,[225 187 179 4 579 8 48 79 464],"{'Negatif': {'precision': 0.8122743682310469, 'recall': 0.38071065989847713, 'f1-score': 0.5184331797235023, 'support': 591.0}, 'Netral': {'precision': 0.685207100591716, 'recall': 0.9796954314720813, 'f1-score': 0.8064066852367688, 'support': 591.0}, 'Positif': {'precision': 0.7127496159754224, 'recall': 0.7851099830795262, 'f1-score': 0.7471819645732689, 'support': 591.0}, 'accuracy': 0.7151720248166948, 'macro avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}, 'weighted avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}}","[0.6802030456852792, 0.6965595036661026, 0.6999435984207558, 0.7439368302312465, 0.7693175408911449]",0.7179921037789059,0.0332281420796022
|
||||
KNN,[ 97 347 147 1 580 10 3 120 468],"{'Negatif': {'precision': 0.9603960396039604, 'recall': 0.16412859560067683, 'f1-score': 0.28034682080924855, 'support': 591.0}, 'Netral': {'precision': 0.55396370582617, 'recall': 0.9813874788494078, 'f1-score': 0.7081807081807082, 'support': 591.0}, 'Positif': {'precision': 0.7488, 'recall': 0.7918781725888325, 'f1-score': 0.7697368421052632, 'support': 591.0}, 'accuracy': 0.6457980823463056, 'macro avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463058, 'f1-score': 0.5860881236984067, 'support': 1773.0}, 'weighted avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463056, 'f1-score': 0.5860881236984066, 'support': 1773.0}}","[0.5640157924421884, 0.5747320924985899, 0.6424139875916526, 0.757473209249859, 0.6728708403835307]",0.6423011844331642,0.0705596154664938
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,90 @@
|
|||
import streamlit as st
|
||||
import pandas as pd
|
||||
|
||||
def show_sidebar():
|
||||
with st.sidebar:
|
||||
# Navigation styling
|
||||
st.markdown("""
|
||||
<style>
|
||||
div[role="radiogroup"] > label > div:first-child {
|
||||
padding: 12px;
|
||||
border-radius: 8px;
|
||||
margin: 8px 0;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
div[role="radiogroup"] > label > div:first-child:hover {
|
||||
background: #f0f2f6;
|
||||
}
|
||||
.sidebar .sidebar-content {
|
||||
padding: 4rem 1rem !important;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
with st.expander("📌 **Panduan Penggunaan**", expanded=True):
|
||||
st.markdown("""
|
||||
1. 🖼️ Pilih model analisis sentimen yang diinginkan
|
||||
2. ✍️ Masukkan teks yang ingin dianalisis
|
||||
3. ⏳ Klik tombol prediksi dan tunggu hasil analisis
|
||||
4. 📊 Hasil analisis akan ditampilkan di layar
|
||||
""")
|
||||
|
||||
with st.expander("📊 **Statistik Model**"):
|
||||
st.markdown("""
|
||||
- **SVM:**
|
||||
- Akurasi: 90%
|
||||
- **Naive Bayes:**
|
||||
- Akurasi: 72%
|
||||
- **KNN:**
|
||||
- Akurasi: 65%
|
||||
""")
|
||||
|
||||
with st.expander("ℹ️ **Informasi Teknis**"):
|
||||
st.markdown("""
|
||||
- **🧠 Model yang digunakan:**
|
||||
- SVM, Naive Bayes, KNN
|
||||
- **📁 Dataset:**
|
||||
- Dataset komentar Twitter tentang gaji dan kesehatan mental
|
||||
- **🔄 Teknik Preprocessing:**
|
||||
- Tokenization, Stopword Removal, TF-IDF Vectorization
|
||||
- **⚙️ Optimizer:**
|
||||
- SVM: Default
|
||||
- Naive Bayes: Default
|
||||
- KNN: Default
|
||||
""")
|
||||
|
||||
st.markdown("---")
|
||||
st.warning("""
|
||||
**Disclaimer Medis:**
|
||||
Hasil analisis ini bersifat informatif awal dan tidak menggantikan diagnosis medis profesional.
|
||||
Selalu konsultasikan dengan dokter spesialis untuk pemeriksaan lengkap.
|
||||
""")
|
||||
|
||||
class MultiApp:
|
||||
def __init__(self):
|
||||
self.apps = []
|
||||
|
||||
def add_app(self, title, func):
|
||||
self.apps.append({
|
||||
"title": title,
|
||||
"function": func
|
||||
})
|
||||
|
||||
def run(self):
|
||||
# Render navigation dengan styling improved
|
||||
st.sidebar.markdown("## 🧭 Navigasi Aplikasi")
|
||||
app = st.sidebar.radio(
|
||||
'',
|
||||
self.apps,
|
||||
format_func=lambda app: f"👉 {app['title']}",
|
||||
label_visibility="collapsed"
|
||||
)
|
||||
|
||||
# Render sidebar content
|
||||
show_sidebar()
|
||||
|
||||
# Eksekusi app function
|
||||
app['function']()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
|
@ -0,0 +1,8 @@
|
|||
streamlit
|
||||
pandas
|
||||
matplotlib
|
||||
plotly
|
||||
seaborn
|
||||
wordcloud
|
||||
multiapp
|
||||
scikit-learn
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue