amankan skripsi

laptop rusak
This commit is contained in:
Ja'far Shodiq 2025-03-28 15:16:08 +08:00
commit 13e9d04a47
12 changed files with 19584 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/Pengumpulan Data

17
dashboard/app.py Normal file
View File

@ -0,0 +1,17 @@
import streamlit as st
from multiapp import MultiApp
from apps import frontend,test
# Set page configuration
st.set_page_config(page_title='Sentiment Analysis Dashboard', layout='wide')
app = MultiApp()
st.markdown("""
# Sentiment Analysis Dashboard
Dashboard ini berisi analisis sentimen komentar netizen di Twitter terhadap gaji dan kesehatan mental generasi Z.
""")
app.add_app("Home", frontend.app)
app.add_app("Test", test.app)
app.run()

109
dashboard/apps/frontend.py Normal file
View File

@ -0,0 +1,109 @@
import streamlit as st
import plotly.express as px
import pandas as pd
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import plotly.graph_objects as go
from backend import get_pivot_sentiment, get_label_counts, get_keyword_sentiment_distribution, get_avg_metrics, generate_wordclouds
def inject_css():
css_path = os.path.join(os.path.dirname(__file__), "style.css")
with open(css_path) as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
def app():
inject_css()
# Placeholder Data (Will be replaced with actual backend data)
sentiment_data = pd.read_csv(
'datasets/datasets-keyword-label.csv',
parse_dates=['created_at'],
infer_datetime_format=True
)
# data wordcloud
wordcloud_data = pd.read_csv('datasets/word_count_labeled.csv')
# performance data
performance_data = pd.read_csv('datasets/evaluation_results_combine.csv')
# Row 1: Pie Chart
col1, col2 = st.columns(2)
with col1:
st.subheader('Sentiment Distribution')
label_counts = get_label_counts(sentiment_data) # Panggil fungsi backend
pie_fig = px.pie(label_counts, names='label', values='count')
pie_fig.update_traces(textinfo='percent+label')
pie_fig.update_layout(showlegend=False)
st.plotly_chart(pie_fig, use_container_width=True)
# Distribusi Sentimen bedasarkan Model
with col2:
st.subheader('Keyword Sentiment Distribution')
keyword_sentiment_counts = get_keyword_sentiment_distribution(sentiment_data) # Panggil fungsi backend
# Mengatur urutan kategori label di frontend
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
# Membuat bar chart dengan urutan label yang diatur
bar_fig = px.bar(keyword_sentiment_counts, x='keyword', y='count', color='label', barmode='group',
category_orders={'label': ['Negatif', 'Positif', 'Netral']})
st.plotly_chart(bar_fig, use_container_width=True)
# Row 2: Wordclouds
label_colors = {
'positif': 'green',
'negatif': 'red',
'netral': 'gray'
}
wordclouds = generate_wordclouds(wordcloud_data, label_colors)
cols = st.columns(len(wordclouds))
for col, (label, wordcloud) in zip(cols, wordclouds.items()):
with col:
st.subheader(f'Word Cloud {label}')
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot(plt)
# cols = st.columns(len(wordclouds))
# for col, (label, wordcloud) in zip(cols, wordclouds.items()):
# with col:
# st.subheader(f'Word Cloud {label}')
# fig = px.imshow(wordcloud, binary_string=True)
# fig.update_layout(
# coloraxis_showscale=False,
# xaxis=dict(visible=False),
# yaxis=dict(visible=False),
# margin=dict(l=0, r=0, t=0, b=0)
# )
# st.plotly_chart(fig, use_container_height=False)
# Row 3: Line Chart & Grouped Bar Chart
col5, col6 = st.columns(2)
with col5:
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
st.subheader('Sentiment Trends Over Time')
pivot_sentiment = get_pivot_sentiment(sentiment_data)
line_fig = px.line(pivot_sentiment, x='Year', y=['Negatif', 'Positif', 'Netral'], markers=True)
st.plotly_chart(line_fig, use_container_width=True)
with col6:
st.markdown('<div class="column-costum"></div>', unsafe_allow_html=True)
st.subheader('Model Performance')
avg_metrics_df = get_avg_metrics(performance_data) # Panggil fungsi backend
bar_group_fig = px.bar(avg_metrics_df.melt(id_vars='model', value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score']),
x='variable', y='value', color='model', barmode='group')
st.plotly_chart(bar_group_fig, use_container_width=True)
# with col8:
st.subheader('Data Tables')
columns_to_display = ['created_at', 'full_text', 'keyword', 'label']
st.dataframe(sentiment_data[columns_to_display], use_container_width=True)
if __name__ == "__main__":
app()

3
dashboard/apps/style.css Normal file
View File

@ -0,0 +1,3 @@
.column-costum {
margin-top: 5rem; /* Adjust the value as needed */
}

32
dashboard/apps/test.py Normal file
View File

@ -0,0 +1,32 @@
import streamlit as st
from backend import load_model_and_vectorizer, predict_sentiment
def app():
# Dropdown untuk memilih model
model_choice = st.selectbox('Pilih Model', ['SVM', 'Naive Bayes', 'KNN'])
# Input teks dari user
user_input = st.text_area('Masukkan teks untuk analisis sentimen')
# Tombol untuk melakukan prediksi
if st.button('Prediksi Sentimen'):
if model_choice == 'SVM':
model_path = 'models/svm_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
elif model_choice == 'Naive Bayes':
model_path = 'models/nb_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
elif model_choice == 'KNN':
model_path = 'models/knn_model.pkl'
vectorizer_path = 'models/datasets-tfidf.pkl'
# Load model dan vectorizer
model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
# Prediksi sentimen
prediction = predict_sentiment(model, vectorizer, user_input)
st.write(f'#### Prediksi Sentimen: {prediction}')
if __name__ == '__main__':
app()

132
dashboard/backend.py Normal file
View File

@ -0,0 +1,132 @@
import pandas as pd
from wordcloud import WordCloud, get_single_color_func
import joblib
def get_label_counts(sentiment_data: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame dengan jumlah masing-masing label.
"""
label_counts = sentiment_data['label'].value_counts().reset_index()
label_counts.columns = ['label', 'count']
return label_counts
def get_sentiment_distribution(y_pred: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame dengan jumlah masing-masing label dari hasil prediksi.
"""
sentiment_counts = y_pred['predicted_label'].value_counts().reset_index()
sentiment_counts.columns = ['label', 'count']
return sentiment_counts
def get_yearly_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame dengan jumlah label sentimen per tahun.
"""
# Pastikan kolom 'created_at' bertipe datetime
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
# Ekstraksi tahun dari kolom 'created_at'
sentiment_data['year'] = sentiment_data['created_at'].dt.year
# Group by tahun dan label, lalu hitung jumlahnya
yearly_sentiment = sentiment_data.groupby(['year', 'label']).size().reset_index(name='count')
# Mengatur urutan kategori label
yearly_sentiment['label'] = pd.Categorical(yearly_sentiment['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
# Urutkan DataFrame berdasarkan tahun dan label
yearly_sentiment = yearly_sentiment.sort_values(by=['year', 'label'])
return yearly_sentiment
def get_keyword_sentiment_distribution(sentiment_data: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame dengan distribusi sentimen per keyword.
"""
keyword_sentiment_counts = sentiment_data.groupby(['keyword', 'label']).size().reset_index(name='count')
# Mengatur urutan kategori label
keyword_sentiment_counts['label'] = pd.Categorical(keyword_sentiment_counts['label'], categories=['Negatif', 'Positif', 'Netral'], ordered=True)
return keyword_sentiment_counts
def get_pivot_sentiment(sentiment_data: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame pivot_sentiment dengan jumlah label positif, netral, dan negatif per tahun.
"""
# Pastikan kolom 'created_at' bertipe datetime
sentiment_data['created_at'] = pd.to_datetime(sentiment_data['created_at'], utc=True)
# Ekstraksi tahun dari kolom 'created_at'
sentiment_data['Year'] = sentiment_data['created_at'].dt.year
# Group by Year dan Label, lalu hitung jumlahnya
yearly_sentiment = sentiment_data.groupby(['Year', 'label']).size().reset_index(name='count')
# Pivot agar setiap label menjadi kolom tersendiri
pivot_sentiment = yearly_sentiment.pivot(index='Year', columns='label', values='count').fillna(0)
# Reset index agar kolom 'Year' tersedia sebagai kolom biasa
pivot_sentiment = pivot_sentiment.reset_index()
return pivot_sentiment
def extract_avg_metrics(report: str) -> dict:
"""
Mengekstrak nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score dari kolom Classification Report.
"""
report_dict = eval(report)
return {
'Accuracy': report_dict['accuracy'],
'Precision': report_dict['macro avg']['precision'],
'Recall': report_dict['macro avg']['recall'],
'F1-score': report_dict['macro avg']['f1-score']
}
def get_avg_metrics(performance_data: pd.DataFrame) -> pd.DataFrame:
"""
Menghasilkan DataFrame dengan nilai rata-rata dari Accuracy, Precision, Recall, dan F1-score.
"""
performance_data['Classification Report'] = performance_data['Classification Report'].apply(extract_avg_metrics)
avg_metrics_df = performance_data['Classification Report'].apply(pd.Series)
avg_metrics_df['model'] = performance_data['model']
return avg_metrics_df
def generate_wordclouds(wordcloud_data: pd.DataFrame, label_colors: dict) -> dict:
"""
Menghasilkan wordcloud untuk setiap label dalam wordcloud_data.
"""
wordclouds = {}
labels = wordcloud_data['label'].unique()
for label in labels:
words = wordcloud_data[wordcloud_data['label'] == label].set_index('word')['count'].to_dict()
wordcloud = WordCloud(width=1000, height=500, background_color='white', color_func=get_single_color_func(label_colors[label])).generate_from_frequencies(words)
wordclouds[label] = wordcloud
return wordclouds
# ======================================
# Memuat Model dan Prediksi Sentimen
# ======================================
vectorizer = joblib.load('models/datasets-tfidf.pkl')
def load_model_and_vectorizer(model_path, vectorizer_path):
"""
Memuat model dari file pickle.
"""
try:
model = joblib.load(model_path)
text_vectorizer = joblib.load(vectorizer_path)
return model, text_vectorizer
except Exception as e:
print(f"Error loading model or vectorizer: {e}")
return None, None
def predict_sentiment(model, text_vectorizer, text):
"""
Melakukan prediksi sentimen terhadap teks yang diberikan menggunakan model yang dipilih.
"""
try:
text_vectorized = text_vectorizer.transform([text])
prediction = model.predict(text_vectorized)
return prediction[0]
except Exception as e:
print(f"Error predicting sentiment: {e}")
return None

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
model,Confusion Matrix,Classification Report,Cross-Validation Scores,Mean CV Score,Std Dev CV Score
SVM,[500 71 20 15 558 18 12 45 534],"{'Negatif': {'precision': 0.9487666034155597, 'recall': 0.8460236886632826, 'f1-score': 0.8944543828264758, 'support': 591.0}, 'Netral': {'precision': 0.827893175074184, 'recall': 0.9441624365482234, 'f1-score': 0.8822134387351779, 'support': 591.0}, 'Positif': {'precision': 0.9335664335664335, 'recall': 0.9035532994923858, 'f1-score': 0.9183147033533964, 'support': 591.0}, 'accuracy': 0.8979131415679639, 'macro avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050167, 'support': 1773.0}, 'weighted avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050166, 'support': 1773.0}}","[0.8155668358714044, 0.8217710095882684, 0.8798646362098139, 0.9419063733784546, 0.9187817258883249]",0.8755781161872532,0.0505587229522866
Naïve Bayes,[225 187 179 4 579 8 48 79 464],"{'Negatif': {'precision': 0.8122743682310469, 'recall': 0.38071065989847713, 'f1-score': 0.5184331797235023, 'support': 591.0}, 'Netral': {'precision': 0.685207100591716, 'recall': 0.9796954314720813, 'f1-score': 0.8064066852367688, 'support': 591.0}, 'Positif': {'precision': 0.7127496159754224, 'recall': 0.7851099830795262, 'f1-score': 0.7471819645732689, 'support': 591.0}, 'accuracy': 0.7151720248166948, 'macro avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}, 'weighted avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}}","[0.6802030456852792, 0.6965595036661026, 0.6999435984207558, 0.7439368302312465, 0.7693175408911449]",0.7179921037789059,0.0332281420796022
KNN,[ 97 347 147 1 580 10 3 120 468],"{'Negatif': {'precision': 0.9603960396039604, 'recall': 0.16412859560067683, 'f1-score': 0.28034682080924855, 'support': 591.0}, 'Netral': {'precision': 0.55396370582617, 'recall': 0.9813874788494078, 'f1-score': 0.7081807081807082, 'support': 591.0}, 'Positif': {'precision': 0.7488, 'recall': 0.7918781725888325, 'f1-score': 0.7697368421052632, 'support': 591.0}, 'accuracy': 0.6457980823463056, 'macro avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463058, 'f1-score': 0.5860881236984067, 'support': 1773.0}, 'weighted avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463056, 'f1-score': 0.5860881236984066, 'support': 1773.0}}","[0.5640157924421884, 0.5747320924985899, 0.6424139875916526, 0.757473209249859, 0.6728708403835307]",0.6423011844331642,0.0705596154664938
1 model Confusion Matrix Classification Report Cross-Validation Scores Mean CV Score Std Dev CV Score
2 SVM [500 71 20 15 558 18 12 45 534] {'Negatif': {'precision': 0.9487666034155597, 'recall': 0.8460236886632826, 'f1-score': 0.8944543828264758, 'support': 591.0}, 'Netral': {'precision': 0.827893175074184, 'recall': 0.9441624365482234, 'f1-score': 0.8822134387351779, 'support': 591.0}, 'Positif': {'precision': 0.9335664335664335, 'recall': 0.9035532994923858, 'f1-score': 0.9183147033533964, 'support': 591.0}, 'accuracy': 0.8979131415679639, 'macro avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050167, 'support': 1773.0}, 'weighted avg': {'precision': 0.9034087373520592, 'recall': 0.8979131415679639, 'f1-score': 0.8983275083050166, 'support': 1773.0}} [0.8155668358714044, 0.8217710095882684, 0.8798646362098139, 0.9419063733784546, 0.9187817258883249] 0.8755781161872532 0.0505587229522866
3 Naïve Bayes [225 187 179 4 579 8 48 79 464] {'Negatif': {'precision': 0.8122743682310469, 'recall': 0.38071065989847713, 'f1-score': 0.5184331797235023, 'support': 591.0}, 'Netral': {'precision': 0.685207100591716, 'recall': 0.9796954314720813, 'f1-score': 0.8064066852367688, 'support': 591.0}, 'Positif': {'precision': 0.7127496159754224, 'recall': 0.7851099830795262, 'f1-score': 0.7471819645732689, 'support': 591.0}, 'accuracy': 0.7151720248166948, 'macro avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}, 'weighted avg': {'precision': 0.7367436949327284, 'recall': 0.7151720248166948, 'f1-score': 0.6906739431778467, 'support': 1773.0}} [0.6802030456852792, 0.6965595036661026, 0.6999435984207558, 0.7439368302312465, 0.7693175408911449] 0.7179921037789059 0.0332281420796022
4 KNN [ 97 347 147 1 580 10 3 120 468] {'Negatif': {'precision': 0.9603960396039604, 'recall': 0.16412859560067683, 'f1-score': 0.28034682080924855, 'support': 591.0}, 'Netral': {'precision': 0.55396370582617, 'recall': 0.9813874788494078, 'f1-score': 0.7081807081807082, 'support': 591.0}, 'Positif': {'precision': 0.7488, 'recall': 0.7918781725888325, 'f1-score': 0.7697368421052632, 'support': 591.0}, 'accuracy': 0.6457980823463056, 'macro avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463058, 'f1-score': 0.5860881236984067, 'support': 1773.0}, 'weighted avg': {'precision': 0.7543865818100435, 'recall': 0.6457980823463056, 'f1-score': 0.5860881236984066, 'support': 1773.0}} [0.5640157924421884, 0.5747320924985899, 0.6424139875916526, 0.757473209249859, 0.6728708403835307] 0.6423011844331642 0.0705596154664938

File diff suppressed because it is too large Load Diff

90
dashboard/multiapp.py Normal file
View File

@ -0,0 +1,90 @@
import streamlit as st
import pandas as pd
def show_sidebar():
with st.sidebar:
# Navigation styling
st.markdown("""
<style>
div[role="radiogroup"] > label > div:first-child {
padding: 12px;
border-radius: 8px;
margin: 8px 0;
transition: all 0.3s;
}
div[role="radiogroup"] > label > div:first-child:hover {
background: #f0f2f6;
}
.sidebar .sidebar-content {
padding: 4rem 1rem !important;
}
</style>
""", unsafe_allow_html=True)
with st.expander("📌 **Panduan Penggunaan**", expanded=True):
st.markdown("""
1. 🖼 Pilih model analisis sentimen yang diinginkan
2. Masukkan teks yang ingin dianalisis
3. Klik tombol prediksi dan tunggu hasil analisis
4. 📊 Hasil analisis akan ditampilkan di layar
""")
with st.expander("📊 **Statistik Model**"):
st.markdown("""
- **SVM:**
- Akurasi: 90%
- **Naive Bayes:**
- Akurasi: 72%
- **KNN:**
- Akurasi: 65%
""")
with st.expander(" **Informasi Teknis**"):
st.markdown("""
- **🧠 Model yang digunakan:**
- SVM, Naive Bayes, KNN
- **📁 Dataset:**
- Dataset komentar Twitter tentang gaji dan kesehatan mental
- **🔄 Teknik Preprocessing:**
- Tokenization, Stopword Removal, TF-IDF Vectorization
- ** Optimizer:**
- SVM: Default
- Naive Bayes: Default
- KNN: Default
""")
st.markdown("---")
st.warning("""
**Disclaimer Medis:**
Hasil analisis ini bersifat informatif awal dan tidak menggantikan diagnosis medis profesional.
Selalu konsultasikan dengan dokter spesialis untuk pemeriksaan lengkap.
""")
class MultiApp:
def __init__(self):
self.apps = []
def add_app(self, title, func):
self.apps.append({
"title": title,
"function": func
})
def run(self):
# Render navigation dengan styling improved
st.sidebar.markdown("## 🧭 Navigasi Aplikasi")
app = st.sidebar.radio(
'',
self.apps,
format_func=lambda app: f"👉 {app['title']}",
label_visibility="collapsed"
)
# Render sidebar content
show_sidebar()
# Eksekusi app function
app['function']()
if __name__ == "__main__":
app()

View File

@ -0,0 +1,8 @@
streamlit
pandas
matplotlib
plotly
seaborn
wordcloud
multiapp
scikit-learn

390
dashboard/testing.ipynb Normal file

File diff suppressed because one or more lines are too long