first commit
This commit is contained in:
parent
36d3c88aae
commit
c9035a115e
Binary file not shown.
|
@ -0,0 +1,47 @@
|
||||||
|
from flask import Flask, request, render_template
|
||||||
|
import pickle
|
||||||
|
from preprocessing import preprocess_text # pastikan file preprocessing.py ada di folder yang sama
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
with open('model_SVM.pkl', 'rb') as f:
|
||||||
|
model_SVM = pickle.load(f)
|
||||||
|
with open('model_NB.pkl', 'rb') as f:
|
||||||
|
model_NB = pickle.load(f)
|
||||||
|
|
||||||
|
def classify_text(processed_text):
|
||||||
|
result_svm = model_SVM.predict([processed_text])[0]
|
||||||
|
result_nb = model_NB.predict([processed_text])[0]
|
||||||
|
return result_svm, result_nb
|
||||||
|
|
||||||
|
@app.route('/', methods=['GET', 'POST'])
|
||||||
|
def index():
|
||||||
|
input_text = ''
|
||||||
|
processed_steps = {}
|
||||||
|
result_svm = ''
|
||||||
|
result_nb = ''
|
||||||
|
tfidf_dict = {}
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
input_text = request.form['text']
|
||||||
|
processed_steps = preprocess_text(input_text)
|
||||||
|
# Gunakan hasil akhir (stemming) untuk prediksi dan TF-IDF
|
||||||
|
final_text = processed_steps.get("stemming", "")
|
||||||
|
result_svm, result_nb = classify_text(final_text)
|
||||||
|
|
||||||
|
# Dapatkan nilai TF-IDF dari vectorizer di pipeline model_SVM
|
||||||
|
vectorizer = model_SVM.named_steps['vectorizer']
|
||||||
|
tfidf_vector = vectorizer.transform([final_text])
|
||||||
|
dense = tfidf_vector.todense().tolist()[0]
|
||||||
|
features = vectorizer.get_feature_names_out()
|
||||||
|
tfidf_dict = {feature: round(value, 3) for feature, value in zip(features, dense) if value > 0}
|
||||||
|
|
||||||
|
return render_template('index.html',
|
||||||
|
input_text=input_text,
|
||||||
|
processed_steps=processed_steps,
|
||||||
|
result_svm=result_svm,
|
||||||
|
result_nb=result_nb,
|
||||||
|
tfidf_dict=tfidf_dict)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,105 @@
|
||||||
|
import re
|
||||||
|
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
||||||
|
|
||||||
|
def clean_twitter_data(text):
|
||||||
|
# Fungsi cleansing: menghilangkan mention, hashtag, RT, URL, dan karakter non-alfanumerik
|
||||||
|
text = re.sub(r'@[A-Za-z0-9_]+', '', text)
|
||||||
|
text = re.sub(r'#\w+', '', text)
|
||||||
|
text = re.sub(r'RT[\s]+', '', text)
|
||||||
|
text = re.sub(r'https?://\S+', '', text)
|
||||||
|
text = re.sub(r'[^A-Za-z0-9 ]', '', text)
|
||||||
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
def normalize_text(text):
|
||||||
|
kamus_normalisasi = {
|
||||||
|
'bi': 'bank',
|
||||||
|
'ri': 'indonesia',
|
||||||
|
'akn': 'akan',
|
||||||
|
'gmn': 'bagaimana',
|
||||||
|
'ga': 'tidak',
|
||||||
|
'gak': 'tidak',
|
||||||
|
'nggak': 'tidak',
|
||||||
|
'yg': 'yang',
|
||||||
|
'kalo': 'kalau',
|
||||||
|
'aja': 'saja',
|
||||||
|
'nih': 'ini',
|
||||||
|
'dong': '',
|
||||||
|
'banget': 'sangat',
|
||||||
|
'bro': 'teman',
|
||||||
|
'sis': 'teman',
|
||||||
|
'dgn': 'dengan',
|
||||||
|
'bgt': 'sangat',
|
||||||
|
'blm': 'belum',
|
||||||
|
'jgn': 'jangan',
|
||||||
|
'tdk': 'tidak',
|
||||||
|
}
|
||||||
|
words = text.split()
|
||||||
|
normalized_words = [kamus_normalisasi.get(word, word) for word in words]
|
||||||
|
return ' '.join(normalized_words)
|
||||||
|
|
||||||
|
def convert_negation(text):
|
||||||
|
negation_words = ['tidak', 'bukan', 'tak', 'jangan', 'belum']
|
||||||
|
tokens = text.split()
|
||||||
|
new_tokens = []
|
||||||
|
negate = False
|
||||||
|
negation_word = ''
|
||||||
|
for token in tokens:
|
||||||
|
if token in negation_words:
|
||||||
|
negate = True
|
||||||
|
negation_word = token
|
||||||
|
elif negate:
|
||||||
|
new_tokens.append(f"{negation_word}-{token}")
|
||||||
|
negate = False
|
||||||
|
else:
|
||||||
|
new_tokens.append(token)
|
||||||
|
return ' '.join(new_tokens)
|
||||||
|
|
||||||
|
def remove_stopwords(text):
|
||||||
|
stopwords = set([
|
||||||
|
'yang', 'di', 'ke', 'dari', 'dan', 'atau', 'itu', 'ini', 'dengan',
|
||||||
|
'pada', 'untuk', 'ada', 'sangat', 'dalam', 'oleh', 'karena'
|
||||||
|
])
|
||||||
|
words = text.split()
|
||||||
|
filtered_words = [word for word in words if word.lower() not in stopwords]
|
||||||
|
return ' '.join(filtered_words)
|
||||||
|
|
||||||
|
def stemming(tokenized_text):
|
||||||
|
factory = StemmerFactory()
|
||||||
|
stemmer = factory.create_stemmer()
|
||||||
|
stemmed_words = [stemmer.stem(w) for w in tokenized_text]
|
||||||
|
return " ".join(stemmed_words)
|
||||||
|
|
||||||
|
def preprocess_text(text):
|
||||||
|
steps = {}
|
||||||
|
steps["original_text"] = text
|
||||||
|
|
||||||
|
# 1. Case Folding: konversi ke huruf kecil
|
||||||
|
case_folding = text.lower()
|
||||||
|
steps["case_folding"] = case_folding
|
||||||
|
|
||||||
|
# 2. Cleansing: hapus mention, hashtag, URL, dll.
|
||||||
|
cleansing = clean_twitter_data(case_folding)
|
||||||
|
steps["cleansing"] = cleansing
|
||||||
|
|
||||||
|
# 4. Normalization: normalisasi kata dengan kamus
|
||||||
|
normalization = normalize_text(cleansing)
|
||||||
|
steps["normalization"] = normalization
|
||||||
|
|
||||||
|
# 3. Convert Negation: ubah kata negasi
|
||||||
|
negation_converted = convert_negation(normalization)
|
||||||
|
steps["convert_negation"] = negation_converted
|
||||||
|
|
||||||
|
# 5. Stopwords: hilangkan stopwords
|
||||||
|
no_stopwords = remove_stopwords(negation_converted)
|
||||||
|
steps["stopwords"] = no_stopwords
|
||||||
|
|
||||||
|
# 6. Tokenizing: pecah teks jadi token
|
||||||
|
tokenizing = no_stopwords.split()
|
||||||
|
steps["tokenizing"] = tokenizing
|
||||||
|
|
||||||
|
# 7. Stemming: proses stemming tiap token
|
||||||
|
stemming_result = stemming(tokenizing)
|
||||||
|
steps["stemming"] = stemming_result
|
||||||
|
|
||||||
|
return steps
|
|
@ -0,0 +1,90 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Sentimen Analysis Lengkap</title>
|
||||||
|
<script src="https://cdn.tailwindcss.com"></script>
|
||||||
|
</head>
|
||||||
|
<body class="bg-gray-100">
|
||||||
|
<div class="container mx-auto p-6">
|
||||||
|
<h1 class="text-3xl font-bold text-center text-blue-600 mb-6">
|
||||||
|
Sentimen Analysis dengan SVM & Naive Bayes
|
||||||
|
</h1>
|
||||||
|
<form method="post" class="bg-white p-6 rounded-lg shadow-md">
|
||||||
|
<div class="mb-4">
|
||||||
|
<textarea name="text" rows="4" class="w-full p-2 border rounded-lg"
|
||||||
|
placeholder="Masukkan teks disini...">{{ input_text }}</textarea>
|
||||||
|
</div>
|
||||||
|
<button type="submit" class="w-full bg-blue-500 text-white py-2 rounded-lg hover:bg-blue-600">
|
||||||
|
Analisis
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{% if input_text %}
|
||||||
|
<!-- Hasil Prediksi -->
|
||||||
|
<div class="mt-6 flex space-x-6">
|
||||||
|
<div class="flex-1 bg-white p-4 rounded-lg shadow-md border border-gray-300">
|
||||||
|
<h3 class="text-lg font-semibold text-center">Hasil Naive Bayes</h3>
|
||||||
|
<p class="text-xl text-center text-blue-500 mt-2">{{ result_nb }}</p>
|
||||||
|
</div>
|
||||||
|
<div class="flex-1 bg-white p-4 rounded-lg shadow-md border border-gray-300">
|
||||||
|
<h3 class="text-lg font-semibold text-center">Hasil SVM</h3>
|
||||||
|
<p class="text-xl text-center text-blue-500 mt-2">{{ result_svm }}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Detail Proses Preprocessing -->
|
||||||
|
<div class="mt-8">
|
||||||
|
<h2 class="text-2xl font-bold mb-4 text-center">Detail Proses Preprocessing</h2>
|
||||||
|
{% set step_keys = ["case_folding", "cleansing", "normalization", "convert_negation", "stopwords", "tokenizing", "stemming"] %}
|
||||||
|
<div class="flex flex-wrap items-center justify-center gap-4">
|
||||||
|
{% for key in step_keys %}
|
||||||
|
{% if processed_steps[key] is defined %}
|
||||||
|
<div class="bg-white p-4 rounded-lg shadow-md border border-gray-300">
|
||||||
|
<h3 class="font-semibold capitalize">{{ key | replace('_', ' ') }}</h3>
|
||||||
|
{% if processed_steps[key] is iterable and processed_steps[key] is not string %}
|
||||||
|
<p>{{ processed_steps[key] | join(', ') }}</p>
|
||||||
|
{% else %}
|
||||||
|
<p>{{ processed_steps[key] }}</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% if not loop.last %}
|
||||||
|
<div class="text-3xl font-bold">→</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Nilai TF-IDF -->
|
||||||
|
<div class="mt-8">
|
||||||
|
<h2 class="text-2xl font-bold mb-4 text-center">Nilai TF-IDF</h2>
|
||||||
|
{% if tfidf_dict %}
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="min-w-full bg-white border border-gray-300">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="px-4 py-2 border">Fitur</th>
|
||||||
|
<th class="px-4 py-2 border">Nilai</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for feature, value in tfidf_dict.items() %}
|
||||||
|
<tr>
|
||||||
|
<td class="px-4 py-2 border">{{ feature }}</td>
|
||||||
|
<td class="px-4 py-2 border">{{ value }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p class="text-center">Tidak ada nilai TF-IDF untuk ditampilkan.</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in New Issue