From 761583f1a3e8ebfd094961ccadc4476533e709cb Mon Sep 17 00:00:00 2001 From: NaufalHisyam18 Date: Mon, 24 Jun 2024 09:44:25 +0700 Subject: [PATCH] Create app.py --- Analisis-Sentimen-Twitter-Flask-main/app.py | 345 ++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 Analisis-Sentimen-Twitter-Flask-main/app.py diff --git a/Analisis-Sentimen-Twitter-Flask-main/app.py b/Analisis-Sentimen-Twitter-Flask-main/app.py new file mode 100644 index 0000000..09f9e2a --- /dev/null +++ b/Analisis-Sentimen-Twitter-Flask-main/app.py @@ -0,0 +1,345 @@ +from flask import Flask, render_template, url_for, request, flash +import tweepy +import re, string, csv, pickle, os +from os.path import join, dirname, realpath +import pandas as pd +import numpy as np +from Sastrawi.Stemmer.StemmerFactory import StemmerFactory +from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory, StopWordRemover, ArrayDictionary +import nltk +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +from googletrans import Translator +from textblob import TextBlob +from sklearn.metrics import accuracy_score, precision_score, recall_score +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from sklearn.feature_extraction.text import TfidfVectorizer +from PIL import Image +import urllib.request +import matplotlib.pyplot as plt +from wordcloud import WordCloud +from sklearn.naive_bayes import MultinomialNB +nltk.download('punkt') +nltk.download('stopwords') + + +#Preprocessing Twitter +hasil_preprocessing = [] + +def preprocessing_twitter(): + + # Membuat File CSV + file = open('static/files/Data Preprocessing.csv', 'w', newline='', encoding='utf-8') + writer = csv.writer(file) + + hasil_preprocessing.clear() + + with open("static/files/Data Scraping.csv", "r",encoding='utf-8') as csvfile: + readCSV = csv.reader(csvfile, delimiter =',') + hasil_labeling.clear() + for row in readCSV: + # proses cleansing + #remove mention, link,hashtag + clean = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ", row[2]).split()) + # remove number + clean = re.sub("\d+", "", clean) + # remove single char + clean = re.sub(r"\b[a-zA-Z]\b", "", clean) + # remove multiple whitespace menjadi satu spasi + clean = re.sub('\s+', ' ', clean) + # remove punctuation (emoji) + clean = clean.translate(clean.maketrans("", "", string.punctuation)) + + # proses casefolding + casefold = clean.casefold() + + #proses normalisasi + normalized_text = normalize_text(casefold) + + # proses tokenizing + tokenizing = nltk.tokenize.word_tokenize(normalized_text) + + # proses stopword + # mengambil data stop word dari library + stop_factory = StopWordRemoverFactory().get_stop_words() + # menambah stopword sendiri + more_stop_word = ['&', 'ad', 'ada', 'ae', 'ah', 'aja', 'ajar', 'ajar', 'amp', 'apa', 'aya', 'bab', 'bajo', 'bar', 'bbrp', 'beda', 'begini', 'bgmn', 'bgt', 'bhw', 'biar', 'bikin', 'bilang', 'bkh', 'bkn', 'bln', 'bnyk', 'brt', 'buah', 'cc', 'cc', 'ckp', 'com', 'cuy', 'd', 'dab', 'dah', 'dan', 'dg', 'dgn', 'di', 'dih', 'dlm', 'dm', 'dpo', 'dr', 'dr', 'dri', 'duga', 'duh', 'enth', 'er', 'et', 'ga', 'gak', 'gal', 'gin', 'gitu', 'gk', 'gmn', 'gs', 'gt', 'gue', 'gw', 'hah', 'hallo', 'halo', 'hehe', 'hello', 'hha', 'hrs', 'https', 'ia', 'iii', 'in', 'ini', 'iw', 'jadi', 'jadi', 'jangn', 'jd', 'jg', 'jgn', 'jls', 'kak', 'kali', 'kalo', 'kan', 'kch', 'ke', 'kena', 'ket', 'kl', 'kll', 'klo', 'km', 'kmrn', 'knp', 'kok', 'kpd', 'krn', 'kui', 'lagi', 'lah', 'lahh', 'lalu', 'lbh', 'lewat', 'loh', 'lu', 'mah', 'mau', 'min', 'mlkukan', 'mls', 'mnw', 'mrk', 'n', 'nan', 'ni', 'nih', 'no', 'nti', 'ntt', 'ny', 'nya', 'nyg', 'oleh', 'ono', 'ooooo', 'op', 'org', 'pen', 'pk', 'pun', 'qq', 'rd', 'rt', 'sama', 'sbg', 'sdh', 'sdrhn', 'segera', 'sgt', 'si', 'si', 'sih', 'sj', 'so', 'sy', 't', 'tak', 'tak', 'tara', 'tau', 'td', 'tdk', 'tdk', 'thd', 'thd', 'thn', 'tindkn', 'tkt', 'tp', 'tsb', 'ttg', 'ttp', 'tuh', 'tv', 'u', 'upa', 'utk', 'uyu', 'viral', 'vm', 'wae', 'wah', 'wb', 'wes', 'wk', 'wkwk', 'wkwkwk', 'wn', 'woiii', 'xxxx', 'ya', 'yaa', 'yah', 'ybs', 'ye', 'yg', 'ykm'] + # menggabungkan stopword library + milik sendiri + data = stop_factory + more_stop_word + + dictionary = ArrayDictionary(data) + str = StopWordRemover(dictionary) + stop_wr = nltk.tokenize.word_tokenize(str.remove(normalized_text)) + + # proses stemming + kalimat = ' '.join(stop_wr) + factory = StemmerFactory() + # mamanggil fungsi stemming + stemmer = factory.create_stemmer() + stemming = stemmer.stem(kalimat) + + + tweets =[row[0], row[1], row[2], clean, casefold, normalized_text, tokenizing, stop_wr, stemming] + hasil_preprocessing.append(tweets) + + writer.writerow(tweets) + flash('Preprocessing Berhasil', 'preprocessing_data') + +def normalize_text(text): + # Contoh normalisasi: mengganti singkatan-singkatan menjadi kata lengkap + text = re.sub(r'dn', 'dan', text) + text = re.sub(r'km', 'kamu', text) + text = re.sub(r'pake', 'pakai', text) + text = re.sub(r'mksh', 'terima kasih', text) + text = re.sub(r'krg', 'kurang', text) + text = re.sub(r'sm', 'sama', text) + text = re.sub(r'bljr', 'belajar', text) + text = re.sub(r'blajar', 'belajar', text) + text = re.sub(r'meept', 'mepet', text) + text = re.sub(r'dr', 'dari', text) + text = re.sub(r'jg', 'juga', text) + # Tambahkan normalisasi lainnya sesuai kebutuhan + return text + +# Labeling +hasil_labeling = [] + +def labeling_twitter(): + # Membuat File CSV + file = open('static/files/Data Labeling.csv', 'w', newline='', encoding='utf-8') + writer = csv.writer(file) + translator = Translator() + + + with open("static/files/Data Preprocessing.csv", "r",encoding='utf-8') as csvfile: + readCSV = csv.reader(csvfile, delimiter =',') + hasil_labeling.clear() + for row in readCSV: + tweet = {} + try: + value = translator.translate(row[8], dest='en') + except: + print("Terjadi kesalahan", flush=True) + + terjemahan = value.text + data_label = TextBlob(terjemahan) + + if data_label.sentiment.polarity > 0.0 : + tweet['sentiment'] = "Positif" + else : + tweet['sentiment'] = "Negatif" + + labeling = tweet['sentiment'] + tweets =[row[1], row[8], labeling] + hasil_labeling.append(tweets) + + writer.writerow(tweets) + flash('Labeling Berhasil', 'labeling_data') + +#Klasifikasi + +# Membuat variabel df +df = None +df2 = None + +# menentukan akurasi 0 +akurasi = 0 + +def proses_klasifikasi(): + global df + global df2 + global akurasi + tweet = [] + y = [] + + with open("static/files/Data Labeling.csv", encoding='utf-8') as csvfile: + readCSV = csv.reader(csvfile, delimiter=',') + for row in readCSV: + tweet.append(row[1]) + y.append(row[2]) + + vectorizer = TfidfVectorizer() + vectorizer.fit(tweet) + # tfidf = vectorizer.fit_transform(X_train) + x = vectorizer.transform(tweet) + + # split data training dan testing 80:20 + x_train, x_test, y_train, y_test = train_test_split( + x, y, test_size=0.2, random_state=42) + + + # metode NB + clf = MultinomialNB() + clf.fit(x_train, y_train) + + predict = clf.predict(x_test) + report = classification_report(y_test, predict, output_dict=True) + + # simpan ke csv + clsf_report = pd.DataFrame(report).transpose() + clsf_report.to_csv( + 'static/files/Data Klasifikasi.csv', index=True) + + pickle.dump(vectorizer, open('static/files/vec.pkl', 'wb')) + pickle.dump(x, open('static/files/tfidf.pkl', 'wb')) + pickle.dump(clf, open('static/files/model.pkl', 'wb')) + + #Confusion Matrix + unique_label = np.unique([y_test, predict]) + cmtx = pd.DataFrame( + confusion_matrix(y_test, predict, labels=unique_label), + index=['pred:{:}'.format(x) for x in unique_label], + columns=['true:{:}'.format(x) for x in unique_label] + ) + + cmtx.to_csv( + 'static/files/Data Confusion Matrix.csv', index=True) + + df = pd.read_csv( + 'static/files/Data Confusion Matrix.csv', sep=",") + df.rename(columns={'Unnamed: 0': ''}, inplace=True) + + df2 = pd.read_csv( + 'static/files/Data Klasifikasi.csv', sep=",") + df2.rename(columns={'Unnamed: 0': ''}, inplace=True) + + akurasi = round(accuracy_score(y_test, predict) * 100, 2) + + kalimat = "" + + for i in tweet: + s = ("".join(i)) + kalimat += s + + urllib.request.urlretrieve( + "https://firebasestorage.googleapis.com/v0/b/sentimen-97d49.appspot.com/o/Circle-icon.png?alt=media&token=b9647ca7-dfdb-46cd-80a9-cfcaa45a1ee4", 'circle.png') + mask = np.array(Image.open("circle.png")) + wordcloud = WordCloud(width=1600, height=800, + max_font_size=200, background_color='white', mask=mask) + wordcloud.generate(kalimat) + plt.figure(figsize=(12, 10)) + + plt.imshow(wordcloud, interpolation='bilinear') + + plt.axis("off") + + plt.savefig('static/files/wordcloud.png') + + + flash('Klasifikasi Berhasil', 'klasifikasi_data') + +app = Flask(__name__) +app.config['SECRET_KEY'] = 'farez' + + +# Upload folder +UPLOAD_FOLDER = 'static/files' +ALLOWED_EXTENSION = set(['csv']) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +def allowed_file(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSION + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/preprocessing', methods=['GET', 'POST']) +def preprocessing(): + if request.method == 'POST': + if request.form.get('upload') == 'Upload Data': + hasil_preprocessing.clear() + file = request.files['file'] + if not allowed_file(file.filename): + flash('Format file tidak diperbolehkan', 'upload_gagal') + return render_template('preprocessing.html', value=hasil_preprocessing) + + if 'file' not in request.files: + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('preprocessing.html', value=hasil_preprocessing) + + if file.filename == '': + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('preprocessing.html', value=hasil_preprocessing) + + if file and allowed_file(file.filename): + file.filename = "Data Scraping.csv" + file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename)) + hasil_preprocessing.clear() + flash('File Berhasil di upload', 'upload_berhasil') + return render_template('preprocessing.html') + + if request.form.get('preprocess') == 'Preprocessing Data': + preprocessing_twitter() + return render_template('preprocessing.html', value=hasil_preprocessing) + + return render_template('preprocessing.html', value=hasil_preprocessing) + +@app.route('/labeling', methods=['GET', 'POST']) +def labeling(): + if request.method == 'POST': + if request.form.get('upload') == 'Upload Data': + hasil_labeling.clear() + file = request.files['file'] + if not allowed_file(file.filename): + flash('Format file tidak diperbolehkan', 'upload_gagal') + return render_template('labeling.html', value=hasil_labeling) + + if 'file' not in request.files: + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('labeling.html', value=hasil_labeling) + + if file.filename == '': + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('labeling.html', value=hasil_labeling) + + if file and allowed_file(file.filename): + file.filename = "Data Preprocessing.csv" + file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename)) + hasil_labeling.clear() + flash('File Berhasil di upload', 'upload_berhasil') + return render_template('labeling.html') + + if request.form.get('labeling') == 'Labeling Data': + labeling_twitter() + return render_template('labeling.html', value=hasil_labeling) + + return render_template('labeling.html', value=hasil_labeling) + +@app.route('/klasifikasi', methods=['GET', 'POST']) +def klasifikasi(): + if request.method == 'POST': + if request.form.get('upload') == 'Upload Data': + file = request.files['file'] + if not allowed_file(file.filename): + flash('Format file tidak diperbolehkan', 'upload_gagal') + return render_template('klasifikasi.html') + if 'file' not in request.files: + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('klasifikasi.html',) + if file.filename == '': + flash('File tidak boleh kosong', 'upload_gagal') + return render_template('klasifikasi.html') + if file and allowed_file(file.filename): + file.filename = "Data Labeling.csv" + file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename)) + flash('File Berhasil di upload', 'upload_berhasil') + return render_template('klasifikasi.html') + + if request.form.get('klasifikasi') == 'Klasifikasi Data': + proses_klasifikasi() + return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values) + + if akurasi == 0: + return render_template('klasifikasi.html') + else: + return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values) + +@app.route('/visualisasi') +def visualisasi(): + return render_template('visualisasi.html') + +if __name__ == "__main__": + app.run(debug=True) \ No newline at end of file