From 761583f1a3e8ebfd094961ccadc4476533e709cb Mon Sep 17 00:00:00 2001
From: NaufalHisyam18 <naufalhisyam84@gmail.com>
Date: Mon, 24 Jun 2024 09:44:25 +0700
Subject: [PATCH] Create app.py

---
 Analisis-Sentimen-Twitter-Flask-main/app.py | 345 ++++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 Analisis-Sentimen-Twitter-Flask-main/app.py

diff --git a/Analisis-Sentimen-Twitter-Flask-main/app.py b/Analisis-Sentimen-Twitter-Flask-main/app.py
new file mode 100644
index 0000000..09f9e2a
--- /dev/null
+++ b/Analisis-Sentimen-Twitter-Flask-main/app.py
@@ -0,0 +1,345 @@
+from flask import Flask, render_template, url_for, request, flash
+import tweepy
+import re, string, csv, pickle, os
+from os.path import join, dirname, realpath
+import pandas as pd
+import numpy as np
+from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
+from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory, StopWordRemover, ArrayDictionary
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from googletrans import Translator
+from textblob import TextBlob
+from sklearn.metrics import accuracy_score, precision_score, recall_score
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix
+from sklearn.feature_extraction.text import TfidfVectorizer
+from PIL import Image
+import urllib.request
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+from sklearn.naive_bayes import MultinomialNB
+nltk.download('punkt')
+nltk.download('stopwords')
+
+
+#Preprocessing Twitter
+hasil_preprocessing  = []
+
+def preprocessing_twitter():
+    
+    # Membuat File CSV
+    file = open('static/files/Data Preprocessing.csv', 'w', newline='', encoding='utf-8')
+    writer = csv.writer(file)
+
+    hasil_preprocessing.clear()
+
+    with open("static/files/Data Scraping.csv", "r",encoding='utf-8') as csvfile:
+        readCSV = csv.reader(csvfile, delimiter =',')
+        hasil_labeling.clear()
+        for row in readCSV:
+            # proses cleansing
+            #remove mention, link,hashtag
+            clean = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ", row[2]).split())
+            # remove number
+            clean = re.sub("\d+", "", clean)
+            # remove single char
+            clean = re.sub(r"\b[a-zA-Z]\b", "", clean)
+            # remove multiple whitespace menjadi satu spasi
+            clean = re.sub('\s+', ' ', clean)
+            # remove punctuation (emoji)
+            clean = clean.translate(clean.maketrans("", "", string.punctuation))
+
+            # proses casefolding
+            casefold = clean.casefold()
+
+            #proses normalisasi
+            normalized_text = normalize_text(casefold)
+
+            # proses tokenizing
+            tokenizing = nltk.tokenize.word_tokenize(normalized_text)
+            
+            # proses stopword
+            # mengambil data stop word dari library
+            stop_factory = StopWordRemoverFactory().get_stop_words()
+            # menambah stopword sendiri
+            more_stop_word = ['&amp', 'ad', 'ada', 'ae', 'ah', 'aja', 'ajar', 'ajar', 'amp', 'apa', 'aya', 'bab', 'bajo', 'bar', 'bbrp', 'beda', 'begini', 'bgmn', 'bgt', 'bhw', 'biar', 'bikin', 'bilang', 'bkh', 'bkn', 'bln', 'bnyk', 'brt', 'buah', 'cc', 'cc', 'ckp', 'com', 'cuy', 'd', 'dab', 'dah', 'dan', 'dg', 'dgn', 'di', 'dih', 'dlm', 'dm', 'dpo', 'dr', 'dr', 'dri', 'duga', 'duh', 'enth', 'er', 'et', 'ga', 'gak', 'gal', 'gin', 'gitu', 'gk', 'gmn', 'gs', 'gt', 'gue', 'gw', 'hah', 'hallo', 'halo', 'hehe', 'hello', 'hha', 'hrs', 'https', 'ia', 'iii', 'in', 'ini', 'iw', 'jadi', 'jadi', 'jangn', 'jd', 'jg', 'jgn', 'jls', 'kak', 'kali', 'kalo', 'kan', 'kch', 'ke', 'kena', 'ket', 'kl', 'kll', 'klo', 'km', 'kmrn', 'knp', 'kok', 'kpd', 'krn', 'kui', 'lagi', 'lah', 'lahh', 'lalu', 'lbh', 'lewat', 'loh', 'lu', 'mah', 'mau', 'min', 'mlkukan', 'mls', 'mnw', 'mrk', 'n', 'nan', 'ni', 'nih', 'no', 'nti', 'ntt', 'ny', 'nya', 'nyg', 'oleh', 'ono', 'ooooo', 'op', 'org', 'pen', 'pk', 'pun', 'qq', 'rd', 'rt', 'sama', 'sbg', 'sdh', 'sdrhn', 'segera', 'sgt', 'si', 'si', 'sih', 'sj', 'so', 'sy', 't', 'tak', 'tak', 'tara', 'tau', 'td', 'tdk', 'tdk', 'thd', 'thd', 'thn', 'tindkn', 'tkt', 'tp', 'tsb', 'ttg', 'ttp', 'tuh', 'tv', 'u', 'upa', 'utk', 'uyu', 'viral', 'vm', 'wae', 'wah', 'wb', 'wes', 'wk', 'wkwk', 'wkwkwk', 'wn', 'woiii', 'xxxx', 'ya', 'yaa', 'yah', 'ybs', 'ye', 'yg', 'ykm']
+            # menggabungkan stopword library + milik sendiri
+            data = stop_factory + more_stop_word
+            
+            dictionary = ArrayDictionary(data)
+            str = StopWordRemover(dictionary)
+            stop_wr = nltk.tokenize.word_tokenize(str.remove(normalized_text))
+
+            # proses stemming
+            kalimat = ' '.join(stop_wr)
+            factory = StemmerFactory()
+            # mamanggil fungsi stemming
+            stemmer = factory.create_stemmer()
+            stemming = stemmer.stem(kalimat)
+            
+
+            tweets =[row[0], row[1], row[2], clean, casefold, normalized_text, tokenizing, stop_wr, stemming]
+            hasil_preprocessing.append(tweets)
+
+            writer.writerow(tweets)
+            flash('Preprocessing Berhasil', 'preprocessing_data')
+
+def normalize_text(text):
+    # Contoh normalisasi: mengganti singkatan-singkatan menjadi kata lengkap
+    text = re.sub(r'dn', 'dan', text)
+    text = re.sub(r'km', 'kamu', text)
+    text = re.sub(r'pake', 'pakai', text)
+    text = re.sub(r'mksh', 'terima kasih', text)
+    text = re.sub(r'krg', 'kurang', text)
+    text = re.sub(r'sm', 'sama', text)
+    text = re.sub(r'bljr', 'belajar', text)
+    text = re.sub(r'blajar', 'belajar', text)
+    text = re.sub(r'meept', 'mepet', text)
+    text = re.sub(r'dr', 'dari', text)
+    text = re.sub(r'jg', 'juga', text)
+    # Tambahkan normalisasi lainnya sesuai kebutuhan
+    return text
+
+# Labeling
+hasil_labeling = []
+
+def labeling_twitter():
+    # Membuat File CSV
+    file = open('static/files/Data Labeling.csv', 'w', newline='', encoding='utf-8')
+    writer = csv.writer(file)
+    translator = Translator()
+
+    
+    with open("static/files/Data Preprocessing.csv", "r",encoding='utf-8') as csvfile:
+        readCSV = csv.reader(csvfile, delimiter =',')
+        hasil_labeling.clear()
+        for row in readCSV:
+            tweet = {}
+            try:
+                value = translator.translate(row[8], dest='en')
+            except:
+                print("Terjadi kesalahan", flush=True)
+
+            terjemahan = value.text
+            data_label = TextBlob(terjemahan)
+
+            if data_label.sentiment.polarity > 0.0 :
+                tweet['sentiment'] = "Positif"
+            else : 
+                tweet['sentiment'] = "Negatif"
+
+            labeling = tweet['sentiment']
+            tweets =[row[1], row[8], labeling]
+            hasil_labeling.append(tweets)
+
+            writer.writerow(tweets)
+    flash('Labeling Berhasil', 'labeling_data')
+
+#Klasifikasi
+
+# Membuat variabel df
+df = None
+df2 = None
+
+# menentukan akurasi 0
+akurasi = 0
+
+def proses_klasifikasi():
+    global df
+    global df2
+    global akurasi
+    tweet = []
+    y = []
+
+    with open("static/files/Data Labeling.csv", encoding='utf-8') as csvfile:
+        readCSV = csv.reader(csvfile, delimiter=',')
+        for row in readCSV:
+            tweet.append(row[1])
+            y.append(row[2])
+
+    vectorizer = TfidfVectorizer()
+    vectorizer.fit(tweet)
+    # tfidf = vectorizer.fit_transform(X_train)
+    x = vectorizer.transform(tweet)
+
+    # split data training dan testing 80:20
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42)
+
+    
+    # metode NB
+    clf = MultinomialNB()
+    clf.fit(x_train, y_train)
+    
+    predict = clf.predict(x_test)
+    report = classification_report(y_test, predict, output_dict=True)
+    
+    # simpan ke csv
+    clsf_report = pd.DataFrame(report).transpose()
+    clsf_report.to_csv(
+        'static/files/Data Klasifikasi.csv', index=True)
+
+    pickle.dump(vectorizer, open('static/files/vec.pkl', 'wb'))
+    pickle.dump(x, open('static/files/tfidf.pkl', 'wb'))
+    pickle.dump(clf, open('static/files/model.pkl', 'wb'))
+
+    #Confusion Matrix
+    unique_label = np.unique([y_test, predict])
+    cmtx = pd.DataFrame(
+        confusion_matrix(y_test, predict, labels=unique_label),
+        index=['pred:{:}'.format(x) for x in unique_label],
+        columns=['true:{:}'.format(x) for x in unique_label]
+    )   
+
+    cmtx.to_csv(
+        'static/files/Data Confusion Matrix.csv', index=True)
+
+    df = pd.read_csv(
+        'static/files/Data Confusion Matrix.csv', sep=",")
+    df.rename(columns={'Unnamed: 0': ''}, inplace=True)
+
+    df2 = pd.read_csv(
+        'static/files/Data Klasifikasi.csv', sep=",")
+    df2.rename(columns={'Unnamed: 0': ''}, inplace=True)
+
+    akurasi = round(accuracy_score(y_test, predict) * 100, 2)
+
+    kalimat = ""
+
+    for i in tweet:
+        s = ("".join(i))
+        kalimat += s
+
+    urllib.request.urlretrieve(
+        "https://firebasestorage.googleapis.com/v0/b/sentimen-97d49.appspot.com/o/Circle-icon.png?alt=media&token=b9647ca7-dfdb-46cd-80a9-cfcaa45a1ee4", 'circle.png')
+    mask = np.array(Image.open("circle.png"))
+    wordcloud = WordCloud(width=1600, height=800,
+                          max_font_size=200, background_color='white', mask=mask)
+    wordcloud.generate(kalimat)
+    plt.figure(figsize=(12, 10))
+
+    plt.imshow(wordcloud, interpolation='bilinear')
+
+    plt.axis("off")
+
+    plt.savefig('static/files/wordcloud.png')
+
+    
+    flash('Klasifikasi Berhasil', 'klasifikasi_data')
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = 'farez'
+
+
+# Upload folder
+UPLOAD_FOLDER = 'static/files'
+ALLOWED_EXTENSION = set(['csv'])
+app.config['UPLOAD_FOLDER'] =  UPLOAD_FOLDER
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSION
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@app.route('/preprocessing', methods=['GET', 'POST'])
+def preprocessing():
+    if request.method == 'POST':
+        if request.form.get('upload') == 'Upload Data':
+            hasil_preprocessing.clear()
+            file = request.files['file']
+            if not allowed_file(file.filename):
+                flash('Format file tidak diperbolehkan', 'upload_gagal')
+                return render_template('preprocessing.html', value=hasil_preprocessing)
+
+            if 'file' not in request.files:
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('preprocessing.html', value=hasil_preprocessing)
+
+            if file.filename == '':
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('preprocessing.html', value=hasil_preprocessing)
+
+            if file and allowed_file(file.filename):
+                file.filename = "Data Scraping.csv"
+                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
+                hasil_preprocessing.clear()
+                flash('File Berhasil di upload', 'upload_berhasil')
+                return render_template('preprocessing.html')
+
+        if request.form.get('preprocess') == 'Preprocessing Data':
+            preprocessing_twitter()
+            return render_template('preprocessing.html', value=hasil_preprocessing)
+
+    return render_template('preprocessing.html', value=hasil_preprocessing)
+
+@app.route('/labeling', methods=['GET', 'POST'])
+def labeling():
+    if request.method == 'POST':
+        if request.form.get('upload') == 'Upload Data':
+            hasil_labeling.clear()
+            file = request.files['file']
+            if not allowed_file(file.filename):
+                flash('Format file tidak diperbolehkan', 'upload_gagal')
+                return render_template('labeling.html', value=hasil_labeling)
+
+            if 'file' not in request.files:
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('labeling.html', value=hasil_labeling)
+
+            if file.filename == '':
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('labeling.html', value=hasil_labeling)
+
+            if file and allowed_file(file.filename):
+                file.filename = "Data Preprocessing.csv"
+                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
+                hasil_labeling.clear()
+                flash('File Berhasil di upload', 'upload_berhasil')
+                return render_template('labeling.html')
+
+        if request.form.get('labeling') == 'Labeling Data':
+            labeling_twitter()
+            return render_template('labeling.html', value=hasil_labeling)
+            
+    return render_template('labeling.html', value=hasil_labeling)
+
+@app.route('/klasifikasi', methods=['GET', 'POST'])
+def klasifikasi():
+    if request.method == 'POST':
+        if request.form.get('upload') == 'Upload Data':
+            file = request.files['file']
+            if not allowed_file(file.filename):
+                flash('Format file tidak diperbolehkan', 'upload_gagal')
+                return render_template('klasifikasi.html')
+            if 'file' not in request.files:
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('klasifikasi.html',)
+            if file.filename == '':
+                flash('File tidak boleh kosong', 'upload_gagal')
+                return render_template('klasifikasi.html')
+            if file and allowed_file(file.filename):
+                file.filename = "Data Labeling.csv"
+                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
+                flash('File Berhasil di upload', 'upload_berhasil')
+                return render_template('klasifikasi.html')
+
+        if request.form.get('klasifikasi') == 'Klasifikasi Data':
+            proses_klasifikasi()
+            return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values)
+            
+    if akurasi == 0:
+        return render_template('klasifikasi.html')
+    else:
+        return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values)
+
+@app.route('/visualisasi')
+def visualisasi():
+    return render_template('visualisasi.html')
+
+if __name__ == "__main__":
+    app.run(debug=True)
\ No newline at end of file