MIF_E31220095/codeawalflask.txt

from flask import Flask, render_template, url_for, request, flash
import tweepy
import re, string, csv, pickle, os
from os.path import join, dirname, realpath
import pandas as pd
import numpy as np
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory, StopWordRemover, ArrayDictionary
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from googletrans import Translator
from textblob import TextBlob
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from PIL import Image
import urllib.request
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from sklearn.naive_bayes import MultinomialNB
nltk.download('punkt')
nltk.download('stopwords')


#Preprocessing Twitter
hasil_preprocessing  = []

def preprocessing_twitter():
    translator = Translator()
    hasil_preprocessing.clear()

    # Siapkan file CSV untuk menampung hasil
    with open('static/files/Data Preprocessing.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Tulis header CSV
        writer.writerow([
            'Tanggal', 'Username', 'Tweet',
            'Cleansing', 'Case Folding', 'Normalisasi',
            'Tokenizing', 'Stopword', 'Stemming',
            'Translate'
        ])

        # Baca data input dari file scraping
        with open("static/files/Data Scraping.csv", "r", encoding='utf-8') as csvfile:
            readCSV = csv.reader(csvfile, delimiter=',')

            for row in readCSV:
                # Ambil data asli
                tanggal = row[0]
                username = row[1]
                tweet = row[2]

                # --- CLEANSING (versi disamakan dengan Google Colab) ---
                clean = tweet
                clean = re.sub(r'@[A-Za-z0-9_]+', '', clean)      # hapus mention
                clean = re.sub(r'#\w+', '', clean)                # hapus hashtag
                clean = re.sub(r'RT[\s]+', '', clean)             # hapus RT
                clean = re.sub(r'https?://\S+', '', clean)        # hapus link
                clean = re.sub(r'[^A-Za-z0-9 ]', '', clean)       # hapus karakter selain alfanumerik dan spasi
                clean = re.sub(r'\s+', ' ', clean).strip()        # hilangkan spasi berlebih

                # --- CASE FOLDING ---
                casefold = clean.casefold()

                # --- NORMALISASI ---
                normalized_text = normalize_text(casefold)

                # --- TOKENIZING ---
                tokenizing = nltk.tokenize.word_tokenize(normalized_text)

                # --- STOPWORD REMOVAL ---
                stop_factory = StopWordRemoverFactory().get_stop_words()
                more_stop_word = ['tidak']
                all_stopwords = stop_factory + more_stop_word
                dictionary = ArrayDictionary(all_stopwords)
                stopword_remover = StopWordRemover(dictionary)
                stopword_removed = nltk.tokenize.word_tokenize(stopword_remover.remove(normalized_text))

                # --- STEMMING ---
                kalimat = ' '.join(stopword_removed)
                factory = StemmerFactory()
                stemmer = factory.create_stemmer()
                stemming = stemmer.stem(kalimat)

                # --- TRANSLATE ---
                try:
                    translation = translator.translate(stemming, dest='en')
                    translated_text = translation.text
                except:
                    translated_text = "Terjemahan gagal"

                # --- SIMPAN SEMUA HASIL ---
                tweets = [
                    tanggal, username, tweet,
                    clean, casefold, normalized_text,
                    tokenizing, stopword_removed, stemming,
                    translated_text
                ]
                hasil_preprocessing.append(tweets)
                writer.writerow(tweets)

    flash('Preprocessing Berhasil', 'preprocessing_data')

def normalize_text(text):
    # Normalisasi manual kata tidak baku ke bentuk baku (berdasarkan kamus dari Google Colab)
    text = re.sub(r'sdh', 'sudah', text)
    text = re.sub(r' yg ', ' yang ', text)
    text = re.sub(r' nggak ', ' tidak ', text)
    text = re.sub(r' gak ', ' tidak ', text)
    text = re.sub(r' bangetdari ', ' banget dari ', text)
    text = re.sub(r'vibes ', 'suasana ', text)
    text = re.sub(r'mantab ', 'mantap ', text)
    text = re.sub(r' benarsetuju ', ' benar setuju ', text)
    text = re.sub(r' ganjarmahfud ', ' ganjar mahfud ', text)
    text = re.sub(r' stylish ', ' bergaya ', text)
    text = re.sub(r' ngapusi ', ' bohong ', text)
    text = re.sub(r' gede ', ' besar ', text)
    text = re.sub(r' all in ', ' yakin ', text)
    text = re.sub(r' blokkkkk ', ' goblok ', text)
    text = re.sub(r' blokkkk ', ' goblok ', text)
    text = re.sub(r' blokkk ', ' goblok ', text)
    text = re.sub(r' blokk ', ' goblok ', text)
    text = re.sub(r' blok ', ' goblok ', text)
    text = re.sub(r' ri ', ' republik indonesia ', text)
    text = re.sub(r' kem3nangan ', ' kemenangan ', text)
    text = re.sub(r' sat set ', ' cepat ', text)
    text = re.sub(r' ala ', ' dari ', text)
    text = re.sub(r' best ', ' terbaik ', text)
    text = re.sub(r' bgttt ', ' banget ', text)
    text = re.sub(r' gue ', ' saya ', text)
    text = re.sub(r' hrs ', ' harus ', text)
    text = re.sub(r' fixed ', ' tetap ', text)
    text = re.sub(r' blom ', ' belum ', text)
    text = re.sub(r' aing ', ' aku ', text)
    text = re.sub(r' tehnologi ', ' teknologi ', text)
    text = re.sub(r' jd ', ' jadi ', text)
    text = re.sub(r' dg ', ' dengan ', text)
    text = re.sub(r' kudu ', ' harus ', text)
    text = re.sub(r' jk ', ' jika ', text)
    text = re.sub(r' problem ', ' masalah ', text)
    text = re.sub(r' iru ', ' itu ', text)
    text = re.sub(r' duit ', ' uang ', text)
    text = re.sub(r' duid ', ' uang ', text)
    text = re.sub(r' bgsd ', ' bangsat ', text)
    text = re.sub(r' jt ', ' juta ', text)
    text = re.sub(r' stop ', ' berhenti ', text)
    text = re.sub(r' ngeri ', ' seram ', text)
    text = re.sub(r' turu ', ' tidur ', text)
    text = re.sub(r' early ', ' awal ', text)
    text = re.sub(r' pertamna ', ' pertamina ', text)
    text = re.sub(r' yg ', ' yang ', text)
    text = re.sub(r' mnurut ', ' menurut ', text)
    text = re.sub(r' trus ', ' terus ', text)
    text = re.sub(r' msh ', ' masih ', text)
    text = re.sub(r' simple ', ' mudah ', text)
    text = re.sub(r' worth ', ' layak ', text)
    text = re.sub(r'problem ', 'masalah ', text)
    text = re.sub(r' hny ', ' hanya ', text)
    text = re.sub(r' dn ', ' dan ', text)
    text = re.sub(r' jln ', ' jalan ', text)
    text = re.sub(r' bgt ', ' banget ', text)
    text = re.sub(r' yg ', ' yang ', text)
    text = re.sub(r' ga ', ' tidak ', text)
    text = re.sub(r' text ', ' teks ', text)
    text = re.sub(r' end ', ' selesai ', text)
    text = re.sub(r' kelen ', ' kalian ', text)
    text = re.sub(r' jd ', ' jadi ', text)
    text = re.sub(r' tuk ', ' untuk ', text)
    text = re.sub(r' kk ', ' kakak ', text)

    return text

# Labeling 5 Kelas
hasil_labeling = []

def labeling_twitter():
    hasil_labeling.clear()

    with open("static/files/Data Preprocessing.csv", "r", encoding='utf-8') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV)  # Lewati header CSV

        with open('static/files/Data Labeling.csv', 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)

            # Header file hasil labeling
            writer.writerow(['Tanggal', 'Username', 'Tweet', 'Stemming', 'Translate', 'Label'])

            for row in readCSV:
                tanggal = row[0]
                username = row[1]
                tweet_asli = row[2]
                stemming = row[8]
                translated = row[9]  # hasil translate

                try:
                    analysis = TextBlob(translated)
                    score = analysis.sentiment.polarity
                except Exception as e:
                    score = 0.0  # Jika gagal, asumsikan netral

                # Penentuan label berdasarkan polaritas
                if score >= 0.6:
                    label = 'Sangat Mendukung'
                elif 0.2 <= score < 0.6:
                    label = 'Mendukung'
                elif -0.2 < score < 0.2:
                    label = 'Netral'
                elif -0.6 < score <= -0.2:
                    label = 'Tidak Mendukung'
                else:
                    label = 'Sangat Tidak Mendukung'

                hasil = [tanggal, username, tweet_asli, stemming, translated, label]
                hasil_labeling.append(hasil)
                writer.writerow(hasil)

    flash('Labeling 5 Kelas Berhasil', 'labeling_data')

#Klasifikasi

# Membuat variabel df
df = None
df2 = None

# menentukan akurasi 0
akurasi = 0

def proses_klasifikasi():
    global df
    global df2
    global akurasi
    tweet = []
    y = []

    # Baca data labeling
    with open("static/files/Data Labeling.csv", encoding='utf-8') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV)  # Lewati header

        for row in readCSV:
            tweet_text = row[4]  # kolom Translate
            label = row[5]       # kolom Label

            if tweet_text.lower() != "terjemahan gagal":  # filter data gagal translate
                tweet.append(tweet_text)
                y.append(label)

    # TF-IDF
    vectorizer = TfidfVectorizer()
    x = vectorizer.fit_transform(tweet)

    # Split data training dan testing
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42
    )

    # Naive Bayes
    clf = MultinomialNB()
    clf.fit(x_train, y_train)
    predict = clf.predict(x_test)

    # Simpan classification report ke CSV
    report = classification_report(y_test, predict, output_dict=True)
    clsf_report = pd.DataFrame(report).transpose()
    clsf_report.to_csv('static/files/Data Klasifikasi.csv', index=True)

    # Simpan model dan vectorizer
    pickle.dump(vectorizer, open('static/files/vec.pkl', 'wb'))
    pickle.dump(x, open('static/files/tfidf.pkl', 'wb'))
    pickle.dump(clf, open('static/files/model.pkl', 'wb'))

    # Confusion Matrix
    unique_label = np.unique([y_test, predict])
    cmtx = pd.DataFrame(
        confusion_matrix(y_test, predict, labels=unique_label),
        index=['pred:{:}'.format(x) for x in unique_label],
        columns=['true:{:}'.format(x) for x in unique_label]
    )
    cmtx.to_csv('static/files/Data Confusion Matrix.csv', index=True)

    # Baca ulang hasil evaluasi
    df = pd.read_csv('static/files/Data Confusion Matrix.csv', sep=",")
    df.rename(columns={'Unnamed: 0': ''}, inplace=True)

    df2 = pd.read_csv('static/files/Data Klasifikasi.csv', sep=",")
    df2.rename(columns={'Unnamed: 0': ''}, inplace=True)

    # Hitung akurasi
    akurasi = round(accuracy_score(y_test, predict) * 100, 2)

    # Wordcloud
    kalimat = "".join(tweet)
    urllib.request.urlretrieve(
        "https://firebasestorage.googleapis.com/v0/b/sentimen-97d49.appspot.com/o/Circle-icon.png?alt=media&token=b9647ca7-dfdb-46cd-80a9-cfcaa45a1ee4",
        'circle.png')
    mask = np.array(Image.open("circle.png"))
    wordcloud = WordCloud(width=1600, height=800,
                          max_font_size=200, background_color='white', mask=mask)
    wordcloud.generate(kalimat)
    plt.figure(figsize=(12, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.savefig('static/files/wordcloud.png')

    flash('Klasifikasi Berhasil', 'klasifikasi_data')


app = Flask(__name__)
app.config['SECRET_KEY'] = 'farez'


# Upload folder
UPLOAD_FOLDER = 'static/files'
ALLOWED_EXTENSION = set(['csv'])
app.config['UPLOAD_FOLDER'] =  UPLOAD_FOLDER

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSION

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/preprocessing', methods=['GET', 'POST'])
def preprocessing():
    if request.method == 'POST':
        if request.form.get('upload') == 'Upload Data':
            hasil_preprocessing.clear()
            file = request.files['file']
            if not allowed_file(file.filename):
                flash('Format file tidak diperbolehkan', 'upload_gagal')
                return render_template('preprocessing.html', value=hasil_preprocessing)

            if 'file' not in request.files:
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('preprocessing.html', value=hasil_preprocessing)

            if file.filename == '':
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('preprocessing.html', value=hasil_preprocessing)

            if file and allowed_file(file.filename):
                file.filename = "Data Scraping.csv"
                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
                hasil_preprocessing.clear()
                flash('File Berhasil di upload', 'upload_berhasil')
                return render_template('preprocessing.html')

        if request.form.get('preprocess') == 'Preprocessing Data':
            preprocessing_twitter()
            return render_template('preprocessing.html', value=hasil_preprocessing)

    return render_template('preprocessing.html', value=hasil_preprocessing)

@app.route('/labeling', methods=['GET', 'POST'])
def labeling():
    if request.method == 'POST':
        if request.form.get('upload') == 'Upload Data':
            hasil_labeling.clear()
            file = request.files['file']
            if not allowed_file(file.filename):
                flash('Format file tidak diperbolehkan', 'upload_gagal')
                return render_template('labeling.html', value=hasil_labeling)

            if 'file' not in request.files:
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('labeling.html', value=hasil_labeling)

            if file.filename == '':
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('labeling.html', value=hasil_labeling)

            if file and allowed_file(file.filename):
                file.filename = "Data Preprocessing.csv"
                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
                hasil_labeling.clear()
                flash('File Berhasil di upload', 'upload_berhasil')
                return render_template('labeling.html')

        if request.form.get('labeling') == 'Labeling Data':
            labeling_twitter()
            return render_template('labeling.html', value=hasil_labeling)

    return render_template('labeling.html', value=hasil_labeling)

@app.route('/klasifikasi', methods=['GET', 'POST'])
def klasifikasi():
    if request.method == 'POST':
        if request.form.get('upload') == 'Upload Data':
            file = request.files['file']
            if not allowed_file(file.filename):
                flash('Format file tidak diperbolehkan', 'upload_gagal')
                return render_template('klasifikasi.html')
            if 'file' not in request.files:
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('klasifikasi.html',)
            if file.filename == '':
                flash('File tidak boleh kosong', 'upload_gagal')
                return render_template('klasifikasi.html')
            if file and allowed_file(file.filename):
                file.filename = "Data Labeling.csv"
                file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
                flash('File Berhasil di upload', 'upload_berhasil')
                return render_template('klasifikasi.html')

        if request.form.get('klasifikasi') == 'Klasifikasi Data':
            proses_klasifikasi()
            return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values)

    if akurasi == 0:
        return render_template('klasifikasi.html')
    else:
        return render_template('klasifikasi.html', accuracy=akurasi, tables=[df.to_html(classes='table table-bordered', index=False, justify='left')], titles=df.columns.values, tables2=[df2.to_html(classes='table table-bordered', index=False, justify='left')], titles2=df2.columns.values)

@app.route('/visualisasi')
def visualisasi():
    return render_template('visualisasi.html')

if __name__ == "__main__":
    app.run(debug=True)