MIF_E31222492/Pengumpulan Data/Tugas Akhir.ipynb

5367 lines
428 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "QwCBnQJxpbPt"
},
"source": [
"# Pengumpulan Data"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "E7F7B9QVpkJK",
"outputId": "bb9ae0d3-d86e-479f-f6d1-7e4df55c67c7",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KkXH79MxpbPw"
},
"outputs": [],
"source": [
"#@title Twitter Auth Token\n",
"\n",
"twitter_auth_token = 'ccb0a6dc47226f7ddc27a7a0975107094c8899e3'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DIGXUll9pbPy"
},
"outputs": [],
"source": [
"# Install Node.js\n",
"%sudo apt-get update\n",
"%sudo apt-get install -y ca-certificates curl gnupg\n",
"%sudo mkdir -p /etc/apt/keyrings\n",
"%curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg\n",
"\n",
"%NODE_MAJOR=20 && echo \"deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main\" | sudo tee /etc/apt/sources.list.d/nodesource.list\n",
"\n",
"%sudo apt-get update\n",
"%sudo apt-get install nodejs -y\n",
"\n",
"%node -v"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "443732olpbP0"
},
"outputs": [],
"source": [
"# Crawl Data\n",
"import os\n",
"\n",
"# List keyword untuk digunakan\n",
"keywords = [\n",
" 'gaji gen z lang:id until:2024-12-31 since:2020-01-01',\n",
" 'kesehatan mental generasi z lang:id until:2024-12-31 since:2020-01-01'\n",
" 'finansial gen z lang:id until:2024-12-31 since:2020-01-01'\n",
"]\n",
"\n",
"# Filename untuk setiap keyword\n",
"filenames = [\n",
" 'dataset-gaji-gen-z.csv',\n",
" 'dataset_kesehatan_mental_generasi_z.csv',\n",
" 'dataset_finansial_gen_z.csv'\n",
"]\n",
"\n",
"# Limit data yang ingin dikumpulkan\n",
"limit = 50000\n",
"\n",
"# Lakukan crawling untuk setiap keyword\n",
"for keyword, filename in zip(keywords, filenames):\n",
" os.system(f'npx -y tweet-harvest@2.6.1 -o \"{filename}\" -s \"{keyword}\" --tab \"LATEST\" -l {limit} --token {twitter_auth_token}')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Fa_zUkx_pbP1"
},
"source": [
"# Pra-pemrosesan data"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "Op1SYYqvvRwE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "OtOOY93LpbP2",
"outputId": "7d40a52c-bed3-4227-93b8-9941a28847ac",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 4600
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" conversation_id_str created_at favorite_count \\\n",
"0 1873627304632758640 Mon Dec 30 07:08:54 +0000 2024 0 \n",
"1 1873552074291765383 Mon Dec 30 02:09:58 +0000 2024 0 \n",
"2 1873198670558183506 Sun Dec 29 02:45:40 +0000 2024 0 \n",
"3 1872963394632101930 Sat Dec 28 11:10:45 +0000 2024 123 \n",
"4 1872907838714229142 Sat Dec 28 07:30:00 +0000 2024 0 \n",
".. ... ... ... \n",
"446 1287968595243405314 Tue Jul 28 04:30:17 +0000 2020 2 \n",
"447 1273488173532938240 Thu Jun 18 06:29:17 +0000 2020 0 \n",
"448 1235524166519885825 Thu Mar 05 11:14:50 +0000 2020 3 \n",
"449 1215396692926746625 Thu Jan 09 22:15:27 +0000 2020 0 \n",
"450 1215395587614359553 Thu Jan 09 22:11:03 +0000 2020 0 \n",
"\n",
" full_text id_str \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... 1873627304632758640 \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... 1873552074291765383 \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... 1873198670558183506 \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... 1872963394632101930 \n",
"4 Finansial yang sehat itu bukan cuma soal punya... 1872907838714229142 \n",
".. ... ... \n",
"446 Acara yang berbasis online ini merupakan sebua... 1287968595243405314 \n",
"447 @detiksport Kalo masalahnya finansial bisa dim... 1273503030978068480 \n",
"448 Tahukah Smart People jika Generasi Z tercatat ... 1235524166519885825 \n",
"449 Yrl Mendekati usia 20 tahun gatau kenapa banya... 1215396692926746625 \n",
"450 Mendekati usia 20 tahun gatau kenapa banyak ba... 1215395587614359553 \n",
"\n",
" image_url in_reply_to_screen_name \\\n",
"0 NaN NaN \n",
"1 https://pbs.twimg.com/media/GgAxaBXawAAaveA.jpg NaN \n",
"2 NaN NaN \n",
"3 https://pbs.twimg.com/media/Gf4aRbyawAgqR32.jpg NaN \n",
"4 https://pbs.twimg.com/media/Gf22shuagAA-GV5.jpg NaN \n",
".. ... ... \n",
"446 NaN NaN \n",
"447 NaN detiksport \n",
"448 https://pbs.twimg.com/media/ESV2rbvUwAIiilw.jpg NaN \n",
"449 NaN NaN \n",
"450 NaN NaN \n",
"\n",
" lang location quote_count reply_count retweet_count \\\n",
"0 in Indonesia 0 0 0 \n",
"1 in Indonesia 0 0 0 \n",
"2 in Indonesia 0 0 0 \n",
"3 in Jakarta, Indonesia 0 4 55 \n",
"4 in Jakarta, Indonesia 0 1 1 \n",
".. ... ... ... ... ... \n",
"446 in www.marketeers.com 0 0 0 \n",
"447 in Kota Bandung, Jawa Barat 0 0 0 \n",
"448 in DKI Jakarta 0 1 0 \n",
"449 in RULES TAP LINK ‼️ 0 3 1 \n",
"450 in NaN 0 0 0 \n",
"\n",
" tweet_url user_id_str \\\n",
"0 https://x.com/JavanicaPost/status/187362730463... 1846016698920685568 \n",
"1 https://x.com/digivestasi/status/1873552074291... 1450395130792329223 \n",
"2 https://x.com/ilhamiasnawi/status/187319867055... 305657272 \n",
"3 https://x.com/bankbsi_id/status/18729633946321... 141500996 \n",
"4 https://x.com/CIMBNiaga/status/187290783871422... 949032626 \n",
".. ... ... \n",
"446 https://x.com/the_marketeers/status/1287968595... 79957943 \n",
"447 https://x.com/faithprogress/status/12735030309... 81836775 \n",
"448 https://x.com/tuguinsurance/status/12355241665... 2217154892 \n",
"449 https://x.com/your2rl/status/1215396692926746625 1601505026 \n",
"450 https://x.com/wipilarpa/status/121539558761435... 1024610447847116802 \n",
"\n",
" username \n",
"0 JavanicaPost \n",
"1 digivestasi \n",
"2 ilhamiasnawi \n",
"3 bankbsi_id \n",
"4 CIMBNiaga \n",
".. ... \n",
"446 the_marketeers \n",
"447 faithprogress \n",
"448 tuguinsurance \n",
"449 your2rl \n",
"450 wipilarpa \n",
"\n",
"[451 rows x 15 columns]"
],
"text/html": [
"\n",
" <div id=\"df-0ee037d3-b2c2-468e-9ebe-10e372be8a57\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>conversation_id_str</th>\n",
" <th>created_at</th>\n",
" <th>favorite_count</th>\n",
" <th>full_text</th>\n",
" <th>id_str</th>\n",
" <th>image_url</th>\n",
" <th>in_reply_to_screen_name</th>\n",
" <th>lang</th>\n",
" <th>location</th>\n",
" <th>quote_count</th>\n",
" <th>reply_count</th>\n",
" <th>retweet_count</th>\n",
" <th>tweet_url</th>\n",
" <th>user_id_str</th>\n",
" <th>username</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1873627304632758640</td>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>0</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>1873627304632758640</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Indonesia</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/JavanicaPost/status/187362730463...</td>\n",
" <td>1846016698920685568</td>\n",
" <td>JavanicaPost</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1873552074291765383</td>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>0</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>1873552074291765383</td>\n",
" <td>https://pbs.twimg.com/media/GgAxaBXawAAaveA.jpg</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Indonesia</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/digivestasi/status/1873552074291...</td>\n",
" <td>1450395130792329223</td>\n",
" <td>digivestasi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1873198670558183506</td>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>0</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>1873198670558183506</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Indonesia</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/ilhamiasnawi/status/187319867055...</td>\n",
" <td>305657272</td>\n",
" <td>ilhamiasnawi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1872963394632101930</td>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>123</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>1872963394632101930</td>\n",
" <td>https://pbs.twimg.com/media/Gf4aRbyawAgqR32.jpg</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Jakarta, Indonesia</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>55</td>\n",
" <td>https://x.com/bankbsi_id/status/18729633946321...</td>\n",
" <td>141500996</td>\n",
" <td>bankbsi_id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1872907838714229142</td>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>0</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>1872907838714229142</td>\n",
" <td>https://pbs.twimg.com/media/Gf22shuagAA-GV5.jpg</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Jakarta, Indonesia</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>https://x.com/CIMBNiaga/status/187290783871422...</td>\n",
" <td>949032626</td>\n",
" <td>CIMBNiaga</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>446</th>\n",
" <td>1287968595243405314</td>\n",
" <td>Tue Jul 28 04:30:17 +0000 2020</td>\n",
" <td>2</td>\n",
" <td>Acara yang berbasis online ini merupakan sebua...</td>\n",
" <td>1287968595243405314</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>www.marketeers.com</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/the_marketeers/status/1287968595...</td>\n",
" <td>79957943</td>\n",
" <td>the_marketeers</td>\n",
" </tr>\n",
" <tr>\n",
" <th>447</th>\n",
" <td>1273488173532938240</td>\n",
" <td>Thu Jun 18 06:29:17 +0000 2020</td>\n",
" <td>0</td>\n",
" <td>@detiksport Kalo masalahnya finansial bisa dim...</td>\n",
" <td>1273503030978068480</td>\n",
" <td>NaN</td>\n",
" <td>detiksport</td>\n",
" <td>in</td>\n",
" <td>Kota Bandung, Jawa Barat</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/faithprogress/status/12735030309...</td>\n",
" <td>81836775</td>\n",
" <td>faithprogress</td>\n",
" </tr>\n",
" <tr>\n",
" <th>448</th>\n",
" <td>1235524166519885825</td>\n",
" <td>Thu Mar 05 11:14:50 +0000 2020</td>\n",
" <td>3</td>\n",
" <td>Tahukah Smart People jika Generasi Z tercatat ...</td>\n",
" <td>1235524166519885825</td>\n",
" <td>https://pbs.twimg.com/media/ESV2rbvUwAIiilw.jpg</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>DKI Jakarta</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>https://x.com/tuguinsurance/status/12355241665...</td>\n",
" <td>2217154892</td>\n",
" <td>tuguinsurance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>449</th>\n",
" <td>1215396692926746625</td>\n",
" <td>Thu Jan 09 22:15:27 +0000 2020</td>\n",
" <td>0</td>\n",
" <td>Yrl Mendekati usia 20 tahun gatau kenapa banya...</td>\n",
" <td>1215396692926746625</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>RULES TAP LINK ‼️</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>https://x.com/your2rl/status/1215396692926746625</td>\n",
" <td>1601505026</td>\n",
" <td>your2rl</td>\n",
" </tr>\n",
" <tr>\n",
" <th>450</th>\n",
" <td>1215395587614359553</td>\n",
" <td>Thu Jan 09 22:11:03 +0000 2020</td>\n",
" <td>0</td>\n",
" <td>Mendekati usia 20 tahun gatau kenapa banyak ba...</td>\n",
" <td>1215395587614359553</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://x.com/wipilarpa/status/121539558761435...</td>\n",
" <td>1024610447847116802</td>\n",
" <td>wipilarpa</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>451 rows × 15 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0ee037d3-b2c2-468e-9ebe-10e372be8a57')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-0ee037d3-b2c2-468e-9ebe-10e372be8a57 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-0ee037d3-b2c2-468e-9ebe-10e372be8a57');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-d068a85e-c6ad-486d-a0e7-e0f31be8b605\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d068a85e-c6ad-486d-a0e7-e0f31be8b605')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-d068a85e-c6ad-486d-a0e7-e0f31be8b605 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" <div id=\"id_9139d825-9147-43e3-9776-f1c8808c560b\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df1')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_9139d825-9147-43e3-9776-f1c8808c560b button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df1');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df1",
"summary": "{\n \"name\": \"df1\",\n \"rows\": 451,\n \"fields\": [\n {\n \"column\": \"conversation_id_str\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 137338101189843440,\n \"min\": 1215395587614359553,\n \"max\": 1873627304632758640,\n \"num_unique_values\": 430,\n \"samples\": [\n 1290467073399955461,\n 1846151521060507687,\n 1795394936793379161\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 449,\n \"samples\": [\n \"Tue Oct 03 09:36:19 +0000 2023\",\n \"Mon Aug 28 13:00:36 +0000 2023\",\n \"Thu Aug 29 07:29:44 +0000 2024\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"favorite_count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 101,\n \"min\": 0,\n \"max\": 1817,\n \"num_unique_values\": 36,\n \"samples\": [\n 10,\n 5,\n 1817\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 447,\n \"samples\": [\n \"Ini Tantangan Finansial Bagi Gen Z : Okezone Economy https://t.co/VtCMM7P7fL\",\n \"@MiniGoldID Jadilah generasi millenial &amp; Gen Z yang smart finansial untuk ciptakan masa depan yang cerah secerah kemilau MiniGold..\",\n \"Gen Z dan Milenial Lebih Mengutamakan Work-Life Balance daripada Finansial Apa Alasannya? - Bisnis Muda https://t.co/l7MKmgnnft #bisnismuda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id_str\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 137334203568879136,\n \"min\": 1215395587614359553,\n \"max\": 1873627304632758640,\n \"num_unique_values\": 451,\n \"samples\": [\n 1696063147482943613,\n 1854206852722307481,\n 1630849872302866434\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"image_url\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 128,\n \"samples\": [\n \"https://pbs.twimg.com/media/GR8h4KCbcAEcaiE.jpg\",\n \"https://pbs.twimg.com/media/GX6SF7caUAEWQfS.jpg\",\n \"https://pbs.twimg.com/media/Ga83N2ubQAAGkjq.jpg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"in_reply_to_screen_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 128,\n \"samples\": [\n \"KemenkeuRI\",\n \"_haye_\",\n \"babinyeleneh\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"lang\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"in\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 129,\n \"samples\": [\n \"\\u2764\\ufe0f\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"quote_count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 13,\n \"num_unique_values\": 9,\n \"samples\": [\n 13\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"reply_count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 0,\n \"max\": 135,\n \"num_unique_values\": 15,\n \"samples\": [\n 15\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"retweet_count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 0,\n \"max\": 208,\n \"num_unique_values\": 20,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tweet_url\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 451,\n \"samples\": [\n \"https://x.com/Bisniscom/status/1696063147482943613\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_id_str\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 735132847075928192,\n \"min\": 18129942,\n \"max\": 1862874585315811328,\n \"num_unique_values\": 341,\n \"samples\": [\n 1217102261014294528\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"username\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 341,\n \"samples\": [\n \"matabicara_\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 2
}
],
"source": [
"import pandas as pd\n",
"\n",
"# Membaca dataset\n",
"df1 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/REAL-DATA/REAL-dataset_finansial_gen_z.csv')\n",
"df2 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/REAL-DATA/REAL-dataset_gaji_gen_z.csv')\n",
"df3 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/REAL-DATA/REAL-dataset_kesehatan_mental_generasi_z.csv')\n",
"df4 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data matang/dataset_finansial_gen_z.csv')\n",
"df5 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data matang/dataset_gaji_gen_z.csv')\n",
"df6 = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data matang/dataset_kesehatan_mental_generasi_z-TGL.csv')\n",
"\n",
"# Menambahkan kolom keyword\n",
"df1['keyword'] = 'finansial gen z'\n",
"df2['keyword'] = 'gaji gen z'\n",
"df3['keyword'] = 'kesehatan mental generasi z'\n",
"df4['keyword'] = 'finansial gen z'\n",
"df5['keyword'] = 'gaji gen z'\n",
"df6['keyword'] = 'kesehatan mental generasi z'\n",
"\n",
"# Menggabungkan data set berdasarkan keyword\n",
"df_finansial = pd.concat([df1, df4], ignore_index=True)\n",
"df_gaji = pd.concat([df2, df5], ignore_index=True)\n",
"df_kesehatan_mental = pd.concat([df3, df6], ignore_index=True)\n",
"\n",
"# Menggabungkan semua dataframe menjadi satu\n",
"merge_df = pd.concat([df_finansial, df_gaji, df_kesehatan_mental], ignore_index=True)\n",
"\n",
"# Menyimpan ke CSV\n",
"merge_df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-kotor.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "EzxWJ80fpbP4",
"outputId": "6d58b284-2276-47ef-fa0b-8c1dd5958c39",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 585
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"conversation_id_str 8369\n",
"created_at 8369\n",
"favorite_count 8369\n",
"full_text 8369\n",
"id_str 8369\n",
"image_url 692\n",
"in_reply_to_screen_name 4689\n",
"lang 8369\n",
"location 4065\n",
"quote_count 8369\n",
"reply_count 8369\n",
"retweet_count 8369\n",
"tweet_url 8369\n",
"user_id_str 8369\n",
"username 8369\n",
"keyword 8369\n",
"dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>conversation_id_str</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>created_at</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>favorite_count</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>full_text</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>id_str</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>image_url</th>\n",
" <td>692</td>\n",
" </tr>\n",
" <tr>\n",
" <th>in_reply_to_screen_name</th>\n",
" <td>4689</td>\n",
" </tr>\n",
" <tr>\n",
" <th>lang</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>location</th>\n",
" <td>4065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>quote_count</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>reply_count</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>retweet_count</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tweet_url</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user_id_str</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>username</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>keyword</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"df = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-kotor.csv')\n",
"df.count()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "M_-HgONJpbP8",
"outputId": "881c8cf9-6b3a-4287-a0ea-1d047b566443",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 178
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"created_at 8369\n",
"full_text 8369\n",
"keyword 8369\n",
"dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>created_at</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>full_text</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>keyword</th>\n",
" <td>8369</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"#drop column is not needed\n",
"df.drop(df.columns[[0,2,4,5,6,7,8,9,10,11,12,11,13,14]], axis=1, inplace=True)\n",
"df.count()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "oufVMghCpbP-",
"outputId": "06d2cff5-a00a-4668-a2e7-25051a81bf7b",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 178
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"created_at 5387\n",
"full_text 5387\n",
"keyword 5387\n",
"dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>created_at</th>\n",
" <td>5387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>full_text</th>\n",
" <td>5387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>keyword</th>\n",
" <td>5387</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"# drop row have duplicate value\n",
"df.drop_duplicates(subset=['full_text'], inplace=True)\n",
"\n",
"#drop row have missing value\n",
"df.dropna(subset=['full_text'], inplace=True)\n",
"\n",
"df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets.csv', index=False)\n",
"df.count()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_AjsHPfMpbP-"
},
"source": [
"## cleansing data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "EcBqIvJVpbP_",
"outputId": "f303b7eb-4e6f-43d8-ef84-d77a0fdb3087",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 293
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z "
],
"text/html": [
"\n",
" <div id=\"df-cce5170f-51de-46d6-b8bb-6cbb75a5a244\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cce5170f-51de-46d6-b8bb-6cbb75a5a244')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-cce5170f-51de-46d6-b8bb-6cbb75a5a244 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-cce5170f-51de-46d6-b8bb-6cbb75a5a244');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-88529e77-c011-44c5-83cb-7126964f3676\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-88529e77-c011-44c5-83cb-7126964f3676')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-88529e77-c011-44c5-83cb-7126964f3676 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "ZfLlmtcvpbQA",
"outputId": "172c3819-cee2-4fc6-a4b0-6accf5962f62",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 379
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... "
],
"text/html": [
"\n",
" <div id=\"df-5f981857-2c3b-4029-ba08-e307d799d28c\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5f981857-2c3b-4029-ba08-e307d799d28c')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-5f981857-2c3b-4029-ba08-e307d799d28c button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-5f981857-2c3b-4029-ba08-e307d799d28c');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-587eedde-d54a-4983-b0b3-5c94a7c0cc81\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-587eedde-d54a-4983-b0b3-5c94a7c0cc81')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-587eedde-d54a-4983-b0b3-5c94a7c0cc81 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 4
}
],
"source": [
"# cleansing data\n",
"import re\n",
"import string\n",
"\n",
"def clean_text(text):\n",
" text = re.sub(r'@[A-Za-z0-9]+', '', text) # delete mention\n",
" text = re.sub(r'#', '', text) # delete hastag\n",
" text = re.sub(r'RT[\\s]+', '', text) # delate RT\n",
" text = re.sub(r'https?:\\/\\/\\S+', '', text) # delete hyperlink\n",
" text = re.sub(r'\\n', '', text) # delete new line\n",
" text = re.sub(r'\\d+', '', text) # delete number\n",
" text = re.sub(r'[^A-Za-z ]+', '', text) # delete non alphabet\n",
"\n",
" text = text.replace('…', '') # delete ellipsis\n",
" text = text.translate(str.maketrans('', '', string.punctuation)) # delete punctuation\n",
" text = text.strip() # delete space\n",
" return text\n",
"\n",
"df ['cleanning_text'] = df['full_text'].apply(clean_text)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EpoJ46YFpbQB"
},
"source": [
"## case folding"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "3d82FLCHpbQC",
"outputId": "33e71be8-9854-403d-8b9a-191250ef65c1",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 466
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" case_folding \n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen z capai ... \n",
"2 langkah mudah mulai investasi buat gen z menuj... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... "
],
"text/html": [
"\n",
" <div id=\"df-d1619afa-9401-4b8c-8241-f9748d5bc63f\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" <th>case_folding</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen z capai ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen z menuj...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d1619afa-9401-4b8c-8241-f9748d5bc63f')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d1619afa-9401-4b8c-8241-f9748d5bc63f button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d1619afa-9401-4b8c-8241-f9748d5bc63f');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-eff5452d-89bc-44e8-9e1d-bea5779cf26f\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-eff5452d-89bc-44e8-9e1d-bea5779cf26f')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-eff5452d-89bc-44e8-9e1d-bea5779cf26f button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"case_folding\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5336,\n \"samples\": [\n \"sebenernya gen z tuh kerja apa aja dijabanin asal wajar dan umr lu kalo bukan perusahaan yang bener udah kerjaan overload gaji autan terus lu bilang bersyukur lu sama aja menyuburkan perbudakan koncol\",\n \"diluar konteks ini lagian siapa lagi yang berani minta gaji tinggi selain gen z ya betah banget sama umr yang berapa lama ga naik naik itu padahal semuanya udah inflasi\",\n \"ternyata bener jg gen z klo kerja gaji kecil tu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"# case folding\n",
"def case_folding(text):\n",
" text = text.lower() # change to lower case\n",
" return text\n",
"\n",
"df['case_folding'] = df['cleanning_text'].apply(case_folding)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2fzDtpQ6pbQC"
},
"source": [
"## convert slang word"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "goccyreFpbQC",
"outputId": "603b754d-f71f-4e36-cf4d-5d72d19f1c88",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 625
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" case_folding \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen z capai ... \n",
"2 langkah mudah mulai investasi buat gen z menuj... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" convert_slang_word \n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen saja cap... \n",
"2 langkah mudah mulai investasi buat gen saja me... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... "
],
"text/html": [
"\n",
" <div id=\"df-44e127d7-0eb2-4807-ab0b-e55df497fe10\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" <th>case_folding</th>\n",
" <th>convert_slang_word</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen z capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen saja cap...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen z menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen saja me...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-44e127d7-0eb2-4807-ab0b-e55df497fe10')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-44e127d7-0eb2-4807-ab0b-e55df497fe10 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-44e127d7-0eb2-4807-ab0b-e55df497fe10');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-6e86fa91-0508-4578-a004-048923830ef4\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-6e86fa91-0508-4578-a004-048923830ef4')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-6e86fa91-0508-4578-a004-048923830ef4 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"case_folding\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5336,\n \"samples\": [\n \"sebenernya gen z tuh kerja apa aja dijabanin asal wajar dan umr lu kalo bukan perusahaan yang bener udah kerjaan overload gaji autan terus lu bilang bersyukur lu sama aja menyuburkan perbudakan koncol\",\n \"diluar konteks ini lagian siapa lagi yang berani minta gaji tinggi selain gen z ya betah banget sama umr yang berapa lama ga naik naik itu padahal semuanya udah inflasi\",\n \"ternyata bener jg gen z klo kerja gaji kecil tu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"convert_slang_word\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5333,\n \"samples\": [\n \"asmara jika orang ini semuanya dilantik jadi kabinet berapa anggaran gaji tukim honor spj ops bgm nasib milenial genz amp alfa yang akan dhadapkn dengan over demografis coba anggaran itu dialhkan untuk program pembsdm subsidibeasiswa demi indonesia agar tidak cemas\",\n \"essai penugasan raja brawijaya nama bintang anda hartono putra prodi ilmuadministrasibisnis mengatasi dan mengenal gangguan perubahan mental atau kesehatan jiwa dalam remaja generasi saja pada saat pandemi penulis bintang anda hartono\",\n \"ternyata benar juga gen saja kalo kerja gaji kecil itu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 6
}
],
"source": [
"# convert slang word\n",
"slang_words = pd.read_csv('https://raw.githubusercontent.com/nasalsabila/kamus-alay/refs/heads/master/colloquial-indonesian-lexicon.csv')\n",
"slang_words_dict = dict(zip(slang_words['slang'], slang_words['formal']))\n",
"\n",
"# Fungsi untuk mengonversi slang word\n",
"def convert_slang_word(text):\n",
" return ' '.join([slang_words_dict.get(word, word) for word in text.split()])\n",
"\n",
"# Menerapkan fungsi ke kolom 'case_folding'\n",
"df['convert_slang_word'] = df['case_folding'].apply(convert_slang_word)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "InaswBd6pbQD"
},
"source": [
"## Stop word"
]
},
{
"cell_type": "code",
"source": [
"!pip install Sastrawi"
],
"metadata": {
"id": "oifhA8okEExE",
"outputId": "16c4daf7-9ea7-4d4a-f12e-6c5a7ca186eb",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting Sastrawi\n",
" Downloading Sastrawi-1.0.1-py2.py3-none-any.whl.metadata (909 bytes)\n",
"Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.7/209.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: Sastrawi\n",
"Successfully installed Sastrawi-1.0.1\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "O7t8MApApbQD",
"outputId": "0d313294-1bf6-4329-c5ff-20ea3057f7b5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 642
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" case_folding \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen z capai ... \n",
"2 langkah mudah mulai investasi buat gen z menuj... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" convert_slang_word \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen saja cap... \n",
"2 langkah mudah mulai investasi buat gen saja me... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" filtering \n",
"0 kemudahan berbelanja maupun mengakses produk p... \n",
"1 langkah sederhana investasi gen capai stabilit... \n",
"2 langkah mudah mulai investasi buat gen menuju ... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial sehat bukan cuma soal punya banyak u... "
],
"text/html": [
"\n",
" <div id=\"df-3b749406-4923-457c-9b05-5be3e0e12d1e\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" <th>case_folding</th>\n",
" <th>convert_slang_word</th>\n",
" <th>filtering</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan berbelanja maupun mengakses produk p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen z capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen saja cap...</td>\n",
" <td>langkah sederhana investasi gen capai stabilit...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen z menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen saja me...</td>\n",
" <td>langkah mudah mulai investasi buat gen menuju ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial sehat bukan cuma soal punya banyak u...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3b749406-4923-457c-9b05-5be3e0e12d1e')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-3b749406-4923-457c-9b05-5be3e0e12d1e button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-3b749406-4923-457c-9b05-5be3e0e12d1e');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-38ae3508-5376-4308-988a-ec006dedfb46\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-38ae3508-5376-4308-988a-ec006dedfb46')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-38ae3508-5376-4308-988a-ec006dedfb46 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"case_folding\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5336,\n \"samples\": [\n \"sebenernya gen z tuh kerja apa aja dijabanin asal wajar dan umr lu kalo bukan perusahaan yang bener udah kerjaan overload gaji autan terus lu bilang bersyukur lu sama aja menyuburkan perbudakan koncol\",\n \"diluar konteks ini lagian siapa lagi yang berani minta gaji tinggi selain gen z ya betah banget sama umr yang berapa lama ga naik naik itu padahal semuanya udah inflasi\",\n \"ternyata bener jg gen z klo kerja gaji kecil tu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"convert_slang_word\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5333,\n \"samples\": [\n \"asmara jika orang ini semuanya dilantik jadi kabinet berapa anggaran gaji tukim honor spj ops bgm nasib milenial genz amp alfa yang akan dhadapkn dengan over demografis coba anggaran itu dialhkan untuk program pembsdm subsidibeasiswa demi indonesia agar tidak cemas\",\n \"essai penugasan raja brawijaya nama bintang anda hartono putra prodi ilmuadministrasibisnis mengatasi dan mengenal gangguan perubahan mental atau kesehatan jiwa dalam remaja generasi saja pada saat pandemi penulis bintang anda hartono\",\n \"ternyata benar juga gen saja kalo kerja gaji kecil itu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"filtering\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5323,\n \"samples\": [\n \"boomers pensiun gen kali sih mempertanyakan gen suka kaburan judging tempat kerjanya toxic menuntut worklife balance padahal enggak balance selama menerima gaji indak suka angkat telepon padahal sih pemberi kerjanya enggak terbiasa chatting\",\n \"pentingnya kesehatan mental terutama generasi thread\",\n \"disadari memiliki pengaruh cukup signifikan kondisi mental health dimiliki beberapa survei menyebutkan generasi memiliki kesadaran lebih rendah mematuhi protokol kesehatandua sangat penting dimiliki\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"# Stop word\n",
"# from nltk.corpus import stopwords\n",
"\n",
"# def filtering(text):\n",
"# stop_words = set(stopwords.words('indonesian'))\n",
"# word_tokens = text.split()\n",
"# text = [word for word in word_tokens if word not in stop_words]\n",
"# text = ' '.join(text)\n",
"# return text\n",
"\n",
"# df['filtering'] = df['convert_slang_word'].apply(filtering)\n",
"# df\n",
"\n",
"# Stop word\n",
"\n",
"from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory\n",
"\n",
"def filtering(text):\n",
" factory = StopWordRemoverFactory()\n",
" stop_words = set(factory.get_stop_words())\n",
" word_tokens = text.split()\n",
" text = [word for word in word_tokens if word not in stop_words]\n",
" text = ' '.join(text)\n",
" return text\n",
"\n",
"df['filtering'] = df['convert_slang_word'].apply(filtering)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2zX-C_01pbQD"
},
"source": [
"## tokenizing"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "ZSynjnyCpbQE",
"outputId": "7b8a2e25-7ad3-444c-9cad-592e56263363",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
"[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt_tab.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"import nltk\n",
"nltk.download('punkt')\n",
"nltk.download('punkt_tab')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "0jT4u7sfpbQE",
"outputId": "42daddde-1a79-47c2-a3e3-53c5190e12e1",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 642
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" case_folding \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen z capai ... \n",
"2 langkah mudah mulai investasi buat gen z menuj... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" convert_slang_word \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen saja cap... \n",
"2 langkah mudah mulai investasi buat gen saja me... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" filtering \\\n",
"0 kemudahan berbelanja maupun mengakses produk p... \n",
"1 langkah sederhana investasi gen capai stabilit... \n",
"2 langkah mudah mulai investasi buat gen menuju ... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial sehat bukan cuma soal punya banyak u... \n",
"\n",
" tokenizing \n",
"0 [kemudahan, berbelanja, maupun, mengakses, pro... \n",
"1 [langkah, sederhana, investasi, gen, capai, st... \n",
"2 [langkah, mudah, mulai, investasi, buat, gen, ... \n",
"3 [assalamu, alaikum, sahabat, syariah, tantanga... \n",
"4 [finansial, sehat, bukan, cuma, soal, punya, b... "
],
"text/html": [
"\n",
" <div id=\"df-13e4f51b-9ca3-4a78-bc78-009ec2f8c38c\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" <th>case_folding</th>\n",
" <th>convert_slang_word</th>\n",
" <th>filtering</th>\n",
" <th>tokenizing</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan berbelanja maupun mengakses produk p...</td>\n",
" <td>[kemudahan, berbelanja, maupun, mengakses, pro...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen z capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen saja cap...</td>\n",
" <td>langkah sederhana investasi gen capai stabilit...</td>\n",
" <td>[langkah, sederhana, investasi, gen, capai, st...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen z menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen saja me...</td>\n",
" <td>langkah mudah mulai investasi buat gen menuju ...</td>\n",
" <td>[langkah, mudah, mulai, investasi, buat, gen, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>[assalamu, alaikum, sahabat, syariah, tantanga...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial sehat bukan cuma soal punya banyak u...</td>\n",
" <td>[finansial, sehat, bukan, cuma, soal, punya, b...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-13e4f51b-9ca3-4a78-bc78-009ec2f8c38c')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-13e4f51b-9ca3-4a78-bc78-009ec2f8c38c button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-13e4f51b-9ca3-4a78-bc78-009ec2f8c38c');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-dea4f2d6-dc79-43e0-b519-1051bafe6653\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-dea4f2d6-dc79-43e0-b519-1051bafe6653')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-dea4f2d6-dc79-43e0-b519-1051bafe6653 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"case_folding\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5336,\n \"samples\": [\n \"sebenernya gen z tuh kerja apa aja dijabanin asal wajar dan umr lu kalo bukan perusahaan yang bener udah kerjaan overload gaji autan terus lu bilang bersyukur lu sama aja menyuburkan perbudakan koncol\",\n \"diluar konteks ini lagian siapa lagi yang berani minta gaji tinggi selain gen z ya betah banget sama umr yang berapa lama ga naik naik itu padahal semuanya udah inflasi\",\n \"ternyata bener jg gen z klo kerja gaji kecil tu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"convert_slang_word\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5333,\n \"samples\": [\n \"asmara jika orang ini semuanya dilantik jadi kabinet berapa anggaran gaji tukim honor spj ops bgm nasib milenial genz amp alfa yang akan dhadapkn dengan over demografis coba anggaran itu dialhkan untuk program pembsdm subsidibeasiswa demi indonesia agar tidak cemas\",\n \"essai penugasan raja brawijaya nama bintang anda hartono putra prodi ilmuadministrasibisnis mengatasi dan mengenal gangguan perubahan mental atau kesehatan jiwa dalam remaja generasi saja pada saat pandemi penulis bintang anda hartono\",\n \"ternyata benar juga gen saja kalo kerja gaji kecil itu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"filtering\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5323,\n \"samples\": [\n \"boomers pensiun gen kali sih mempertanyakan gen suka kaburan judging tempat kerjanya toxic menuntut worklife balance padahal enggak balance selama menerima gaji indak suka angkat telepon padahal sih pemberi kerjanya enggak terbiasa chatting\",\n \"pentingnya kesehatan mental terutama generasi thread\",\n \"disadari memiliki pengaruh cukup signifikan kondisi mental health dimiliki beberapa survei menyebutkan generasi memiliki kesadaran lebih rendah mematuhi protokol kesehatandua sangat penting dimiliki\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tokenizing\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"# tokenizing\n",
"from nltk.tokenize import word_tokenize\n",
"\n",
"def tokenizing(text):\n",
" text = word_tokenize(text)\n",
" return text\n",
"\n",
"df['tokenizing'] = df['filtering'].apply(tokenizing)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rhdJgP7BpbQE"
},
"source": [
"## stemming"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "OzGIEhUDpbQF",
"outputId": "950963f8-06ba-46c2-f7ae-5a5bc9f65555",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 677
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 \n",
"1 Mon Dec 30 02:09:58 +0000 2024 \n",
"2 Sun Dec 29 02:45:40 +0000 2024 \n",
"3 Sat Dec 28 11:10:45 +0000 2024 \n",
"4 Sat Dec 28 07:30:00 +0000 2024 \n",
"\n",
" full_text keyword \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... finansial gen z \n",
"1 4 Langkah Sederhana Investasi untuk Gen Z Capa... finansial gen z \n",
"2 4 Langkah Mudah Mulai Investasi Buat Gen Z Men... finansial gen z \n",
"3 Assalamu alaikum Sahabat Syariah! Tantangan so... finansial gen z \n",
"4 Finansial yang sehat itu bukan cuma soal punya... finansial gen z \n",
"\n",
" cleanning_text \\\n",
"0 Kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 Langkah Sederhana Investasi untuk Gen Z Capai ... \n",
"2 Langkah Mudah Mulai Investasi Buat Gen Z Menuj... \n",
"3 Assalamu alaikum Sahabat Syariah Tantangan sos... \n",
"4 Finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" case_folding \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen z capai ... \n",
"2 langkah mudah mulai investasi buat gen z menuj... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" convert_slang_word \\\n",
"0 kemudahan dalam berbelanja maupun mengakses pr... \n",
"1 langkah sederhana investasi untuk gen saja cap... \n",
"2 langkah mudah mulai investasi buat gen saja me... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial yang sehat itu bukan cuma soal punya... \n",
"\n",
" filtering \\\n",
"0 kemudahan berbelanja maupun mengakses produk p... \n",
"1 langkah sederhana investasi gen capai stabilit... \n",
"2 langkah mudah mulai investasi buat gen menuju ... \n",
"3 assalamu alaikum sahabat syariah tantangan sos... \n",
"4 finansial sehat bukan cuma soal punya banyak u... \n",
"\n",
" tokenizing \\\n",
"0 [kemudahan, berbelanja, maupun, mengakses, pro... \n",
"1 [langkah, sederhana, investasi, gen, capai, st... \n",
"2 [langkah, mudah, mulai, investasi, buat, gen, ... \n",
"3 [assalamu, alaikum, sahabat, syariah, tantanga... \n",
"4 [finansial, sehat, bukan, cuma, soal, punya, b... \n",
"\n",
" stemming \n",
"0 mudah belanja maupun akses produk pinjam kini ... \n",
"1 langkah sederhana investasi gen capai stabilit... \n",
"2 langkah mudah mulai investasi buat gen tuju fi... \n",
"3 assalamu alaikum sahabat syariah tantang sosia... \n",
"4 finansial sehat bukan cuma soal punya banyak u... "
],
"text/html": [
"\n",
" <div id=\"df-9617e45e-8dfd-47ad-b3bc-25e09286bcc0\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>full_text</th>\n",
" <th>keyword</th>\n",
" <th>cleanning_text</th>\n",
" <th>case_folding</th>\n",
" <th>convert_slang_word</th>\n",
" <th>filtering</th>\n",
" <th>tokenizing</th>\n",
" <th>stemming</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan dalam berbelanja maupun mengakses pr...</td>\n",
" <td>kemudahan berbelanja maupun mengakses produk p...</td>\n",
" <td>[kemudahan, berbelanja, maupun, mengakses, pro...</td>\n",
" <td>mudah belanja maupun akses produk pinjam kini ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>4 Langkah Sederhana Investasi untuk Gen Z Capa...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Sederhana Investasi untuk Gen Z Capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen z capai ...</td>\n",
" <td>langkah sederhana investasi untuk gen saja cap...</td>\n",
" <td>langkah sederhana investasi gen capai stabilit...</td>\n",
" <td>[langkah, sederhana, investasi, gen, capai, st...</td>\n",
" <td>langkah sederhana investasi gen capai stabilit...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>4 Langkah Mudah Mulai Investasi Buat Gen Z Men...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Langkah Mudah Mulai Investasi Buat Gen Z Menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen z menuj...</td>\n",
" <td>langkah mudah mulai investasi buat gen saja me...</td>\n",
" <td>langkah mudah mulai investasi buat gen menuju ...</td>\n",
" <td>[langkah, mudah, mulai, investasi, buat, gen, ...</td>\n",
" <td>langkah mudah mulai investasi buat gen tuju fi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>Assalamu alaikum Sahabat Syariah! Tantangan so...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Assalamu alaikum Sahabat Syariah Tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>assalamu alaikum sahabat syariah tantangan sos...</td>\n",
" <td>[assalamu, alaikum, sahabat, syariah, tantanga...</td>\n",
" <td>assalamu alaikum sahabat syariah tantang sosia...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial gen z</td>\n",
" <td>Finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial yang sehat itu bukan cuma soal punya...</td>\n",
" <td>finansial sehat bukan cuma soal punya banyak u...</td>\n",
" <td>[finansial, sehat, bukan, cuma, soal, punya, b...</td>\n",
" <td>finansial sehat bukan cuma soal punya banyak u...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9617e45e-8dfd-47ad-b3bc-25e09286bcc0')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-9617e45e-8dfd-47ad-b3bc-25e09286bcc0 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-9617e45e-8dfd-47ad-b3bc-25e09286bcc0');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-60fdfdfb-fb48-48a9-838e-a1a1002340bf\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-60fdfdfb-fb48-48a9-838e-a1a1002340bf')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-60fdfdfb-fb48-48a9-838e-a1a1002340bf button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5387,\n \"samples\": [\n \"@worksfess ni yg nanya pasti gen Z. kalo lo expect gaji 2 dijit ngaca dulu yaa please :). kalo masih anak kemaren sore struggle dulu nyet.\",\n \"Dari data dapat disimpulkan sebenarnya gen Z punya minat yang cukup besar menjadi PNS maupun korporat swasta. Berbeda dengan milenial cenderung lebih tinggi menjadi PNS. Hal ini karena milenial berorientasi pada gaji dan tunjangan untuk tabungan masa depan. #IDNTimesLife https://t.co/4wKYGFPnFC\",\n \"Khususnya di tengah masa pandemi seperti ini menurut survei banyak dari generasi z yang merasakan dampak dari pandemi ini kepada kesehatan mental mereka. Generasi yang lebih tua dianggap lebih tangguh karena dari segi finansial ataupun pola pikir yang sudah lebih dewasa.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cleanning_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5339,\n \"samples\": [\n \"Gaji dan budaya tidak sesuai Gen Z lebih pilih resign Bener gak yah Terus apa dong yang harus dilakukan Swipe Yuk genZ millenials kerja resign faktor didyouknow fact fakta\",\n \"Gamau gen z tapi apa nuntut pengalaman dan entry level job opportunity maks tahun emg gajelas maunya apa Mau yg profesional bisa dijinakin dgn gaji rendah tpi gamau nerima gen z dan gamau ngajarin trs jahat ke younger coworker tpi nyalahin mental melempem\",\n \"Mental health Apa yang ada di pikiran kalian jika mendengar kata itu Untuk para Generasi Z mungkin sudah tidak asing lagi mendengar kata itu Sudah banyak orang yang menyinggung tentang mental health ini Namun kesadaran akan kesehatan mental di Indonesia masih sangat rendah\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"case_folding\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5336,\n \"samples\": [\n \"sebenernya gen z tuh kerja apa aja dijabanin asal wajar dan umr lu kalo bukan perusahaan yang bener udah kerjaan overload gaji autan terus lu bilang bersyukur lu sama aja menyuburkan perbudakan koncol\",\n \"diluar konteks ini lagian siapa lagi yang berani minta gaji tinggi selain gen z ya betah banget sama umr yang berapa lama ga naik naik itu padahal semuanya udah inflasi\",\n \"ternyata bener jg gen z klo kerja gaji kecil tu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"convert_slang_word\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5333,\n \"samples\": [\n \"asmara jika orang ini semuanya dilantik jadi kabinet berapa anggaran gaji tukim honor spj ops bgm nasib milenial genz amp alfa yang akan dhadapkn dengan over demografis coba anggaran itu dialhkan untuk program pembsdm subsidibeasiswa demi indonesia agar tidak cemas\",\n \"essai penugasan raja brawijaya nama bintang anda hartono putra prodi ilmuadministrasibisnis mengatasi dan mengenal gangguan perubahan mental atau kesehatan jiwa dalam remaja generasi saja pada saat pandemi penulis bintang anda hartono\",\n \"ternyata benar juga gen saja kalo kerja gaji kecil itu mikir hehe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"filtering\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5323,\n \"samples\": [\n \"boomers pensiun gen kali sih mempertanyakan gen suka kaburan judging tempat kerjanya toxic menuntut worklife balance padahal enggak balance selama menerima gaji indak suka angkat telepon padahal sih pemberi kerjanya enggak terbiasa chatting\",\n \"pentingnya kesehatan mental terutama generasi thread\",\n \"disadari memiliki pengaruh cukup signifikan kondisi mental health dimiliki beberapa survei menyebutkan generasi memiliki kesadaran lebih rendah mematuhi protokol kesehatandua sangat penting dimiliki\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tokenizing\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"stemming\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5323,\n \"samples\": [\n \"boomers pensiun gen kali sih tanya gen suka kabur judging tempat kerja toxic tuntut worklife balance padahal enggak balance lama terima gaji indak suka angkat telepon padahal sih beri kerja enggak biasa chatting\",\n \"penting sehat mental utama generasi thread\",\n \"sadar milik pengaruh cukup signifikan kondisi mental health milik beberapa survei sebut generasi milik sadar lebih rendah patuh protokol kesehatandua sangat penting milik\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"# stemming\n",
"from Sastrawi.Stemmer.StemmerFactory import StemmerFactory\n",
"\n",
"# create stemmer\n",
"factory = StemmerFactory()\n",
"stemmer = factory.create_stemmer()\n",
"\n",
"def stem_text(tokens):\n",
" text = ' '.join(tokens) # Join the tokens back into a single string\n",
" return stemmer.stem(text)\n",
"\n",
"df['stemming'] = df['tokenizing'].apply(stem_text)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "BfG2wbo2pbQF",
"outputId": "fe5f3922-2e2a-42ea-8d27-48dd4d985560",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5387 entries, 0 to 5386\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 created_at 5387 non-null object\n",
" 1 full_text 5387 non-null object\n",
" 2 keyword 5387 non-null object\n",
" 3 cleanning_text 5387 non-null object\n",
" 4 case_folding 5387 non-null object\n",
" 5 convert_slang_word 5387 non-null object\n",
" 6 filtering 5387 non-null object\n",
" 7 tokenizing 5387 non-null object\n",
" 8 stemming 5387 non-null object\n",
"dtypes: object(9)\n",
"memory usage: 378.9+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "lrFlWml7pbQG"
},
"outputs": [],
"source": [
"df.drop(df.columns[[1, 3, 4, 5, 6, 7]], axis=1, inplace=True)\n",
"\n",
"# Rename column 'stemming' to 'full_text'\n",
"df.rename(columns={'stemming': 'full_text'}, inplace=True)\n",
"\n",
"df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-clean.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kDCcxR4vpbQG"
},
"source": [
"# Pelabelan Data"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "d30hAZZ7pbQG",
"outputId": "fb627f89-198e-48ec-8ace-2f2d9fb65737",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 293
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" created_at keyword \\\n",
"0 Mon Dec 30 07:08:54 +0000 2024 finansial gen z \n",
"1 Mon Dec 30 02:09:58 +0000 2024 finansial gen z \n",
"2 Sun Dec 29 02:45:40 +0000 2024 finansial gen z \n",
"3 Sat Dec 28 11:10:45 +0000 2024 finansial gen z \n",
"4 Sat Dec 28 07:30:00 +0000 2024 finansial gen z \n",
"\n",
" full_text \n",
"0 mudah belanja maupun akses produk pinjam kini ... \n",
"1 langkah sederhana investasi gen capai stabilit... \n",
"2 langkah mudah mulai investasi buat gen tuju fi... \n",
"3 assalamu alaikum sahabat syariah tantang sosia... \n",
"4 finansial sehat bukan cuma soal punya banyak u... "
],
"text/html": [
"\n",
" <div id=\"df-50603f99-07b3-41f4-877c-5567785ac3be\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>created_at</th>\n",
" <th>keyword</th>\n",
" <th>full_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mon Dec 30 07:08:54 +0000 2024</td>\n",
" <td>finansial gen z</td>\n",
" <td>mudah belanja maupun akses produk pinjam kini ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mon Dec 30 02:09:58 +0000 2024</td>\n",
" <td>finansial gen z</td>\n",
" <td>langkah sederhana investasi gen capai stabilit...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sun Dec 29 02:45:40 +0000 2024</td>\n",
" <td>finansial gen z</td>\n",
" <td>langkah mudah mulai investasi buat gen tuju fi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Sat Dec 28 11:10:45 +0000 2024</td>\n",
" <td>finansial gen z</td>\n",
" <td>assalamu alaikum sahabat syariah tantang sosia...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sat Dec 28 07:30:00 +0000 2024</td>\n",
" <td>finansial gen z</td>\n",
" <td>finansial sehat bukan cuma soal punya banyak u...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-50603f99-07b3-41f4-877c-5567785ac3be')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-50603f99-07b3-41f4-877c-5567785ac3be button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-50603f99-07b3-41f4-877c-5567785ac3be');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-45e17e67-531c-466d-a0d0-5bc50897cbb8\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-45e17e67-531c-466d-a0d0-5bc50897cbb8')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-45e17e67-531c-466d-a0d0-5bc50897cbb8 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5387,\n \"fields\": [\n {\n \"column\": \"created_at\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5229,\n \"samples\": [\n \"Sun Jul 09 21:01:42 +0000 2023\",\n \"Wed May 22 16:50:31 +0000 2024\",\n \"Sun Oct 11 09:39:07 +0000 2020\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"keyword\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"finansial gen z\",\n \"gaji gen z\",\n \"kesehatan mental generasi z\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"full_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5323,\n \"samples\": [\n \"boomers pensiun gen kali sih tanya gen suka kabur judging tempat kerja toxic tuntut worklife balance padahal enggak balance lama terima gaji indak suka angkat telepon padahal sih beri kerja enggak biasa chatting\",\n \"penting sehat mental utama generasi thread\",\n \"sadar milik pengaruh cukup signifikan kondisi mental health milik beberapa survei sebut generasi milik sadar lebih rendah patuh protokol kesehatandua sangat penting milik\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 18
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-clean.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"id": "uQyIeS0kpbQH",
"outputId": "2325a6d0-ef0b-43a4-90f7-de0981b6153a",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5387 entries, 0 to 5386\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 created_at 5387 non-null object\n",
" 1 keyword 5387 non-null object\n",
" 2 full_text 5387 non-null object\n",
"dtypes: object(3)\n",
"memory usage: 126.4+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"id": "t4aXN3ZppbQH"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"#unduh kamus inset lexicon positif dan negatif\n",
"positive_url = \"https://raw.githubusercontent.com/fajri91/InSet/master/positive.tsv\"\n",
"negative_url = \"https://raw.githubusercontent.com/fajri91/InSet/master/negative.tsv\"\n",
"\n",
"positive_lexicon = set(pd.read_csv(positive_url, sep='\\t', header=None)[0])\n",
"negative_lexicon = set(pd.read_csv(negative_url, sep='\\t', header=None)[0])\n",
"\n",
"#fungsi menghitung skor sentimen\n",
"def determine_sentiment(text):\n",
" if isinstance(text, str):\n",
" positive_count = sum(1 for word in text.split() if word in positive_lexicon)\n",
" negative_count = sum(1 for word in text.split() if word in negative_lexicon)\n",
" sentiment_score = positive_count - negative_count\n",
" if sentiment_score > 0:\n",
" sentiment = 'Positif'\n",
" elif sentiment_score < 0:\n",
" sentiment = 'Negatif'\n",
" else:\n",
" sentiment = 'Netral'\n",
" return sentiment_score, sentiment\n",
" return 0, \"netral\"\n",
"\n",
"#menerapkan perhitungan ke datasets\n",
"df[['score', 'label']] = df['full_text'].apply(lambda x: pd.Series(determine_sentiment(x)))\n",
"\n",
"df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-label.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"id": "uPqiuk8wpbQI",
"outputId": "52fc2264-16bf-4659-b233-da3cdbb6c64f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 209
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"label\n",
"Negatif 2955\n",
"Positif 1662\n",
"Netral 770\n",
"Name: count, dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Negatif</th>\n",
" <td>2955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Positif</th>\n",
" <td>1662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Netral</th>\n",
" <td>770</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 21
}
],
"source": [
"# Menghitung jumlah label\n",
"df['label'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "uXn4fFoSpbQI"
},
"source": [
"# Ekstraksi Fitur"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"id": "oGfljS6zpbQI",
"outputId": "df9c00cb-13d3-42dc-a8aa-0c3786976e5f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 209
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"label\n",
"Negatif 2955\n",
"Positif 1662\n",
"Netral 770\n",
"Name: count, dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Negatif</th>\n",
" <td>2955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Positif</th>\n",
" <td>1662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Netral</th>\n",
" <td>770</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 22
}
],
"source": [
"import pandas as pd\n",
"#Membaca dataset yang sudah diberi label\n",
"df = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-label.csv')\n",
"df['label'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"id": "3kwG5OI_pbQR",
"outputId": "4423ae3a-f959-469a-916b-de7ee5344767",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0. 0. 0. ... 0. 0. 0.]]\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"df = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-label.csv')\n",
"# Convert text to vectors using TF-IDF\n",
"tfidf_vectorizer = TfidfVectorizer()\n",
"x = tfidf_vectorizer.fit_transform(df['full_text'])\n",
"\n",
"tfidf = x.toarray()\n",
"print(tfidf[:1])\n",
"\n",
"# Convert the array to a DataFrame\n",
"tfidf_df = pd.DataFrame(tfidf)\n",
"\n",
"# Save the DataFrame to a CSV file\n",
"tfidf_df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-tfidf.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "TgDVRkE7pbQR"
},
"source": [
"# Data Balancing"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "IeNHm1SzpbQS",
"outputId": "62469c77-ef1f-4a83-9d28-1991992c1825",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 0 1 2 3 4 5 6 7 8 9 ... 11394 11395 11396 \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"\n",
" 11397 11398 11399 11400 11401 11402 label \n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 Positif \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 Positif \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 Positif \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 Positif \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 Positif \n",
"\n",
"[5 rows x 11404 columns]\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"tfidf_data = pd.read_csv(\"/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-tfidf.csv\")\n",
"labels_data = pd.read_csv(\"/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-label.csv\")\n",
"\n",
"# Drop rows with null values in labels_data\n",
"labels_data = labels_data.dropna(subset=['label'])\n",
"\n",
"# Merge the TF-IDF features with the labels\n",
"data = pd.concat([tfidf_data, labels_data['label']], axis=1)\n",
"data.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balance.csv', index=False)\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"id": "N9P8jdgUpbQS",
"outputId": "b968e82a-0c9b-4e4d-cf53-c82ea86e590a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 209
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"label\n",
"Negatif 2955\n",
"Positif 1662\n",
"Netral 770\n",
"Name: count, dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Negatif</th>\n",
" <td>2955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Positif</th>\n",
" <td>1662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Netral</th>\n",
" <td>770</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 26
}
],
"source": [
"df_combined = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balance.csv')\n",
"df['label'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "HoW2DWLcpbQT",
"outputId": "658d3b98-b33a-4695-8308-12aaef02abb2",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Jumlah kelas sebelum SMOTE:\n",
"label\n",
"Negatif 2955\n",
"Positif 1662\n",
"Netral 770\n",
"Name: count, dtype: int64\n",
"\n",
"Jumlah kelas setelah SMOTE:\n",
"label\n",
"Positif 2955\n",
"Negatif 2955\n",
"Netral 2955\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"# Import library yang diperlukan\n",
"import pandas as pd\n",
"from imblearn.over_sampling import SMOTE\n",
"\n",
"# Memisahkan fitur dan label\n",
"X = df_combined.drop(columns=['label']) # Menghapus kolom label\n",
"y = df_combined['label'] # Mengambil kolom label\n",
"\n",
"# Menggunakan SMOTE untuk melakukan data balancing\n",
"smote = SMOTE(random_state=42)\n",
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
"\n",
"# Menampilkan jumlah kelas setelah balancing\n",
"print(\"Jumlah kelas sebelum SMOTE:\")\n",
"print(y.value_counts())\n",
"print(\"\\nJumlah kelas setelah SMOTE:\")\n",
"print(y_resampled.value_counts())\n",
"\n",
"# Menyimpan dataset yang sudah di-balance ke file CSV\n",
"balanced_df = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['label'])], axis=1)\n",
"balanced_df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balanced.csv', index=False)"
]
},
{
"cell_type": "markdown",
"source": [
"# Modeling"
],
"metadata": {
"id": "3c0_ju9YyBlw"
}
},
{
"cell_type": "markdown",
"source": [
"## Support Vector Machine"
],
"metadata": {
"id": "T-suo_4bzVYB"
}
},
{
"cell_type": "code",
"source": [
"# Import library yang diperlukan\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split, cross_val_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"\n",
"# Membaca dataset yang sudah di-balance\n",
"df_balanced = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balanced.csv')\n",
"\n",
"# Memisahkan fitur dan label\n",
"X = df_balanced.drop(columns=['label']) # Menghapus kolom label\n",
"y = df_balanced['label'] # Mengambil kolom label\n",
"\n",
"# Membagi dataset menjadi data latih (80%) dan data uji 20%)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"# Membuat model SVM\n",
"svm_model = SVC(kernel='linear', random_state=42) # Anda bisa mengganti kernel sesuai kebutuhan\n",
"svm_model.fit(X_train, y_train)\n",
"\n",
"# Melakukan prediksi pada data uji\n",
"y_pred = svm_model.predict(X_test)\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"print(\"Confusion Matrix:\")\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(\"\\nClassification Report:\")\n",
"print(classification_report(y_test, y_pred))\n",
"\n",
"# Evaluasi model menggunakan cross-validation\n",
"cv_scores = cross_val_score(svm_model, X, y, cv=5) # Menggunakan 5-fold cross-validation\n",
"\n",
"# Menampilkan hasil cross-validation\n",
"print(\"\\nCross-Validation Scores:\")\n",
"print(cv_scores)\n",
"print(f\"Mean Cross-Validation Score: {cv_scores.mean():.4f}\")\n",
"print(f\"Standard Deviation of Cross-Validation Scores: {cv_scores.std():.4f}\")\n"
],
"metadata": {
"id": "noJ-pc-5yG7W",
"outputId": "7bdd9de6-0571-457b-ffe2-9ec1cd56c351",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion Matrix:\n",
"[[487 85 19]\n",
" [ 16 561 14]\n",
" [ 15 35 541]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Negatif 0.94 0.82 0.88 591\n",
" Netral 0.82 0.95 0.88 591\n",
" Positif 0.94 0.92 0.93 591\n",
"\n",
" accuracy 0.90 1773\n",
" macro avg 0.90 0.90 0.90 1773\n",
"weighted avg 0.90 0.90 0.90 1773\n",
"\n",
"\n",
"Cross-Validation Scores:\n",
"[0.79526227 0.83981951 0.83417936 0.93175409 0.94585448]\n",
"Mean Cross-Validation Score: 0.8694\n",
"Standard Deviation of Cross-Validation Scores: 0.0589\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Generate confusion matrix\n",
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
"\n",
"# Visualization\n",
"plt.figure(figsize=(12, 6))\n",
"\n",
"# Subplot for Confusion Matrix\n",
"plt.subplot(1, 2, 1)\n",
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('True')\n",
"\n",
"# Subplot for Cross-Validation Scores\n",
"plt.subplot(1, 2, 2)\n",
"sns.boxplot(data=cv_scores, orient='w')\n",
"plt.title('Cross-Validation Scores')\n",
"plt.xlabel('Score')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
],
"metadata": {
"id": "SSMxyyBSzc6S",
"outputId": "f26150c0-82a2-4c5e-ebd6-0509ad7144d2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 349
}
},
"execution_count": 29,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1200x600 with 2 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"import joblib\n",
"from sklearn.metrics import classification_report\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"class_report = classification_report(y_test, y_pred, output_dict=True) # Get report as dict\n",
"\n",
"# Save the model\n",
"with open('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/model-svm.pkl', 'wb') as file:\n",
" joblib.dump(svm_model, file)\n",
"\n",
"# Save evaluation results\n",
"results = {\n",
" 'Confusion Matrix': [conf_matrix.flatten()], # Flatten for easier saving\n",
" 'Classification Report': [class_report],\n",
" 'Cross-Validation Scores': [cv_scores.tolist()],\n",
" 'Mean CV Score': [cv_scores.mean()],\n",
" 'Std Dev CV Score': [cv_scores.std()]\n",
"}\n",
"\n",
"results_df = pd.DataFrame(results)\n",
"results_df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/evaluation_results_SVM.csv', index=False) # Save results to CSV"
],
"metadata": {
"id": "AEy8VwJezgbZ"
},
"execution_count": 30,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Naive Bayes"
],
"metadata": {
"id": "Z-GsSbYN0V90"
}
},
{
"cell_type": "code",
"source": [
"# Import library yang diperlukan\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split, cross_val_score\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"\n",
"# Membaca dataset yang sudah di-balance\n",
"df_balanced = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balanced.csv')\n",
"\n",
"# Memisahkan fitur dan label\n",
"X = df_balanced.drop(columns=['label']) # Menghapus kolom label\n",
"y = df_balanced['label'] # Mengambil kolom label\n",
"\n",
"# Membagi dataset menjadi data latih (80%) dan data uji 20%)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"# Membuat model Naive Bayes\n",
"nb_model = GaussianNB() # Model Naive Bayes\n",
"nb_model.fit(X_train, y_train)\n",
"\n",
"# Melakukan prediksi pada data uji\n",
"y_pred = nb_model.predict(X_test)\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"print(\"Confusion Matrix:\")\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(\"\\nClassification Report:\")\n",
"print(classification_report(y_test, y_pred))\n",
"\n",
"# Evaluasi model menggunakan cross-validation\n",
"cv_scores = cross_val_score(nb_model, X, y, cv=5) # Menggunakan 5-fold cross-validation\n",
"\n",
"# Menampilkan hasil cross-validation\n",
"print(\"\\nCross-Validation Scores:\")\n",
"print(cv_scores)\n",
"print(f\"Mean Cross-Validation Score: {cv_scores.mean():.4f}\")\n",
"print(f\"Standard Deviation of Cross-Validation Scores: {cv_scores.std():.4f}\")"
],
"metadata": {
"id": "rK3bzFsp0cM8",
"outputId": "4f68a956-53e9-459c-ebfb-28df22028f65",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion Matrix:\n",
"[[226 181 184]\n",
" [ 3 588 0]\n",
" [ 52 84 455]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Negatif 0.80 0.38 0.52 591\n",
" Netral 0.69 0.99 0.81 591\n",
" Positif 0.71 0.77 0.74 591\n",
"\n",
" accuracy 0.72 1773\n",
" macro avg 0.74 0.72 0.69 1773\n",
"weighted avg 0.74 0.72 0.69 1773\n",
"\n",
"\n",
"Cross-Validation Scores:\n",
"[0.68640722 0.69204738 0.69543147 0.74619289 0.75521715]\n",
"Mean Cross-Validation Score: 0.7151\n",
"Standard Deviation of Cross-Validation Scores: 0.0294\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Generate confusion matrix\n",
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
"\n",
"# Visualization\n",
"plt.figure(figsize=(12, 6))\n",
"\n",
"# Subplot for Confusion Matrix\n",
"plt.subplot(1, 2, 1)\n",
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('True')\n",
"\n",
"# Subplot for Cross-Validation Scores\n",
"plt.subplot(1, 2, 2)\n",
"sns.boxplot(data=cv_scores, orient='w')\n",
"plt.title('Cross-Validation Scores')\n",
"plt.xlabel('Score')\n",
"plt.ylabel('CV Fold')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
],
"metadata": {
"id": "QNnCek9g0pYW",
"outputId": "eb6d5a26-6c72-4fd8-f182-5c351e4c96ce",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 349
}
},
"execution_count": 32,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1200x600 with 2 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"import joblib\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"class_report = classification_report(y_test, y_pred, output_dict=True) # Get report as dict\n",
"\n",
"# Save the model\n",
"with open('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/model-nb.pkl', 'wb') as file:\n",
" joblib.dump(nb_model, file)\n",
"\n",
"# Save evaluation results\n",
"results = {\n",
" 'Confusion Matrix': [conf_matrix.flatten()], # Flatten for easier saving\n",
" 'Classification Report': [class_report],\n",
" 'Cross-Validation Scores': [cv_scores.tolist()],\n",
" 'Mean CV Score': [cv_scores.mean()],\n",
" 'Std Dev CV Score': [cv_scores.std()]\n",
"}\n",
"\n",
"results_df = pd.DataFrame(results)\n",
"results_df.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/evaluation_results_nb.csv', index=False) # Save results to CSV"
],
"metadata": {
"id": "JZ8f3lZL094X"
},
"execution_count": 33,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## K-Nearest Neighbor"
],
"metadata": {
"id": "D5wJEcy61Yw-"
}
},
{
"cell_type": "code",
"source": [
"# Import library yang diperlukan\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"\n",
"# Membaca dataset yang sudah di-balance\n",
"df_balanced = pd.read_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/data-analisis/datasets-balanced.csv')\n",
"\n",
"# Memisahkan fitur dan label\n",
"X = df_balanced.drop(columns=['label']) # Menghapus kolom label\n",
"y = df_balanced['label'] # Mengambil kolom label\n",
"\n",
"# Membagi dataset menjadi data latih (80%) dan data uji 20%)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"# Mencari nilai K optimal menggunakan GridSearchCV\n",
"param_grid = {'n_neighbors': range(1, 11)} # Mencoba nilai K dari 1 hingga 10\n",
"knn = KNeighborsClassifier()\n",
"grid_search = GridSearchCV(knn, param_grid, cv=5) # Menggunakan 5-fold cross-validation\n",
"grid_search.fit(X_train, y_train)\n",
"\n",
"# Mengambil hasil dari GridSearchCV untuk visualisasi\n",
"results = grid_search.cv_results_\n",
"\n",
"# Menyusun nilai K dan rata-rata akurasi untuk setiap nilai K\n",
"k_values = results['param_n_neighbors'].data # Nilai K yang dicoba\n",
"mean_test_scores = results['mean_test_score'] # Rata-rata akurasi untuk setiap K\n",
"\n",
"# Visualisasi hasil dengan line chart\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(k_values, mean_test_scores, marker='o', linestyle='-', color='b', label='Mean Test Score')\n",
"plt.title('Visualisasi Kinerja Model KNN pada Berbagai Nilai K')\n",
"plt.xlabel('Nilai K')\n",
"plt.ylabel('Rata-rata Akurasi (Cross-validation)')\n",
"plt.grid(True)\n",
"plt.show()\n",
"\n",
"# Menentukan nilai K optimal\n",
"optimal_k = grid_search.best_params_['n_neighbors']\n",
"\n",
"# Memastikan nilai K memenuhi kriteria\n",
"if optimal_k == 1:\n",
" # Jika K = 1, mencari nilai K terbaik berikutnya yang lebih besar dari 1 dan ganjil\n",
" valid_k_values = [k for k in range(3, 11, 2)] # K = 3, 5, 7, 9\n",
" param_grid = {'n_neighbors': valid_k_values}\n",
" grid_search = GridSearchCV(knn, param_grid, cv=5)\n",
" grid_search.fit(X_train, y_train)\n",
" optimal_k = grid_search.best_params_['n_neighbors']\n",
" print(f\"Optimal K = {optimal_k}\")\n",
"\n",
"elif optimal_k == 2:\n",
" # Jika K = 2, mencari nilai K terbaik berikutnya yang lebih besar dari 2 dan ganjil\n",
" valid_k_values = [k for k in range(3, 11, 2)] # K = 3, 5, 7, 9\n",
" param_grid = {'n_neighbors': valid_k_values}\n",
" grid_search = GridSearchCV(knn, param_grid, cv=5)\n",
" grid_search.fit(X_train, y_train)\n",
" optimal_k = grid_search.best_params_['n_neighbors']\n",
" print(f\"Optimal K = {optimal_k}\")\n",
"\n",
"else:\n",
" # Jika K tidak sama dengan 1 atau 2, maka K sudah valid\n",
" print(f\"Optimal K = {optimal_k}\")\n",
"\n",
"# Membuat model KNN dengan K optimal\n",
"knn_model = KNeighborsClassifier(n_neighbors = optimal_k)\n",
"knn_model.fit(X_train, y_train)\n",
"\n",
"# Melakukan prediksi pada data uji\n",
"y_pred = knn_model.predict(X_test)\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"print(\"Confusion Matrix:\")\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(\"\\nClassification Report:\")\n",
"print(classification_report(y_test, y_pred))\n",
"\n",
"# Evaluasi model menggunakan cross-validation\n",
"cv_scores = cross_val_score(knn_model, X, y, cv=5) # Menggunakan 5-fold cross-validation\n",
"\n",
"# Menampilkan hasil cross-validation\n",
"print(\"\\nCross-Validation Scores:\")\n",
"print(cv_scores)\n",
"print(f\"Mean Cross-Validation Score: {cv_scores.mean():.4f}\")\n",
"print(f\"Standard Deviation of Cross-Validation Scores: {cv_scores.std():.4f}\")"
],
"metadata": {
"id": "_r-z131V1idv",
"outputId": "7cffc5b5-8a4a-407e-902d-c9dc800c079d",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 823
}
},
"execution_count": 34,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Optimal K = 3\n",
"Confusion Matrix:\n",
"[[ 96 370 125]\n",
" [ 0 582 9]\n",
" [ 8 119 464]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Negatif 0.92 0.16 0.28 591\n",
" Netral 0.54 0.98 0.70 591\n",
" Positif 0.78 0.79 0.78 591\n",
"\n",
" accuracy 0.64 1773\n",
" macro avg 0.75 0.64 0.59 1773\n",
"weighted avg 0.75 0.64 0.59 1773\n",
"\n",
"\n",
"Cross-Validation Scores:\n",
"[0.56739989 0.58657642 0.58319233 0.72758037 0.76875353]\n",
"Mean Cross-Validation Score: 0.6467\n",
"Standard Deviation of Cross-Validation Scores: 0.0841\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Generate confusion matrix\n",
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
"\n",
"# Visualization\n",
"plt.figure(figsize=(12, 6))\n",
"\n",
"# Subplot for Confusion Matrix\n",
"plt.subplot(1, 2, 1)\n",
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('True')\n",
"\n",
"# Subplot for Cross-Validation Scores\n",
"plt.subplot(1, 2, 2)\n",
"sns.boxplot(data=cv_scores, orient='w')\n",
"plt.title('Cross-Validation Scores')\n",
"plt.xlabel('Score')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
],
"metadata": {
"id": "gfxD10kx149Y",
"outputId": "3a06d921-e8d7-4847-b9f6-d0555569fd47",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 349
}
},
"execution_count": 35,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1200x600 with 2 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"import joblib\n",
"\n",
"# Menampilkan hasil evaluasi\n",
"class_report = classification_report(y_test, y_pred, output_dict=True) # Get report as dict\n",
"\n",
"# # Save the model\n",
"# joblib.dump(knn_model, 'HASIL-RISET/knn_model.pkl') # Save the model to a file\n",
"with open('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/model-knn.pkl', 'wb') as file:\n",
" joblib.dump(knn_model, file)\n",
"\n",
"# Save evaluation results\n",
"results_knn = {\n",
" 'Confusion Matrix': [conf_matrix.flatten()], # Flatten for easier saving\n",
" 'Classification Report': [class_report],\n",
" 'Cross-Validation Scores': [cv_scores.tolist()],\n",
" 'Mean CV Score': [cv_scores.mean()],\n",
" 'Std Dev CV Score': [cv_scores.std()]\n",
"}\n",
"\n",
"results_df_knn = pd.DataFrame(results_knn)\n",
"results_df_knn.to_csv('/content/drive/MyDrive/Tugas Akhir/datasets penelitian/HASIL-RISET/evaluation_results_knn-new.csv', index=False) # Save results to CSV"
],
"metadata": {
"id": "-AWKeEfP1_Vn"
},
"execution_count": 36,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ezzOKpOLpbQU"
},
"source": [
"# eksport Data Untuk Dashboard"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "smzFSiD_pbQU"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from nltk.tokenize import word_tokenize\n",
"from collections import Counter\n",
"\n",
"df=pd.read_csv('REAL-DATA/datasets-clean.csv')\n",
"\n",
"def create_word_count_table(df, text_column):\n",
" # Tokenizing the text data\n",
" df['tokens'] = df[text_column].apply(word_tokenize)\n",
"\n",
" # Flatten the list of tokens and count the occurrences of each word\n",
" all_tokens = [token for sublist in df['tokens'] for token in sublist]\n",
" word_counts = Counter(all_tokens)\n",
"\n",
" # Convert the word counts to a DataFrame\n",
" word_count_df = pd.DataFrame(word_counts.items(), columns=['word', 'count'])\n",
"\n",
" return word_count_df\n",
"\n",
"word_tokenize_df = create_word_count_table(df, 'full_text')\n",
"word_tokenize_df.to_csv('word_count_result.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sNdj6ku9pbQV"
},
"outputs": [],
"source": [
"df = pd.read_csv('word_count_result.csv')\n",
"\n",
"df.isnull().sum()\n",
"\n",
"# Mencari baris yang memiliki nilai NaN atau kosong\n",
"df[df.isnull().any(axis=1)]\n",
"\n",
"# Menampilkan baris yang memiliki nilai NaN atau kosong\n",
"# print(rows_with_nan)\n",
"\n",
"df.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "B313m4ExpbQV"
},
"outputs": [],
"source": [
"df = pd.read_csv('word_count_result.csv')\n",
"\n",
"positive_lexicon = pd.read_csv('InSet/positive.tsv', sep='\\t', header=None)\n",
"negative_lexicon = pd.read_csv('InSet/negative.tsv', sep='\\t', header=None)\n",
"\n",
"# Gabungkan lexicon positif dan negatif\n",
"positive_lexicon.columns = ['kata', 'polaritas']\n",
"negative_lexicon.columns = ['kata', 'polaritas']\n",
"\n",
"# Pastikan kolom polaritas bertipe numerik\n",
"positive_lexicon['polaritas'] = pd.to_numeric(positive_lexicon['polaritas'], errors='coerce')\n",
"negative_lexicon['polaritas'] = pd.to_numeric(negative_lexicon['polaritas'], errors='coerce')\n",
"\n",
"lexicon = pd.concat([positive_lexicon, negative_lexicon])\n",
"\n",
"# Konversi lexicon ke dictionary untuk lookup cepat\n",
"lexicon_dict = dict(zip(lexicon['kata'], lexicon['polaritas']))\n",
"\n",
"# Fungsi untuk memberikan skor pada teks berdasarkan kamus lexicon\n",
"def label(tweet, lexicon_dict):\n",
" words = tweet.split() # Pisahkan tweet menjadi kata-kata\n",
" sentiment_score = 0 # Inisialisasi skor sentimen\n",
"\n",
" # Hitung skor sentimen berdasarkan kata-kata dalam lexicon\n",
" for word in words:\n",
" sentiment = lexicon_dict.get(word, 0) # Ambil polaritas dari dictionary, default 0 jika tidak ditemukan\n",
" sentiment_score += sentiment\n",
"\n",
" # Berikan label berdasarkan skor polaritas total\n",
" if sentiment_score > 0:\n",
" return 'positif', sentiment_score\n",
" elif sentiment_score < 0:\n",
" return 'negatif', sentiment_score\n",
" else:\n",
" return 'netral', sentiment_score\n",
"\n",
"# Handle NaN values in the 'word' column\n",
"df['word'].fillna('', inplace=True)\n",
"\n",
"df['label', 'score'] = df['word'].apply(lambda x: pd.Series(label(x, lexicon_dict)))\n",
"df.to_csv('word_count_labeled.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uAEqGDqZpbQW"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv('word_count_labeled.csv')\n",
"\n",
"df['label'].value_counts()\n",
"\n",
"df.groupby('label').apply(lambda x: x.loc[x['count'].idxmax()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "30i0VbsvpbQW"
},
"outputs": [],
"source": [
"# import pandas as pd\n",
"# from wordcloud import WordCloud, get_single_color_func\n",
"# import matplotlib.pyplot as plt\n",
"\n",
"# df = pd.read_csv('word_count_labeled.csv')\n",
"\n",
"# # Menampilkan jumlah label\n",
"# # print(df['label'].value_counts())\n",
"\n",
"# # Fungsi untuk membuat dan menampilkan Word Cloud dengan warna berdasarkan label\n",
"# def plot_word_cloud(label, color):\n",
"# words = df[df['label'] == label].set_index('word')['count'].to_dict()\n",
"# wordcloud = WordCloud(width=800, height=400, background_color='white', color_func=get_single_color_func(color)).generate_from_frequencies(words)\n",
"\n",
"# plt.figure(figsize=(10, 5))\n",
"# plt.imshow(wordcloud, interpolation='bilinear')\n",
"# plt.title(f'Word Cloud for {label} words')\n",
"# plt.axis('off')\n",
"# plt.show()\n",
"\n",
"# # Menampilkan Word Cloud untuk setiap label dengan warna yang sesuai\n",
"# label_colors = {\n",
"# 'positif': 'green',\n",
"# 'negatif': 'red',\n",
"# 'netral': 'gray'\n",
"# }\n",
"\n",
"# for label in df['label'].unique():\n",
"# plot_word_cloud(label, label_colors[label])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8TdWKd4ypbQW",
"outputId": "9f78aa8a-71aa-4083-875f-a1171a31a630"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": []
}
],
"source": [
"import pandas as pd\n",
"\n",
"eval_svm = pd.read_csv('HASIL-RISET/evaluation_results_SVM-new.csv')\n",
"eval_nb = pd.read_csv('HASIL-RISET/evaluation_results_nb-new.csv')\n",
"eval_knn = pd.read_csv('HASIL-RISET/evaluation_results_knn-new.csv')\n",
"\n",
"# penggabungan data evaluation\n",
"# Menambahkan kolom 'model' ke setiap DataFrame\n",
"eval_svm['model'] = 'svm'\n",
"eval_nb['model'] = 'nb'\n",
"eval_knn['model'] = 'knn'\n",
"\n",
"# Mengatur ulang kolom agar 'model' menjadi kolom pertama\n",
"svm = eval_svm[['model'] + [col for col in eval_svm.columns if col != 'model']]\n",
"nb = eval_nb[['model'] + [col for col in eval_nb.columns if col != 'model']]\n",
"knn = eval_knn[['model'] + [col for col in eval_knn.columns if col != 'model']]\n",
"\n",
"# Menggabungkan semua DataFrame\n",
"combined_df = pd.concat([svm, nb, knn], axis=0, ignore_index=True)\n",
"\n",
"# Menampilkan hasil\n",
"print(combined_df.head())\n",
"\n",
"combined_df.to_csv('HASIL-RISET/evaluation_results_combine.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
},
"colab": {
"provenance": [],
"toc_visible": true,
"gpuType": "T4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}