TIF_E41200170/Undang_Undang_KES.ipynb

910 lines
58 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Crawl Data Twitter > 1500 Tweets\n",
"The crawling process was done using Tweet-Harvest. Written by Helmi Satria on October 18th."
],
"metadata": {
"id": "xEWpayxURuUn"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"id": "4UIL1x21P9rQ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ced4d1b2-e272-49fe-9fa2-1f75e9a59357"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n",
"Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]\n",
"Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]\n",
"Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease\n",
"Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]\n",
"Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n",
"Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n",
"Get:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]\n",
"Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n",
"Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]\n",
"Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,084 kB]\n",
"Get:12 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy/main amd64 Packages [47.6 kB]\n",
"Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,377 kB]\n",
"Get:14 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2,129 kB]\n",
"Get:15 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [1,858 kB]\n",
"Fetched 6,907 kB in 5s (1,378 kB/s)\n",
"Reading package lists... Done\n",
"Reading package lists... Done\n",
"Building dependency tree... Done\n",
"Reading state information... Done\n",
"ca-certificates is already the newest version (20230311ubuntu0.22.04.1).\n",
"curl is already the newest version (7.81.0-1ubuntu1.16).\n",
"gnupg is already the newest version (2.2.27-3ubuntu2.1).\n",
"gnupg set to manually installed.\n",
"0 upgraded, 0 newly installed, 0 to remove and 51 not upgraded.\n",
"deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main\n",
"Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n",
"Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n",
"Get:3 https://deb.nodesource.com/node_20.x nodistro InRelease [12.1 kB]\n",
"Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease\n",
"Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease\n",
"Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n",
"Get:7 https://deb.nodesource.com/node_20.x nodistro/main amd64 Packages [7,495 B]\n",
"Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n",
"Hit:9 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n",
"Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n",
"Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n",
"Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n",
"Fetched 19.6 kB in 1s (16.1 kB/s)\n",
"Reading package lists... Done\n",
"Reading package lists... Done\n",
"Building dependency tree... Done\n",
"Reading state information... Done\n",
"The following NEW packages will be installed:\n",
" nodejs\n",
"0 upgraded, 1 newly installed, 0 to remove and 51 not upgraded.\n",
"Need to get 31.6 MB of archives.\n",
"After this operation, 196 MB of additional disk space will be used.\n",
"Get:1 https://deb.nodesource.com/node_20.x nodistro/main amd64 nodejs amd64 20.14.0-1nodesource1 [31.6 MB]\n",
"Fetched 31.6 MB in 1s (59.9 MB/s)\n",
"debconf: unable to initialize frontend: Dialog\n",
"debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)\n",
"debconf: falling back to frontend: Readline\n",
"debconf: unable to initialize frontend: Readline\n",
"debconf: (This frontend requires a controlling tty.)\n",
"debconf: falling back to frontend: Teletype\n",
"dpkg-preconfigure: unable to re-open stdin: \n",
"Selecting previously unselected package nodejs.\n",
"(Reading database ... 121918 files and directories currently installed.)\n",
"Preparing to unpack .../nodejs_20.14.0-1nodesource1_amd64.deb ...\n",
"Unpacking nodejs (20.14.0-1nodesource1) ...\n",
"Setting up nodejs (20.14.0-1nodesource1) ...\n",
"Processing triggers for man-db (2.10.2-1) ...\n",
"v20.14.0\n"
]
}
],
"source": [
"#@title Install Requirement\n",
"!pip install pandas\n",
"import pandas as pd\n",
"# Install Node.js (because tweet-harvest built using Node.js)\n",
"!sudo apt-get update\n",
"!sudo apt-get install -y ca-certificates curl gnupg\n",
"!sudo mkdir -p /etc/apt/keyrings\n",
"!curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg\n",
"\n",
"!NODE_MAJOR=20 && echo \"deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main\" | sudo tee /etc/apt/sources.list.d/nodesource.list\n",
"\n",
"!sudo apt-get update\n",
"!sudo apt-get install nodejs -y\n",
"\n",
"!node -v"
]
},
{
"cell_type": "code",
"source": [
"#@title Crawling Data\n",
"\n",
"twitter_auth_token = '226ba6333b7918979c2c5a16d991aee92a3a62c7'\n",
"filename = 'kdrt.csv'\n",
"keyWord = 'perselingkuhan'\n",
"dateSearch = 'until:2021-01-21 since:2020-03-22'\n",
"search_keyword = f\"{keyWord} {dateSearch}\"\n",
"limit = 1000\n",
"\n",
"!npx --yes tweet-harvest@2.6.0 -o \"{filename}\" -s \"{search_keyword}\" -l {limit} --token {twitter_auth_token}"
],
"metadata": {
"id": "LYDR51dJlVlX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e333b5d2-42a0-4c61-9a31-eba4c5fabd19"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K\u001b[1mnpm\u001b[22m \u001b[33mwarn\u001b[39m \u001b[94mdeprecated\u001b[39m rimraf@3.0.2: Rimraf versions prior to v4 are no longer supported\n",
"\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K\u001b[1mnpm\u001b[22m \u001b[33mwarn\u001b[39m \u001b[94mdeprecated\u001b[39m inflight@1.0.6: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.\n",
"\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K\u001b[1mnpm\u001b[22m \u001b[33mwarn\u001b[39m \u001b[94mdeprecated\u001b[39m glob@7.2.3: Glob versions prior to v9 are no longer supported\n",
"\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K⠧\u001b[1G\u001b[0K⠇\u001b[1G\u001b[0K⠏\u001b[1G\u001b[0K⠋\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K⠹\u001b[1G\u001b[0K⠸\u001b[1G\u001b[0K⠼\u001b[1G\u001b[0K⠴\u001b[1G\u001b[0K⠦\u001b[1G\u001b[0K\u001b[1m\u001b[32mTweet Harvest [v2.6.0]\u001b[39m\u001b[22m\n",
"\u001b[1m\u001b[32m\u001b[39m\u001b[22m\n",
"\u001b[34mResearch by \u001b[39m\u001b[1m\u001b[34mHelmi Satria\u001b[39m\u001b[22m\u001b[34m\u001b[39m\n",
"\u001b[34mUse it for Educational Purposes only!\u001b[39m\n",
"\u001b[34m\u001b[39m\n",
"\u001b[33mThis script uses Chromium Browser to crawl data from Twitter with \u001b[1myour Twitter auth token\u001b[22m.\u001b[39m\n",
"\u001b[33mPlease enter your Twitter auth token when prompted.\u001b[39m\n",
"\u001b[33m\u001b[39m\n",
"\u001b[31m\u001b[1mNote:\u001b[22m\u001b[39m Keep your access token secret! Don't share it with anyone else.\n",
"\u001b[31m\u001b[1mNote:\u001b[22m\u001b[39m This script only runs on your local device.\n",
"\n",
"\u001b[34m\u001b[39m\n",
"\u001b[34mOpening twitter search page...\u001b[39m\n",
"\u001b[34m\u001b[39m\n",
"\u001b[90m[v2.6.0]\u001b[39m Invalid twitter auth token. Please check your auth token\n",
"\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K\u001b[1mnpm\u001b[22m \u001b[96mnotice\u001b[39m\n",
"\u001b[1mnpm\u001b[22m \u001b[96mnotice\u001b[39m New \u001b[33mminor\u001b[39m version of npm available! \u001b[33m10.7.0\u001b[39m -> \u001b[34m10.8.1\u001b[39m\n",
"\u001b[1mnpm\u001b[22m \u001b[96mnotice\u001b[39m Changelog: \u001b[34mhttps://github.com/npm/cli/releases/tag/v10.8.1\u001b[39m\n",
"\u001b[1mnpm\u001b[22m \u001b[96mnotice\u001b[39m To update run: \u001b[4mnpm install -g npm@10.8.1\u001b[24m\n",
"\u001b[1mnpm\u001b[22m \u001b[96mnotice\u001b[39m\n",
"\u001b[1G\u001b[0K⠙\u001b[1G\u001b[0K"
]
}
]
},
{
"cell_type": "code",
"source": [
"#@title Cek Crawling\n",
"filename = 'kdrt.csv'\n",
"filePath = f\"tweets-data/{filename}\"\n",
"data = pd.read_csv(filePath)\n",
"print(f\"Jumlah tweet dalam dataframe adalah {len(data)}.\")\n",
"data.head(5)"
],
"metadata": {
"id": "JjY7CF83qTtF",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 356
},
"outputId": "2542c2b4-2ddc-46c8-bc63-50379dacc48c"
},
"execution_count": 3,
"outputs": [
{
"output_type": "error",
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'tweets-data/kdrt.csv'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-97c56697db95>\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'kdrt.csv'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mfilePath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"tweets-data/{filename}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilePath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Jumlah tweet dalam dataframe adalah {len(data)}.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 911\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 912\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 913\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 914\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1405\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1406\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1407\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1408\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1409\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"b\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1661\u001b[0;31m self.handles = get_handle(\n\u001b[0m\u001b[1;32m 1662\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1663\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m handle = open(\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'tweets-data/kdrt.csv'"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eRfDl54waHC4",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "3599ad01-6f93-47ca-f32c-69e85d245969"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Jumlah tweet dalam dataframe adalah 1108.\n",
"Jumlah tweet dalam dataframe adalah 56.\n",
"Jumlah tweet dalam dataframe adalah 2157.\n",
"Jumlah Total adalah 3010.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Unnamed: 0.5 Unnamed: 0.3 Unnamed: 0.2 Unnamed: 0.1 Unnamed: 0 \\\n",
"0 0 0.0 0.0 0.0 0.0 \n",
"1 1 1.0 1.0 1.0 1.0 \n",
"2 2 2.0 2.0 2.0 2.0 \n",
"3 3 3.0 3.0 3.0 3.0 \n",
"4 4 4.0 4.0 4.0 4.0 \n",
"... ... ... ... ... ... \n",
"3005 3005 NaN NaN NaN NaN \n",
"3006 3006 NaN NaN NaN NaN \n",
"3007 3007 NaN NaN NaN NaN \n",
"3008 3008 NaN NaN NaN NaN \n",
"3009 3009 NaN NaN NaN NaN \n",
"\n",
" conversation_id_str created_at favorite_count \\\n",
"0 1580076775752994816 Wed Oct 12 06:03:54 +0000 2022 40485 \n",
"1 1612732602531602432 Tue Jan 10 08:46:30 +0000 2023 238 \n",
"2 1593769984022310913 Sat Nov 19 00:55:49 +0000 2022 37 \n",
"3 1613113017025331201 Wed Jan 11 09:58:08 +0000 2023 74 \n",
"4 1608084597035929603 Wed Dec 28 12:56:59 +0000 2022 288 \n",
"... ... ... ... \n",
"3005 1610520173974519808 Wed Jan 04 06:51:47 +0000 2023 40 \n",
"3006 1616113161366822912 Thu Jan 19 16:39:38 +0000 2023 4 \n",
"3007 1615107283838726144 Mon Jan 16 22:02:38 +0000 2023 23 \n",
"3008 1613303489140248578 Wed Jan 11 23:52:58 +0000 2023 20 \n",
"3009 1614130490969030658 Sat Jan 14 05:21:12 +0000 2023 2 \n",
"\n",
" full_text id_str \\\n",
"0 Tabok2an anjing2an pukul2an dalam rumah tangg... 1580076775752994816 \n",
"1 Pengacara Hotman Paris Hutapea mengaku ditelep... 1612732602531602432 \n",
"2 STOP Kekerasan dalam Rumah Tangga Maafkan suam... 1593769984022310913 \n",
"3 Kita sering melihat kekerasan dalam rumah tang... 1613113017025331201 \n",
"4 Ya kalau suaminya maksa-maksa istrinya hamil n... 1608084597035929603 \n",
"... ... ... \n",
"3005 @convomfs Iya trauma gara2 ngeliat anak sebaya... 1610529407118225409 \n",
"3006 Sepanjang tahun 2022 PA Lumajang mengabulkan 2... 1616113161366822912 \n",
"3007 Blak-blakan! Hotman Paris Beberkan Pesan Venna... 1615107283838726144 \n",
"3008 @tanyarlfes Males ngasih tau males Mau red fla... 1613323110341804032 \n",
"3009 Perceraian orangtua akibat perselingkuhan memb... 1614130490969030658 \n",
"\n",
" ... in_reply_to_screen_name lang location \\\n",
"0 ... NaN in Indonesia \n",
"1 ... NaN in NaN \n",
"2 ... NaN in NaN \n",
"3 ... NaN in ciputat, Tangerang Selatan \n",
"4 ... NaN in Greater Jakarta, Indonesia \n",
"... ... ... ... ... \n",
"3005 ... convomfs in Hogwarts \n",
"3006 ... NaN in Jakarta \n",
"3007 ... NaN in Pulo Gadung, Indonesia \n",
"3008 ... tanyarlfes in Balikpapan,Indonesia \n",
"3009 ... NaN in Palmerah, Jakarta \n",
"\n",
" quote_count reply_count retweet_count \\\n",
"0 862 727 5638 \n",
"1 0 15 29 \n",
"2 4 23 4 \n",
"3 1 1 20 \n",
"4 1 4 102 \n",
"... ... ... ... \n",
"3005 0 1 0 \n",
"3006 0 0 1 \n",
"3007 1 3 2 \n",
"3008 0 0 0 \n",
"3009 0 0 0 \n",
"\n",
" tweet_url user_id_str \\\n",
"0 https://twitter.com/Hujandisenja/status/158007... 36906324 \n",
"1 https://twitter.com/IDNTimes/status/1612732602... 388190238 \n",
"2 https://twitter.com/tokoghoib/status/159376998... 1285403043425447936 \n",
"3 https://twitter.com/BincangSyariah/status/1613... 4836542132 \n",
"4 https://twitter.com/RodriChen/status/160808459... 963890288 \n",
"... ... ... \n",
"3005 https://twitter.com/oversizedmoon_/status/1610... 1597022213445943297 \n",
"3006 https://twitter.com/kompascom/status/161611316... 23343960 \n",
"3007 https://twitter.com/tvOneNews/status/161510728... 55507370 \n",
"3008 https://twitter.com/rcyourbae/status/161332311... 238097101 \n",
"3009 https://twitter.com/hariankompas/status/161413... 255866913 \n",
"\n",
" username Unnamed: 0.4 \n",
"0 Hujandisenja 0.0 \n",
"1 IDNTimes 1.0 \n",
"2 tokoghoib 2.0 \n",
"3 BincangSyariah 3.0 \n",
"4 RodriChen 4.0 \n",
"... ... ... \n",
"3005 oversizedmoon_ 2152.0 \n",
"3006 kompascom 2153.0 \n",
"3007 tvOneNews 2154.0 \n",
"3008 rcyourbae 2155.0 \n",
"3009 hariankompas 2156.0 \n",
"\n",
"[3010 rows x 21 columns]"
],
"text/html": [
"\n",
" <div id=\"df-97bc159a-4c74-4b99-b67d-40d7943d13d5\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0.5</th>\n",
" <th>Unnamed: 0.3</th>\n",
" <th>Unnamed: 0.2</th>\n",
" <th>Unnamed: 0.1</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>conversation_id_str</th>\n",
" <th>created_at</th>\n",
" <th>favorite_count</th>\n",
" <th>full_text</th>\n",
" <th>id_str</th>\n",
" <th>...</th>\n",
" <th>in_reply_to_screen_name</th>\n",
" <th>lang</th>\n",
" <th>location</th>\n",
" <th>quote_count</th>\n",
" <th>reply_count</th>\n",
" <th>retweet_count</th>\n",
" <th>tweet_url</th>\n",
" <th>user_id_str</th>\n",
" <th>username</th>\n",
" <th>Unnamed: 0.4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1580076775752994816</td>\n",
" <td>Wed Oct 12 06:03:54 +0000 2022</td>\n",
" <td>40485</td>\n",
" <td>Tabok2an anjing2an pukul2an dalam rumah tangg...</td>\n",
" <td>1580076775752994816</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Indonesia</td>\n",
" <td>862</td>\n",
" <td>727</td>\n",
" <td>5638</td>\n",
" <td>https://twitter.com/Hujandisenja/status/158007...</td>\n",
" <td>36906324</td>\n",
" <td>Hujandisenja</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1612732602531602432</td>\n",
" <td>Tue Jan 10 08:46:30 +0000 2023</td>\n",
" <td>238</td>\n",
" <td>Pengacara Hotman Paris Hutapea mengaku ditelep...</td>\n",
" <td>1612732602531602432</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>29</td>\n",
" <td>https://twitter.com/IDNTimes/status/1612732602...</td>\n",
" <td>388190238</td>\n",
" <td>IDNTimes</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>1593769984022310913</td>\n",
" <td>Sat Nov 19 00:55:49 +0000 2022</td>\n",
" <td>37</td>\n",
" <td>STOP Kekerasan dalam Rumah Tangga Maafkan suam...</td>\n",
" <td>1593769984022310913</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>23</td>\n",
" <td>4</td>\n",
" <td>https://twitter.com/tokoghoib/status/159376998...</td>\n",
" <td>1285403043425447936</td>\n",
" <td>tokoghoib</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1613113017025331201</td>\n",
" <td>Wed Jan 11 09:58:08 +0000 2023</td>\n",
" <td>74</td>\n",
" <td>Kita sering melihat kekerasan dalam rumah tang...</td>\n",
" <td>1613113017025331201</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>ciputat, Tangerang Selatan</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>20</td>\n",
" <td>https://twitter.com/BincangSyariah/status/1613...</td>\n",
" <td>4836542132</td>\n",
" <td>BincangSyariah</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>1608084597035929603</td>\n",
" <td>Wed Dec 28 12:56:59 +0000 2022</td>\n",
" <td>288</td>\n",
" <td>Ya kalau suaminya maksa-maksa istrinya hamil n...</td>\n",
" <td>1608084597035929603</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Greater Jakarta, Indonesia</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>102</td>\n",
" <td>https://twitter.com/RodriChen/status/160808459...</td>\n",
" <td>963890288</td>\n",
" <td>RodriChen</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3005</th>\n",
" <td>3005</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1610520173974519808</td>\n",
" <td>Wed Jan 04 06:51:47 +0000 2023</td>\n",
" <td>40</td>\n",
" <td>@convomfs Iya trauma gara2 ngeliat anak sebaya...</td>\n",
" <td>1610529407118225409</td>\n",
" <td>...</td>\n",
" <td>convomfs</td>\n",
" <td>in</td>\n",
" <td>Hogwarts</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>https://twitter.com/oversizedmoon_/status/1610...</td>\n",
" <td>1597022213445943297</td>\n",
" <td>oversizedmoon_</td>\n",
" <td>2152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3006</th>\n",
" <td>3006</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1616113161366822912</td>\n",
" <td>Thu Jan 19 16:39:38 +0000 2023</td>\n",
" <td>4</td>\n",
" <td>Sepanjang tahun 2022 PA Lumajang mengabulkan 2...</td>\n",
" <td>1616113161366822912</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Jakarta</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>https://twitter.com/kompascom/status/161611316...</td>\n",
" <td>23343960</td>\n",
" <td>kompascom</td>\n",
" <td>2153.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3007</th>\n",
" <td>3007</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1615107283838726144</td>\n",
" <td>Mon Jan 16 22:02:38 +0000 2023</td>\n",
" <td>23</td>\n",
" <td>Blak-blakan! Hotman Paris Beberkan Pesan Venna...</td>\n",
" <td>1615107283838726144</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Pulo Gadung, Indonesia</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>https://twitter.com/tvOneNews/status/161510728...</td>\n",
" <td>55507370</td>\n",
" <td>tvOneNews</td>\n",
" <td>2154.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3008</th>\n",
" <td>3008</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1613303489140248578</td>\n",
" <td>Wed Jan 11 23:52:58 +0000 2023</td>\n",
" <td>20</td>\n",
" <td>@tanyarlfes Males ngasih tau males Mau red fla...</td>\n",
" <td>1613323110341804032</td>\n",
" <td>...</td>\n",
" <td>tanyarlfes</td>\n",
" <td>in</td>\n",
" <td>Balikpapan,Indonesia</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://twitter.com/rcyourbae/status/161332311...</td>\n",
" <td>238097101</td>\n",
" <td>rcyourbae</td>\n",
" <td>2155.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3009</th>\n",
" <td>3009</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1614130490969030658</td>\n",
" <td>Sat Jan 14 05:21:12 +0000 2023</td>\n",
" <td>2</td>\n",
" <td>Perceraian orangtua akibat perselingkuhan memb...</td>\n",
" <td>1614130490969030658</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>in</td>\n",
" <td>Palmerah, Jakarta</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>https://twitter.com/hariankompas/status/161413...</td>\n",
" <td>255866913</td>\n",
" <td>hariankompas</td>\n",
" <td>2156.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3010 rows × 21 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-97bc159a-4c74-4b99-b67d-40d7943d13d5')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-97bc159a-4c74-4b99-b67d-40d7943d13d5 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-97bc159a-4c74-4b99-b67d-40d7943d13d5');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-5b9217cb-3051-4e6c-9ede-1761f346f44b\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5b9217cb-3051-4e6c-9ede-1761f346f44b')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-5b9217cb-3051-4e6c-9ede-1761f346f44b button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "xxx"
}
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"#@title Merge Crawling Data\n",
"# Cek jumlah data yang didapatkan\n",
"!pip install pandas\n",
"import pandas as pd\n",
"listFile = ['1108.csv','perempuan.csv','2157.csv']\n",
"for x in range(len(listFile)):\n",
" #print(x)\n",
" fileName = listFile[x]\n",
" filePath = f\"tweets-data/{fileName}\"\n",
" data = pd.read_csv(filePath)\n",
" #data = data['full_text']\n",
" if x > 0:\n",
" result = pd.merge(result, data, how=\"outer\")\n",
" else:\n",
" result = data\n",
"\n",
" print(f\"Jumlah tweet dalam dataframe adalah {len(data)}.\")\n",
"##result.sort_values(by=['favorite_count'], ascending=False)\n",
"namaSave = '/content/dataset.csv'\n",
"result.to_csv(namaSave)\n",
"print(f\"Jumlah Total adalah {len(result)}.\")\n",
"xxx = pd.read_csv(namaSave)\n",
"xxx"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}