{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "90ab8483-90bf-43b3-83d2-56d0978b1a33", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "np.random.seed(42)\n", "\n", "n_samples = 2000\n", "data = []\n", "\n", "for i in range(1, n_samples + 1):\n", " \n", " sleep = np.clip(np.random.normal(6.5, 1.5), 3, 9)\n", " \n", " if sleep >= 7:\n", " mood = np.random.choice(\n", " ['Bagus', 'Lumayan', 'Biasa Saja'],\n", " p=[0.5, 0.3, 0.2]\n", " )\n", " elif sleep >= 5:\n", " mood = np.random.choice(\n", " ['Lumayan', 'Biasa Saja', 'Cukup Jenuh'],\n", " p=[0.4, 0.4, 0.2]\n", " )\n", " else:\n", " mood = np.random.choice(\n", " ['Biasa Saja', 'Cukup Jenuh', 'Jenuh'],\n", " p=[0.3, 0.4, 0.3]\n", " )\n", "\n", " if mood == 'Bagus':\n", " duration = np.random.normal(95, 20)\n", " elif mood == 'Lumayan':\n", " duration = np.random.normal(75, 20)\n", " elif mood == 'Biasa Saja':\n", " duration = np.random.normal(55, 15)\n", " elif mood == 'Cukup Jenuh':\n", " duration = np.random.normal(35, 10)\n", " else: # Jenuh\n", " duration = np.random.normal(25, 10)\n", " \n", " duration = int(np.clip(duration, 10, 180))\n", "\n", " data.append([\n", " i,\n", " mood,\n", " duration,\n", " round(sleep, 1)\n", " ])\n", "\n", "# Buat DataFrame\n", "df = pd.DataFrame(data, columns=[\n", " 'no',\n", " 'mood',\n", " 'durasi_belajar',\n", " 'durasi_tidur'\n", "])\n", "\n", "# Simpan ke CSV\n", "df.to_csv('data_sintetis_2000_tanpa_label.csv', index=False)\n", "\n", "df.head()\n", "\n", "df_labeled = df.copy()\n", "\n", "# 1️⃣ Mapping skor mood\n", "mood_scores = {\n", " 'Bagus': 2,\n", " 'Lumayan': 1.5,\n", " 'Biasa Saja': 1,\n", " 'Cukup Jenuh': 0.5,\n", " 'Jenuh': 0\n", "}\n", "\n", "df_labeled['mood_score'] = df_labeled['mood'].map(mood_scores)\n", "\n", "# 2️⃣ Skor durasi tidur\n", "def sleep_score(hours):\n", " if hours > 7:\n", " return 2\n", " elif hours >= 5:\n", " return 1\n", " else:\n", " return 0\n", "\n", "df_labeled['sleep_score'] = df_labeled['durasi_tidur'].apply(sleep_score)\n", "\n", "# 3️⃣ Skor durasi belajar\n", "def duration_score(minutes):\n", " if minutes > 60:\n", " return 2\n", " elif minutes > 30:\n", " return 1\n", " else:\n", " return 0\n", "\n", "df_labeled['duration_score'] = df_labeled['durasi_belajar'].apply(duration_score)\n", "\n", "# 4️⃣ Total skor\n", "df_labeled['total_score'] = (\n", " df_labeled['mood_score'] +\n", " df_labeled['sleep_score'] +\n", " df_labeled['duration_score']\n", ")\n", "\n", "# 5️⃣ Buat label pseudo\n", "def categorize(score):\n", " if score <= 3:\n", " return 'Ringan'\n", " elif score <= 4.5:\n", " return 'Sedang'\n", " else:\n", " return 'Intensif'\n", "\n", "df_labeled['label'] = df_labeled['total_score'].apply(categorize)\n", "\n", "df_labeled.head()\n", "\n", "df_labeled.to_csv('data_sintetis_2000_dengan_pseudo_label.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.12", "language": "python", "name": "python312" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" } }, "nbformat": 4, "nbformat_minor": 5 }