152 lines
4.1 KiB
Plaintext
152 lines
4.1 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "90ab8483-90bf-43b3-83d2-56d0978b1a33",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"n_samples = 2000\n",
|
||
"data = []\n",
|
||
"\n",
|
||
"for i in range(1, n_samples + 1):\n",
|
||
" \n",
|
||
" sleep = np.clip(np.random.normal(6.5, 1.5), 3, 9)\n",
|
||
" \n",
|
||
" if sleep >= 7:\n",
|
||
" mood = np.random.choice(\n",
|
||
" ['Bagus', 'Lumayan', 'Biasa Saja'],\n",
|
||
" p=[0.5, 0.3, 0.2]\n",
|
||
" )\n",
|
||
" elif sleep >= 5:\n",
|
||
" mood = np.random.choice(\n",
|
||
" ['Lumayan', 'Biasa Saja', 'Cukup Jenuh'],\n",
|
||
" p=[0.4, 0.4, 0.2]\n",
|
||
" )\n",
|
||
" else:\n",
|
||
" mood = np.random.choice(\n",
|
||
" ['Biasa Saja', 'Cukup Jenuh', 'Jenuh'],\n",
|
||
" p=[0.3, 0.4, 0.3]\n",
|
||
" )\n",
|
||
"\n",
|
||
" if mood == 'Bagus':\n",
|
||
" duration = np.random.normal(95, 20)\n",
|
||
" elif mood == 'Lumayan':\n",
|
||
" duration = np.random.normal(75, 20)\n",
|
||
" elif mood == 'Biasa Saja':\n",
|
||
" duration = np.random.normal(55, 15)\n",
|
||
" elif mood == 'Cukup Jenuh':\n",
|
||
" duration = np.random.normal(35, 10)\n",
|
||
" else: # Jenuh\n",
|
||
" duration = np.random.normal(25, 10)\n",
|
||
" \n",
|
||
" duration = int(np.clip(duration, 10, 180))\n",
|
||
"\n",
|
||
" data.append([\n",
|
||
" i,\n",
|
||
" mood,\n",
|
||
" duration,\n",
|
||
" round(sleep, 1)\n",
|
||
" ])\n",
|
||
"\n",
|
||
"# Buat DataFrame\n",
|
||
"df = pd.DataFrame(data, columns=[\n",
|
||
" 'no',\n",
|
||
" 'mood',\n",
|
||
" 'durasi_belajar',\n",
|
||
" 'durasi_tidur'\n",
|
||
"])\n",
|
||
"\n",
|
||
"# Simpan ke CSV\n",
|
||
"df.to_csv('data_sintetis_2000_tanpa_label.csv', index=False)\n",
|
||
"\n",
|
||
"df.head()\n",
|
||
"\n",
|
||
"df_labeled = df.copy()\n",
|
||
"\n",
|
||
"# 1️⃣ Mapping skor mood\n",
|
||
"mood_scores = {\n",
|
||
" 'Bagus': 2,\n",
|
||
" 'Lumayan': 1.5,\n",
|
||
" 'Biasa Saja': 1,\n",
|
||
" 'Cukup Jenuh': 0.5,\n",
|
||
" 'Jenuh': 0\n",
|
||
"}\n",
|
||
"\n",
|
||
"df_labeled['mood_score'] = df_labeled['mood'].map(mood_scores)\n",
|
||
"\n",
|
||
"# 2️⃣ Skor durasi tidur\n",
|
||
"def sleep_score(hours):\n",
|
||
" if hours > 7:\n",
|
||
" return 2\n",
|
||
" elif hours >= 5:\n",
|
||
" return 1\n",
|
||
" else:\n",
|
||
" return 0\n",
|
||
"\n",
|
||
"df_labeled['sleep_score'] = df_labeled['durasi_tidur'].apply(sleep_score)\n",
|
||
"\n",
|
||
"# 3️⃣ Skor durasi belajar\n",
|
||
"def duration_score(minutes):\n",
|
||
" if minutes > 60:\n",
|
||
" return 2\n",
|
||
" elif minutes > 30:\n",
|
||
" return 1\n",
|
||
" else:\n",
|
||
" return 0\n",
|
||
"\n",
|
||
"df_labeled['duration_score'] = df_labeled['durasi_belajar'].apply(duration_score)\n",
|
||
"\n",
|
||
"# 4️⃣ Total skor\n",
|
||
"df_labeled['total_score'] = (\n",
|
||
" df_labeled['mood_score'] +\n",
|
||
" df_labeled['sleep_score'] +\n",
|
||
" df_labeled['duration_score']\n",
|
||
")\n",
|
||
"\n",
|
||
"# 5️⃣ Buat label pseudo\n",
|
||
"def categorize(score):\n",
|
||
" if score <= 3:\n",
|
||
" return 'Ringan'\n",
|
||
" elif score <= 4.5:\n",
|
||
" return 'Sedang'\n",
|
||
" else:\n",
|
||
" return 'Intensif'\n",
|
||
"\n",
|
||
"df_labeled['label'] = df_labeled['total_score'].apply(categorize)\n",
|
||
"\n",
|
||
"df_labeled.head()\n",
|
||
"\n",
|
||
"df_labeled.to_csv('data_sintetis_2000_dengan_pseudo_label.csv', index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3.12",
|
||
"language": "python",
|
||
"name": "python312"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|