TIF_E4121149_ADITIYA_GILANG/modeling.ipynb

212 lines
7.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best Parameters: {'svc__C': 100, 'svc__gamma': 0.01}\n",
"Best Score: 93.94%\n",
"Test Accuracy: 93.33%\n"
]
},
{
"data": {
"text/plain": [
"['svmreat_rbf.pkl']"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import joblib\n",
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.svm import SVC\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"df = pd.read_csv(\"train.csv\")\n",
"\n",
"df[\"PM10_PM25_ratio\"] = df[\"pm10\"] / (df[\"pm25\"] + 1)\n",
"df[\"PM10_SO2_ratio\"] = df[\"pm10\"] / (df[\"so2\"] + 1)\n",
"df[\"PM2.5_SO2_ratio\"] = df[\"pm25\"] / (df[\"so2\"] + 1)\n",
"df[\"CO_NO2_ratio\"] = df[\"co\"] / (df[\"no2\"] + 1)\n",
"df[\"CO_SO2_ratio\"] = df[\"co\"] / (df[\"so2\"] + 1)\n",
"df[\"CO_O3_ratio\"] = df[\"co\"] / (df[\"o3\"] + 1)\n",
"df[\"SO2_NO2_ratio\"] = df[\"so2\"] / (df[\"no2\"] + 1)\n",
"df[\"SO2_O3_ratio\"] = df[\"so2\"] / (df[\"o3\"] + 1)\n",
"df[\"NO2_O3_ratio\"] = df[\"no2\"] / (df[\"o3\"] + 1)\n",
"df[\"HC_CO_ratio\"] = df[\"hc\"] / (df[\"co\"] + 1)\n",
"df[\"HC_NO2_ratio\"] = df[\"hc\"] / (df[\"no2\"] + 1)\n",
"df[\"HC_SO2_ratio\"] = df[\"hc\"] / (df[\"so2\"] + 1)\n",
"df[\"HC_O3_ratio\"] = df[\"hc\"] / (df[\"o3\"] + 1)\n",
"df[\"total_pollution\"] = df[[\"pm10\", \"pm25\", \"co\", \"no2\", \"so2\", \"o3\", \"hc\"]].sum(axis=1)\n",
"\n",
"X = df[[\"pm10\", \"pm25\", \"co\", \"no2\", \"so2\", \"o3\", \"hc\",\n",
" \"PM10_PM25_ratio\", \"PM10_SO2_ratio\", \"PM2.5_SO2_ratio\",\n",
" \"CO_NO2_ratio\", \"CO_SO2_ratio\", \"CO_O3_ratio\",\n",
" \"SO2_NO2_ratio\", \"SO2_O3_ratio\", \"NO2_O3_ratio\",\n",
" \"HC_CO_ratio\", \"HC_NO2_ratio\", \"HC_SO2_ratio\", \"HC_O3_ratio\",\n",
" \"total_pollution\"]]\n",
"y = df[\"kategori\"]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"pipeline = Pipeline([\n",
" ('scaler', StandardScaler()),\n",
" ('svc', SVC(kernel='rbf'))\n",
"])\n",
"\n",
"param_grid = {\n",
" 'svc__C': [0.1, 1, 10, 100],\n",
" 'svc__gamma': [1, 0.1, 0.01, 0.001]\n",
"}\n",
"\n",
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='accuracy')\n",
"grid_search.fit(X_train, y_train)\n",
"\n",
"best_model = grid_search.best_estimator_\n",
"\n",
"print(\"Best Parameters:\", grid_search.best_params_)\n",
"print(f\"Best Score: {round(grid_search.best_score_ * 100, 2)}%\")\n",
"print(f\"Test Accuracy: {round(best_model.score(X_test, y_test) * 100, 2)}%\")\n",
"\n",
"joblib.dump(best_model, \"svmreat_rbf.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 98.67%\n",
"✅ Model saved as svm_multi_param.pkl\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.svm import SVC\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"import joblib\n",
"\n",
"df = pd.read_csv(\"train_svm.csv\")\n",
"\n",
"kategori_cols = ['pm25_kat', 'pm10_kat', 'co_kat', 'hc_kat', 'o3_kat', 'no2_kat', 'so2_kat']\n",
"\n",
"label_encoder = LabelEncoder()\n",
"\n",
"for col in kategori_cols:\n",
" df[col] = label_encoder.fit_transform(df[col])\n",
"\n",
"df['kategori'] = label_encoder.fit_transform(df['kategori'])\n",
"\n",
"X = df[kategori_cols]\n",
"y = df['kategori']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"pipeline = Pipeline([\n",
" ('scaler', StandardScaler()),\n",
" ('svc', SVC(kernel='rbf', C=10, gamma=0.1))\n",
"])\n",
"\n",
"pipeline.fit(X_train, y_train)\n",
"accuracy = pipeline.score(X_test, y_test)\n",
"print(f\"Accuracy: {round(accuracy * 100, 2)}%\")\n",
"joblib.dump(pipeline, 'svm_multi_param.pkl')\n",
"print(\"✅ Model saved as svm_multi_param.pkl\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ Prediksi Kategori Kualitas Udara: SANGAT TIDAK SEHAT\n"
]
}
],
"source": [
"import joblib\n",
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"# Load model\n",
"model = joblib.load('svm_multi_param.pkl')\n",
"\n",
"# Encoder untuk menyamakan mapping label (HARUS SAMA DENGAN SAAT TRAINING)\n",
"label_encoder = LabelEncoder()\n",
"label_encoder.fit(['BAIK', 'SEDANG', 'TIDAK SEHAT', 'SANGAT TIDAK SEHAT', 'BERBAHAYA'])\n",
"\n",
"# Data input baru (harus disusun urut sama seperti training)\n",
"input_kategori = ['SEDANG', 'SEDANG', 'BAIK', 'BAIK', 'BAIK', 'BAIK', 'BAIK']\n",
"\n",
"# Encode kategori ke angka\n",
"encoded_input = [label_encoder.transform([kat])[0] for kat in input_kategori]\n",
"\n",
"# Ubah ke DataFrame\n",
"input_df = pd.DataFrame([encoded_input], columns=['pm25_kat', 'pm10_kat', 'co_kat', 'hc_kat', 'o3_kat', 'no2_kat', 'so2_kat'])\n",
"\n",
"# Prediksi\n",
"predicted_label_encoded = model.predict(input_df)[0]\n",
"predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]\n",
"\n",
"print(\"✅ Prediksi Kategori Kualitas Udara:\", predicted_label)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}