212 lines
7.0 KiB
Plaintext
212 lines
7.0 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Best Parameters: {'svc__C': 100, 'svc__gamma': 0.01}\n",
|
|
"Best Score: 93.94%\n",
|
|
"Test Accuracy: 93.33%\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['svmreat_rbf.pkl']"
|
|
]
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"ename": "",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
|
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
|
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
|
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import joblib\n",
|
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"\n",
|
|
"df = pd.read_csv(\"train.csv\")\n",
|
|
"\n",
|
|
"df[\"PM10_PM25_ratio\"] = df[\"pm10\"] / (df[\"pm25\"] + 1)\n",
|
|
"df[\"PM10_SO2_ratio\"] = df[\"pm10\"] / (df[\"so2\"] + 1)\n",
|
|
"df[\"PM2.5_SO2_ratio\"] = df[\"pm25\"] / (df[\"so2\"] + 1)\n",
|
|
"df[\"CO_NO2_ratio\"] = df[\"co\"] / (df[\"no2\"] + 1)\n",
|
|
"df[\"CO_SO2_ratio\"] = df[\"co\"] / (df[\"so2\"] + 1)\n",
|
|
"df[\"CO_O3_ratio\"] = df[\"co\"] / (df[\"o3\"] + 1)\n",
|
|
"df[\"SO2_NO2_ratio\"] = df[\"so2\"] / (df[\"no2\"] + 1)\n",
|
|
"df[\"SO2_O3_ratio\"] = df[\"so2\"] / (df[\"o3\"] + 1)\n",
|
|
"df[\"NO2_O3_ratio\"] = df[\"no2\"] / (df[\"o3\"] + 1)\n",
|
|
"df[\"HC_CO_ratio\"] = df[\"hc\"] / (df[\"co\"] + 1)\n",
|
|
"df[\"HC_NO2_ratio\"] = df[\"hc\"] / (df[\"no2\"] + 1)\n",
|
|
"df[\"HC_SO2_ratio\"] = df[\"hc\"] / (df[\"so2\"] + 1)\n",
|
|
"df[\"HC_O3_ratio\"] = df[\"hc\"] / (df[\"o3\"] + 1)\n",
|
|
"df[\"total_pollution\"] = df[[\"pm10\", \"pm25\", \"co\", \"no2\", \"so2\", \"o3\", \"hc\"]].sum(axis=1)\n",
|
|
"\n",
|
|
"X = df[[\"pm10\", \"pm25\", \"co\", \"no2\", \"so2\", \"o3\", \"hc\",\n",
|
|
" \"PM10_PM25_ratio\", \"PM10_SO2_ratio\", \"PM2.5_SO2_ratio\",\n",
|
|
" \"CO_NO2_ratio\", \"CO_SO2_ratio\", \"CO_O3_ratio\",\n",
|
|
" \"SO2_NO2_ratio\", \"SO2_O3_ratio\", \"NO2_O3_ratio\",\n",
|
|
" \"HC_CO_ratio\", \"HC_NO2_ratio\", \"HC_SO2_ratio\", \"HC_O3_ratio\",\n",
|
|
" \"total_pollution\"]]\n",
|
|
"y = df[\"kategori\"]\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"pipeline = Pipeline([\n",
|
|
" ('scaler', StandardScaler()),\n",
|
|
" ('svc', SVC(kernel='rbf'))\n",
|
|
"])\n",
|
|
"\n",
|
|
"param_grid = {\n",
|
|
" 'svc__C': [0.1, 1, 10, 100],\n",
|
|
" 'svc__gamma': [1, 0.1, 0.01, 0.001]\n",
|
|
"}\n",
|
|
"\n",
|
|
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='accuracy')\n",
|
|
"grid_search.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"best_model = grid_search.best_estimator_\n",
|
|
"\n",
|
|
"print(\"Best Parameters:\", grid_search.best_params_)\n",
|
|
"print(f\"Best Score: {round(grid_search.best_score_ * 100, 2)}%\")\n",
|
|
"print(f\"Test Accuracy: {round(best_model.score(X_test, y_test) * 100, 2)}%\")\n",
|
|
"\n",
|
|
"joblib.dump(best_model, \"svmreat_rbf.pkl\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Accuracy: 98.67%\n",
|
|
"✅ Model saved as svm_multi_param.pkl\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"import joblib\n",
|
|
"\n",
|
|
"df = pd.read_csv(\"train_svm.csv\")\n",
|
|
"\n",
|
|
"kategori_cols = ['pm25_kat', 'pm10_kat', 'co_kat', 'hc_kat', 'o3_kat', 'no2_kat', 'so2_kat']\n",
|
|
"\n",
|
|
"label_encoder = LabelEncoder()\n",
|
|
"\n",
|
|
"for col in kategori_cols:\n",
|
|
" df[col] = label_encoder.fit_transform(df[col])\n",
|
|
"\n",
|
|
"df['kategori'] = label_encoder.fit_transform(df['kategori'])\n",
|
|
"\n",
|
|
"X = df[kategori_cols]\n",
|
|
"y = df['kategori']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"pipeline = Pipeline([\n",
|
|
" ('scaler', StandardScaler()),\n",
|
|
" ('svc', SVC(kernel='rbf', C=10, gamma=0.1))\n",
|
|
"])\n",
|
|
"\n",
|
|
"pipeline.fit(X_train, y_train)\n",
|
|
"accuracy = pipeline.score(X_test, y_test)\n",
|
|
"print(f\"Accuracy: {round(accuracy * 100, 2)}%\")\n",
|
|
"joblib.dump(pipeline, 'svm_multi_param.pkl')\n",
|
|
"print(\"✅ Model saved as svm_multi_param.pkl\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"✅ Prediksi Kategori Kualitas Udara: SANGAT TIDAK SEHAT\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import joblib\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"\n",
|
|
"# Load model\n",
|
|
"model = joblib.load('svm_multi_param.pkl')\n",
|
|
"\n",
|
|
"# Encoder untuk menyamakan mapping label (HARUS SAMA DENGAN SAAT TRAINING)\n",
|
|
"label_encoder = LabelEncoder()\n",
|
|
"label_encoder.fit(['BAIK', 'SEDANG', 'TIDAK SEHAT', 'SANGAT TIDAK SEHAT', 'BERBAHAYA'])\n",
|
|
"\n",
|
|
"# Data input baru (harus disusun urut sama seperti training)\n",
|
|
"input_kategori = ['SEDANG', 'SEDANG', 'BAIK', 'BAIK', 'BAIK', 'BAIK', 'BAIK']\n",
|
|
"\n",
|
|
"# Encode kategori ke angka\n",
|
|
"encoded_input = [label_encoder.transform([kat])[0] for kat in input_kategori]\n",
|
|
"\n",
|
|
"# Ubah ke DataFrame\n",
|
|
"input_df = pd.DataFrame([encoded_input], columns=['pm25_kat', 'pm10_kat', 'co_kat', 'hc_kat', 'o3_kat', 'no2_kat', 'so2_kat'])\n",
|
|
"\n",
|
|
"# Prediksi\n",
|
|
"predicted_label_encoded = model.predict(input_df)[0]\n",
|
|
"predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]\n",
|
|
"\n",
|
|
"print(\"✅ Prediksi Kategori Kualitas Udara:\", predicted_label)\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|