kedua

2025-06-03 18:17:01 +07:00 · 2025-06-03 18:17:01 +07:00 · ea2277a92b
parent 2adf68f3d2
commit ea2277a92b
4 changed files with 114 additions and 65 deletions
--- a/BE/pycache/decision_tree_model.cpython-310.pyc
+++ b/BE/pycache/decision_tree_model.cpython-310.pyc
--- a/BE/pycache/main.cpython-310.pyc
+++ b/BE/pycache/main.cpython-310.pyc
--- a/BE/decision_tree_model.py
+++ b/BE/decision_tree_model.py
@ -1,81 +1,130 @@
 import pandas as pd
-import math
+import numpy as np
 from math import log2
 import os
-# Hitung entropy dari target
+target_column = 'TARGET'
 model_cache = {}  # Menyimpan tree dan atribut input per file CSV
-def calculate_entropy(target_series):
+def calculate_entropy(subset):
-    values = target_series.value_counts(normalize=True)
+    if len(subset) == 0:
-    return -sum(p * math.log2(p) for p in values if p > 0)
+        return 0
    counts = subset[target_column].value_counts()
    probabilities = counts / len(subset)
    entropy = -sum(p * log2(p) for p in probabilities)
    return entropy
-# Cari split terbaik berdasarkan gain tertinggi dari mean/median
+def count_classes(subset):
    return subset[target_column].value_counts().to_dict()
-def find_best_split(data, target):
+def get_majority_class(counts):
-    total_data = len(data)
+    return max(counts.items(), key=lambda x: x[1])[0]
    entropy_total = calculate_entropy(data[target])
    all_columns = data.columns.drop(target)
    numeric_columns = data[all_columns].select_dtypes(include=['number']).columns
    best = None
-    for column in numeric_columns:
+def build_tree_structure(data):
-        for method in ['mean', 'median']:
+    entropy = calculate_entropy(data)
-            threshold = data[column].mean() if method == 'mean' else data[column].median()
+    counts = count_classes(data)
            gt_split = data[data[column] > threshold]
            le_split = data[data[column] <= threshold]
-            if len(gt_split) == 0 or len(le_split) == 0:
+    if entropy == 0 or len(data) < 2:
-                continue
+        return {
            "type": "leaf",
            "prediction": get_majority_class(counts),
            "class_counts": counts
        }
-            entropy_gt = calculate_entropy(gt_split[target])
+    gains = {}
-            entropy_le = calculate_entropy(le_split[target])
+    thresholds = {}
-            gain = entropy_total - (len(gt_split) / total_data) * entropy_gt - (len(le_split) / total_data) * entropy_le
+    for column in data.columns:
        if column == target_column or not np.issubdtype(data[column].dtype, np.number):
            continue
        threshold = data[column].mean()
        le_subset = data[data[column] <= threshold]
        gt_subset = data[data[column] > threshold]
-            if best is None or gain > best['gain']:
+        le_entropy = calculate_entropy(le_subset)
-                best = {
+        gt_entropy = calculate_entropy(gt_subset)
                    'column': column,
                    'method': method,
                    'threshold': threshold,
                    'gain': gain,
                    'gt_split': gt_split,
                    'le_split': le_split
                }
    return best
-# Bangun decision tree secara rekursif
+        le_weight = len(le_subset) / len(data)
        gt_weight = len(gt_subset) / len(data)
-def build_tree(data, target, depth=0, max_depth=12):
+        weighted_entropy = le_weight * le_entropy + gt_weight * gt_entropy
-    counts = data[target].value_counts().to_dict()
+        gain = entropy - weighted_entropy
    samples = len(data)
    is_pure_leaf = len(counts) == 1
    is_single_sample = samples == 1
-    if samples == 0 or is_pure_leaf or is_single_sample or depth >= max_depth:
+        gains[column] = gain
-        if samples == 0:
+        thresholds[column] = threshold
            return {'type': 'leaf', 'class': 'Unknown'}
        chosen = max(counts, key=counts.get)
        return {'type': 'leaf', 'class': chosen}
-    split = find_best_split(data, target)
+    if not gains:
-    if split is None or split['gain'] <= 0:
+        return {
-        chosen = max(counts, key=counts.get)
+            "type": "leaf",
-        return {'type': 'leaf', 'class': chosen}
+            "prediction": get_majority_class(counts),
            "class_counts": counts
        }
    best_attribute = max(gains, key=gains.get)
    threshold = thresholds[best_attribute]
    le_data = data[data[best_attribute] <= threshold]
    gt_data = data[data[best_attribute] > threshold]
    return {
-        'type': 'node',
+        "type": "node",
-        'column': split['column'],
+        "attribute": best_attribute,
-        'method': split['method'],
+        "threshold": threshold,
-        'threshold': split['threshold'],
+        "class_counts": counts,
-        'left': build_tree(split['le_split'], target, depth + 1, max_depth),
+        "left": build_tree_structure(le_data),
-        'right': build_tree(split['gt_split'], target, depth + 1, max_depth)
+        "right": build_tree_structure(gt_data)
    }
-# Prediksi menggunakan pohon keputusan
+def predict(tree, input_data):
-
+    if tree["type"] == "leaf":
-def predict_tree(tree, input_data):
+        return tree["prediction"]
-    if tree['type'] == 'leaf':
+    attr = tree["attribute"]
-        return tree['class']
+    threshold = tree["threshold"]
-
+    if input_data[attr] <= threshold:
-    value = input_data[tree['column']]
+        return predict(tree["left"], input_data)
    if value <= tree['threshold']:
        return predict_tree(tree['left'], input_data)
    else:
-        return predict_tree(tree['right'], input_data)
+        return predict(tree["right"], input_data)
 def tree_input_attributes(tree, attributes=None):
    if attributes is None:
        attributes = set()
    if tree["type"] == "node":
        attributes.add(tree["attribute"])
        tree_input_attributes(tree["left"], attributes)
        tree_input_attributes(tree["right"], attributes)
    return sorted(attributes)
 def load_model_from_csv(csv_filename):
    if csv_filename in model_cache:
        return model_cache[csv_filename]
    full_path = os.path.join("data", csv_filename)
    if not os.path.exists(full_path):
        raise FileNotFoundError(f"CSV file '{csv_filename}' tidak ditemukan.")
    df = pd.read_csv(full_path)
    tree = build_tree_structure(df)
    input_fields = tree_input_attributes(tree)
    model_cache[csv_filename] = {
        "tree": tree,
        "input_fields": input_fields
    }
    return model_cache[csv_filename]
 def get_required_fields(csv_filename):
    model = load_model_from_csv(csv_filename)
    return model["input_fields"]
 def predict_from_input_dict(csv_filename, input_dict):
    model = load_model_from_csv(csv_filename)
    return predict(model["tree"], input_dict)
 def get_available_csv_files():
    return [f for f in os.listdir("data") if f.endswith(".csv")]
 def get_feature_columns_from_csv(csv_filename):
    full_path = os.path.join("data", csv_filename)
    df = pd.read_csv(full_path)
    return [
        col for col in df.columns
        if col != target_column and np.issubdtype(df[col].dtype, np.number)
    ]
--- a/frontendd/resources/views/layouts/home.blade.php
+++ b/frontendd/resources/views/layouts/home.blade.php
@ -24,8 +24,8 @@
            <div class="sidebar-wrapper active">
                <div class="sidebar-header">
                    <div class="d-flex justify-content-between">
-                        <div class="logo flex justify-center items-center"">
+                        <div class="d-flex justify-content-center w-100">
-                            <img src="{{ asset('assets/images/logo/logo_aplikasi.png') }}" alt="Logo" style="width: 100px; height: auto;" />
+                            <img src="{{ asset('assets/images/logo/logo_aplikasi.png') }}" alt="Logo" style="width: 125px; height: auto; " />
                        </div>
                        <div class="toggler">