104 lines
4.4 KiB
Python
104 lines
4.4 KiB
Python
import pandas as pd
|
|
import json
|
|
|
|
class DataProcessor:
|
|
def __init__(self):
|
|
self.paket_info = {
|
|
1: {
|
|
'nama': 'Paket Informatika-Fisika-Kimia-Biologi',
|
|
'mapel': ['Informatika', 'Fisika', 'Kimia', 'Biologi'],
|
|
'kelas_tersedia': 2,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
},
|
|
2: {
|
|
'nama': 'Paket Informatika-BIG Lanjut-Kimia-Biologi',
|
|
'mapel': ['Informatika', 'BIG Lanjut', 'Kimia', 'Biologi'],
|
|
'kelas_tersedia': 1,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
},
|
|
3: {
|
|
'nama': 'Paket Informatika-Ekonomi-BIG Lanjut-MAT Lanjut',
|
|
'mapel': ['Informatika', 'Ekonomi', 'BIG Lanjut', 'MAT Lanjut'],
|
|
'kelas_tersedia': 1,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
},
|
|
4: {
|
|
'nama': 'Paket Informatika-Ekonomi-MAT Lanjut-SEJ Lanjut',
|
|
'mapel': ['Informatika', 'Ekonomi', 'MAT Lanjut', 'SEJ Lanjut'],
|
|
'kelas_tersedia': 3,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
},
|
|
5: {
|
|
'nama': 'Paket Informatika-Ekonomi-Sosiologi-SEJ Lanjut',
|
|
'mapel': ['Informatika', 'Ekonomi', 'Sosiologi', 'SEJ Lanjut'],
|
|
'kelas_tersedia': 1,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
},
|
|
6: {
|
|
'nama': 'Paket Informatika-Ekonomi-Sosiologi-Geografi',
|
|
'mapel': ['Informatika', 'Ekonomi', 'Sosiologi', 'Geografi'],
|
|
'kelas_tersedia': 1,
|
|
'kapasitas_per_kelas': 36,
|
|
'jumlah_siswa': 50
|
|
}
|
|
}
|
|
|
|
def create_sample_dataset(self, output_path, num_samples=500):
|
|
"""Membuat dataset contoh untuk training"""
|
|
import random
|
|
|
|
data = []
|
|
for i in range(num_samples):
|
|
# Generate random grades (60-100)
|
|
nilai = {
|
|
'nama_siswa': f'Siswa_{i+1}',
|
|
'nilai_informatika': random.randint(60, 100),
|
|
'nilai_fisika': random.randint(60, 100),
|
|
'nilai_kimia': random.randint(60, 100),
|
|
'nilai_biologi': random.randint(60, 100),
|
|
'nilai_big_lanjut': random.randint(60, 100),
|
|
'nilai_ekonomi': random.randint(60, 100),
|
|
'nilai_mat_lanjut': random.randint(60, 100),
|
|
'nilai_sej_lanjut': random.randint(60, 100),
|
|
'nilai_sosiologi': random.randint(60, 100),
|
|
'nilai_geografi': random.randint(60, 100)
|
|
}
|
|
|
|
# Logic untuk menentukan paket berdasarkan nilai tertinggi
|
|
paket = self._determine_best_paket(nilai)
|
|
nilai['paket_jurusan'] = paket
|
|
|
|
data.append(nilai)
|
|
|
|
df = pd.DataFrame(data)
|
|
df.to_csv(output_path, index=False)
|
|
return df
|
|
|
|
def _determine_best_paket(self, nilai):
|
|
"""Logika sederhana untuk menentukan paket terbaik"""
|
|
scores = {}
|
|
|
|
# Hitung skor untuk setiap paket berdasarkan mata pelajaran yang relevan
|
|
scores[1] = (nilai['nilai_informatika'] + nilai['nilai_fisika'] + nilai['nilai_kimia'] + nilai['nilai_biologi']) / 4
|
|
scores[2] = (nilai['nilai_informatika'] + nilai['nilai_big_lanjut'] + nilai['nilai_kimia'] + nilai['nilai_biologi']) / 4
|
|
scores[3] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_big_lanjut'] + nilai['nilai_mat_lanjut']) / 4
|
|
scores[4] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_mat_lanjut'] + nilai['nilai_sej_lanjut']) / 4
|
|
scores[5] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_sosiologi'] + nilai['nilai_sej_lanjut']) / 4
|
|
scores[6] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_sosiologi'] + nilai['nilai_geografi']) / 4
|
|
|
|
return max(scores, key=scores.get)
|
|
|
|
def get_paket_info(self, paket_num):
|
|
"""Mendapatkan informasi paket"""
|
|
return self.paket_info.get(paket_num, {})
|
|
|
|
def save_paket_info(self, path):
|
|
"""Simpan informasi paket ke file JSON"""
|
|
with open(path, 'w') as f:
|
|
json.dump(self.paket_info, f, indent=2)
|
|
|