E41222753_NinikYuniarsih_Ju.../models/data_processor.py

104 lines
4.4 KiB
Python

import pandas as pd
import json
class DataProcessor:
def __init__(self):
self.paket_info = {
1: {
'nama': 'Paket Informatika-Fisika-Kimia-Biologi',
'mapel': ['Informatika', 'Fisika', 'Kimia', 'Biologi'],
'kelas_tersedia': 2,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
},
2: {
'nama': 'Paket Informatika-BIG Lanjut-Kimia-Biologi',
'mapel': ['Informatika', 'BIG Lanjut', 'Kimia', 'Biologi'],
'kelas_tersedia': 1,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
},
3: {
'nama': 'Paket Informatika-Ekonomi-BIG Lanjut-MAT Lanjut',
'mapel': ['Informatika', 'Ekonomi', 'BIG Lanjut', 'MAT Lanjut'],
'kelas_tersedia': 1,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
},
4: {
'nama': 'Paket Informatika-Ekonomi-MAT Lanjut-SEJ Lanjut',
'mapel': ['Informatika', 'Ekonomi', 'MAT Lanjut', 'SEJ Lanjut'],
'kelas_tersedia': 3,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
},
5: {
'nama': 'Paket Informatika-Ekonomi-Sosiologi-SEJ Lanjut',
'mapel': ['Informatika', 'Ekonomi', 'Sosiologi', 'SEJ Lanjut'],
'kelas_tersedia': 1,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
},
6: {
'nama': 'Paket Informatika-Ekonomi-Sosiologi-Geografi',
'mapel': ['Informatika', 'Ekonomi', 'Sosiologi', 'Geografi'],
'kelas_tersedia': 1,
'kapasitas_per_kelas': 36,
'jumlah_siswa': 50
}
}
def create_sample_dataset(self, output_path, num_samples=500):
"""Membuat dataset contoh untuk training"""
import random
data = []
for i in range(num_samples):
# Generate random grades (60-100)
nilai = {
'nama_siswa': f'Siswa_{i+1}',
'nilai_informatika': random.randint(60, 100),
'nilai_fisika': random.randint(60, 100),
'nilai_kimia': random.randint(60, 100),
'nilai_biologi': random.randint(60, 100),
'nilai_big_lanjut': random.randint(60, 100),
'nilai_ekonomi': random.randint(60, 100),
'nilai_mat_lanjut': random.randint(60, 100),
'nilai_sej_lanjut': random.randint(60, 100),
'nilai_sosiologi': random.randint(60, 100),
'nilai_geografi': random.randint(60, 100)
}
# Logic untuk menentukan paket berdasarkan nilai tertinggi
paket = self._determine_best_paket(nilai)
nilai['paket_jurusan'] = paket
data.append(nilai)
df = pd.DataFrame(data)
df.to_csv(output_path, index=False)
return df
def _determine_best_paket(self, nilai):
"""Logika sederhana untuk menentukan paket terbaik"""
scores = {}
# Hitung skor untuk setiap paket berdasarkan mata pelajaran yang relevan
scores[1] = (nilai['nilai_informatika'] + nilai['nilai_fisika'] + nilai['nilai_kimia'] + nilai['nilai_biologi']) / 4
scores[2] = (nilai['nilai_informatika'] + nilai['nilai_big_lanjut'] + nilai['nilai_kimia'] + nilai['nilai_biologi']) / 4
scores[3] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_big_lanjut'] + nilai['nilai_mat_lanjut']) / 4
scores[4] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_mat_lanjut'] + nilai['nilai_sej_lanjut']) / 4
scores[5] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_sosiologi'] + nilai['nilai_sej_lanjut']) / 4
scores[6] = (nilai['nilai_informatika'] + nilai['nilai_ekonomi'] + nilai['nilai_sosiologi'] + nilai['nilai_geografi']) / 4
return max(scores, key=scores.get)
def get_paket_info(self, paket_num):
"""Mendapatkan informasi paket"""
return self.paket_info.get(paket_num, {})
def save_paket_info(self, path):
"""Simpan informasi paket ke file JSON"""
with open(path, 'w') as f:
json.dump(self.paket_info, f, indent=2)