NIM_E31221299/app/Http/Controllers/SentimentController.php

876 lines
32 KiB
PHP

<?php
namespace App\Http\Controllers;
use Illuminate\Http\Request;
use Symfony\Component\Process\Process;
use Illuminate\Support\Facades\Log;
use App\Models\KpiMetric;
use App\Models\VersionKpiMetric;
use Illuminate\Support\Facades\DB;
use Carbon\Carbon;
class SentimentController extends Controller
{
/**
* Tampilkan form upload CSV untuk training.
*/
public function showUploadForm()
{
return view('sentiment.upload');
}
/**
* Handle upload CSV, buat folder run baru, panggil Python script untuk training,
* copy hasil ke public, dan simpan KPI metrics ke database.
*/
public function handleUploadAndTrain(Request $request)
{
Log::info('Request content-type: ' . $request->header('Content-Type'));
Log::info('Apakah file ada? ' . ($request->hasFile('csv_file') ? 'YA' : 'TIDAK'));
$uploaded = $request->file('csv_file');
Log::info('CSV file debug:', [
'type' => gettype($uploaded),
'class' => is_object($uploaded) ? get_class($uploaded) : null,
'valid' => is_object($uploaded) && $uploaded->isValid(),
'original_name' => is_object($uploaded) ? $uploaded->getClientOriginalName() : null,
]);
Log::info('File upload status: ' . json_encode($request->file('csv_file')));
// dd($request->all(), $request->hasFile('csv_file'), $request->file('csv_file'));
$request->validate([
'csv_file' => 'required|file|mimes:csv,txt',
]);
// Tentukan nama folder berdasarkan hari dan tanggal
$hari = now()->translatedFormat('l'); // Contoh: "Senin"
$tanggal = now()->format('Ymd'); // Contoh: "20250612"
$baseDir = storage_path("app/data_processed");
// Cari semua folder dengan prefix run_{hari}_{tanggal}_*
$existing = collect(glob($baseDir . "/run_{$hari}_{$tanggal}_*", GLOB_ONLYDIR))
->map(function ($dir) use ($hari, $tanggal) {
$basename = basename($dir);
$prefix = "run_{$hari}_{$tanggal}_";
$numStr = substr($basename, strlen($prefix));
return is_numeric($numStr) ? (int) $numStr : 0;
})
->filter(fn($v) => $v >= 0)
->sort()
->values();
$nextNumber = $existing->isEmpty() ? 1 : ($existing->last() + 1);
$runSuffix = str_pad($nextNumber, 2, '0', STR_PAD_LEFT);
$runName = "run_{$hari}_{$tanggal}_{$runSuffix}";
$runDir = storage_path("app/data_processed/{$runName}");
if (!file_exists($runDir)) {
mkdir($runDir, 0755, true);
}
// Simpan CSV ke folder run
$csvPath = $runDir . '/labeling.csv';
$request->file('csv_file')->move($runDir, 'labeling.csv');
// Hitung jumlah data dari CSV (asumsi baris pertama adalah header)
$dataSize = count(file($csvPath)) - 1;
// Perpanjang waktu eksekusi PHP
if (function_exists('set_time_limit')) {
set_time_limit(0);
}
Log::info('Cek apakah file script ada: ' . base_path("sentimen/python/sentiment_service.py"));
Log::info('Apakah file_exists? ' . (file_exists(base_path("sentimen/python/sentiment_service.py")) ? 'YA' : 'TIDAK'));
// Siapkan proses training Python
$relativeCsvPath = "data_processed/{$runName}/labeling.csv";
$outputDirPath = "data_processed/{$runName}";
$datasetPath = str_replace('/', '\\', realpath(storage_path("app/$relativeCsvPath")));
$outputDir = str_replace('/', '\\', realpath(storage_path("app/$outputDirPath")));
$scriptPath = base_path('python/sentiment_service.py');
$pythonPath = str_replace('/', '\\', base_path("venv/Scripts/python.exe"));
// e.g., base_path('scripts/sentiment.py')
Log::info("Script path: " . $scriptPath);
Log::info("Python path: " . $pythonPath);
$process = new Process([
$pythonPath,
$scriptPath,
'train',
'--data', $datasetPath,
'--output-dir', $outputDir,
]);
$process->setTimeout(null);
Log::info("Final CMD: " . $process->getCommandLine());
try {
$start = microtime(true); // Mulai timer training
$process->run();
$durationSec = microtime(true) - $start;
Log::info("STDOUT: " . $process->getOutput());
Log::info("STDERR: " . $process->getErrorOutput()); // Hitung durasi training
if (!$process->isSuccessful()) {
$err = $process->getErrorOutput();
$out = $process->getOutput();
return back()->withErrors("Training gagal!\nOutput:\n{$out}\nError:\n{$err}");
}
// Copy hasil training ke public storage
$this->copyResultsToPublic($runDir);
// Logging untuk debugging: cek folder hasil
if (!is_dir(storage_path("app/public/results/{$runName}"))) {
Log::error("Folder public/results tidak ditemukan setelah copy untuk run {$runName}");
} else {
Log::info("Folder public/results tersedia untuk run {$runName}");
}
// Simpan metrik ke database
$this->storeKpiMetrics($runDir, $runName, $dataSize, $durationSec);
return redirect()->route('sentiment.report', ['id' => $runName])
->with('message', 'Training selesai. Lihat hasil di halaman report.');
} catch (\Throwable $e) {
return back()->withErrors('Exception saat training: ' . $e->getMessage());
}
}
/**
* Copy hasil training dari $outDir ke public/storage/results/{runId}
*/
protected function copyResultsToPublic(string $outDir)
{
$runId = basename($outDir); // misal "run_Senin_20250612_01"
$publicPath = storage_path("app/public/results/{$runId}");
if (!file_exists($publicPath)) {
mkdir($publicPath, 0755, true);
}
// Wordcloud folder
$wcDir = $outDir . '/wordcloud';
$pubWcDir = $publicPath . '/wordcloud';
if (is_dir($wcDir)) {
if (!file_exists($pubWcDir)) {
mkdir($pubWcDir, 0755, true);
}
foreach (glob($wcDir . '/wordcloud_*.png') as $file) {
copy($file, $pubWcDir . '/' . basename($file));
}
}
// Files lain yang ingin disalin
$others = [
'distribution.png','distribution.csv',
'tfidf_all.csv',
'confusion_matrix.csv','evaluation_metrics.csv',
'evaluation_full.csv','top_features_per_class.csv',
'top_features.png','evaluation_full.png','ringkasan.txt',
'mnb_final_model.joblib','tfidf_vectorizer.joblib'
];
foreach ($others as $fname) {
$src = $outDir . '/' . $fname;
if (file_exists($src)) {
copy($src, $publicPath . '/' . $fname);
}
}
}
/**
* Copy hasil training dari $outDir ke public/storage/results/{runId}
*/
/**
* Parse evaluation_metrics.csv di $outDir dan simpan ke tabel kpi_metrics
*/
protected function storeKpiMetrics(string $outDir, string $runName, int $dataSize, int $trainingDuration)
{
$evalCsv = $outDir . '/evaluation_metrics.csv';
if (!file_exists($evalCsv)) {
Log::warning("KPI: file evaluation_metrics.csv tidak ditemukan untuk run {$runName}");
// Tetap simpan data_size dan training_duration minimal
KpiMetric::updateOrCreate(
['run_id' => $runName],
[
'run_timestamp' => now(),
'data_size' => $dataSize,
'training_duration' => $trainingDuration,
]
);
return;
}
$rows = array_map('str_getcsv', file($evalCsv));
if (count($rows) < 2) {
Log::warning("KPI: evaluation_metrics.csv format tidak sesuai untuk run {$runName}");
// Simpan minimal
KpiMetric::updateOrCreate(
['run_id' => $runName],
[
'run_timestamp' => now(),
'data_size' => $dataSize,
'training_duration' => $trainingDuration,
]
);
return;
}
$header = $rows[0];
Log::info("KPI CSV header for run {$runName}: " . implode(', ', $header));
$dataRows = [];
foreach (array_slice($rows, 1) as $r) {
if (count($r) === count($header)) {
$dataRows[] = array_combine($header, $r);
}
}
if (empty($dataRows)) {
Log::warning("KPI: tidak ada baris metrik valid di evaluation_metrics.csv untuk run {$runName}");
KpiMetric::updateOrCreate(
['run_id' => $runName],
[
'run_timestamp' => now(),
'data_size' => $dataSize,
'training_duration' => $trainingDuration,
]
);
return;
}
// Inisialisasi
$accuracy = null; $precision = null; $recall = null; $f1 = null;
$perClassMetrics = []; // array untuk disimpan JSON
$classDistribution = null; // akan diisi nanti
// Parsing evaluation_metrics.csv:
$lowerHdr = array_map('strtolower', $header);
// Jika format ringkasan metric,value
if (in_array('metric', $lowerHdr) && in_array('value', $lowerHdr)) {
foreach ($dataRows as $row) {
$m = strtolower($row['metric']);
$v = (float) $row['value'];
if ($m === 'accuracy') $accuracy = $v;
elseif ($m === 'precision') $precision = $v;
elseif ($m === 'recall') $recall = $v;
elseif (in_array($m, ['f1','f1-score','f1_score'])) $f1 = $v;
// Simpan per-class jika metric berformat class-specific, tapi ringkasan biasanya tidak punya
}
}
// Jika format per-class: misal header ["label","precision","recall","f1-score",...]
elseif (in_array('precision', $lowerHdr) && in_array('recall', $lowerHdr) && (in_array('f1-score', $lowerHdr) || in_array('f1_score', $lowerHdr))) {
$sumP = $sumR = $sumF1 = 0;
$count = 0;
foreach ($dataRows as $row) {
// Simpan per-class metrics: gunakan label sebagai key jika ada
$labelKey = null;
if (isset($row['label'])) {
$labelKey = $row['label'];
} elseif (isset($row['class'])) {
$labelKey = $row['class'];
}
$p = isset($row['precision']) ? (float)$row['precision'] : 0;
$r = isset($row['recall']) ? (float)$row['recall'] : 0;
if (isset($row['f1-score'])) $fv = (float)$row['f1-score'];
elseif (isset($row['f1_score'])) $fv = (float)$row['f1_score'];
else $fv = 0;
// Tambahkan ke perClassMetrics
if ($labelKey !== null) {
$perClassMetrics[$labelKey] = [
'precision' => $p,
'recall' => $r,
'f1_score' => $fv,
];
} else {
// jika tidak ada kolom label, key numeric index
$perClassMetrics[] = [
'precision' => $p,
'recall' => $r,
'f1_score' => $fv,
];
}
$sumP += $p;
$sumR += $r;
$sumF1 += $fv;
$count++;
}
// Macro average
$precision = $count ? $sumP / $count : null;
$recall = $count ? $sumR / $count : null;
$f1 = $count ? $sumF1 / $count : null;
// Accuracy: coba dari confusion_matrix.csv
// Jika accuracy belum tersedia, coba ambil dari ringkasan.txt
if ($accuracy === null) {
$summaryTxt = $outDir . '/ringkasan.txt';
if (file_exists($summaryTxt)) {
$lines = file($summaryTxt, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
foreach ($lines as $line) {
if (stripos($line, 'Akurasi akhir split:') !== false) {
if (preg_match('/Akurasi akhir split:\s*([\d,.]+)/i', $line, $matches)) {
$percentStr = str_replace(',', '.', $matches[1]); // handle koma
$accuracy = floatval($percentStr) / 100;
Log::info("KPI: Akurasi diambil dari ringkasan.txt untuk run {$runName} = {$accuracy}");
}
break;
}
}
}
}
// AUC/loss jika ada di evaluation_full.csv atau summary: butuh Python side untuk output
}
else {
Log::warning("KPI: format header evaluation_metrics.csv tidak dikenali untuk run {$runName}");
// Tetap simpan minimal data_size & training_duration
KpiMetric::updateOrCreate(
['run_id' => $runName],
[
'run_timestamp' => now(),
'data_size' => $dataSize,
'training_duration' => $trainingDuration,
]
);
return;
}
// Class distribution: baca distribution.csv jika ada
$distCsv = $outDir . '/distribution.csv';
if (file_exists($distCsv)) {
$distRows = array_map('str_getcsv', file($distCsv));
if (count($distRows) >= 2) {
$hdr = $distRows[0];
$distArr = [];
foreach (array_slice($distRows, 1) as $r) {
if (count($r) === count($hdr)) {
$comb = array_combine($hdr, $r);
// Asumsikan ada kolom 'label' dan 'count' atau 'frequency'
$labelKey = $comb['label'] ?? ($comb['class'] ?? null);
$countVal = null;
if (isset($comb['count'])) $countVal = (int)$comb['count'];
elseif (isset($comb['frequency'])) $countVal = (int)$comb['frequency'];
else {
// jika header lain, ambil kolom numeric kedua
$vals = array_values($comb);
$countVal = isset($vals[1]) ? (int)$vals[1] : null;
}
if ($labelKey !== null && $countVal !== null) {
$distArr[$labelKey] = $countVal;
}
}
}
$classDistribution = $distArr;
}
}
// Simpan ke DB
$attrs = [
'run_timestamp' => now(),
'accuracy' => $accuracy,
'precision' => $precision,
'recall' => $recall,
'f1_score' => $f1,
'data_size' => $dataSize,
'training_duration' => $trainingDuration,
'class_distribution'=> $classDistribution,
'per_class_metrics' => $perClassMetrics,
];
try {
KpiMetric::updateOrCreate(
['run_id' => $runName],
$attrs
);
Log::info("KPI: berhasil menyimpan metrik untuk run {$runName}: " .
"accuracy={$accuracy}, precision={$precision}, recall={$recall}, f1={$f1}, data_size={$dataSize}, training_duration={$trainingDuration}");
} catch (\Throwable $e) {
Log::error("KPI: gagal menyimpan ke DB untuk run {$runName}: " . $e->getMessage());
}
Log::info("KPI: memanggil storeVersionMetrics untuk run {$runName}, outDir={$outDir}");
$this->storeVersionMetrics($outDir, $runName);
}
protected function storeVersionMetrics(string $outDir, string $runName)
{
Log::info("Version KPI: isi folder {$outDir}: " . json_encode(array_diff(scandir($outDir), ['.', '..'])));
$csvPath = $outDir . '/df_full_predictions.csv';
if (!file_exists($csvPath)) {
Log::warning("Version KPI: file df_full_predictions.csv tidak ditemukan untuk run {$runName}");
return;
}
if (($handle = fopen($csvPath, 'r')) === false) {
Log::warning("Version KPI: gagal membuka df_full_predictions.csv untuk run {$runName}");
return;
}
$header = fgetcsv($handle);
if (!$header) {
Log::warning("Version KPI: header CSV kosong untuk run {$runName}");
fclose($handle);
return;
}
$lowerHeader = array_map('strtolower', $header);
$idxVersion = array_search('version', $lowerHeader);
$idxPred = array_search('predicted_label', $lowerHeader);
if ($idxVersion === false || $idxPred === false) {
Log::warning("Version KPI: kolom 'version' atau 'predicted_label' tidak ditemukan di header: " . json_encode($header));
fclose($handle);
return;
}
// === DETEKSI DINAMIS SEMUA VERSI YANG ADA ===
$metrics = [];
while (($row = fgetcsv($handle)) !== false) {
if (count($row) !== count($header)) continue;
$ver = trim($row[$idxVersion]);
$label = strtolower(trim($row[$idxPred]));
if (!isset($metrics[$ver])) {
$metrics[$ver] = ['pos' => 0, 'neg' => 0, 'neu' => 0, 'total' => 0];
}
$metrics[$ver]['total']++;
if (str_contains($label, 'pos')) {
$metrics[$ver]['pos']++;
} elseif (str_contains($label, 'neg')) {
$metrics[$ver]['neg']++;
} else {
$metrics[$ver]['neu']++;
}
}
fclose($handle);
// === SIMPAN KE DB ===
foreach ($metrics as $ver => $count) {
$total = $count['total'];
$data = [
'count_positive' => $count['pos'],
'count_negative' => $count['neg'],
'count_neutral' => $count['neu'],
'total' => $total,
'pct_positive' => $total ? round($count['pos'] * 100 / $total, 2) : null,
'pct_negative' => $total ? round($count['neg'] * 100 / $total, 2) : null,
'pct_neutral' => $total ? round($count['neu'] * 100 / $total, 2) : null,
];
try {
VersionKpiMetric::updateOrCreate(
['run_id' => $runName, 'version' => $ver],
$data
);
Log::info("Version KPI: tersimpan {$ver} run {$runName}: " . json_encode($data));
} catch (\Throwable $e) {
Log::error("Version KPI: gagal simpan {$ver} run {$runName}: " . $e->getMessage());
}
}
}
/**
* Tampilkan report untuk run tertentu.
* Jika tidak ada ID, redirect ke run terbaru.
*/
public function showReport(Request $request)
{
$runId = $request->get('id');
if (!$runId) {
$resultsDir = storage_path("app/public/results");
$folders = glob($resultsDir . '/run_*', GLOB_ONLYDIR);
if (empty($folders)) {
return back()->withErrors('Belum ada hasil training yang tersedia.');
}
usort($folders, function ($a, $b) {
return filemtime($b) <=> filemtime($a);
});
$latestFolder = basename($folders[0]);
return redirect()->route('sentiment.report', ['id' => $latestFolder]);
}
$dir = storage_path("app/public/results/{$runId}");
if (!is_dir($dir)) {
return back()->withErrors('Hasil training tidak ditemukan.');
}
$results = [];
// distribution
$distCsv = $dir . '/distribution.csv';
if (file_exists($distCsv)) {
$rows = array_map('str_getcsv', file($distCsv));
$header = array_shift($rows);
$dist = [];
foreach ($rows as $r) {
if (count($r) === count($header)) {
$dist[] = array_combine($header, $r);
}
}
$results['distribution'] = $dist;
}
$results['distribution_img'] = file_exists($dir . '/distribution.png')
? asset("storage/results/{$runId}/distribution.png")
: null;
// tfidf (tampil max 50 baris)
$tfidfCsv = $dir . '/tfidf_all.csv';
if (file_exists($tfidfCsv)) {
$rows = array_map('str_getcsv', file($tfidfCsv));
$header = array_shift($rows);
$tfidf = [];
foreach ($rows as $i => $r) {
if ($i >= 50) break;
if (count($r) === count($header)) {
$tfidf[] = array_combine($header, $r);
}
}
$results['tfidf'] = $tfidf;
}
// confusion matrix
$cmCsv = $dir . '/confusion_matrix.csv';
if (file_exists($cmCsv)) {
$rows = array_map('str_getcsv', file($cmCsv));
$header = array_shift($rows);
$cm = [];
foreach ($rows as $r) {
if (count($r) === count($header)) {
$cm[] = array_combine($header, $r);
}
}
$results['confusion'] = $cm;
}
// evaluation metrics
$evalCsv = $dir . '/evaluation_metrics.csv';
if (file_exists($evalCsv)) {
$rows = array_map('str_getcsv', file($evalCsv));
$header = array_shift($rows);
$eval = [];
foreach ($rows as $r) {
if (count($r) === count($header)) {
$eval[] = array_combine($header, $r);
}
}
$results['evaluation'] = $eval;
}
$results['evaluation_split_img'] = file_exists($dir . '/evaluation_split.png')
? asset("storage/results/{$runId}/evaluation_split.png")
: null;
// top features
$topCsv = $dir . '/top_features_per_class.csv';
if (file_exists($topCsv)) {
$rows = array_map('str_getcsv', file($topCsv));
$header = array_shift($rows);
$top = [];
foreach ($rows as $r) {
if (count($r) === count($header)) {
$top[] = array_combine($header, $r);
}
}
$results['top_features'] = $top;
}
$results['top_features_img'] = file_exists($dir . '/top_features.png')
? asset("storage/results/{$runId}/top_features.png")
: null;
// full evaluation
$evalFullCsv = $dir . '/evaluation_full.csv';
if (file_exists($evalFullCsv)) {
$rows = array_map('str_getcsv', file($evalFullCsv));
$header = array_shift($rows);
$full = [];
foreach ($rows as $r) {
if (count($r) === count($header)) {
$full[] = array_combine($header, $r);
}
}
$results['evaluation_full'] = $full;
}
$results['evaluation_full_img'] = file_exists($dir . '/evaluation_full.png')
? asset("storage/results/{$runId}/evaluation_full.png")
: null;
// wordclouds
$wcDir = $dir . '/wordcloud';
$wcUrls = [];
if (is_dir($wcDir)) {
foreach (glob($wcDir . '/wordcloud_*.png') as $file) {
$name = basename($file);
$wcUrls[] = [
'label' => pathinfo($name, PATHINFO_FILENAME),
'url' => asset("storage/results/{$runId}/wordcloud/{$name}")
];
}
}
$results['wordclouds'] = $wcUrls;
// Daftar semua run tersedia
$resultsDir = storage_path("app/public/results");
$allFolders = collect(glob($resultsDir . '/run_*', GLOB_ONLYDIR))
->sortByDesc(fn($f) => filemtime($f))
->map(fn($f) => basename($f))
->values()
->toArray();
return view('sentiment.report', [
'results' => $results,
'runId' => $runId,
'availableRuns' => $allFolders,
]);
}
/**
* Tampilkan form untuk inference.
*/
public function showInferForm()
{
// Ambil run terbaru
$latest = KpiMetric::orderByDesc('run_timestamp')->first();
$latestRunId = $latest?->run_id;
// (Optional) ambil juga semua run untuk dropdown
$allRunIds = KpiMetric::orderByDesc('run_timestamp')
->pluck('run_id')
->toArray();
return view('sentiment.infer_form', [
'latestRunId' => $latestRunId,
'allRunIds' => $allRunIds,
]);
}
/**
* Lakukan inference: panggil Python script dengan model dan vectorizer dari run tertentu.
*/
public function doInfer(Request $request)
{
$request->validate([
'text' => 'required|string',
'run_id' => 'required|string'
]);
$runId = $request->input('run_id');
$dir = storage_path("app/public/results/{$runId}");
$modelPath = $dir . '/mnb_final_model.joblib';
$vectPath = $dir . '/tfidf_vectorizer.joblib';
if (!file_exists($modelPath) || !file_exists($vectPath)) {
return back()->withErrors('Model belum tersedia. Jalankan training terlebih dahulu.');
}
$texts_json = json_encode([$request->input('text')], JSON_UNESCAPED_UNICODE);
$python = config('services.python.path');
$script = config('services.python.script');
$process = new Process([
$python,
$script,
'infer',
'--model', $modelPath,
'--vectorizer', $vectPath,
'--texts', $texts_json
]);
$process->setTimeout(60);
try {
$process->run();
if (!$process->isSuccessful()) {
return back()->withErrors('Inference gagal: ' . $process->getErrorOutput());
}
$out = json_decode($process->getOutput(), true);
if (!$out || !isset($out['status']) || $out['status'] !== 'success') {
return back()->withErrors('Response tidak valid dari Python.');
}
$result = $out['results'][0] ?? null;
return view('sentiment.infer_result', compact('result'));
} catch (\Throwable $e) {
return back()->withErrors('Error saat inference: ' . $e->getMessage());
}
}
/**
* Tampilkan Dashboard KPI: ringkasan dan tren metrik dari database.
*/
public function showDashboard(Request $request)
{
// Ambil semua KPI urut dari paling awal ke akhir
$kpis = KpiMetric::orderBy('run_timestamp')->get();
// Siapkan label timestamp dan data metrik
$labels = $kpis->map(fn($k) => $k->run_timestamp->format('Y-m-d H:i'))->toArray();
$accuracyData = $kpis->map(fn($k) => $k->accuracy ?? 0)->toArray();
$precisionData = $kpis->map(fn($k) => $k->precision ?? 0)->toArray();
$recallData = $kpis->map(fn($k) => $k->recall ?? 0)->toArray();
$f1Data = $kpis->map(fn($k) => $k->f1_score ?? 0)->toArray();
// Statistik umum
$totalRuns = $kpis->count();
$avgAccuracy = $totalRuns > 0 ? $kpis->avg('accuracy') : null;
$avgF1 = $totalRuns > 0 ? $kpis->avg('f1_score') : null;
// Ambil data terakhir
$lastRun = $kpis->last();
$lastClassDist = $lastRun?->class_distribution ?? [];
// Pastikan array biasa + ubah ke lowercase
$lastClassDist = collect($lastClassDist)
->mapWithKeys(function ($value, $key) {
return [strtolower($key) => $value];
})->toArray();
// Ambil data versi (misal per-platform iOS/Android) dari run terakhir
$lastRunId = $lastRun?->run_id;
$versionMetrics = $lastRunId
? VersionKpiMetric::where('run_id', $lastRunId)->get()
: collect();
$gapThreshold = 15;
// Ambil metrik versi
$posCount = $versionMetrics->pluck('count_positive');
$negCount = $versionMetrics->pluck('count_negative');
$neutralCount = $versionMetrics->pluck('count_neutral');
$posPct = $versionMetrics->pluck('pct_positive');
$negPct = $versionMetrics->pluck('pct_negative');
$neutralPct = $versionMetrics->pluck('pct_neutral');
$totalMentions = $versionMetrics->pluck('total');
// 1) Insight Distribusi Kelas
$classDist = $lastClassDist;
$totalCount = array_sum($lastClassDist);
if ($totalCount <= 0) {
$classMessage = "Tidak ada data distribusi kelas untuk run terakhir.";
// Untuk Blade, agar persentase tidak ditampilkan 0% misleading
$classPct = null;
} else {
$pos = $lastClassDist['positif'] ?? 0;
$neg = $lastClassDist['negatif'] ?? 0;
$neu = $lastClassDist['netral'] ?? 0;
$posPct = $pos / $totalCount * 100;
$negPct = $neg / $totalCount * 100;
$neuPct = $neu / $totalCount * 100;
$maxVal = max($posPct, $negPct, $neuPct);
$secondMax = max(array_diff([$posPct, $negPct, $neuPct], [$maxVal]));
if (($maxVal - $secondMax) > $gapThreshold) {
if ($maxVal === $posPct) {
$classMessage = "Distribusi sentimen menunjukkan dominasi sentimen positif sebesar " . round($posPct,1) . "%, yang mengindikasikan persepsi publik yang cenderung mendukung atau puas terhadap topik.";
} elseif ($maxVal === $negPct) {
$classMessage = "Sentimen negatif mendominasi dengan proporsi " . round($negPct,1) . "%, yang bisa mencerminkan kekhawatiran atau kritik publik terhadap topik.";
} else {
$classMessage = "Mayoritas sentimen tergolong netral (" . round($neuPct,1) . "%), menunjukkan bahwa publik cenderung memberikan respons yang informatif atau tidak emosional.";
}
} else {
$classMessage = "Distribusi sentimen relatif seimbang: Positif " . round($posPct,1) . "%, Negatif " . round($negPct,1) . "%, dan Netral " . round($neuPct,1) . "%. Hal ini menunjukkan keragaman opini publik terhadap topik yang dianalisis.";
}
$classPct = [
'positif' => round($posPct,1),
'negatif' => round($negPct,1),
'netral' => round($neuPct,1),
];
}
// 2) Korelasi Data Size vs Accuracy (Pearson)
$sizes = $kpis->pluck('data_size')->map(fn($v)=>(float)($v ?? 0))->toArray();
$accs = $kpis->pluck('accuracy')->map(fn($v)=>(float)($v ?? 0))->toArray();
$n = count($sizes);
$corr = 0;
if ($n > 1) {
$meanX = array_sum($sizes)/$n;
$meanY = array_sum($accs)/$n;
$cov=0; $varX=0; $varY=0;
for ($i=0;$i<$n;$i++){
$dx = $sizes[$i] - $meanX;
$dy = $accs[$i] - $meanY;
$cov += $dx * $dy;
$varX += $dx * $dx;
$varY += $dy * $dy;
}
$corr = $cov / sqrt(max($varX*$varY, 1e-9));
}
if ($corr > 0.3) {
$scatterMessage = "Terdapat korelasi positif sedang/kuat antara ukuran data dan akurasi (r=".round($corr,2)."). Menunjukkan akurasi cenderung naik jika data_size bertambah.";
} elseif ($corr < -0.3) {
$scatterMessage = "Terdapat korelasi negatif antara ukuran data dan akurasi (r=".round($corr,2)."), perlu diteliti mengapa akurasi menurun saat data_size bertambah.";
} else {
$scatterMessage = "Korelasi antara data_size dan akurasi lemah (r=".round($corr,2)."), tidak jelas pola kenaikan akurasi dengan ukuran data.";
}
// 3) Insight Tren Metrik (gunakan accuracyData)
$y = $accuracyData; // asumsikan array float
$m = count($y);
$trendMessage = "Tidak cukup data untuk analisis tren.";
if ($m > 3) {
// indeks 1..m sebagai x
$x = range(1, $m);
$meanX = array_sum($x)/$m;
$meanY = array_sum($y)/$m;
$cov=0; $varX=0;
for ($i=0;$i<$m;$i++){
$dx = $x[$i] - $meanX;
$dy = $y[$i] - $meanY;
$cov += $dx * $dy;
$varX += $dx * $dx;
}
$slope = $cov / max($varX, 1e-9);
if ($slope > 0.001) {
$trendMessage = "Tren akurasi meningkat seiring run (slope=".round($slope,4).").";
} elseif ($slope < -0.001) {
$trendMessage = "Tren akurasi menurun seiring run (slope=".round($slope,4).").";
} else {
$trendMessage = "Tren akurasi relatif datar (slope=".round($slope,4).").";
}
}
// 4) Insight Versi iOS
$versionMessages = [];
foreach ($versionMetrics as $vm) {
$vp = $vm->pct_positive ?? 0;
$vn = $vm->pct_negative ?? 0;
$vneu = $vm->pct_neutral ?? 0;
if ($vn > $vp && $vn > $vneu) {
$versionMessages[$vm->version] = "Versi {$vm->version} cenderung negatif: {$vn}% negatif dari total {$vm->total}.";
} elseif ($vp > $vn && $vp > $vneu) {
$versionMessages[$vm->version] = "Versi {$vm->version} dominan positif: {$vp}% positif.";
} else {
$versionMessages[$vm->version] = "Versi {$vm->version} mayoritas netral atau seimbang: Positif {$vp}%, Negatif {$vn}%, Netral {$vneu}%.";
}
}
return view('sentiment.dashboard', compact(
'kpis',
'labels', 'accuracyData', 'precisionData', 'recallData', 'f1Data',
'totalRuns', 'avgAccuracy', 'avgF1',
'lastRun', 'lastClassDist', 'versionMetrics',
'posCount','negCount','neutralCount',
'posPct','negPct','neutralPct','totalMentions','classMessage',
'scatterMessage',
'trendMessage',
'versionMessages'
));
}
}