NIM_E31221299/app/Console/Commands/ScrapTweets.php

149 lines
5.4 KiB
PHP

<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Str;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
use League\Csv\Reader;
use Illuminate\Support\Facades\File;
class ScrapTweets extends Command
{
protected $signature = 'scrap:tweets
{--keyword= : Kata kunci pencarian tweet (tanpa lang:id)}
{--limit=100 : Jumlah maksimal tweet}
{--output= : Nama file output (opsional, tanpa path)}
{--token= : Bearer token Twitter (jika kosong, ambil dari config)}';
protected $description = 'Scrape tweets via tweet-harvest dan simpan ke public/tweets-data';
public function handle()
{
set_time_limit(0);
// 1. Ambil & validasi input
$keyword = trim($this->option('keyword'));
$limit = (int) $this->option('limit');
$token = $this->option('token') ?: config('services.twitter.token');
if (! $keyword) {
$this->error('❌ Keyword wajib diisi. Gunakan opsi --keyword=');
return 1;
}
// 2. Siapkan query & nama file
$cleanKeyword = preg_replace('/\s*lang:id\s*/i', '', $keyword);
$searchQuery = trim($cleanKeyword) . ' lang:id';
$slugName = Str::slug($cleanKeyword);
$outputFilename = $this->option('output') ?: "{$slugName}.csv";
// 3. Direktori final: public/tweets-data
$outputDir = public_path('tweets-data');
if (! is_dir($outputDir)) {
mkdir($outputDir, 0755, true);
}
$outputPath = $outputDir . DIRECTORY_SEPARATOR . $outputFilename;
$this->info("🔍 Mulai scraping: \"{$searchQuery}\" dengan limit {$limit}");
$this->info("💾 Output diharapkan: public/tweets-data/{$outputFilename}");
// 4. Path ke binary tweet-harvest
$isWin = strncasecmp(PHP_OS, 'WIN', 3) === 0;
$binName = $isWin ? 'tweet-harvest.cmd' : 'tweet-harvest';
$binary = base_path('node_modules')
. DIRECTORY_SEPARATOR . '.bin'
. DIRECTORY_SEPARATOR . $binName;
if (! file_exists($binary)) {
$this->error("❌ Binary tweet-harvest tidak ditemukan: {$binary}");
$this->error(" Jalankan: npm install tweet-harvest di root project");
return 1;
}
// 5. Jalankan proses di workingDir = public/tweets-data sehingga output langsung di folder ini
$workingDir = $outputDir;
$outputOption = $outputFilename; // hanya nama file, tanpa path
$process = new Process([
$binary,
'-o', $outputOption,
'-s', $searchQuery,
'--tab', 'LATEST',
'-l', $limit,
'--token', $token,
], $workingDir);
// Set environment: pastikan PATH mencakup nodejs & Chromium jika perlu
$currentPath = getenv('PATH') ?: '';
if ($isWin) {
// tambahkan path NodeJS jika diperlukan
$envPath = 'C:\\Program Files\\nodejs;' . $currentPath;
} else {
$envPath = $currentPath;
}
$process->setEnv([
'PATH' => $envPath,
'TEMP' => sys_get_temp_dir(),
'TMP' => sys_get_temp_dir(),
]);
$process->setTimeout(900);
// Run dan tampilkan output untuk debugging
$this->info("➡️ Menjalankan tweet-harvest di folder: {$workingDir}");
$process->run(function ($type, $buffer) {
$this->line($buffer);
});
if (! $process->isSuccessful()) {
$this->error("❌ Proses tweet-harvest gagal.");
$this->error($process->getErrorOutput());
throw new ProcessFailedException($process);
}
// 6. Verifikasi file di lokasi utama
if (file_exists($outputPath)) {
$this->info("✅ File ditemukan di: {$outputPath}");
} else {
// Coba cari di nested path + file lama (misal .old.csv)
$nestedDir = $outputDir . DIRECTORY_SEPARATOR . 'tweets-data';
$fallbackOld = $nestedDir . DIRECTORY_SEPARATOR . pathinfo($outputFilename, PATHINFO_FILENAME) . '.old.csv';
if (file_exists($fallbackOld)) {
$this->warn("⚠️ File tidak ditemukan di lokasi utama, tapi ditemukan file lama di: {$fallbackOld}");
// Salin/move file lama ke lokasi utama
try {
File::move($fallbackOld, $outputPath);
$this->info("➡️ File .old.csv dipindahkan ke: {$outputPath}");
// Hapus folder nested jika kosong
@rmdir($nestedDir);
} catch (\Exception $e) {
$this->error("❌ Gagal memindahkan file fallback: " . $e->getMessage());
return 1;
}
} else {
$this->error("❌ File output tidak ditemukan di: {$outputPath}");
$this->error(" Juga tidak ditemukan fallback di: {$fallbackOld}");
return 1;
}
}
// 7. Hitung jumlah baris CSV (kecuali header)
try {
$csv = Reader::createFromPath($outputPath, 'r');
$csv->setHeaderOffset(0);
$count = iterator_count($csv->getRecords());
} catch (\Exception $e) {
$this->error("❌ Gagal membaca CSV: " . $e->getMessage());
return 1;
}
$this->info("✅ Scraping selesai! Total tweet: {$count}");
return 0;
}
}