149 lines
5.4 KiB
PHP
149 lines
5.4 KiB
PHP
<?php
|
|
|
|
namespace App\Console\Commands;
|
|
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Support\Str;
|
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
|
use Symfony\Component\Process\Process;
|
|
use League\Csv\Reader;
|
|
use Illuminate\Support\Facades\File;
|
|
|
|
class ScrapTweets extends Command
|
|
{
|
|
protected $signature = 'scrap:tweets
|
|
{--keyword= : Kata kunci pencarian tweet (tanpa lang:id)}
|
|
{--limit=100 : Jumlah maksimal tweet}
|
|
{--output= : Nama file output (opsional, tanpa path)}
|
|
{--token= : Bearer token Twitter (jika kosong, ambil dari config)}';
|
|
|
|
protected $description = 'Scrape tweets via tweet-harvest dan simpan ke public/tweets-data';
|
|
|
|
public function handle()
|
|
{
|
|
set_time_limit(0);
|
|
|
|
// 1. Ambil & validasi input
|
|
$keyword = trim($this->option('keyword'));
|
|
$limit = (int) $this->option('limit');
|
|
$token = $this->option('token') ?: config('services.twitter.token');
|
|
|
|
if (! $keyword) {
|
|
$this->error('❌ Keyword wajib diisi. Gunakan opsi --keyword=');
|
|
return 1;
|
|
}
|
|
|
|
// 2. Siapkan query & nama file
|
|
$cleanKeyword = preg_replace('/\s*lang:id\s*/i', '', $keyword);
|
|
$searchQuery = trim($cleanKeyword) . ' lang:id';
|
|
$slugName = Str::slug($cleanKeyword);
|
|
$outputFilename = $this->option('output') ?: "{$slugName}.csv";
|
|
|
|
// 3. Direktori final: public/tweets-data
|
|
$outputDir = public_path('tweets-data');
|
|
if (! is_dir($outputDir)) {
|
|
mkdir($outputDir, 0755, true);
|
|
}
|
|
$outputPath = $outputDir . DIRECTORY_SEPARATOR . $outputFilename;
|
|
|
|
$this->info("🔍 Mulai scraping: \"{$searchQuery}\" dengan limit {$limit}");
|
|
$this->info("💾 Output diharapkan: public/tweets-data/{$outputFilename}");
|
|
|
|
// 4. Path ke binary tweet-harvest
|
|
$isWin = strncasecmp(PHP_OS, 'WIN', 3) === 0;
|
|
$binName = $isWin ? 'tweet-harvest.cmd' : 'tweet-harvest';
|
|
$binary = base_path('node_modules')
|
|
. DIRECTORY_SEPARATOR . '.bin'
|
|
. DIRECTORY_SEPARATOR . $binName;
|
|
|
|
if (! file_exists($binary)) {
|
|
$this->error("❌ Binary tweet-harvest tidak ditemukan: {$binary}");
|
|
$this->error(" Jalankan: npm install tweet-harvest di root project");
|
|
return 1;
|
|
}
|
|
|
|
// 5. Jalankan proses di workingDir = public/tweets-data sehingga output langsung di folder ini
|
|
$workingDir = $outputDir;
|
|
$outputOption = $outputFilename; // hanya nama file, tanpa path
|
|
|
|
$process = new Process([
|
|
$binary,
|
|
'-o', $outputOption,
|
|
'-s', $searchQuery,
|
|
'--tab', 'LATEST',
|
|
'-l', $limit,
|
|
'--token', $token,
|
|
], $workingDir);
|
|
|
|
// Set environment: pastikan PATH mencakup nodejs & Chromium jika perlu
|
|
$currentPath = getenv('PATH') ?: '';
|
|
if ($isWin) {
|
|
// tambahkan path NodeJS jika diperlukan
|
|
$envPath = 'C:\\Program Files\\nodejs;' . $currentPath;
|
|
} else {
|
|
$envPath = $currentPath;
|
|
}
|
|
$process->setEnv([
|
|
'PATH' => $envPath,
|
|
'TEMP' => sys_get_temp_dir(),
|
|
'TMP' => sys_get_temp_dir(),
|
|
]);
|
|
$process->setTimeout(900);
|
|
|
|
// Run dan tampilkan output untuk debugging
|
|
$this->info("➡️ Menjalankan tweet-harvest di folder: {$workingDir}");
|
|
$process->run(function ($type, $buffer) {
|
|
$this->line($buffer);
|
|
});
|
|
|
|
if (! $process->isSuccessful()) {
|
|
$this->error("❌ Proses tweet-harvest gagal.");
|
|
$this->error($process->getErrorOutput());
|
|
throw new ProcessFailedException($process);
|
|
}
|
|
|
|
// 6. Verifikasi file di lokasi utama
|
|
if (file_exists($outputPath)) {
|
|
$this->info("✅ File ditemukan di: {$outputPath}");
|
|
} else {
|
|
// Coba cari di nested path + file lama (misal .old.csv)
|
|
$nestedDir = $outputDir . DIRECTORY_SEPARATOR . 'tweets-data';
|
|
$fallbackOld = $nestedDir . DIRECTORY_SEPARATOR . pathinfo($outputFilename, PATHINFO_FILENAME) . '.old.csv';
|
|
|
|
if (file_exists($fallbackOld)) {
|
|
$this->warn("⚠️ File tidak ditemukan di lokasi utama, tapi ditemukan file lama di: {$fallbackOld}");
|
|
|
|
// Salin/move file lama ke lokasi utama
|
|
try {
|
|
File::move($fallbackOld, $outputPath);
|
|
$this->info("➡️ File .old.csv dipindahkan ke: {$outputPath}");
|
|
|
|
// Hapus folder nested jika kosong
|
|
@rmdir($nestedDir);
|
|
} catch (\Exception $e) {
|
|
$this->error("❌ Gagal memindahkan file fallback: " . $e->getMessage());
|
|
return 1;
|
|
}
|
|
} else {
|
|
$this->error("❌ File output tidak ditemukan di: {$outputPath}");
|
|
$this->error(" Juga tidak ditemukan fallback di: {$fallbackOld}");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
// 7. Hitung jumlah baris CSV (kecuali header)
|
|
try {
|
|
$csv = Reader::createFromPath($outputPath, 'r');
|
|
$csv->setHeaderOffset(0);
|
|
$count = iterator_count($csv->getRecords());
|
|
} catch (\Exception $e) {
|
|
$this->error("❌ Gagal membaca CSV: " . $e->getMessage());
|
|
return 1;
|
|
}
|
|
|
|
$this->info("✅ Scraping selesai! Total tweet: {$count}");
|
|
return 0;
|
|
}
|
|
}
|