import os import time import sys import pandas as pd from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC # --- KONFIGURASI BROWSER --- chrome_driver_path = "D:\\lecturertask\\chromedriver.exe" chrome_options = Options() user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, seperti Gecko) Chrome/120.0.0.0 Safari/537.36" chrome_options.add_argument(f"user-agent={user_agent}") chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1920x1080") service = Service(chrome_driver_path) browser = webdriver.Chrome(service=service, options=chrome_options) # --- BACA AFFILIATION DARI ARGUMEN --- if len(sys.argv) < 2: raise ValueError("Affiliation ID harus disediakan sebagai argumen.") affiliation_id = sys.argv[1] print(f"[INFO] Menggunakan Affiliation ID: {affiliation_id}") # --- LOGIN --- browser.get("https://sinta.kemdikbud.go.id/logins") browser.implicitly_wait(10) login_wait_timeout = 50 elapsed = 0 while elapsed < login_wait_timeout: if "profile" in browser.current_url: print("\n[SUCCESS] Login sukses, lanjut scraping Pengabdian...") break else: print(f"[WAIT] Menunggu login... ({login_wait_timeout - elapsed} detik tersisa)", end='\r') time.sleep(2) elapsed += 2 else: print("\n[WARNING] Belum login setelah 3 menit, lanjut scraping saja...") # --- SCRAPING PENGABDIAN --- browser.get(f"https://sinta.kemdikbud.go.id/affiliations/profile/{affiliation_id}/?view=services") time.sleep(3) all_services = [] page = 1 while True: print(f"\nScraping halaman {page}...") try: service_items = WebDriverWait(browser, 10).until( EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.ar-list-item.mb-5")) ) except: print("Tidak ada data di halaman ini, scraping dihentikan.") break for item in service_items: try: title = item.find_element(By.CSS_SELECTOR, "div.ar-title").text.strip() leader_element = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a") leader = leader_element[0].text.strip() if leader_element else "Tidak Ada Leader" personil_elements = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a[href*='authors/profile']") personil_list = "; ".join([p.text.strip() for p in personil_elements]) if personil_elements else "Tidak Ada Personil" service_type = item.find_element(By.CSS_SELECTOR, "a.ar-pub").text.strip() year = item.find_element(By.CSS_SELECTOR, "a.ar-year").text.strip().replace("📅 ", "") fund = item.find_element(By.CSS_SELECTOR, "a.ar-quartile").text.strip().replace("📊 ", "") all_services.append([title, leader, personil_list, service_type, year, fund]) print(f"{title}") except Exception as e: print(f"Gagal parsing item: {e}") # Coba klik tombol halaman berikutnya try: next_button = browser.find_element(By.LINK_TEXT, str(page + 1)) next_button.click() WebDriverWait(browser, 10).until( EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.ar-list-item.mb-5")) ) page += 1 time.sleep(2) except: print("Tidak ada halaman berikutnya, scraping selesai.") break # --- SIMPAN KE EXCEL --- folder_path = "D:\\lecturertask" base_filename = "data_pengabdian" file_ext = ".xlsx" existing_files = [f for f in os.listdir(folder_path) if f.startswith(base_filename) and f.endswith(file_ext)] max_num = 0 for filename in existing_files: try: num = int(filename.replace(base_filename, "").replace(file_ext, "")) if num > max_num: max_num = num except: pass next_num = max_num + 1 new_filename = f"{base_filename}{next_num}{file_ext}" save_path = os.path.join(folder_path, new_filename) df = pd.DataFrame(all_services, columns=["Judul", "Leader", "Personil", "Tipe Pengabdian", "Tahun", "Dana"]) df.to_excel(save_path, index=False, engine="openpyxl") print(f"\nScraping selesai! {len(all_services)} data disimpan di '{save_path}'") browser.quit()