from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains import pandas as pd import time, random chrome_driver_path = "D:\\lecturertask\\chromedriver.exe" chrome_options = Options() user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, seperti Gecko) Chrome/120.0.0.0 Safari/537.36" chrome_options.add_argument(f"user-agent={user_agent}") chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1920x1080") service = Service(chrome_driver_path) browser = webdriver.Chrome(service=service, options=chrome_options) browser.get("https://sinta.kemdikbud.go.id/logins") input("Login secara manual, lalu tekan ENTER untuk melanjutkan...") browser.get("https://sinta.kemdikbud.go.id/affiliations/profile/447/?view=researches") time.sleep(random.uniform(2, 6)) all_research = [] page = 1 action = ActionChains(browser) while True: print(f"📄 Scraping halaman {page}...") research_items = browser.find_elements(By.CSS_SELECTOR, "div.ar-list-item.mb-5") if not research_items: print("❌ Tidak ada data di halaman ini, berhenti.") break for item in research_items: try: action.move_to_element(item).perform() time.sleep(random.uniform(0.5, 2)) title = item.find_element(By.CSS_SELECTOR, "div.ar-title").text.strip() leader_element = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a") leader = leader_element[0].text.strip() if leader_element else "Tidak Ada Leader" personil_elements = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a[href*='authors/profile']") personil_list = "; ".join([p.text.strip() for p in personil_elements]) if personil_elements else "Tidak Ada Personil" research_type_element = item.find_elements(By.CSS_SELECTOR, "a.ar-pub") research_type = research_type_element[0].text.strip() if research_type_element else "Tidak Diketahui" year_element = item.find_elements(By.CSS_SELECTOR, "a.ar-year") year = year_element[0].text.strip().replace("📅 ", "") if year_element else "Tidak Ada Tahun" fund_element = item.find_elements(By.CSS_SELECTOR, "a.ar-quartile") fund = fund_element[0].text.strip().replace("📊 ", "") if fund_element else "Tidak Ada Dana" all_research.append([title, leader, personil_list, research_type, year, fund]) except Exception as e: print(f"⚠️ Error mengambil data: {e}") try: next_button = WebDriverWait(browser, random.uniform(3, 7)).until( EC.presence_of_element_located((By.LINK_TEXT, str(page + 1))) ) next_button.click() time.sleep(random.uniform(3, 7)) page += 1 except: print("✅ Tidak ada halaman berikutnya, selesai scraping.") break df = pd.DataFrame(all_research, columns=["Judul", "Leader", "Personil", "Tipe Penelitian", "Tahun", "Dana"]) df.to_excel("data_penelitian.xlsx", index=False, engine="openpyxl") print(f"🎉 Scraping selesai! {len(all_research)} data disimpan di 'data_penelitian.xlsx'") browser.quit()