from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pandas as pd import time, random chrome_driver_path = "D:\\lecturertask\\chromedriver.exe" chrome_options = Options() chrome_options.add_argument("--headful") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1920x1080") service = Service(chrome_driver_path) browser = webdriver.Chrome(service=service, options=chrome_options) browser.get("https://sinta.kemdikbud.go.id/logins") input("Login secara manual, lalu tekan ENTER untuk melanjutkan...") browser.get("https://sinta.kemdikbud.go.id/affiliations/profile/447/?view=iprs") time.sleep(random.uniform(2, 6)) all_iprs = [] page = 1 while True: print(f"📄 Scraping halaman {page}...") service_items = browser.find_elements(By.CSS_SELECTOR, "div.ar-list-item.mb-5") if not service_items: print("❌ Tidak ada data di halaman ini, berhenti.") break for item in service_items: try: title = item.find_element(By.CSS_SELECTOR, "div.ar-title a").text.strip() inventor = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a")[0].text.strip() publication = item.find_elements(By.CSS_SELECTOR, "a.ar-pub")[0].text.strip() year = item.find_elements(By.CSS_SELECTOR, "a.ar-year")[0].text.strip() application_number = item.find_elements(By.CSS_SELECTOR, "a.ar-cited")[0].text.strip() patent_type = item.find_elements(By.CSS_SELECTOR, "a.ar-quartile")[0].text.strip() all_iprs.append([title, inventor, publication, year, application_number, patent_type]) print(f"✅ Data berhasil diambil: {title}") except Exception as e: print(f"⚠️ Error mengambil data: {e}") try: next_button = WebDriverWait(browser, random.uniform(3, 7)).until( EC.presence_of_element_located((By.LINK_TEXT, str(page + 1))) ) next_button.click() time.sleep(random.uniform(3, 7)) page += 1 except: print("✅ Tidak ada halaman berikutnya, selesai scraping.") break df = pd.DataFrame(all_iprs, columns=["Judul", "Inventor", "Publikasi", "Tahun", "Nomor Permohonan", "Jenis Paten"]) df.to_excel("data_iprs.xlsx", index=False, engine="openpyxl") print(f"🎉 Scraping selesai! {len(all_iprs)} data disimpan di 'data_iprs.xlsx'") browser.quit()