68 lines
2.7 KiB
Python
68 lines
2.7 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
import pandas as pd
|
|
import time, random
|
|
|
|
chrome_driver_path = "D:\\lecturertask\\chromedriver.exe"
|
|
chrome_options = Options()
|
|
chrome_options.add_argument("--headful")
|
|
chrome_options.add_argument("--disable-gpu")
|
|
chrome_options.add_argument("--no-sandbox")
|
|
chrome_options.add_argument("--window-size=1920x1080")
|
|
|
|
service = Service(chrome_driver_path)
|
|
browser = webdriver.Chrome(service=service, options=chrome_options)
|
|
|
|
browser.get("https://sinta.kemdikbud.go.id/logins")
|
|
input("Login secara manual, lalu tekan ENTER untuk melanjutkan...")
|
|
|
|
browser.get("https://sinta.kemdikbud.go.id/affiliations/profile/447/?view=iprs")
|
|
time.sleep(random.uniform(2, 6))
|
|
|
|
all_iprs = []
|
|
page = 1
|
|
|
|
while True:
|
|
print(f"📄 Scraping halaman {page}...")
|
|
|
|
service_items = browser.find_elements(By.CSS_SELECTOR, "div.ar-list-item.mb-5")
|
|
if not service_items:
|
|
print("❌ Tidak ada data di halaman ini, berhenti.")
|
|
break
|
|
|
|
for item in service_items:
|
|
try:
|
|
title = item.find_element(By.CSS_SELECTOR, "div.ar-title a").text.strip()
|
|
inventor = item.find_elements(By.CSS_SELECTOR, "div.ar-meta a")[0].text.strip()
|
|
publication = item.find_elements(By.CSS_SELECTOR, "a.ar-pub")[0].text.strip()
|
|
year = item.find_elements(By.CSS_SELECTOR, "a.ar-year")[0].text.strip()
|
|
application_number = item.find_elements(By.CSS_SELECTOR, "a.ar-cited")[0].text.strip()
|
|
patent_type = item.find_elements(By.CSS_SELECTOR, "a.ar-quartile")[0].text.strip()
|
|
|
|
all_iprs.append([title, inventor, publication, year, application_number, patent_type])
|
|
print(f"✅ Data berhasil diambil: {title}")
|
|
except Exception as e:
|
|
print(f"⚠️ Error mengambil data: {e}")
|
|
|
|
try:
|
|
next_button = WebDriverWait(browser, random.uniform(3, 7)).until(
|
|
EC.presence_of_element_located((By.LINK_TEXT, str(page + 1)))
|
|
)
|
|
next_button.click()
|
|
time.sleep(random.uniform(3, 7))
|
|
page += 1
|
|
except:
|
|
print("✅ Tidak ada halaman berikutnya, selesai scraping.")
|
|
break
|
|
|
|
df = pd.DataFrame(all_iprs, columns=["Judul", "Inventor", "Publikasi", "Tahun", "Nomor Permohonan", "Jenis Paten"])
|
|
df.to_excel("data_iprs.xlsx", index=False, engine="openpyxl")
|
|
|
|
print(f"🎉 Scraping selesai! {len(all_iprs)} data disimpan di 'data_iprs.xlsx'")
|
|
|
|
browser.quit()
|