Selenium/BeautfulSoup中用于抓取电子商务网站的下一页迭代

# Run the argument with incognito option = webdriver.ChromeOptions() option.add_argument(' — incognito') driver = webdriver.Chrome(executable_path='chromedriver', chrome_options=option) driver.get('https://www.lazada.com.my/') driver.maximize_window() # Select category item # element = driver.find_elements_by_class_name('card-categories-li-content')[0] webdriver.ActionChains(driver).move_to_element(element).click(element).perform() t = 10 try: WebDriverWait(driver,t).until(EC.visibility_of_element_located((By.ID,"a2o4k.searchlistcategory.0.i0.460b6883jV3Y0q"))) except TimeoutException: print('Page Refresh!') driver.refresh() element = driver.find_elements_by_class_name('card-categories-li-content')[0] webdriver.ActionChains(driver).move_to_element(element).click(element).perform() print('Page Load!') #Soup and select element def getData(np): soup = bs(driver.page_source, "lxml") product_containers = soup.findAll("div", class_='c2prKC') for p in product_containers: title = (p.find(class_='c16H9d').text)#title selling_price = (p.find(class_='c13VH6').text)#selling price try: original_price=(p.find("del", class_='c13VH6').text)#original price except: original_price = "-1" if p.find("i", class_='ic-dynamic-badge ic-dynamic-badge-freeShipping ic-dynamic-group-2'): freeShipping = 1 else: freeShipping = 0 try: discount = (p.find("span", class_='c1hkC1').text) except: discount ="-1" if p.find(("div", {'class':['c16H9d']})): url = "https:"+(p.find("a").get("href")) else: url = "-1" nextpage_elements = driver.find_elements_by_class_name('ant-pagination-next')[0] np=webdriver.ActionChains(driver).move_to_element(nextpage_elements).click(nextpage_elements).perform() print("- -"*30) toSave = [title,selling_price,original_price,freeShipping,discount,url] print(toSave) writerows(toSave,filename) getData(np)

1条回答

网友

1楼 · 发布于 2024-05-04 12:01:07

问题可能是驱动程序试图在元素正确加载之前单击按钮

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome(PATH, chrome_options=option)

# use this code after driver initialization
# this is make the driver wait 5 seconds for the page to load.

driver.implicitly_wait(5)

url = "https://www.lazada.com.ph/catalog/?q=phone&_keyori=ss&from=input&spm=a2o4l.home.search.go.239e359dTYxZXo"
driver.get(url)

next_page_path = "//ul[@class='ant-pagination ']//li[@class=' ant-pagination-next']"

# the following code will wait 5 seconds for
# element to become clickable
# and then try clicking the element. 

try:
    next_page = WebDriverWait(driver, 5).until(
                    EC.element_to_be_clickable((By.XPATH, next_page_path)))
    next_page.click()

except Exception as e:
    print(e)

编辑1

更改代码以使驱动程序等待元素变为可单击。您可以将此代码添加到while loop中进行多次迭代，如果找不到按钮且无法单击，则可以中断循环

编辑1

相关问题更多 >

编程相关推荐

热门问题

热门文章