缺少价格的Web浏览表单Web浏览器

from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import requests from bs4 import BeautifulSoup import time driver = webdriver.Chrome(executable_path = "D:\chromedriver\chromedriver.exe") driver.maximize_window() wait = WebDriverWait(driver, 30) driver.get("https://www.russellhendrix.com/category/185/cooking-equipment?pagesize=600") wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.entity-product-price-wrap.grid-item-price-wrap"))).click() wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#gv-postcalcode"))).send_keys("B3K 1X2") wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal"))).click() wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal"))) time.sleep(60) # delays start of scrape for 60 secords for page to load headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' } url = 'https://www.russellhendrix.com/category/185/cooking-equipment' r = requests.get(url, headers) soup = BeautifulSoup(r.content, 'lxml') baseurl = 'https://www.russellhendrix.com' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' } url = 'https://www.russellhendrix.com/category/185/cooking-equipment' r = requests.get(url) soup = BeautifulSoup(r.content, 'lxml') productlist = soup.find_all('div', class_='entity-product-image-wrap') productlinks = [] for item in productlist: for link in item.find_all('a', href=True): productlinks.append(baseurl + link['href']) for link in productlinks: r = requests.get(link, headers=headers) soup = BeautifulSoup(r.content, 'lxml') skunumber = soup.find('table', class_='product-details-table').text pricing = soup.find('div', class_='regPriceValue') print(skunumber, pricing)

1条回答

网友
1楼 · 发布于 2024-10-04 01:36:35

正如我在几个产品链接上看到的，手动打开链接的方式与您使用的方式类似
for link in productlinks:
有几种产品根本没有价格。
没有与('div', class_='gv-price')定位器匹配的元素。
因此soup.find('div', class_='gv-price')将返回None，因为找不到元素。
UPD
这是插入“B3K 1X2”邮政编码以获取产品价格的硒代码：
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC driver = webdriver.Chrome(executable_path = driver_path) driver.maximize_window() wait = WebDriverWait(driver, 30) driver.get(url) wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.entity-product-price-wrap.grid-item-price-wrap"))).click() wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#gv-postcalcode"))).send_keys("B3K 1X2") wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal"))).click() wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal")))
从这里你可以继续你的刮

相关问题更多 >

编程相关推荐

热门问题

热门文章