刮取元素在页面源中不可见

import requests from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time url = "https://harleytherapy.com/therapists?page=1" options = webdriver.ChromeOptions() options.add_argument('headless') capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "none" driver = webdriver.Chrome(chrome_options=options, desired_capabilities=capa) driver.set_window_size(1440,900) driver.get(url) time.sleep(15) plain_text = driver.page_source soup = BeautifulSoup(plain_text, 'html') therapist_menu_id = "downshift-7-menu" print(soup.find(id=therapist_menu_id))

1条回答

网友

1楼 · 发布于 2024-09-30 06:21:10

ID为downshift-7-menu的元素只有在打开治疗师下拉菜单后才会加载，您可以通过将其滚动到视图中加载，然后单击它来完成加载。还应该考虑用明确的等待< /P >替换睡眠。

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


wait = WebDriverWait(driver, 15)

# scroll the dropdown into view to load it
side_menu = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'inner-a377b5')))
last_height = driver.execute_script("return arguments[0].scrollHeight", side_menu)
while True:
    driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", side_menu)
    new_height = driver.execute_script("return arguments[0].scrollHeight", side_menu)
    if new_height == last_height:
        break
    last_height = new_height

# open the menu
wait.until(EC.visibility_of_element_located((By.ID, 'downshift-7-input'))).click()

# wait for the option to load
therapist_menu_id = 'downshift-7-menu'
wait.until(EC.presence_of_element_located((By.ID, therapist_menu_id)))
print(soup.find(id=therapist_menu_id))

相关问题更多 >

编程相关推荐

热门问题

热门文章