在python中从类外访问“html\u soup”

2024-06-28 15:33:06 发布

您现在位置:Python中文网/ 问答频道 /正文

我对使用类这个东西很陌生

我试图调用html\u soup变量,以便在类外使用它

我将类定义为:

from bs4 import BeautifulSoup
from requests import get
from urllib.request import Request, urlopen

from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver

class DataScrape():
    def __init__(self, website):
        self.website = website
        self.driver = webdriver.Chrome(
           executable_path=r"C:/Users/littl/Downloads/"
        + "chromedriver_win32/chromedriver.exe")
        # self.options = Options()
        # self.options.add_argument("--headless")# Runs Chrome in headless mode.
        # self.options.add_argument('--no-sandbox')# # Bypass OS security model
        # self.options.add_argument('start-maximized')
        # self.options.add_argument('disable-infobars')
        # self.options.add_argument("--disable-extensions")
        # self.driver = webdriver.Chrome(chrome_options=self.options,
        # executable_path=
        # r'C:/Users/littl/Downloads/chromedriver_win32/chromedriver.exe')

    def bs4Scrape(self, urlToScrape):
        req = Request(urlToScrape, headers={'User-Agent': 'Mozilla/5.0'})
        web_byte = urlopen(req).read()
        webpage = web_byte.decode('utf-8')
        self.html_soup = BeautifulSoup(webpage, 'html5lib')

    def seleniumNavigate(self):     
        self.driver.get(self.website)     

imdbScrape = DataScrape("https://www.imdb.com/")
imdbScrape.seleniumNavigate()
wait = WebDriverWait(imdbScrape.driver, 10)
wait.until(ec.element_to_be_clickable((By.XPATH,
                            '//*[@id="suggestion-search"]'))).send_keys("the"
                            + " fifth element")

firstSelection = wait.until(ec.element_to_be_clickable((By.XPATH,
                                 '//*[@id="react-autowhatever-1--'
                                 + 'item-0"]/span')))
firstSelection.location_once_scrolled_into_view
firstSelection.click()
imdbUrlToScrape = imdbScrape.driver.current_url
imdbScrape.driver.close()
print(imdbScrape.html_soup)

我试着这样称呼它:

  imdbScrape.bs4Scrape(imdbUrlToScrape).html_soup

但在编译时,它说:

NoneType' object has no attribute 'html_soup'

如何编写代码,以便在类外访问html

我猜我必须用另一种方式来定义变量,但我不知道该怎么做

任何帮助都将不胜感激


Tags: fromimportselfaddhtmldriverseleniumwebsite