获取一个简单python脚本的回溯错误以获取GoodReads.com URL

from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException from selenium.webdriver.firefox.options import Options from selenium.webdriver.chrome.options import Options from pyvirtualdisplay import Display from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import Select from selenium.webdriver.common import keys import csv import time import json class Book: def __init__(self, title, url): self.title = title self.url = url def __iter__(self): return iter([self.title, self.url]) url = 'https://www.goodreads.com/' def create_csv_file(): header = ['Title', 'URL'] with open('/home/iii/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'w+', encoding='utf-8') as csv_file: wr = csv.writer(csv_file, delimiter=',') wr.writerow(header) def read_from_txt_file(): lines = [line.rstrip('\n') for line in open('/home/iii/AudioBookReviews/WebScraping/BookTitles.txt', encoding='utf-8')] return lines def init_selenium(): chrome_options = Options() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') options = Options() options.add_argument('--headless') global driver driver = webdriver.Chrome("/home/iii/AudioBookReviews/WebScraping/chromedriver", chrome_options=chrome_options) driver.get(url) time.sleep(5) driver.get('https://www.goodreads.com/search?q=') def search_for_title(title): driver.get('https://www.goodreads.com/search?q=') search_field = driver.find_element_by_name('q') search_field.clear() search_field.send_keys(title) search_field.send_keys(keys.Keys.RETURN) # you missed this part url = driver.find_element_by_xpath( '/html/body/div[2]/div[3]/div[1]/div[2]/div[2]/table/tbody/tr[1]/td[2]/a') print(url.get_attribute('href')) def scrape_url(): try: url = driver.find_element_by_css_selector('a.bookTitle').get_attribute('href') except: url = "N/A" return url def write_into_csv_file(vendor): with open('/home/iii/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'a', encoding='utf-8') as csv_file: wr = csv.writer(csv_file, delimiter=',') wr.writerow(list(vendor)) create_csv_file() titles = read_from_txt_file() init_selenium() for title in titles: search_for_title(title) url = scrape_url() book = Book(title, url) write_into_csv_file(book)

1条回答

网友

1楼 · 发布于 2024-10-03 04:32:40

我认为您使用的是python 2.7版本

python 2.7中的open函数具有以下签名

open(name[, mode[, buffering]])

另一方面python 3+有以下签名

open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)

相关问题更多 >

编程相关推荐

热门问题

热门文章