刮擦爬网：ModuleNotFoundError:没有名为“<project\u name>”的模块

. ├── geckodriver.log ├── ReviewsScraper │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── items.cpython-38.pyc │ │ ├── pipelines.cpython-38.pyc │ │ └── settings.cpython-38.pyc │ ├── settings.py │ └── spiders │ ├── hotels_spider.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── hotels_spider.cpython-38.pyc │ │ ├── __init__.cpython-38.pyc │ │ └── reviews_spider.cpython-38.pyc │ └── reviews_spider.py └── scrapy.cfg

from scrapy import Spider, Request from requests import get from bs4 import BeautifulSoup from urllib.request import urljoin from ..items import Hotel from re import compile from os import system import time class HotelsSpider(Spider): name = "hotels" city = '' def __init__(self, city): self.city = city def start_requests(self): urls = [get("https://www.booking.com/searchresults.ar.html", params={'ss': self.city}).url] for url in urls: yield Request(url, self.parse) def parse(self, response): def getHotelName(hotelDiv): try: name = hotelDiv.find('h3', {'class':"sr-hotel__title"}).find('span', {'class':"sr-hotel__name"}).get_text() except AttributeError: name = '' return name def getHotelLink(hotelDiv): ptrn = compile(r'#hotelTmpl') try: link = hotelDiv.find('h3', {'class':"sr-hotel__title"}).find('a', {'class':["hotel_name_link", "url"]}).attrs['href'] link = ptrn.sub("#tab-reviews", link) except AttributeError: link = '' return link def getHotelStars(hotelDiv): try: stars = hotelDiv.find('span', {'class':"sr-hotel__title-badges"}).find('i', {'class':["bk-icon-wrapper", "bk-icon-stars", "star_track"]}).attrs['title'] except AttributeError: try: stars = len(hotelDiv.find('span', {'class':"sr-hotel__title-badges"}).find('span', {'class':["bh-quality-bars", "bh-quality-bars--medium"]}).find_all('svg', {'class':["bk-icon", "-iconset-square_rating"]})) except AttributeError: stars = '' return stars def getNbrOfReviews(hotelDiv): try: nbr = hotelDiv.find('div', {'class':"bui-review-score__content"}).find('div', {'class':"bui-review-score__text"}).get_text() except AttributeError: nbr = '' return nbr def getRating(hotelDiv): try: ratingLabel = hotelDiv.find('div', {'class':"bui-review-score__content"}).find('div', {'class':"bui-review-score__title"}).get_text() ratingScore = hotelDiv.find('div', {'class':"bui-review-score__badge"}).get_text() except AttributeError: ratingLabel = '' ratingScore = '' return (ratingLabel, ratingScore) def getHotelPages(soupObject): try: lis = soupObject.find('nav', {'class':"bui-pagination__nav"}).find('li', {'class':"bui-pagination__pages"}).find('ul', {'class':"bui-pagination__list"}).find_all('li', {'class':["bui-pagination__item", "sr_pagination_item"]}) except AttributeError: lis = [] if lis != []: links = [] for li in lis: try: links.append(urljoin("https://www.booking.com/", li.a.attrs['href'])) except AttributeError: continue return links soup = BeautifulSoup(response.body, "html.parser") hotelsList = soup.find('div', {'id':"hotellist_inner"}).find_all('div', {'class':["sr_item", "sr_item_new", "sr_item_default", "sr_property_block", "sr_flex_layout", "sr_item_no_dates"]}) for hotelDiv in hotelsList: hotel = Hotel() hotel['name'] = getHotelName(hotelDiv) hotel['stars'] = getHotelStars(hotelDiv) hotel['nbr_of_reviews'] = getNbrOfReviews(hotelDiv) hotel['rating_score'] = getRating(hotelDiv)[1] hotel['rating_label'] = getRating(hotelDiv)[0] if hotel['name'] != '': hotel['reviews_filename'] = "{}.csv".format(hotel['name']) else: t = time.time() hotel['reviews_filename'] = "empty_file_{}.csv".format(t) yield hotel if getHotelLink(hotelDiv) != '': system("scrapy crawl reviews -a hotel_link='{0}' -s filename='{1}' --loglevel=ERROR".format(urljoin("https://www.booking.com/", getHotelLink(hotelDiv)), hotel['reviews_filename'])) yield from response.follow_all(getHotelPages(soup), callback=self.parse)

Traceback (most recent call last): File "/usr/local/bin/scrapy", line 8, in <module> sys.exit(execute()) File "/usr/local/lib64/python3.8/site-packages/scrapy/cmdline.py", line 112, in execute settings = get_project_settings() File "/usr/local/lib64/python3.8/site-packages/scrapy/utils/project.py", line 69, in get_project_settings settings.setmodule(settings_module_path, priority='project') File "/usr/local/lib64/python3.8/site-packages/scrapy/settings/__init__.py", line 287, in setmodule module = import_module(module) File "/usr/lib64/python3.8/importlib/__init__.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "<frozen importlib._bootstrap>", line 1014, in _gcd_import File "<frozen importlib._bootstrap>", line 991, in _find_and_load File "<frozen importlib._bootstrap>", line 961, in _find_and_load_unlocked File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed File "<frozen importlib._bootstrap>", line 1014, in _gcd_import File "<frozen importlib._bootstrap>", line 991, in _find_and_load File "<frozen importlib._bootstrap>", line 973, in _find_and_load_unlocked ModuleNotFoundError: No module named 'ReviewsScraper'

1条回答

网友

1楼 · 发布于 2024-10-02 20:42:25

错误是由于以下行引起的：

system("scrapy crawl reviews -a hotel_link='{0}' -s filename='{1}'  loglevel=ERROR".format(urljoin("https://www.booking.com/", getHotelLink(hotelDiv)), hotel['reviews_filename']))

虽然我还不知道真正的问题是什么，但我重新格式化了代码，不再通过第一个蜘蛛调用第二个蜘蛛，现在一切都很好

无论如何，谢谢你：）

相关问题更多 >

编程相关推荐

热门问题

热门文章