设置中的scrapy错误，不允许我使用scrapy执行任何操作

(scrape_virtual_workspace) C:\Users\Sophocles PC>cd projects (scrape_virtual_workspace) C:\Users\Sophocles PC\projects>scrapy Traceback (most recent call last): File "C:\Anaconda\envs\scrape_virtual_workspace\Scripts\scrapy-script.py", line 10, in <module> sys.exit(execute()) File "C:\Anaconda\envs\scrape_virtual_workspace\lib\site-packages\scrapy\cmdline.py", line 117, in execute check_deprecated_settings(settings) File "C:\Anaconda\envs\scrape_virtual_workspace\lib\site-packages\scrapy\settings\deprecated.py", line 22, in check_deprecated_settings deprecated = [x for x in DEPRECATED_SETTINGS if settings.get(x[0], None) is not None] File "C:\Anaconda\envs\scrape_virtual_workspace\lib\site-packages\scrapy\settings\deprecated.py", line 22, in <listcomp> deprecated = [x for x in DEPRECATED_SETTINGS if settings.get(x[0], None) is not None] AttributeError: 'NoneType' object has no attribute 'get' (scrape_virtual_workspace) C:\Users\Sophocles PC\projects>

import warnings from scrapy.exceptions import ScrapyDeprecationWarning DEPRECATED_SETTINGS = [ ('TRACK_REFS', 'no longer needed (trackref is always enabled)'), ('RESPONSE_CLASSES', 'no longer supported'), ('DEFAULT_RESPONSE_ENCODING', 'no longer supported'), ('BOT_VERSION', 'no longer used (user agent defaults to Scrapy now)'), ('ENCODING_ALIASES', 'no longer needed (encoding discovery uses w3lib now)'), ('STATS_ENABLED', 'no longer supported (change STATS_CLASS instead)'), ('SQLITE_DB', 'no longer supported'), ('SELECTORS_BACKEND', 'use SCRAPY_SELECTORS_BACKEND environment variable instead'), ('AUTOTHROTTLE_MIN_DOWNLOAD_DELAY', 'use DOWNLOAD_DELAY instead'), ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'), ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'), ('REDIRECT_MAX_METAREFRESH_DELAY', 'use METAREFRESH_MAXDELAY instead'), ('LOG_UNSERIALIZABLE_REQUESTS', 'use SCHEDULER_DEBUG instead'), ] def check_deprecated_settings(settings): deprecated = [x for x in DEPRECATED_SETTINGS if settings[x[0]] is not None] if deprecated: msg = "You are using the following settings which are deprecated or obsolete" msg += " (ask scrapy-users@googlegroups.com for alternatives):" msg = msg + "\n " + "\n ".join("%s: %s" % x for x in deprecated) warnings.warn(msg, ScrapyDeprecationWarning)

# -*- coding: utf-8 -*- # Scrapy settings for zalando project # # For simplicity, this file contains only settings considered important or # commonly used. You can find more settings consulting the documentation: # # https://doc.scrapy.org/en/latest/topics/settings.html # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html # https://doc.scrapy.org/en/latest/topics/spider-middleware.html BOT_NAME = 'zalando' SPIDER_MODULES = ['zalando.spiders'] NEWSPIDER_MODULE = 'zalando.spiders' # Crawl responsibly by identifying yourself (and your website) on the user-agent USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36' # Obey robots.txt rules ROBOTSTXT_OBEY = False # Configure maximum concurrent requests performed by Scrapy (default: 16) #CONCURRENT_REQUESTS = 32 # Configure a delay for requests for the same website (default: 0) # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay # See also autothrottle settings and docs #DOWNLOAD_DELAY = 3 # The download delay setting will honor only one of: #CONCURRENT_REQUESTS_PER_DOMAIN = 16 #CONCURRENT_REQUESTS_PER_IP = 16 # Disable cookies (enabled by default) #COOKIES_ENABLED = False # Disable Telnet Console (enabled by default) #TELNETCONSOLE_ENABLED = False # Override the default request headers: #DEFAULT_REQUEST_HEADERS = { # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # 'Accept-Language': 'en', #} # Enable or disable spider middlewares # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html #SPIDER_MIDDLEWARES = { # 'zalando.middlewares.ZalandoSpiderMiddleware': 543, #} # Enable or disable downloader middlewares # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html #DOWNLOADER_MIDDLEWARES = { # 'zalando.middlewares.NetAPorterMaleDownloaderMiddleware': 543, #} # Enable or disable downloader middlewares # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html # Enable or disable extensions # See https://doc.scrapy.org/en/latest/topics/extensions.html #EXTENSIONS = { # 'scrapy.extensions.telnet.TelnetConsole': None, #} # Configure item pipelines # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html #ITEM_PIPELINES = { # 'za # Enable and configure the AutoThrottle extension (disabled by default) # See https://doc.scrapy.org/en/latest/topics/autothrottle.html #AUTOTHROTTLE_ENABLED = True # The initial download delay #AUTOTHROTTLE_START_DELAY = 5 # The maximum download delay to be set in case of high latencies #AUTOTHROTTLE_MAX_DELAY = 60 # The average number of requests Scrapy should be sending in parallel to # each remote server #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 # Enable showing throttling stats for every response received: #AUTOTHROTTLE_DEBUG = False # Enable and configure HTTP caching (disabled by default)lando.pipelines.ZalandoPipeline': 300, #} # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings #HTTPCACHE_ENABLED = True #HTTPCACHE_EXPIRATION_SECS = 0 #HTTPCACHE_DIR = 'httpcache' #HTTPCACHE_IGNORE_HTTP_CODES = [] #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# -*- coding: utf-8 -*- import re import sys from scrapy.cmdline import execute if __name__ == '__main__': sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) sys.exit(execute())

from __future__ import print_function import sys, os import optparse import cProfile import inspect import pkg_resources import scrapy from scrapy.crawler import CrawlerProcess from scrapy.commands import ScrapyCommand from scrapy.exceptions import UsageError from scrapy.utils.misc import walk_modules from scrapy.utils.project import inside_project, get_project_settings from scrapy.utils.python import garbage_collect from scrapy.settings.deprecated import check_deprecated_settings def _iter_command_classes(module_name): # TODO: add `name` attribute to commands and and merge this function with # scrapy.utils.spider.iter_spider_classes for module in walk_modules(module_name): for obj in vars(module).values(): if inspect.isclass(obj) and \ issubclass(obj, ScrapyCommand) and \ obj.__module__ == module.__name__ and \ not obj == ScrapyCommand: yield obj def _get_commands_from_module(module, inproject): d = {} for cmd in _iter_command_classes(module): if inproject or not cmd.requires_project: cmdname = cmd.__module__.split('.')[-1] d[cmdname] = cmd() return d def _get_commands_from_entry_points(inproject, group='scrapy.commands'): cmds = {} for entry_point in pkg_resources.iter_entry_points(group): obj = entry_point.load() if inspect.isclass(obj): cmds[entry_point.name] = obj() else: raise Exception("Invalid entry point %s" % entry_point.name) return cmds def _get_commands_dict(settings, inproject): cmds = _get_commands_from_module('scrapy.commands', inproject) cmds.update(_get_commands_from_entry_points(inproject)) cmds_module = settings['COMMANDS_MODULE'] if cmds_module: cmds.update(_get_commands_from_module(cmds_module, inproject)) return cmds def _pop_command_name(argv): i = 0 for arg in argv[1:]: if not arg.startswith('-'): del argv[i] return arg i += 1 def _print_header(settings, inproject): if inproject: print("Scrapy %s - project: %s\n" % (scrapy.__version__, \ settings['BOT_NAME'])) else: print("Scrapy %s - no active project\n" % scrapy.__version__) def _print_commands(settings, inproject): _print_header(settings, inproject) print("Usage:") print(" scrapy <command> [options] [args]\n") print("Available commands:") cmds = _get_commands_dict(settings, inproject) for cmdname, cmdclass in sorted(cmds.items()): print(" %-13s %s" % (cmdname, cmdclass.short_desc())) if not inproject: print() print(" [ more ] More commands available when run from project directory") print() print('Use "scrapy <command> -h" to see more info about a command') def _print_unknown_command(settings, cmdname, inproject): _print_header(settings, inproject) print("Unknown command: %s\n" % cmdname) print('Use "scrapy" to see available commands') def _run_print_help(parser, func, *a, **kw): try: func(*a, **kw) except UsageError as e: if str(e): parser.error(str(e)) if e.print_help: parser.print_help() sys.exit(2) def execute(argv=None, settings=None): if argv is None: argv = sys.argv # --- backwards compatibility for scrapy.conf.settings singleton --- if settings is None and 'scrapy.conf' in sys.modules: from scrapy import conf if hasattr(conf, 'settings'): settings = conf.settings # ------------------------------------------------------------------ if settings is None: settings = get_project_settings() # set EDITOR from environment if available try: editor = os.environ['EDITOR'] except KeyError: pass else: settings['EDITOR'] = editor check_deprecated_settings(settings) # --- backwards compatibility for scrapy.conf.settings singleton --- import warnings from scrapy.exceptions import ScrapyDeprecationWarning with warnings.catch_warnings(): warnings.simplefilter("ignore", ScrapyDeprecationWarning) from scrapy import conf conf.settings = settings # ------------------------------------------------------------------ inproject = inside_project() cmds = _get_commands_dict(settings, inproject) cmdname = _pop_command_name(argv) parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \ conflict_handler='resolve') if not cmdname: _print_commands(settings, inproject) sys.exit(0) elif cmdname not in cmds: _print_unknown_command(settings, cmdname, inproject) sys.exit(2) cmd = cmds[cmdname] parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax()) parser.description = cmd.long_desc() settings.setdict(cmd.default_settings, priority='command') cmd.settings = settings cmd.add_options(parser) opts, args = parser.parse_args(args=argv[1:]) _run_print_help(parser, cmd.process_options, args, opts) cmd.crawler_process = CrawlerProcess(settings) _run_print_help(parser, _run_command, cmd, args, opts) sys.exit(cmd.exitcode) def _run_command(cmd, args, opts): if opts.profile: _run_command_profiled(cmd, args, opts) else: cmd.run(args, opts) def _run_command_profiled(cmd, args, opts): if opts.profile: sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile) loc = locals() p = cProfile.Profile() p.runctx('cmd.run(args, opts)', globals(), loc) if opts.profile: p.dump_stats(opts.profile) if __name__ == '__main__': try: execute() finally: # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() # on exit: http://doc.pypy.org/en/latest/cpython_differences.html?highlight=gc.collect#differences-related-to-garbage-collection-strategies garbage_collect() `` [1]: https://i.stack.imgur.com/ELQdK.png

1条回答

网友

1楼 · 发布于 2024-09-26 22:42:36

我把我的scrapy版本降到了1.5.2，不知怎的它成功了。这很奇怪，在我看来，新版本删除了几个文件（如deprecated.py）。这很烦人

相关问题更多 >

编程相关推荐

热门问题

热门文章