Scrapy不会以我自己的模式显示每个链接的数据

#!/usr/bin/python from twisted.internet import reactor import scrapy from scrapy.crawler import CrawlerRunner from scrapy.utils.log import configure_logging from scrapy.selector import Selector from lxml import html class GivenSpider(scrapy.Spider): name = "dmoz" allowed_domains = ["dmoz.org"] start_urls = [ "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/", "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/" ] def parse(self, response): # filename = response.url.split("/")[-2] + '.html' select = Selector(response) urls=GivenSpider() u=urls.start_urls titles = select.xpath('//a[@class="listinglink"]/text()').extract() print ' [*] Start crawling at %s ' % u for title in titles: print '\t %s' % title #configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'}) runner = CrawlerRunner() d = runner.crawl(GivenSpider) d.addBoth(lambda _: reactor.stop()) reactor.run()

[*] Start crawling at ['http://www.dmoz.org/Computers/Programming/Languages/Python/Books/', 'http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/'] eff-bot's Daily Python URL O'Reilly Python Center Python Developer's Guide Social Bug [*] Start crawling at ['http://www.dmoz.org/Computers/Programming/Languages/Python/Books/', 'http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/'] Core Python Programming Data Structures and Algorithms with Object-Oriented Design Patterns in Python Dive Into Python 3 Foundations of Python Network Programming Free Python books FreeTechBooks: Python Scripting Language How to Think Like a Computer Scientist: Learning with Python An Introduction to Python Learn to Program Using Python Making Use of Python Practical Python Pro Python System Administration Programming in Python 3 (Second Edition) Python 2.1 Bible Python 3 Object Oriented Programming Python Language Reference Manual Python Programming Patterns Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython Python: Visual QuickStart Guide Sams Teach Yourself Python in 24 Hours Text Processing in Python XML Processing with Python

[*] Start crawling at http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/ eff-bot's Daily Python URL O'Reilly Python Center Python Developer's Guide Social Bug [*] Start crawling at http://www.dmoz.org/Computers/Programming/Languages/Python/Books/ Core Python Programming Data Structures and Algorithms with Object-Oriented Design Patterns in Python Dive Into Python 3 Foundations of Python Network Programming Free Python books FreeTechBooks: Python Scripting Language How to Think Like a Computer Scientist: Learning with Python An Introduction to Python Learn to Program Using Python Making Use of Python Practical Python Pro Python System Administration Programming in Python 3 (Second Edition) Python 2.1 Bible Python 3 Object Oriented Programming Python Language Reference Manual Python Programming Patterns Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython Python: Visual QuickStart Guide Sams Teach Yourself Python in 24 Hours Text Processing in Python XML Processing with Python

1条回答

网友

1楼 · 发布于 2024-05-17 02:34:45

此信息位于response：

print ' [*] Start crawling at %s ' % response.url

检查documentation中的示例。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章