我正在尝试对包含特定关键字的页面进行爬网,然后保存包含该关键字的页面,或者至少保存指向该页面的URL。我试过用这个代码,但没用。这有可能吗?你知道吗
from scrapy.contrib.spiders import CrawlSpider,Rule
import scrapy
import requests
from scrapy.http import Request
from scrapy import signals, Spider
from scrapy.xlib.pydispatch import dispatcher
from scrapy.selector import Selector
import scrapy
from FinalSpider.items import Page # Defined in items.py
class FinalSpider(CrawlSpider):
name = "FinalSpider"
allowed_domains = ['url']
start_urls = ['url.com/=%d' %(n)
for n in range(0, 20)]
def parse(self, response):
for link in response.xpath('//a[text()="100.00"]/@href').extract():
yield Request(url=link, callback=self.parse_link)
def parse_link(self, response):
filename = response.url.split("/")[2] + '.html'
with open(filename, 'wb') as f:
f.write(response.body)
这是我的项目.py代码:
import scrapy
class Page(scrapy.Item):
url = scrapy.Field()
目前没有回答
相关问题 更多 >
编程相关推荐