我不能爬网这个网站吗?公司名称:
我尝试了一个非常简单的代码,看看我是否可以从网站上得到一些东西,但无论我尝试什么,我都一无所获。。在
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import Selector
from scrapy.log import *
from vacatures.settings import *
from vacatures.items import *
from scrapy.http import Request
class VacaturesSpider(CrawlSpider):
name = 'vacatures_spider'
allowed_domains = ['www.itbanen.nl']
start_urls = ['http://www.itbanen.nl/vacature/zoeken/overzicht/wijzigingsdatum/query//distance/30/output/html/items_per_page/15/page/1/ignore_ids']
def parse(self, response):
self.log('Nieuwe pagina! %s' % response.url)
#hxs = HtmlXPathSelector(response)
sel = Selector(response)
# HXS to find url that goes to detail page
test = sel.xpath('//div[@id="resultlist"]/div[@class="resultlist"]/h2/text()').extract()
print test
links = sel.xpath('//div[@class="container"]/h2/text()')
print links
for link in links:
link_item = link.extract()
print link_item
#yield Request(complete_url(link_item), callback=self.parse_category)
我用了scrapy shell并尝试了一下
所以我想您的请求生成应该类似于:
^{pr2}$相关问题 更多 >
编程相关推荐