碎片蜘蛛Xpath图像U

# -*- coding: utf-8 -*- from __future__ import unicode_literals import scrapy from scrapy.shell import inspect_response from scrapy.utils.response import open_in_browser keyword = raw_input('Keyword: ') url = 'http://www.lkqpickyourpart.com/DesktopModules/pyp_vehicleInventory/getVehicleInventory.aspx?store=224&page=0&filter=%s&sp=&cl=&carbuyYardCode=1224&pageSize=1000&language=en-US' % (keyword,) class Cars(scrapy.Item): Make = scrapy.Field() Model = scrapy.Field() Year = scrapy.Field() Entered_Yard = scrapy.Field() Section = scrapy.Field() Color = scrapy.Field() Picture = scrapy.Field() class LkqSpider(scrapy.Spider): name = "lkq" allowed_domains = ["lkqpickyourpart.com"] start_urls = ( url, ) def parse(self, response): picture = response.xpath( '//href=/text()').extract() section_color = response.xpath( '//div[@class="pypvi_notes"]/p/text()').extract() info = response.xpath('//td["pypvi_make"]/text()').extract() for element in range(0, len(info), 4): item = Cars() item["Make"] = info[element] item["Model"] = info[element + 1] item["Year"] = info[element + 2] item["Entered_Yard"] = info[element + 3] item["Section"] = section_color.pop( 0).replace("Section:", "").strip() item["Color"] = section_color.pop(0).replace("Color:", "").strip() item["Picture"] = picture.pop(0).strip() yield item

1条回答

网友

1楼 · 发布于 2024-06-26 01:33:46

我真的不明白您为什么要使用类似于'//href=/text()'的xpath，我建议您先阅读一些xpath教程，here是一个非常好的教程。在

如果你想得到所有的图片网址，我想这是你想要的

pictures = response.xpath('//img/@src').extract()

现在，picture.pop(0).strip()只会得到最后一个url，strip它，记住，.extract()返回一个列表，所以{}现在包含了所有的图像链接，只需在那里选择您需要的链接。在

相关问题更多 >

编程相关推荐

热门问题

热门文章