如何在scrapy的itemloader中传递列表?

2024-10-06 10:31:09 发布

您现在位置:Python中文网/ 问答频道 /正文

我很难获得图像的网址。如何在itemloader中获取image_url?如果我生成一个值为image\u url(不带ItemLoader)的字典,效果会很好。在

def parse_属性(self,response):

    l = ItemLoader(item=PropertiesItem(), response=response)
    url=response.meta.get('URL')
    l.add_value('url', response.url)
    l.add_xpath('title', '//div[@class="property- 
                 title"]/h1/text()',MapCompose(str.strip, str.title))
    l.add_xpath('offering', '//span[@class="property-badge"]/text()',MapCompose(str.strip))
    l.add_xpath('area', '//ul[@class="property-main-features"]/li[contains(text(), "Area")]/span/text()',MapCompose(str.strip))
    l.add_xpath('rooms', '//ul[@class="property-main-features"]/li[contains(text(), "Rooms")]/span/text()',MapCompose(str.strip))
    l.add_xpath('bed_room', '//ul[@class="property-main-features"]/li[contains(text(), "Bedroom")]/span/text()',MapCompose(str.strip))
    l.add_xpath('bath_room', '//ul[@class="property-main-features"]/li[contains(text(), "Bathroom")]/span/text()',MapCompose(str.strip))
    #l.add_value('description', '//div[@class="description-div"]/ul/li/text()')
    #l.add_value('description', '//div[@class="description-div"]/p/text()')
    #l.add_value('description', '//div[@class="description-div"]/descendant::*/text()')
    l.add_xpath('ground_floor', '//*/strong[contains(text(), "Ground Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('first_floor', '//*/strong[contains(text(), "1st Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('top_floor', '//*/strong[contains(text(), "Top Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('facing', '//div[@class="additional-details"]/p[contains(text(), "Facing")]/text()',MapCompose(str.strip))
    l.add_xpath('location', '//div[@class="additional-details"]/p[contains(text(), "Location")]/text()',MapCompose(str.strip))
    l.add_xpath('building_age', '//div[@class="additional-details"]/p[contains(text(), "Building Age")]/span/text()',MapCompose(str.strip))
    l.add_xpath('parking', '//div[@class="additional-details"]/p[contains(text(), "Parking")]/text()',MapCompose(str.strip))
    l.add_xpath('floors', '//div[@class="additional-details"]/p[contains(text(), "floors")]/text()',MapCompose(str.strip))
    l.add_xpath('area1', '//div[@class="additional-details"]/p[contains(text(), "Area")]/text()',MapCompose(str.strip))
    images=response.xpath('//div[@class="col-md-12"]/descendant::img/@src').extract()
    image=("https://www.epropertynepal.com" + image for image in images)
    images=[i.split('?')[0] for i in image]
    image_url=[x for x in images if 'original' in x]
    #l.add_value('image_url', self.image_url)
    l.add_xpath('price', '//div[@class="property-pricing"]/div[1]/text()')

    return l.load_item()

Tags: textimagedivaddurlresponsepropertydescription
1条回答
网友
1楼 · 发布于 2024-10-06 10:31:09
images=[i.split('?')[0] for i in image]
image_url=[x for x in images if 'original' in x]

l.add_value('images', image_url)

这会很好的

相关问题 更多 >