如何使用“if”处理两个或更多xpath？

import scrapy import datetime class ProductsSpider(scrapy.Spider): name = 'products' allowed_domains = ['www.amazon.com.br'] start_urls = ['https://www.amazon.com.br/s?i=computers&bbn=16339926011&rh=n%3A16364756011&fs=true&qid=1615634908&ref=sr_pg_1'] def parse(self, response): for produto in response.xpath("//div[@class='a-section a-spacing-medium']"): selo = produto.xpath(".//span[@class='a-badge-text']/text()").get() link = response.urljoin(produto.xpath(".//h2/a/@href").get()) yield response.follow(url=link, callback=self.parse_details, meta={'selo' : selo}) next_page = response.urljoin(response.xpath("//li[@class='a-last']/a/@href").get()) if next_page: yield scrapy.Request(url=next_page, callback=self.parse) def parse_details(self, response): selo = response.request.meta['selo'] for produto in response.xpath("//div[@id='dp']"): vlr_atual = produto.xpath(".//span[@id='priceblock_ourprice']/text()").get() if vlr_atual is None: vlr_atual = produto.xpath(".//span[@id='priceblock_saleprice']/text()").get() yield{ 'data' : datetime.datetime.now().strftime("%Y%m%d"), 'selo': selo, 'nome': produto.xpath("normalize-space(.//span[@id='productTitle']/text())").get(), 'vlr_atual': vlr_atual, 'estoque': produto.xpath("normalize-space(.//select[@name='quantity']/option[last()]/text())").get(), 'ean': produto.xpath("normalize-space(.//table[@id='productDetails_techSpec_section_1']//tr[last()]/td/text())").get(), }

3条回答

网友

1楼 · 编辑于 2024-10-03 19:21:18

您可以使用^{} operator在两件事之间进行选择，同时对第一件事有偏好

>>> a="www.example.com"
>>> b="www.example2.com"
>>> a or b
'www.example.com'
>>> a=None
>>> a or b
'www.example2.com'
>>>

这种工作方式是，如果a的"truth"值为true，则a or b返回a，否则返回b

所以你可以

product.xpath (".//span[@id='priceblock_ourprice']/text()").get() or product.xpath (".//span[@id='priceblock_saleprice']/text()").get()

编辑

您还可以将其封装到自己的函数中，如下所示

def get_vlr_atual(product, default=None):
    lst_xpaths = [".//span[@id='priceblock_ourprice']/text()",
                 ".//span[@id='priceblock_saleprice']/text()"   
                ]
    for path in lst_paths:
        result = product.xpath(path).get()
        if result is not None:
            return result
    return default

这与以前基本相同，但可以轻松地扩展到任意多个xpath，如果所有这些都失败，只需返回一些方便的默认值

和简单的使用一样

...
{...
  'vlr_atual': get_vlr_atual(product),
 ...
 }
...

网友

2楼 · 编辑于 2024-10-03 19:21:18

我强烈建议您使用Item Loaders。您将能够在单个位置自动更新所选字段。获取第一个非空值，连接多个结果等。首先使用TakeFirst处理器在items.py中定义Product：

class ProductItem(scrapy.Item):
    
    data= scrapy.Field()
    selo = scrapy.Field()
    vlr_atual= scrapy.Field(output_processor=TakeFirst())

下一步在你的蜘蛛身上使用它：

from scrapy.loader import ItemLoader
....

for produto in response.xpath("//div[@id='dp']"):
    l = ItemLoader(item=ProductItem(), selector=produto)
    l.add_value('data', datetime.datetime.now().strftime("%Y%m%d"))
    l.add_xpath("vlr_atual", ".//span[@id='priceblock_ourprice']/text()")
    l.add_xpath("vlr_atual", ".//span[@id='priceblock_saleprice']/text()")
    ...
    l.load_item()

网友

3楼 · 编辑于 2024-10-03 19:21:18

简单一点怎么样：

    def parse_details(self, response):
        selo = response.request.meta['selo']
        for produto in response.xpath("//div[@id='dp']"):

            # determine which field is vlr_atual
            ourprice = produto.xpath(".//span[@id='priceblock_ourprice']/text()").get()
            saleprice = produto.xpath(".//span[@id='priceblock_saleprice']/text()").get()
            if ourprice is not None:
                 vlr_atual = ourprice
            else:
                 vlr_atual = saleprice 

            yield {
                'data': datetime.datetime.now().strftime("%Y%m%d"),
                'selo': selo,
                'nome': produto.xpath("normalize-space(.//span[@id='productTitle']/text())").get(),
                'vlr_atual': vlr_atual,
                'estoque': produto.xpath("normalize-space(.//select[@name='quantity']/option[last()]/text())").get(),
                'ean': produto.xpath("normalize-space(.//table[@id='productDetails_techSpec_section_1']//tr[last()]/td/text())").get(),
            }

相关问题更多 >

编程相关推荐

热门问题

热门文章