其想法是从主页中删除名称和项目数,并从链接页面删除评级,但结果显示多个重复的结果
import scrapy
from ..items import ReatutorialItem
class ReaspiderSpider(scrapy.Spider):
name = 'REAspider'
page_number = 2
start_urls = ['website/page1']
def parse(self, response):
urls = response.xpath("//li/a[@class='csweblead']/@href").extract()
for url in urls:
url = response.urljoin(url)
yield scrapy.Request(url=url, callback=self.parse)
items = ReatutorialItem()
name = response.css('.BlueBold14-1::text').extract()
price = response.css('.cslistinglead::text').extract()
rating= response.css('.bodytext:nth-child(3)::text').extract()
items ['name'] = name
items ['price'] = price
items ['rating'] = rating
yield items
next_page ='website'+ str(ReaspiderSpider.page_number) +'website'
if ReaspiderSpider.page_number <= 7:
ReaspiderSpider.page_number +=1
yield response.follow (next_page, callback = self.parse)
目前没有回答
相关问题 更多 >
编程相关推荐