从http请求响应中提取JSON

class quoteSpider(scrapy.Spider): name = 'Practice' start_urls = ['https://scrapingclub.com/exercise/detail_header/'] def parse(self,response): yield scrapy.Request('https://scrapingclub.com/exercise/ajaxdetail_header/', callback = self.parse_detail, headers={'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'es-ES,es;q=0.9,pt;q=0.8', 'Connection': 'keep-alive', 'Cookie': '__cfduid=da54d7e9c59cf35860825eabc96d7f1c41612805624; _ga=GA1.2.1229230175.1612805628; _gid=GA1.2.205529574.1613135874', 'Host': 'scrapingclub.com', 'Referer': 'https://scrapingclub.com/exercise/detail_header/', 'sec-ch-ua': '"Chromium";v="88", "Google Chrome";v="88", ";Not A Brand";v="99"', 'sec-ch-ua-mobile': '?0', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'}) def parse_detail(self, response): product = ProductClass() data = response # im still debugging so im not putting it into an item yet # data = json.loads(response.text) # product['product_name'] = data['title'] # product['detail'] = data['description'] # product['price'] = data['price'] yield { 'value' : data }

1条回答

网友

1楼 · 发布于 2024-06-28 20:04:53

更改标题数据以获得预期的输出

class quoteSpider(scrapy.Spider):
    name = 'Practice'

    start_urls = ['https://scrapingclub.com/exercise/detail_header/']

    def parse(self,response):
        headers = {
    'authority': 'scrapingclub.com',
    'accept': '*/*',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
    'x-requested-with': 'XMLHttpRequest',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'cors',
    'sec-fetch-dest': 'empty',
    'referer': 'https://scrapingclub.com/exercise/detail_header/',
    'accept-language': 'en-US,en;q=0.9',
    'cookie': '__cfduid=d69d9664405f96c6477078a5c1fa78bb41613195439; _ga=GA1.2.523835360.1613195440; _gid=GA1.2.1763722170.1613195440',
}

        yield scrapy.Request('https://scrapingclub.com/exercise/ajaxdetail_header/',
                             callback = self.parse_detail, headers=headers)

    def parse_detail(self, response):

        product = {}

        data = response


        # im still debugging so im not putting it into an item yet

        data = json.loads(response.text)

        product['product_name'] = data['title']
        product['detail'] = data['description']
        product['price'] = data['price']
        yield product

相关问题更多 >

编程相关推荐

热门问题

热门文章