用BS4//访问类抓取网站

from bs4 import BeautifulSoup import requests import csv url= 'https://www.mediamarkt.ch/fr/product/_lg-oled65gx6la-1991479.html' html_content = requests.get(url).text soup = BeautifulSoup(html_content, "lxml") price = soup.find('meta', property="product:price:amount") title = soup.find("div", {"class": "flix-model-name"}) title2 = soup.find('div', class_="flix-model-name") title3 = soup.find("div", attrs={"class": "flix-model-name"}) print(price['content']) print(title) print(title2) print(title3)

for url in urls: html_content = requests.get(url).text soup = BeautifulSoup(html_content, "lxml") row=[] try: # title = YOUR VERY WELCOMED ANSWER prices = soup.find('meta', property="product:price:amount") row = (title.text+','+prices['content']+'\n') data.append(row) except: pass file = open('database.csv','w') i = 0 while i < (len(data)): file.write(data[i]) i +=1 file.close()

1条回答

网友

1楼 · 发布于 2024-09-28 15:23:28

尝试下面使用python的方法-requests当涉及到请求时，需要简单、直接、可靠、快速和更少的代码。我在检查了谷歌chrome浏览器的网络部分后，从网站本身获取了API URL

下面的脚本到底在做什么：

首先，它将获取API URL，基于2个动态参数（产品和类别）创建URL，然后执行GET请求以获取数据

获取数据后，脚本将使用JSON.loads库解析JSON数据。最后，它将逐一迭代所有产品列表，并打印分为两类的详细信息，如品牌、名称、产品编号和单价。同样，您可以通过查看API调用来添加更多细节

import json
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

def scrap_product_details():
PRODUCT = 'MMCH1991479' #Product number
CATEGORY = '680942' #Category number
URL = 'https://www.mediamarkt.ch/rde_server/res/MMCH/recomm/product_detail/sid/WACXyEbIf3khlu6FcHlh1B1?product=' + PRODUCT + '&category=' + CATEGORY  # dynamic URL
response = requests.get(URL,verify = False) #GET request to fetch the data
result = json.loads(response.text) # Parse JSON data using json.loads
box1_ProductToProduct = result[0]['box1_ProductToProduct'] # Extracted data from API
box2_KategorieTopseller = result[1]['box2_KategorieTopseller']

for item in box1_ProductToProduct: # loop over extracted data
    print('-' * 100)
    print('Brand : ',item['brand'])
    print('Name : ',item['name'])
    print('Net Unit Price : ',item['netUnitPrice'])
    print('Product Number : ',item['product_nr'])
    print('-' * 100)

for item in box2_KategorieTopseller:  # loop over extracted data
    print('-' * 100)
    print('Brand : ',item['brand'])
    print('Name : ',item['name'])
    print('Net Unit Price : ',item['netUnitPrice'])
    print('Product Number : ',item['product_nr'])
    print('-' * 100)

scrap_product_details()

相关问题更多 >

编程相关推荐

热门问题

热门文章