我正试图为某家公司的每个销售类别收集每种商品的所有产品信息。以下是我正在使用的URL:
https://www.lushusa.com/bath-shower/
我已经提取了每个产品可见的所有数据。我有两个问题:
以下是我到目前为止与生成的字典相关的代码:
import requests as req
from bs4 import BeautifulSoup
import pandas as pd
import json
#Need to extract:
#Product Name
#Product Category
#Seasonal/Bestseller/Cult Classic
#Vegan/Not
#Sizes/Weights
#Prices (can't get last two from dataanalytics--check dropdown instead)
def listitems_to_strings(lst):
list_text = [str(lst[i]) for i in range(0, len(lst))]
return list_text
def create_dict(scrape_list):
scrape_data = list()
for html_item in scrape_list:
soup = BeautifulSoup(html_item, "html.parser").find("div", {"class": "impressions"})
scrape_data.append(json.loads(soup["data-impressions"]))
return scrape_data
def html_to_dict(url):
raw_html = req.get(url)
html_soup = BeautifulSoup(raw_html.content, 'html.parser')
scrape = html_soup.select('.impressions')
scrape = listitems_to_strings(scrape)
return create_dict(scrape)
url = html_to_dict('https://www.lushusa.com/bath-shower/')
print(url)
结果:
[{'id': '00232', 'name': 'Ocean Salt', 'price': 21.95, 'brand': 'Lush', 'category': 'Face And Body Scrub', 'variant': '4.2 oz.', 'quantity': 1, 'list': '/face/cleansers-scrubs/ocean-salt/9999902128.html', 'dimension11': 'Cult classics badge', 'dimension12': '', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '00157', 'name': 'Sex Bomb', 'price': 7.95, 'brand': 'Lush', 'category': 'Bath Bombs', 'variant': '6.3 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/sex-bomb/9999900157.html', 'dimension11': '', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '07188', 'name': 'Scrubee', 'price': 9.25, 'brand': 'Lush', 'category': 'Body Butter', 'variant': '3.1 oz.', 'quantity': 1, 'list': '/shower/body-butters-conditioners/scrubee/9999907188.html', 'dimension11': 'Cult classics badge', 'dimension12': 'Naked,Melts', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '02223', 'name': 'I Want A Hippopotamus For Christmas', 'price': 5.95, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '3.1 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/i-want-a-hippopotamus-for-christmas/9999902223.html', 'dimension11': 'Christmas Badge', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '02218', 'name': 'Snowman Dreaming', 'price': 5.95, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '3.1 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/snowman-dreaming/9999902218.html', 'dimension11': 'Christmas Badge', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 0, 'dimension14': 1, 'dimension15': True}, {'id': '03157', 'name': 'Twilight', 'price': 7.95, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '6.3 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/twilight/9999903157.html', 'dimension11': 'Bestseller badge', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '02217', 'name': 'Winter Garden', 'price': 5.95, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '3.1 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/winter-garden/9999902217.html', 'dimension11': 'Christmas Badge', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '09208', 'name': 'Deep Sleep', 'price': 8.95, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '7 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/deep-sleep/9999909208.html', 'dimension11': '', 'dimension12': 'Naked,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '05551', 'name': 'Cup O´ Coffee', 'price': 12.95, 'brand': 'Lush', 'category': 'Face And Body Mask', 'variant': '5.2 oz.', 'quantity': 1, 'list': '/face/masks/cup-o%C2%B4-coffee/9999905552.html', 'dimension11': '', 'dimension12': 'Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '02209', 'name': 'Bat Art', 'price': 6.45, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '3.8 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/bat-art/9999902209.html', 'dimension11': 'Halloween Badge', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 2, 'dimension14': 1, 'dimension15': True}, {'id': '00012', 'name': 'Butterball', 'price': 5.25, 'brand': 'Lush', 'category': 'Bath Bomb', 'variant': '3.1 oz.', 'quantity': 1, 'list': '/bath/bath-bombs/butterball/9999900012.html', 'dimension11': '', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '03821', 'name': 'Dream Cream', 'price': 7.95, 'brand': 'Lush', 'category': 'Body Lotion', 'variant': '1.7 oz.', 'quantity': 1, 'list': '/body/body-lotions/dream-cream/9999900031.html', 'dimension11': 'Cult classics badge', 'dimension12': 'Vegan', 'dimension13': 0, 'dimension14': 1, 'dimension15': True}, {'id': '01920', 'name': 'Sleepy', 'price': 12.95, 'brand': 'Lush', 'category': 'Bubble Bar', 'variant': '7 oz.', 'quantity': 1, 'list': '/bath/bubble-bars/sleepy/9999901920.html', 'dimension11': '', 'dimension12': 'Naked,Self Preserving,Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}, {'id': '06025', 'name': 'Angels on Bare Skin', 'price': 16.95, 'brand': 'Lush', 'category': 'Face And Body Cleanser', 'variant': '3.5 oz', 'quantity': 1, 'list': '/face/cleansers/angels-on-bare-skin/9999906953.html', 'dimension11': 'Cult classics badge', 'dimension12': 'Vegan', 'dimension13': 1, 'dimension14': 1, 'dimension15': True}]
我希望能够将额外的大小/权重附加到它们各自的列表项中
我不希望它能为我解答,但我们非常感谢朝着正确的方向努力
要一次刮取所有结果,请尝试以下操作(要刮取另一个类别,只需替换cgid):
对于带有下拉列表的数据,您应该检查类
custom-select form-control select-size selectpicker
的大小,然后检查每个标记option
,您会发现值为“price/size”。例如:请注意,在“49.95美元/21.8盎司”的值中,我删除了额外的空格和换行符,以获得干净的显示效果
相关问题 更多 >
编程相关推荐