用Python在Excel中保存字典

2024-10-06 15:16:49 发布

您现在位置:Python中文网/ 问答频道 /正文

我需要你的帮助将数据保存在Excel中。我分析了一些网站,需要在Excel中输入字典

from scrapingbee import ScrapingBeeClient
import requests
from bs4 import BeautifulSoup
import pandas as pd

SCRAPINGBEE_API_KEY = "bzzzz"
endpoint = "https://app.scrapingbee.com/api/v1"
pages = [
    'https://www.businesslist.com.ng/category/restaurants/1/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/2/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/3/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/4/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/5/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/6/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/7/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/8/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/9/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/10/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/11/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/12/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/13/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/14/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/15/city:lagos'
    ]

rest = []

#GET_LINKS
for url in pages[:1]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}

    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    body = soup.find_all('h4')
    for items in body:
        item = items.find('a').get('href')
        item_link = 'https://www.businesslist.com.ng' + item
        rest.append(item_link)

#GET_REST

for url in rest[:2]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}
    info = {}
    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    restaraunt_name = soup.find('b', {'id':'company_name'}).text
    info.update({'Restaraunt':restaraunt_name})
    location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
    info.update({'Location':location})
    phone = soup.find('div', {'class':'text phone'}).text[:11]
    info.update({'Phone':phone})    
    web = soup.find('div', {'class':'text weblinks'}).text
    info.update({'web':web})
    df = pd.DataFrame(info)
    df.to_excel('./Lagos.xlsx') 

我从列表“rest”中获取要解析的链接,然后从该链接获取数据。然后我想将所有链接中的每个项目保存到字典“info”。然后将其保存到Excel文件中。但代码是将一行保存到文件中,而不是全部。我错过了一些东西


Tags: texthttpsinfocomurlcitywwwparams
2条回答

您正在使用相同的名称将df保存在循环中,它将只创建一个dict(表示excel中的每个循环值)。因此,最好在循环外创建一个空数据框,并在循环执行完成后将其保存到excel文件中

你修改过的代码如下

all_info = pd.DataFrame()

for url in rest[:2]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}
    info = {}
    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    restaraunt_name = soup.find('b', {'id':'company_name'}).text
    info.update({'Restaraunt':restaraunt_name})
    location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
    info.update({'Location':location})
    phone = soup.find('div', {'class':'text phone'}).text[:11]
    info.update({'Phone':phone})
    web = soup.find('div', {'class':'text weblinks'}).text
    info.update({'web':web})
    if len(all_info) == 0:
        all_info = pd.DataFrame(info, index=range(len(info)))
    else:
        all_info = all_info.append(pd.DataFrame(info))
all_info.to_excel('./Lagos.xlsx')

如何创建一个包含所有数据的列表,然后将其转换为数据帧,然后将其输出到Excel文件

from scrapingbee import ScrapingBeeClient
import requests
from bs4 import BeautifulSoup
import pandas as pd

SCRAPINGBEE_API_KEY = "zzzzzzzzz"
endpoint = "https://app.scrapingbee.com/api/v1"
pages = [
    'https://www.businesslist.com.ng/category/restaurants/1/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/2/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/3/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/4/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/5/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/6/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/7/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/8/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/9/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/10/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/11/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/12/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/13/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/14/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/15/city:lagos'
    ]

rest = []

#GET_LINKS
for url in pages[:1]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}

    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    body = soup.find_all('h4')
    for items in body:
        item = items.find('a').get('href')
        item_link = 'https://www.businesslist.com.ng' + item
        rest.append(item_link)

#GET_REST
data = []
for url in rest[:2]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}
    info = {}
    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    restaraunt_name = soup.find('b', {'id':'company_name'}).text
    info.update({'Restaraunt':restaraunt_name})
    location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
    info.update({'Location':location})
    phone = soup.find('div', {'class':'text phone'}).text[:11]
    info.update({'Phone':phone})    
    web = soup.find('div', {'class':'text weblinks'}).text
    info.update({'web':web})
    data.append(info)

df = pd.DataFrame(data)

df.to_excel('./Lagos.xlsx')

相关问题 更多 >