用Python在Excel中保存字典

from scrapingbee import ScrapingBeeClient import requests from bs4 import BeautifulSoup import pandas as pd SCRAPINGBEE_API_KEY = "bzzzz" endpoint = "https://app.scrapingbee.com/api/v1" pages = [ 'https://www.businesslist.com.ng/category/restaurants/1/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/2/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/3/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/4/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/5/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/6/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/7/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/8/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/9/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/10/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/11/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/12/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/13/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/14/city:lagos', 'https://www.businesslist.com.ng/category/restaurants/15/city:lagos' ] rest = [] #GET_LINKS for url in pages[:1]: params = { 'api_key': SCRAPINGBEE_API_KEY, 'url': url} response = requests.get(endpoint, params=params) soup = BeautifulSoup(response.content, 'html.parser') body = soup.find_all('h4') for items in body: item = items.find('a').get('href') item_link = 'https://www.businesslist.com.ng' + item rest.append(item_link) #GET_REST for url in rest[:2]: params = { 'api_key': SCRAPINGBEE_API_KEY, 'url': url} info = {} response = requests.get(endpoint, params=params) soup = BeautifulSoup(response.content, 'html.parser') restaraunt_name = soup.find('b', {'id':'company_name'}).text info.update({'Restaraunt':restaraunt_name}) location = soup.find('div', {'class':'text location'}).text.split('View Map')[0] info.update({'Location':location}) phone = soup.find('div', {'class':'text phone'}).text[:11] info.update({'Phone':phone}) web = soup.find('div', {'class':'text weblinks'}).text info.update({'web':web}) df = pd.DataFrame(info) df.to_excel('./Lagos.xlsx')

2条回答

网友

1楼 · 编辑于 2024-10-06 15:16:49

您正在使用相同的名称将df保存在循环中，它将只创建一个dict（表示excel中的每个循环值）。因此，最好在循环外创建一个空数据框，并在循环执行完成后将其保存到excel文件中

你修改过的代码如下

all_info = pd.DataFrame()

for url in rest[:2]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}
    info = {}
    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    restaraunt_name = soup.find('b', {'id':'company_name'}).text
    info.update({'Restaraunt':restaraunt_name})
    location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
    info.update({'Location':location})
    phone = soup.find('div', {'class':'text phone'}).text[:11]
    info.update({'Phone':phone})
    web = soup.find('div', {'class':'text weblinks'}).text
    info.update({'web':web})
    if len(all_info) == 0:
        all_info = pd.DataFrame(info, index=range(len(info)))
    else:
        all_info = all_info.append(pd.DataFrame(info))
all_info.to_excel('./Lagos.xlsx')

网友

2楼 · 编辑于 2024-10-06 15:16:49

如何创建一个包含所有数据的列表，然后将其转换为数据帧，然后将其输出到Excel文件

from scrapingbee import ScrapingBeeClient
import requests
from bs4 import BeautifulSoup
import pandas as pd

SCRAPINGBEE_API_KEY = "zzzzzzzzz"
endpoint = "https://app.scrapingbee.com/api/v1"
pages = [
    'https://www.businesslist.com.ng/category/restaurants/1/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/2/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/3/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/4/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/5/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/6/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/7/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/8/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/9/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/10/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/11/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/12/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/13/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/14/city:lagos',
    'https://www.businesslist.com.ng/category/restaurants/15/city:lagos'
    ]

rest = []

#GET_LINKS
for url in pages[:1]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}

    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    body = soup.find_all('h4')
    for items in body:
        item = items.find('a').get('href')
        item_link = 'https://www.businesslist.com.ng' + item
        rest.append(item_link)

#GET_REST
data = []
for url in rest[:2]:
    params = {
    'api_key': SCRAPINGBEE_API_KEY,
    'url': url}
    info = {}
    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    restaraunt_name = soup.find('b', {'id':'company_name'}).text
    info.update({'Restaraunt':restaraunt_name})
    location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
    info.update({'Location':location})
    phone = soup.find('div', {'class':'text phone'}).text[:11]
    info.update({'Phone':phone})    
    web = soup.find('div', {'class':'text weblinks'}).text
    info.update({'web':web})
    data.append(info)

df = pd.DataFrame(data)

df.to_excel('./Lagos.xlsx')

相关问题更多 >

编程相关推荐

热门问题

热门文章