Python：将从内部嵌套循环中抓取的网站数据从行转换为列

#Initialize things before loop df = pd.DataFrame() ### Time based on hour 00:00, 01:00 etc... df_time = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]) for listing in soup.find_all('tr'): listing.attrs = {} #assetTime = listing.find_all("td", {"class": "locked"}) assetCell = listing.find_all("td", {"class": "assetCell"}) assetValue = listing.find_all("td", {"class": "assetValue"}) for data in assetCell: array = [data.get_text()] df = df.append(pd.DataFrame({ 'Fridge name': array, })) for value in assetValue: asset_array = [value.get_text()] df_time = df_time.append(pd.DataFrame({ 'Temperature': asset_array })) ### End of assetValue loop ### End of assetCell loop ### Now we need to save the data to excel ### Create a Pandas Excel writer using XlsxWriter as the Engine writer = pd.ExcelWriter(filename+'.xlsx', engine='xlsxwriter') ### Convert dataframes frames = [df, df_time] result = pd.concat(frames) ### Convert the dataframe to an XlsxWriter Excel object and skip first row for custom header result.to_excel(writer, sheet_name='SheetName', startrow=1, header=True) ### Get the xlsxwritert workbook and worksheet objects workbook = writer.book worksheet = writer.sheets['SheetName'] ### Write the column headers with the defined add_format for col_num, value in enumerate(result.columns.values): worksheet.write(0, col_num +1, value) ### Close Pandas Excel writer and output the Excel file writer.save()

1条回答

网友

1楼 · 发布于 2024-10-03 15:21:46

经过多次测试，我采用了另一种方法。我没有搞乱pandas，而是使用制表来获取整个数据，然后将整个表结构导出为csv。在

from tabulate import tabulate
import csv
import datetime ### Import date function to make the files based on date
import requests
from bs4 import BeautifulSoup



 if (DAY_INTEGER <= 31) and (DAY_INTEGER > 0):

    while True:
        try:
            ### Validate the user input
            form_data = {'UserName': USERNAME, 'Password': PASSWORD}
            with requests.Session() as sesh:
                sesh.post(login_post_url, data=form_data)
                response = sesh.get(internal_url)
                html = response.text
                break
        except requests.exceptions.ConnectionError:
            print ("Whoops! This is embarrasing :( ")
            print ("Unable to connect to the address. Looks like the website is down.")

    if(sesh):

        #BeautifulSoup version
        soup = BeautifulSoup(html,'lxml')
        table = soup.find_all("table")[3] # Skip the first two tables as there isn't something useful there
        df = pd.read_html(str(table))


        df2 = (tabulate(df[0], headers='keys', tablefmt='psql', showindex=False))

        myFile = open(filename+'.csv', 'w')
        myFile.write(str(df2))

    else:
        print("Oops. Something went wrong :(")
        print("It looks like authentication failed")

相关问题更多 >

编程相关推荐

热门问题

热门文章