从多个Excel文件中提取一张工作表，同时处理缺少工作表的文件

import pandas as pd import os path = r'C:/Users/Thiago/Desktop/Backup/Python/Files test append xlsx' files = os.listdir(path) df = pd.DataFrame() xlsx_files = [path + '\\' + f for f in files if f[-4:] == 'xlsx'] for i in xlsx_files: xlsx = pd.ExcelFile(i) for name in xlsx.sheet_names: data = pd.read_excel(i, header = 1, sheet_name = "CSNSC 2020") data['File'] = i print(i) df = df.append(data) break df = df[['Dt. Ref.','Convênio','Tipo de Atendimento','Venc.']] df.head() df = df.dropna(subset=['Convênio']) df.head() df.to_excel(r'C:/Users/Thiago/Desktop/Backup/Python/Files test append xlsx/out.xlsx')

1条回答

网友

1楼 · 发布于 2024-09-29 21:30:22

我编写了这个简单的函数来处理excel文件，并处理缺少工作表的excel文件。您可以根据自己的用例随意调整它

需要注意的主要问题是try和except来处理错误

模块

import pandas as pd
from pathlib import Path
from xlrd import XLRDError

行动中

concat_excels(src,'Sheet2',trg)
No sheet named <'Sheet2'> in file_0.xlsx, skipping
No sheet named <'Sheet2'> in file_1.xlsx, skipping
No sheet named <'Sheet2'> in file_2.xlsx, skipping
No sheet named <'Sheet2'> in file_3.xlsx, skipping
No sheet named <'Sheet2'> in file_4.xlsx, skipping
No sheet named <'Sheet2'> in file_5.xlsx, skipping
No sheet named <'Sheet2'> in file_6.xlsx, skipping
No sheet named <'Sheet2'> in file_7.xlsx, skipping
No sheet named <'Sheet2'> in file_8.xlsx, skipping
No sheet named <'Sheet2'> in file_9.xlsx, skipping
File Saved to C:\Users\DataNovice\OneDrive\Documents\2020\python\file_io_ops\move_files_test

功能

def concat_excels(source_path, sheet_name, target_path):

    """ 
    A simple script to find excel files in a target 
    location and merge them into a single file.
    You need Python installed along with Pandas.
    pathlib is available in Python 3.4 + 
    error handling added.
    """

    # create list for excel files.
    excel_files = [file for file in Path(source_path).glob("*.xlsx")]

    # create empty list to store each individual dataframe.
    excel_dataframe = []

    # loop through our file to read each file and append it to our list.

    for file in excel_files:
        try:
            df = pd.read_excel(file, sheet_name=sheet_name)
            df.columns = df.columns.str.lower()  # lowercase all columns
            df.columns = (
                df.columns.str.strip()
            )  # remove any trailing or leading white space.
            excel_dataframe.append(df)
        except XLRDError as err:
            print(f"{err} in {file.name}, skipping")

    try:
        final_dataframe = pd.concat(excel_dataframe, axis=1)
        final_dataframe.to_excel(target_path + "\master_file.xlsx", index=False)

        print(f"File Saved to {target_path}")

    except ValueError as err_2:
        print(
            f"No Sheets Matched in any of your excel files, are you sure {sheet_name} is correct?"
        )
    return excel_dataframe

模块

行动中

功能

相关问题更多 >

编程相关推荐

热门问题

热门文章