使用xlrd将xlsx文件导入Python

2024-09-22 16:26:27 发布

您现在位置:Python中文网/ 问答频道 /正文

正在尝试将.xlsx文件导入到Python中。我有谷歌和谷歌。我已经为.csv编写了这段代码并在工作,但它必须用于.xlsx文件。所以我基本上拼凑起来做了一个冰雹玛丽,希望它能起作用。非常感谢您的帮助!

**添加了我的全部代码和一段数据文件。我需要它来导入.xlsx并执行数据。

import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')

worksheet = workbook.get_sheet(0, update_pos=True)

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0
oe_intr_req_count = 0
part_img_req_count = 0
upc_req_count = 0
unspsc_req_count = 0
msds_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0
missing_mpcc_count = 0
missing_attr_values_count = 0
missing_part_img_count = 0
missing_upc_count = 0
missing_warr_text_count = 0
missing_warr_pdf_count = 0
missing_unspsc_count = 0
missing_msds_count = 0


for row_num, row in enumerate(worksheet):
    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])
    else:
        hq_line, part_no, part_class, appl_req, appl_count, intr_req, 
        intr_count, oe_intr_req, has_oe_intr, has_attr_editor, 
        has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, 
        has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, 
        has_unspsc, attr_count, attrval_count, valid_part = row

    if valid_part == 'YES':
        valid_count += 1

        # Required Parts Count
        if appl_req == 'YES':
            app_req_count += 1
        if intr_req == 'YES':
            intr_req_count += 1
        if oe_intr_req == 'YES':
            oe_intr_req_count += 1
        if part_img_req == 'YES':
            part_img_req_count += 1
        if upc_req == 'YES':
            upc_req_count += 1
        if msds_req == 'YES':
            msds_req_count += 1

        # Missing Data Counts
        if appl_req == 'YES' and appl_count == '0':
            missing_app_count += 1
        if intr_req == 'YES' and intr_count == '0':
            missing_intr_count += 1
        if oe_intr_req == 'YES' and has_oe_intr == '0':
             missing_oe_intr_count += 1
        if has_mpcc == 'NO':
             missing_mpcc_count += 1
        if has_attr_values == 'NO':
            missing_attr_values_count += 1
        if has_part_img == 'NO':
            missing_part_img_count += 1
        if upc_req == 'YES' and has_upc == '0':
            missing_upc_count += 1
        if warr_req == 'YES' and has_warr_txt == 'NO':
            missing_warr_text_count += 1
        if warr_req == 'YES' and has_warr_pdf == 'NO':
            missing_warr_pdf_count += 1
        if has_unspsc == 'NO':
            missing_unspsc_count += 1
        if msds_req == 'YES' and has_msds == 'NO':
            missing_msds_count += 1

# Statements for Required Counts
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count))
application_required = ('Application Records Required: ', 
'{:,}'.format(app_req_count))
interchange_required = ('Interchange Records Required: ', 
'{:,}'.format(intr_req_count))
oe_interchange_required = ('OE Interchange Records Required: ', 
'{:,}'.format(oe_intr_req_count))
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count))  # Every valid part 
requires a MPCC
attributes = ('Attributes Required: ', '{:,}'.format(valid_count))  # Every 
valid part requires attributes
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count))
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count))
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count))  # 
Require warranty text/pdf on all parts
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count))  # Require 
UNSPSC Codes for all parts
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count))

# Statements for Missing Counts
missing_applications = ('Missing Applications: ', 
'{:,}'.format(missing_app_count))
missing_interchange = ('Missing Interchange: ', 
'{:,}'.format(missing_intr_count))
missing_oe_interchange = ('Missing OE Interchange: ', 
'{:,}'.format(missing_oe_intr_count))
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count))
missing_attributes = ('Missing Attributes: ', 
'{:,}'.format(missing_attr_values_count))
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count))
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count))
missing_warranty_text = ('Missing Warranty Text: ', 
'{:,}'.format(missing_warr_text_count))
missing_warranty_pdf = ('Missing Warranty PDF: ', 
'{:,}'.format(missing_warr_pdf_count))
missing_unspsc = ('Missing UNSPSC Code: ', 
'{:,}'.format(missing_unspsc_count))
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count))

# CSV Output
report_card.append(valid_parts)
report_card.append(application_required)
report_card.append(interchange_required)
report_card.append(oe_interchange_required)
report_card.append(mpcc)
report_card.append(attributes)
report_card.append(image_required)
report_card.append(upc)
report_card.append(warranties)
report_card.append(unspsc)
report_card.append(msds)
report_card.append(missing_applications)
report_card.append(missing_interchange)
report_card.append(missing_oe_interchange)
report_card.append(missing_mpcc)
report_card.append(missing_attributes)
report_card.append(missing_image)
report_card.append(missing_UPC)
report_card.append(missing_warranty_text)
report_card.append(missing_warranty_pdf)
report_card.append(missing_unspsc)
report_card.append(missing_msds)

for row in header:
    print(row)

for x in report_card:
    print(x)



with open('Report_Card.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(header)
    writer.writerows(report_card)

CSV文件:

Supplier Line:,,Gates Rubber - Denver (GAT),,,,,,,,,,,,,,,,,,,,,,
Summary:,,Parts HQ Abbr,,,,,,,,,,,,,,,,,,,,,,
ACCT No:,,40013586,,,,,,,,,,,,,,,,,,,,,,
RecCount:,,10221,,,,,,,,,,,,,,,,,,,,,,
Applicable Date:,,"June 14, 2017 (Wednesday)",,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,
HQ Line,Part No,Part Class,Appl Req,Appl Count ,Intr Req,Intr Count ,OE Intr Req,Has OE Intr,Has Attr Editor, Has Attr Values,Part IMG Req,Has Part IMG,Has MPCC,Warr Req,Has Warr TXT,Has Warr PDF,MSDS Req,Has MSDS,UPC Req,Has UPC,Has UNSPSC,Attr Count ,AttrVal Count ,Valid Part
GAT,'27210',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27211',O,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,14,YES
GAT,'27212',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27213',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27220',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES
GAT,'27221',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES

Tags: noreportformatifcountcardreqyes
3条回答

因为它是一个Excel工作簿对象,而不仅仅是一个.CSV文件,所以您的book对象是由工作表组成的。因此,首先必须获取要处理的工作表,然后需要通过调用sheet.get_rows()获取行

下面的代码应该可以工作,我在本地测试了它。

import xlrd

book = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0

sheet = book.sheets()[0]
for row_num, row in enumerate(sheet.get_rows()):
    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])

print(header)

访问xlrd库的代码需要一些修复,如下所示:

import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')
worksheet = workbook.sheet_by_index(0)

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0
oe_intr_req_count = 0
part_img_req_count = 0
upc_req_count = 0
unspsc_req_count = 0
msds_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0
missing_mpcc_count = 0
missing_attr_values_count = 0
missing_part_img_count = 0
missing_upc_count = 0
missing_warr_text_count = 0
missing_warr_pdf_count = 0
missing_unspsc_count = 0
missing_msds_count = 0

for row_num in range(worksheet.nrows):
    row = worksheet.row_values(row_num)

    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])
    else:
        hq_line, part_no, part_class, appl_req, appl_count, intr_req, \
        intr_count, oe_intr_req, has_oe_intr, has_attr_editor, \
        has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, \
        has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, \
        has_unspsc, attr_count, attrval_count, valid_part = row

        if valid_part == 'YES':
            valid_count += 1

            # Required Parts Count
            if appl_req == 'YES':
                app_req_count += 1
            if intr_req == 'YES':
                intr_req_count += 1
            if oe_intr_req == 'YES':
                oe_intr_req_count += 1
            if part_img_req == 'YES':
                part_img_req_count += 1
            if upc_req == 'YES':
                upc_req_count += 1
            if msds_req == 'YES':
                msds_req_count += 1

            # Missing Data Counts
            if appl_req == 'YES' and appl_count == '0':
                missing_app_count += 1
            if intr_req == 'YES' and intr_count == '0':
                missing_intr_count += 1
            if oe_intr_req == 'YES' and has_oe_intr == '0':
                 missing_oe_intr_count += 1
            if has_mpcc == 'NO':
                 missing_mpcc_count += 1
            if has_attr_values == 'NO':
                missing_attr_values_count += 1
            if has_part_img == 'NO':
                missing_part_img_count += 1
            if upc_req == 'YES' and has_upc == '0':
                missing_upc_count += 1
            if warr_req == 'YES' and has_warr_txt == 'NO':
                missing_warr_text_count += 1
            if warr_req == 'YES' and has_warr_pdf == 'NO':
                missing_warr_pdf_count += 1
            if has_unspsc == 'NO':
                missing_unspsc_count += 1
            if msds_req == 'YES' and has_msds == 'NO':
                missing_msds_count += 1

# Statements for Required Counts
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count))
application_required = ('Application Records Required: ', '{:,}'.format(app_req_count))
interchange_required = ('Interchange Records Required: ', '{:,}'.format(intr_req_count))
oe_interchange_required = ('OE Interchange Records Required: ', '{:,}'.format(oe_intr_req_count))
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count))  # Every valid part requires a MPCC
attributes = ('Attributes Required: ', '{:,}'.format(valid_count))  # Every valid part requires attributes
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count))
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count))
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count))  # Require warranty text/pdf on all parts
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count))  # Require UNSPSC Codes for all parts
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count))

# Statements for Missing Counts
missing_applications = ('Missing Applications: ', '{:,}'.format(missing_app_count))
missing_interchange = ('Missing Interchange: ', '{:,}'.format(missing_intr_count))
missing_oe_interchange = ('Missing OE Interchange: ', '{:,}'.format(missing_oe_intr_count))
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count))
missing_attributes = ('Missing Attributes: ', '{:,}'.format(missing_attr_values_count))
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count))
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count))
missing_warranty_text = ('Missing Warranty Text: ', '{:,}'.format(missing_warr_text_count))
missing_warranty_pdf = ('Missing Warranty PDF: ', '{:,}'.format(missing_warr_pdf_count))
missing_unspsc = ('Missing UNSPSC Code: ', '{:,}'.format(missing_unspsc_count))
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count))

# CSV Output
report_card.append(valid_parts)
report_card.append(application_required)
report_card.append(interchange_required)
report_card.append(oe_interchange_required)
report_card.append(mpcc)
report_card.append(attributes)
report_card.append(image_required)
report_card.append(upc)
report_card.append(warranties)
report_card.append(unspsc)
report_card.append(msds)
report_card.append(missing_applications)
report_card.append(missing_interchange)
report_card.append(missing_oe_interchange)
report_card.append(missing_mpcc)
report_card.append(missing_attributes)
report_card.append(missing_image)
report_card.append(missing_UPC)
report_card.append(missing_warranty_text)
report_card.append(missing_warranty_pdf)
report_card.append(missing_unspsc)
report_card.append(missing_msds)

for row in header:
    print(row)

for x in report_card:
    print(x)

with open('Report_Card.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(header)
    writer.writerows(report_card)        

对于您提供的数据,这将提供一个Report_Card.csv文件,如下所示:

Supplier Line:  Gates Rubber - Denver (GAT)
Summary:    Parts HQ Abbr
ACCT No:    40013586
RecCount:   10221
Applicable Date:    June 14, 2017 (Wednesday)
Number of Valid Parts:  6
Application Records Required:   0
Interchange Records Required:   6
OE Interchange Records Required:    6
MPCC Required:  6
Attributes Required:    6
Image Required:     6
UPC Requited:   6
Warranty Text/PDF Required:     6
UNSPSC Code Required:   6
MSDS Required:  0
Missing Applications:   0
Missing Interchange:    0
Missing OE Interchange:     0
Missing MPCC:   0
Missing Attributes:     0
Missing Image:  0
Missing UPC:    0
Missing Warranty Text:  0
Missing Warranty PDF:   0
Missing UNSPSC Code:    0
Missing MSDS:   0

你的代码肯定需要重新考虑一下。与其有这么多的变量,比如每列一个,不如考虑使用Python^{}这样的工具,它的工作原理就像一个字典。使用列表或字典会使代码更简单。

此脚本允许您将excel数据表转换为字典列表:

import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx', on_demand = True)
worksheet = workbook.sheet_by_index(0)
first_row = [] # The row where we stock the name of the column
for col in range(worksheet.ncols):
    first_row.append( worksheet.cell_value(0,col) )
# transform the workbook to a list of dictionaries
data =[]
for row in range(1, worksheet.nrows):
    elm = {}
    for col in range(worksheet.ncols):
        elm[first_row[col]]=worksheet.cell_value(row,col)
    data.append(elm)
print data

您也可以使用Pandas

from pandas import *
xls = ExcelFile('GAT_US_PartReview_2017-06-23.xlsx')
df = xls.parse(xls.sheet_names[0])
print df.to_dict()

相关问题 更多 >