在Ex中保存数据的问题

import xlsxwriter # Create a workbook and add a worksheet. workbook = xlsxwriter.Workbook('zzzzz.xlsx') worksheet = workbook.add_worksheet() date = "17/08/2015" bill = 001 item = "something" customer = "Luis" price = 100 # Start from the first cell. Rows and columns are zero indexed. row = 0 col = 0 # Iterate over the data and write it out row by row. worksheet.write(row, col, date ) worksheet.write(row, col + 1, bill) worksheet.write(row, col + 2, item ) worksheet.write(row, col + 3, customer ) worksheet.write(row, col + 4,price) workbook.close()

>>> Traceback (most recent call last): File "C:\Python27\crearexcel.py", line 28, in <module> workbook.close() File "C:\Python27\lib\site-packages\xlsxwriter\workbook.py", line 296, in close self._store_workbook() File "C:\Python27\lib\site-packages\xlsxwriter\workbook.py", line 520, in _store_workbook xml_files = packager._create_package() File "C:\Python27\lib\site-packages\xlsxwriter\packager.py", line 140, in _create_package self._write_shared_strings_file() File "C:\Python27\lib\site-packages\xlsxwriter\packager.py", line 280, in _write_shared_strings_file sst._assemble_xml_file() File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 53, in _assemble_xml_file self._write_sst_strings() File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 83, in _write_sst_strings self._write_si(string) File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 110, in _write_si self._xml_si_element(string, attributes) File "C:\Python27\lib\site-packages\xlsxwriter\xmlwriter.py", line 122, in _xml_si_element self.fh.write("""<si><t%s>%s</t></si>""" % (attr, string)) File "C:\Python27\lib\codecs.py", line 694, in write return self.writer.write(data) File "C:\Python27\lib\codecs.py", line 357, in write data, consumed = self.encode(object, self.errors) UnicodeDecodeError: 'ascii' codec can't decode byte 0xa0 in position 11: ordinal not in range(128)

# -*- coding: cp1252 -*- # -*- coding: UTF-8 -*- from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import TextConverter from pdfminer.layout import LAParams from pdfminer.pdfpage import PDFPage from cStringIO import StringIO import os import xlsxwriter # Create a workbook and add a worksheet. workbook = xlsxwriter.Workbook('zzzzz.xlsx') worksheet = workbook.add_worksheet() files = [f for f in os.listdir('.') if os.path.isfile(f)] for f in files: z = 0 e = (len(files) - 1) def convert_pdf_to_txt(path): rsrcmgr = PDFResourceManager() retstr = StringIO() codec = 'utf-8' laparams = LAParams() device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams) fp = file(path, 'rb') interpreter = PDFPageInterpreter(rsrcmgr, device) password = "" maxpages = 0 caching = True pagenos=set() fstr = '' for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True): interpreter.process_page(page) str = retstr.getvalue() fstr += str fp.close() device.close() retstr.close() return fstr row = 0 col = 0 while z<e: factura = files[z] #ejemplo 1 string = convert_pdf_to_txt(factura) lines = list(filter(bool,string.split('\n'))) custData = {} for i in range(len(lines)): if 'EMAIL:' in lines[i]: custData['Name'] = lines[i+1] elif 'FACTURA' in lines[i]: custData['BillNumber'] = lines[i+1] elif 'Vencimientos:' in lines[i]: custData['price'] = lines[i+2] elif 'Banco:' in lines[i]: custData['paymentType'] = lines[i+1] #ejemplo 2 txtList = convert_pdf_to_txt(factura).splitlines() nameIdx, billNumIdx, priceIdx, expirDateIdx, paymentIdx = -1, -1, -1, -1, -1 for idx, line in enumerate(txtList): if line == "EMAIL: buendialogistica@gmail.com": nameIdx = idx +2 # in your example it should be +2... if line == "FACTURA": billNumIdx = idx + 1 if "Vencimientos:" in line: priceIdx = idx + 2 expirDateIdx = idx + 1 if "Banco:" in line: paymentIdx = idx + 1 name = txtList[nameIdx] if nameIdx != -1 else '' billNum = txtList[billNumIdx] if billNumIdx != -1 else '' price = txtList[priceIdx] if priceIdx != -1 else '' expirDate = txtList[expirDateIdx] if expirDateIdx != -1 else '' payment = txtList[paymentIdx] if paymentIdx != -1 else '' print expirDate billNum = billNum.replace("Â Â ", "") print billNum custData['Name'] = custData['Name'].replace("Â", "") print custData['Name'] custData['paymentType'] = custData['paymentType'].replace("Â", "") print custData['paymentType'] print price nombre = str(custData['Name']) formadepago = custData['paymentType'] z+=1 columna2 = str(billNum) + ", " + nombre + ", " + formadepago worksheet.write(row, col, expirDate) worksheet.write(row, col + 1, columna2) worksheet.write(row, col + 2, price) row+=1 workbook.close()

2条回答

网友

1楼 · 编辑于 2024-10-06 12:38:08

您可以尝试将脚本的编码设置为utf-8，方法是包括

# coding=utf-8

作为第一行。然后，您可以通过在其前面添加一个小写字母u将字符串编码设置为utf-8：

^{pr2}$

然后甚至可以在文本中使用非ascii字母，比如。在

或者-如前所述-在文本中使用普通空格而不是非中断空格（0xa0）。在

网友

2楼 · 编辑于 2024-10-06 12:38:08

sys.setdefaultencoding('Cp1252')-这是一个令人讨厌的修复所有黑客。它掩盖了其他问题，并使代码变得脆弱和特定于平台。在

"JOSE LUIS FEBRERO LOPEZ"包含Windows-1252编码的非中断空格（0xA0）。在

xlsxwriter要求在使用非ASCII字符时传递Unicode对象。实际上，对所有字符串使用Unicode对象是一个很好的实践。在

通过在字符串前面添加u来创建Unicode对象：

u"JOSE LUIS FEBRERO LOPEZ"

由于Python源代码编码为Windows-1252，因此需要在源文件的顶部添加以下内容：

^{pr2}$

这将告诉Python如何将源文件中的字符串解码为Unicode对象。在

相关问题更多 >

编程相关推荐

热门问题

热门文章