使用Python格式化以制表符分隔的文本文件

class UnicodeWriter: def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): self.queue = StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f def writerow(self, row): self.writer.writerow([str(s).encode("utf-8") for s in row]) data = self.queue.getvalue() self.stream.write(data) self.queue.truncate(0)

xmlListPath = input('Enter the filepath of the xmlList.xml file: ').replace('"', '') xmlListFile = codecs.open(xmlListPath) xmlList = etree.parse(xmlListFile) listRoot = xmlList.getroot() xmlListFile.close()

batchID = path.split(xmlListPath)[1] statsFile = 'S:/Metadata/ETD/Documentation/Statistics/' + batchID.replace('xmlList.xml', '.stats.txt') stats = open(statsFile, 'w') wtrStats = UnicodeWriter(stats, delimiter='\t') statsHeader = ['Author', 'Degree', 'Department', 'Embargo Start Date', 'Date Web Available', 'Embargo Code', 'Identifier', 'PURL', 'Title', 'Comments'] wtrStats.writerow(statsHeader)

b'Author' b'Degree' b'Department' b'Embargo Start Date' b'Date Web Available' b'Embargo Code' b'Identifier' b'PURL' b'Title' b'Comments' b'Confer, Matthew Phelan' b'Ph.D.' b'Chemical & Biological Engineering' b'01/01/2021' b'01/01/2026' b'4' b'u0015_0000001_0003682' b'http://purl.lib.ua.edu/177826' b'EXPERIMENTAL AND COMPUTATIONAL STUDIES OF MATERIALS DECOMPOSITION' b''

1条回答

网友

1楼 · 发布于 2024-10-02 00:25:09

问题是，在Python3中，CSV模块读取器和编写器希望找到字符串（unicode文本）——当您为它们提供字节时，通过对字符串进行预编码，它使用这些字节对象的表示形式，这是一个b'...'前缀字符串

TL；DR:只需以所需的编码打开输出文件，并将csv.writer对象指向它-绝对不需要列出这个UnicodeWriter中间类

import csv
...
stats = open(statsFile, 'w', encoding="utf-8")
wtrStats = csv.writer(stats, delimiter="\t")
...

相关问题更多 >

编程相关推荐

热门问题

热门文章