<p>通过对csv文件中的行进行排序,然后比较相应的行以查看是否存在差异,可以解决此类问题</p>
<p>此方法使用函数样式执行比较,并将比较任意数量的csv文件</p>
<p>它假定CSV包含相同数量的记录,并且列的顺序相同</p>
<pre><code>import contextlib
import csv
def compare_files(readers):
colnames = [next(reader) for reader in readers][0]
sorted_readers = [sorted(r) for r in readers]
for gen in [compare_rows(colnames, rows) for rows in zip(*sorted_readers)]:
yield from gen
def compare_rows(colnames, rows):
col_iter = zip(*rows)
# Be sure we're comparing the same planets.
planets = set(next(col_iter))
assert len(planets) == 1, planets
planet = planets.pop()
for (colname, *vals) in zip(colnames, col_iter):
if len(set(*vals)) > 1:
yield f"{planet} mismatch {colname} ({'/'.join(*vals)})"
def main(outfile, *infiles):
with contextlib.ExitStack() as stack:
csvs = [stack.enter_context(open(fname)) for fname in infiles]
readers = [csv.reader(f) for f in csvs]
with open(outfile, 'w') as out:
for result in compare_files(readers):
out.write(result + '\n')
if __name__ == "__main__":
main('mismatches.txt', 'planets1.csv', 'planets2.csv')
</code></pre>