使用python读取csv中的特定列

+-----+-----+-----+-----+-----+-----+-----+-----+ | AAA | bbb | ccc | DDD | eee | FFF | GGG | hhh | +-----+-----+-----+-----+-----+-----+-----+-----+ | 1 | 2 | 3 | 4 | 50 | 3 | 20 | 4 | | 2 | 1 | 3 | 5 | 24 | 2 | 23 | 5 | | 4 | 1 | 3 | 6 | 34 | 1 | 22 | 5 | | 2 | 1 | 3 | 5 | 24 | 2 | 23 | 5 | | 2 | 1 | 3 | 5 | 24 | 2 | 23 | 5 | +-----+-----+-----+-----+-----+-----+-----+-----+

3条回答

网友

1楼 · 编辑于 2024-05-17 06:34:13

我知道答案已经被接受了，但是如果您真的想从csv文件中读取特定的命名的列，那么应该使用DictReader（如果您没有使用Pandas，也就是说）。

import csv
from StringIO import StringIO

columns = 'AAA,DDD,FFF,GGG'.split(',')


testdata ='''\
AAA,bbb,ccc,DDD,eee,FFF,GGG,hhh
1,2,3,4,50,3,20,4
2,1,3,5,24,2,23,5
4,1,3,6,34,1,22,5
2,1,3,5,24,2,23,5
2,1,3,5,24,2,23,5
'''

reader = csv.DictReader(StringIO(testdata))

desired_cols = (tuple(row[col] for col in columns) for row in reader)

输出：

>>> list(desired_cols)
[('1', '4', '3', '20'),
 ('2', '5', '2', '23'),
 ('4', '6', '1', '22'),
 ('2', '5', '2', '23'),
 ('2', '5', '2', '23')]

网友

2楼 · 编辑于 2024-05-17 06:34:13

import csv

DESIRED_COLUMNS = ('AAA','DDD','FFF','GGG')

f = open("myfile.csv")
reader = csv.reader(f)

headers = None
results = []
for row in reader:
    if not headers:
        headers = []
        for i, col in enumerate(row):
        if col in DESIRED_COLUMNS:
            # Store the index of the cols of interest
            headers.append(i)

    else:
        results.append(tuple([row[i] for i in headers]))

print results

网友

3楼 · 编辑于 2024-05-17 06:34:13

def read_csv(file, columns, type_name="Row"):
  try:
    row_type = namedtuple(type_name, columns)
  except ValueError:
    row_type = tuple
  rows = iter(csv.reader(file))
  header = rows.next()
  mapping = [header.index(x) for x in columns]
  for row in rows:
    row = row_type(*[row[i] for i in mapping])
    yield row

示例：

>>> import csv
>>> from collections import namedtuple
>>> from StringIO import StringIO
>>> def read_csv(file, columns, type_name="Row"):
...   try:
...     row_type = namedtuple(type_name, columns)
...   except ValueError:
...     row_type = tuple
...   rows = iter(csv.reader(file))
...   header = rows.next()
...   mapping = [header.index(x) for x in columns]
...   for row in rows:
...     row = row_type(*[row[i] for i in mapping])
...     yield row
... 
>>> testdata = """\
... AAA,bbb,ccc,DDD,eee,FFF,GGG,hhh
... 1,2,3,4,50,3,20,4
... 2,1,3,5,24,2,23,5
... 4,1,3,6,34,1,22,5
... 2,1,3,5,24,2,23,5
... 2,1,3,5,24,2,23,5
... """
>>> testfile = StringIO(testdata)
>>> for row in read_csv(testfile, "AAA GGG DDD".split()):
...   print row
... 
Row(AAA='1', GGG='20', DDD='4')
Row(AAA='2', GGG='23', DDD='5')
Row(AAA='4', GGG='22', DDD='6')
Row(AAA='2', GGG='23', DDD='5')
Row(AAA='2', GGG='23', DDD='5')

相关问题更多 >

编程相关推荐

热门问题

热门文章