从多个csv文件读取并合并数据

#!/usr/bin/python import csv file = open("NewRush4.csv", "rb") for line in csv.DictReader(file, delimiter=","): name = line["Player"].strip() yds = line["YDS"].strip() car = line["CAR"].strip() td = line["TD"].strip() fum = line["FUM"].strip() ypc = line["YPC"].strip() print "%-20s%10s%10s%10s%10s%10s" % (name, car, yds, td, fum, ypc) file.close()

Player,YDS,TD,CAR,FUM,YPC 49erswag, 14.0, 0, 3, 0, 4.7 A Beast Playa, 23.0, 0, 7, 0, 3.3 A Swanky Guy 2, 154.0, 1, 29, 2, 5.3 ACIDRUST, 4.0, 0, 1, 0, 4.0 Aj dahitman, 1898.0, 19, 227, 2, 8.4 Aldizzl, 45.0, 0, 10, 0, 4.5 Areis21, 58.0, 0, 13, 2, 4.5 at43, 214.0, 1, 48, 1, 4.5 Ayala2012xTCU, 195.0, 0, 57, 1, 3.4 B O R Nx 25, 31.0, 0, 13, 1, 2.4 B r e e z yx60, 13.0, 0, 4, 0, 3.3 ...

Player,YDS,TD,CAR,FUM,YPC a toxic taz, 307.0, 4, 44, 0, 7.0 AbNL Boss, 509.0, 4, 174, 2, 2.9 AFFISHAUL, 190.0, 0, 35, 2, 5.4 AJ DA HITMAN, 1283.0, 19, 228, 6, 5.6 allen5422, 112.0, 2, 18, 0, 6.2 Allxdayxapx, 264.0, 1, 76, 2, 3.5 AlpHaaNike, 51.0, 1, 10, 1, 5.1 Aura Reflexx, 215.0, 1, 40, 0, 5.4 AWAKEN DA BEAST, -5.0, 0, 4, 1, -1.3 AxDub24, -3.0, 0, 2, 1, -1.5 Ayala2012xTCU, 568.0, 4, 173, 1, 3.3 BALLxXHAWKXx, 221.0, 1, 47, 2, 4.7 BANG FIGHTY007, 983.0, 6, 171, 3, 5.7 bang z ro, 29.0, 0, 9, 0, 3.2 BEARDOWN74, 567.0, 6, 104, 2, 5.5 ...

2条回答

网友

1楼 · 编辑于 2024-10-01 09:29:38

您可以尝试使用python pandas，这似乎是您需要的工具。对于阅读部分，您可以使用read_csv，然后创建三个DataFrame（或者一个，包含所有记录），并对它们进行进一步的操作。在

例如，对于重复项，您可以尝试duplicatedfunction，例如，使用df[ df.duplicated('Player') ]。您还将发现您可能需要很多descriptive statistics函数，例如max。看看吧。在

为了让你尝一尝（基于原帖第四季和第五季的数据）：

import pandas as pd

if __name__ == '__main__':

    # reading in is very convenient here:
    df_4 = pd.read_csv('season4.csv')
    df_5 = pd.read_csv('season5.csv')
    # combine the two DataFrames into one:
    df   = pd.concat([df_4, df_5], ignore_index=True)
    # see how it looks:
    print df.head(50)

             Player   YDS  TD  CAR  FUM  YPC
0          49erswag    14   0    3    0  4.7
1     A Beast Playa    23   0    7    0  3.3
2    A Swanky Guy 2   154   1   29    2  5.3
3          ACIDRUST     4   0    1    0  4.0
4       Aj dahitman  1898  19  227    2  8.4
5           Aldizzl    45   0   10    0  4.5
6           Areis21    58   0   13    2  4.5
7              at43   214   1   48    1  4.5
8     Ayala2012xTCU   195   0   57    1  3.4
9       B O R Nx 25    31   0   13    1  2.4
10   B r e e z yx60    13   0    4    0  3.3
11      a toxic taz   307   4   44    0  7.0
12        AbNL Boss   509   4  174    2  2.9
13        AFFISHAUL   190   0   35    2  5.4
14     AJ DA HITMAN  1283  19  228    6  5.6
15        allen5422   112   2   18    0  6.2
16      Allxdayxapx   264   1   76    2  3.5
17       AlpHaaNike    51   1   10    1  5.1
18     Aura Reflexx   215   1   40    0  5.4
19  AWAKEN DA BEAST    -5   0    4    1 -1.3
20          AxDub24    -3   0    2    1 -1.5
21    Ayala2012xTCU   568   4  173    1  3.3
22     BALLxXHAWKXx   221   1   47    2  4.7
23   BANG FIGHTY007   983   6  171    3  5.7
24        bang z ro    29   0    9    0  3.2
25       BEARDOWN74   567   6  104    2  5.5 

    # see for duplicated entries in the 'Player' column:
    print df[ df.duplicated('Player') ]

           Player  YDS  TD  CAR  FUM  YPC
21  Ayala2012xTCU  568   4  173    1  3.3

    # see for the maximum value in the 'YDS' column:
    print 'Max YDS:', df['YDS'].max()

Max YDS: 1898.0

希望有帮助。在

网友

2楼 · 编辑于 2024-10-01 09:29:38

使用collections.defaultdict：

我不知道每个字段的含义，我对每个字段求和。根据需要调整。在

from collections import defaultdict
import csv

class PlayerStat(object):
    def __init__(self, yds=0, car=0, td=0, fum=0, ypc=0, count=0):
        self.yds   = float(yds)
        self.car   = float(car)
        self.td    = float(td)
        self.fum   = float(fum)
        self.ypc   = float(ypc)
        self.count = count
    def __iadd__(self, other):
        self.yds   += other.yds
        self.car   += other.car
        self.td    += other.td
        self.fum   += other.fum
        self.ypc   += other.ypc
        self.count += other.count
        return self

filenames = 'NewRush4.csv', 'NewRush5.csv', 'NewRush6.csv',
stats = defaultdict(PlayerStat)
for filename in filenames:
    with open(filename) as f:
        reader = csv.DictReader(f, delimiter=',')
        for row in reader:
            stat = PlayerStat(row['YDS'], row['CAR'], row['TD'], row['FUM'], row['YPC'], count=1)
            stats[row['Player']] += stat

for player in sorted(stats, key=lambda player: stats[player].yds):
    stat = stats[player]
    if stat.count == 1:
        continue
    print '{0:<20}{1.car:>10}{1.yds:>10}{1.td:>10}{1.fum:>10}{1.ypc:>10}'.format(player, stat)

相关问题更多 >

编程相关推荐

热门问题

热门文章