有没有办法读取和更改PyCharm中巨大csv文件的内容？

def reading(csv_input): originalLength = 0 rowCount = 0 with open(f'Web Report {csv_input}', 'w') as file: writer = csv.writer(file) writer.writerow(['Index', 'URL Category', 'User IP', 'URL']) dropCount = 0 data = pd.read_csv(csv_input, chunksize=100000) df = pd.DataFrame(data, columns=['Line', 'Date', 'Hour', 'User Name', 'User IP', 'Site Name', 'URL Category', 'Action', 'Action Description']) originalLength = len(df.index) for line in range(originalLength): dataLine = df.loc[line] x = dataLine.get(key='Action') if x == 0: siteName = dataLine.get(key='Site Name') if 'dbk' in siteName: dropCount = dropCount + 1 elif 'ptc' in siteName: dropCount = dropCount + 1 elif 'wcf' in siteName: dropCount = dropCount + 1 elif 'google' in siteName: dropCount = dropCount + 1 else: writer.writerow([line, # Original Index df.loc[line].get(key='URL Category'), # Original URL Category df.loc[line].get(key='User IP'), # Original User IP df.loc[line].get(key='Site Name')]) # Original Site Name rowCount = rowCount + 1 else: dropCount = dropCount + 1 file.close() print("Input: " + str(csv_input)) print("Output: " + str(file.name)) print("Original Length: " + str(originalLength)) print("Current Length: " + str(rowCount)) print("Drop Count: " + str(dropCount) + "\n") return df

1条回答

网友

1楼 · 发布于 2024-09-29 21:37:29

若您使用csv来写入文件，那个么您也可以使用它来逐行读取

import csv

with open('input.csv') as infile, open('output.csv', 'w') as outfile:
    csv_reader = csv.reader(infile)
    csv_writer = csv.writer(outfile)
    
    # copy headers
    headers = next(csv_reader)
    csv_writer.writerow(headers)
    
    # process rows
    for row in csv_reader:  # read row by row
        # keep only rows with even index
        if int(row[0]) % 2 == 0:
            print(' - row  -')
            print(row)
            csv_writer.writerow(row)

如果要将pandas与chunk一起使用，则应使用for-loop来实现此目的。
当您使用pandas编写时，您需要不带头的append模式

import pandas as pd

first = True
for df in pd.read_csv('input.csv', chunksize=1):  # read row by row
    # keep only rows with even index
    if df.index % 2 == 0:
        print(' - row  -')
        print(df)
        if first:
            # create new file with headers
            df.to_csv('output.csv', mode='w')
            first = False
        else:
            # append to existing file without headers
            df.to_csv('output.csv', mode='a', header=False)

最小工作代码

import pandas as pd
import csv

#  - create some data  -

data = {
    'A': range(0,10), 
    'B': range(10,20),
    'C': range(20,30),
} # columns

df = pd.DataFrame(data)
df.to_csv('input.csv', index=False)

#  - read and write with `pandas`  -

first = True
for df in pd.read_csv('input.csv', chunksize=1):  # read row by row
    # keep only rows with even index
    if df.index % 2 == 0:
        print(' - row  -')
        print(df)
        if first:
            # create empty with headers
            df.to_csv('output_pandas.csv', mode='w')
            first = False
        else:
            # append to existing file without headers
            df.to_csv('output_pandas.csv', mode='a', header=False)
        
#  - read and write with `csv`  -
 
with open('input.csv') as infile, open('output.csv', 'w') as outfile:
    csv_reader = csv.reader(infile)
    csv_writer = csv.writer(outfile)
    
    # copy headers
    headers = next(csv_reader)
    csv_writer.writerow(headers)
    
    # process rows
    for row in csv_reader:
        # keep only rows with even index
        if int(row[0]) % 2 == 0:
            print(' - row  -')
            print(row)
            csv_writer.writerow(row)

文件：read_csv()，to_csv()

相关问题更多 >

编程相关推荐

热门问题

热门文章