如何从dataframe中的csv列表创建新的csv

2024-10-02 10:22:16 发布

您现在位置:Python中文网/ 问答频道 /正文

所以我知道我的代码不太靠右,但我正在尝试逐行循环浏览csv列表,以创建一个新的csv,其中每行将列出满足条件的所有csv。所有csv的第一列是“日期”,我想列出所有csv的名称,其中data["entry"] > 3在该日期,日期仍然是第一列

更新:我试图为每个csv创建一个新列表,列出满足条件的每个日期,并在这些天在新csv上将文件名附加到该行

###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/')

###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
complete_string = ' is complete'
listdrs_confirmation = [ x + complete_string for x in listdrs]
#print (listdrs_path)

###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file_path in listdrs_path:

    data = pd.read_csv(file_path, index_col=0)


    ########################################
    ####function 1
    def get_price_hist(ticker):


        # Put stock price data in dataframe
        data = pd.read_csv(file_path)
        #listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
        ##print(listdr)
        # Convert date to timestamp and make index
        data.index = data["date"].apply(lambda x: pd.Timestamp(x))
        data.drop("date", axis=1, inplace=True)

        return data
##create new table and append data
    data = data[data.Entry > 3]
    for date in data.date:
        new_table[date].append(file_path)


new_table_data = data.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print(new_table_data)




Illistration of tables before and after


Tags: csvpathinnewfordatadatestring
2条回答

我会这样做。您需要根据需要修改以下代码段

import pandas as pd
from glob import glob
from collections import defaultdict

# create and save some random data
df1 = pd.DataFrame({'date':[1,2,3], 'entry':[4,3,2]})
df2 = pd.DataFrame({'date':[1,2,3], 'entry':[1,2,4]})
df3 = pd.DataFrame({'date':[1,2,3], 'entry':[3,1,5]})

df1.to_csv('table1.csv')
df2.to_csv('table2.csv')
df3.to_csv('table3.csv')

# read all the csv
tables = glob('*.csv')
new_table = defaultdict(list)

# create new table
for table in tables:
    df = pd.read_csv(table)
    df = df[df.entry > 2]
    for date in df.date:
        new_table[date].append(table)


new_table_df = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print (new_table_df)

   date            table names
0     1  table3.csv,table1.csv
1     2             table1.csv
2     3  table2.csv,table3.csv

其他代码有一些问题,下面是我能够想出的最终解决方案

    if 'Entry' in data:
        ##create new table and append data
        data = data[data.Entry > 3]
        if 'date' in data:
            for date in data.date:
                if date not in new_table:
                    new_table[date] = []
                new_table[date].append(
                    pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.date == date].Entry)]}))

                new_table
        elif 'Date' in data:
            for date in data.Date:
                if date not in new_table:
                    new_table[date] = []
                new_table[date].append(
                    pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.Date == date].Entry)]}))


# sorted(new_table, key=lambda x: x[0])

def find_max(tbl):
    new_table_data = {}
    for date in sorted(tbl.keys()):
        merged_dt = pd.concat(tbl[date])
        max_entry_v = max(list(merged_dt.Entry))
        tbl_names = list(merged_dt[merged_dt.Entry == max_entry_v].FileName)
        new_table_data[date] = tbl_names
    return new_table_data


new_table_data = find_max(tbl=new_table)

#df = pd.DataFrame(new_table, columns =['date', 'tickers'])

#df.to_csv(input_path, index = False, header = True)
# find_max(new_table)
# new_table_data = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())],
#                               columns=['date', 'table names'])
print(new_table_data)

相关问题 更多 >

    热门问题