如何从dataframe中的csv列表创建新的csv

###create list from dir listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/') ###append full path to list string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/' listdrs_path = [ string + x for x in listdrs] complete_string = ' is complete' listdrs_confirmation = [ x + complete_string for x in listdrs] #print (listdrs_path) ###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv. for file_path in listdrs_path: data = pd.read_csv(file_path, index_col=0) ######################################## ####function 1 def get_price_hist(ticker): # Put stock price data in dataframe data = pd.read_csv(file_path) #listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs') ##print(listdr) # Convert date to timestamp and make index data.index = data["date"].apply(lambda x: pd.Timestamp(x)) data.drop("date", axis=1, inplace=True) return data ##create new table and append data data = data[data.Entry > 3] for date in data.date: new_table[date].append(file_path) new_table_data = data.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names']) print(new_table_data)

2条回答

网友

1楼 · 编辑于 2024-10-02 10:22:16

我会这样做。您需要根据需要修改以下代码段

import pandas as pd
from glob import glob
from collections import defaultdict

# create and save some random data
df1 = pd.DataFrame({'date':[1,2,3], 'entry':[4,3,2]})
df2 = pd.DataFrame({'date':[1,2,3], 'entry':[1,2,4]})
df3 = pd.DataFrame({'date':[1,2,3], 'entry':[3,1,5]})

df1.to_csv('table1.csv')
df2.to_csv('table2.csv')
df3.to_csv('table3.csv')

# read all the csv
tables = glob('*.csv')
new_table = defaultdict(list)

# create new table
for table in tables:
    df = pd.read_csv(table)
    df = df[df.entry > 2]
    for date in df.date:
        new_table[date].append(table)


new_table_df = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print (new_table_df)

   date            table names
0     1  table3.csv,table1.csv
1     2             table1.csv
2     3  table2.csv,table3.csv

网友

2楼 · 编辑于 2024-10-02 10:22:16

其他代码有一些问题，下面是我能够想出的最终解决方案

    if 'Entry' in data:
        ##create new table and append data
        data = data[data.Entry > 3]
        if 'date' in data:
            for date in data.date:
                if date not in new_table:
                    new_table[date] = []
                new_table[date].append(
                    pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.date == date].Entry)]}))

                new_table
        elif 'Date' in data:
            for date in data.Date:
                if date not in new_table:
                    new_table[date] = []
                new_table[date].append(
                    pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.Date == date].Entry)]}))


# sorted(new_table, key=lambda x: x[0])

def find_max(tbl):
    new_table_data = {}
    for date in sorted(tbl.keys()):
        merged_dt = pd.concat(tbl[date])
        max_entry_v = max(list(merged_dt.Entry))
        tbl_names = list(merged_dt[merged_dt.Entry == max_entry_v].FileName)
        new_table_data[date] = tbl_names
    return new_table_data


new_table_data = find_max(tbl=new_table)

#df = pd.DataFrame(new_table, columns =['date', 'tickers'])

#df.to_csv(input_path, index = False, header = True)
# find_max(new_table)
# new_table_data = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())],
#                               columns=['date', 'table names'])
print(new_table_data)

相关问题更多 >

编程相关推荐

热门问题

热门文章