使用CSV：在正确的ord中读写数据

import csv # creates empty dictionary in which to store tweetIDs and tweet text originals_data = {} # declares an empty list to hold tweet text from coded datafile # will be used to compare against the dictionary created earlier coded_data = [] coded_all = [] # for all, not just text # list to hold the IDs belonging to coded tweets for the round tweet_IDs_for_coded = [] with open('first20.csv', 'rt') as round_in, open('gg_originals.csv', 'rt') as original_in: # reader object for gg_originals readOrigin = csv.reader(original_in, delimiter=',') # adds values from .csv file into the dictionary for row in readOrigin: originals_data[row[0]] = row[1] # reader object for round_x data readRound = csv.reader(round_in, delimiter=",") # appends the tweet text to a list for row in readRound: coded_data.append(row[0]) # iterates over id:text dictionary for tweet_id in originals_data: # iterates over coded_data for tweet in coded_data: # When tweet in list matches text in dict, sends key to list if tweet == originals_data[tweet_id]: tweet_IDs_for_coded.append(tweet_id) with open('first20.csv', 'rt') as round_in, open('test2.csv', 'wt') as output: # reader object for round_x data readRound = csv.reader(round_in, delimiter=",") # creates writer object to write new csv file with IDs writeNew = csv.writer(output, delimiter=",") # list that holds everything that's going into the csv file everything = [] # sets row to equal a single row from round data row = next(readRound) row.insert(0, 'ID') # appends ID and then all existing data to list of rows everything.append(row) for i, row in enumerate(readRound): everything.append([str(tweet_IDs_for_coded[i])] + row) writeNew.writerows(everything)

tweet_id_str,text 534974890168700930,abcd 534267820071084033,abce 539572102441877504,abcf 539973576108294145,abcg 529278820876943361,abch 529583601244176384,abci 535172191743397888,abcj 532195210059874304,abck 537812033895669760,abcl , ,

1条回答

网友

1楼 · 发布于 2024-10-06 11:31:08

好吧，这个代码（我想）就是你想要做的。我之所以要你的操作系统是因为wt会在Windows中提供双倍行距的csv，所以我不得不使用wb。此外，在单元格A1中插入大写“ID”会导致使用Excel打开时出现类型问题。所有乐趣：）

最后我没有时间追踪你的错误，仍然给出答案，所以我已经写了答案，如果有机会，我会回去强调你的工作不同步的地方（在分心之前，我从来没有在Excel中遇到SYLK错误！）。你知道吗

我把你的字典换了。tweet本身就成了dict的关键，不再重复字典了。这也意味着你只需要打开first20.csv一次。你原来的方法有点复杂。你知道吗

import csv

with open('gg_originals.csv', 'rt') as original_in:
    readOrigin = csv.reader(original_in, delimiter = ',')
    originals_data = {row[1]: row[0] for row in readOrigin}

with open('first20.csv', 'rt') as round_in:
    input_data = csv.reader(round_in)
    data_to_match = [row[0] for row in input_data]

compiled_list = []
for item in data_to_match:
    compiled_list.append([item, originals_data[item]])

with open('testoutput.csv', 'wt') as outfile:
    writer = csv.writer(outfile)
    writer.writerows(compiled_list)

相关问题更多 >

编程相关推荐

热门问题

热门文章