Python如何获取每个文件中唯一ID的数量，并将文件名和每个文件的唯一ID存储在CSV文件中？

import os, sys, json from collections import Counter import csv filename="" filepath = "" jsondata = "" dictjson = "" commenterid = [] FName = [] UList = [] TextFiles = [] UCommenter = 0 def get_FilePathList(): for root, dirs, files in os.walk("/Users/ammcc/Desktop/"): for file in files: ##Find File with specific ending if file.endswith("chatinfo.txt"): path = "/Users/ammcc/Desktop/" ##Get the File Path of the file filepath = os.path.join(path,file) ##Get the Filename of the file ending in chatinfo.txt head, filename = os.path.split(filepath) ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list TextFiles.append(filepath) ##Append all Filenames of files ending in chatinfo.txt to FName array/list FName.append(filename) def open_FilePath(): for x in TextFiles: ##Open each filepath in TextFiles one by one open_file = open(x) ##Read that file line by line for line in open_file: ##Parse the Json of the file into jsondata jsondata = json.loads(line) ##Line not needed but, Parse the Json of the file into dictjson as Dictionary dictjson = json.dumps(jsondata) ## if the field commenter is found in jsondata if "commenter" in jsondata: ##Then, append the field ["commenter"]["_id"] **(nested value in the json)** into list commenterid commenterid.append(jsondata["commenter"]["_id"]) ##Get and count the unique ids for the commenter Ucommenter = (len(set(commenterid))) ##Appended that unique count in UList UList.append(Ucommenter) ## create or append to the Commenter.csv file with open('Commenter.csv', 'a') as csvfile: filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) ##Write the individual filename and the unique commenters for that file filewriter.writerow([filename, Ucommenter]) commenterid.clear() ##Issue: Not counting the commenters for each file and storing the filename and its specific number of commneters in csv. ##the cvs is being created but the rows in the csv is not generating correctly. ##Call the functions get_FilePathList() open_FilePath()

import json, os import pandas as pd import numpy as np from collections import Counter TextFiles = [] FName = [] csv_rows = [] commenterid = [] unique_id = [] NC = [] for root, dirs, files in os.walk("/Users/ammcc/Desktop/"): for file in files: if file.endswith("chatinfo.txt"): path = "/Users/ammcc/Desktop/" filepath = os.path.join(path,file) head, filename = os.path.split(filepath) TextFiles.append(filepath) FName.append(filename) n_commenters = 0 with open(filepath) as open_file: for line in open_file: jsondata = json.loads(line) if "commenter" in jsondata: commenterid.append(jsondata["commenter"]["_id"]) list_set = set(commenterid) unique_list = (list(list_set)) for x in list_set: n_commenters += 1 commenterid.clear() csv_rows.append([filename, n_commenters]) df = pd.DataFrame(csv_rows, columns=['FileName', 'Unique_Commenters']) df.to_csv('CommeterID.csv', index=False)

1条回答

网友

1楼 · 发布于 2024-10-02 20:42:56

试试这个：

import json
import os
import pandas as pd

TextFiles = []
FName = []
csv_rows = []

for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):

    for file in files:
        ##Find File with specific ending
        if file.endswith("chatinfo.txt"):

            path = "/Users/ammcc/Desktop/"
            ##Get the File Path of the file
            filepath = os.path.join(path,file)
            ##Get the Filename of the file ending in chatinfo.txt
            head, filename = os.path.split(filepath)
            ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list
            TextFiles.append(filepath)
            ##Append all Filenames of files ending in chatinfo.txt to FName array/list
            FName.append(filename)

            n_commenters = 0

            with open(filepath) as open_file:
                ##Read that file line by line


                for line in open_file:
                        ##Parse the Json of the file into jsondata
                        jsondata = json.loads(line)

                        ## if the field commenter is found in jsondata
                        if "commenter" in jsondata:

                            n_commenters += 1

            csv_rows.append([filename, n_commenters])


df = pd.DataFrame(csv_rows, columns=['filename', 'n_commenters'])

df.to_csv('some_filename.csv', index=False)

相关问题更多 >

编程相关推荐

热门问题

热门文章