Python如何获取每个文件中唯一ID的数量,并将文件名和每个文件的唯一ID存储在CSV文件中?

2024-10-02 20:42:56 发布

您现在位置:Python中文网/ 问答频道 /正文

我在一个项目上的工作,以计数聊天中唯一的评论者,并存储在一个csv文件中的文件名和该聊天的评论者人数为每个文件。但是,我现在的代码是打开所有文档,并计算多个文件中的所有注释者。因此,它不是为每个文件获取单个唯一的评论者,而是计算多个文件中的所有评论者。所有文件中都有10个唯一的注释者,但是,我需要能够看到每个文件的唯一注释者的数量,并将这些数据存储在csv文件中(请参见csv文件图片的所需输出)。我觉得我很接近,但我被卡住了。有没有人能帮助解决这个问题,或者建议其他方法?你知道吗

import os, sys, json
from collections import Counter
import csv
filename=""
filepath = ""
jsondata = ""
dictjson = ""
commenterid = []
FName = []
UList = []
TextFiles = []
UCommenter = 0


def get_FilePathList():
    for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):
        for file in files:
            ##Find File with specific ending
            if file.endswith("chatinfo.txt"):
                path = "/Users/ammcc/Desktop/"
                ##Get the File Path of the file
                filepath = os.path.join(path,file)
                ##Get the Filename of the file ending in chatinfo.txt
                head, filename = os.path.split(filepath)
                ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list
                TextFiles.append(filepath)
                ##Append all Filenames of files ending in chatinfo.txt to FName array/list
                FName.append(filename)

def open_FilePath():
     for x in TextFiles:
            ##Open each filepath in TextFiles one by one
            open_file = open(x)
            ##Read that file line by line
            for line in open_file:
                    ##Parse the Json of the file into jsondata
                    jsondata = json.loads(line)
                    ##Line not needed but, Parse the Json of the file into dictjson as Dictionary
                    dictjson = json.dumps(jsondata)

                    ## if the field commenter is found in jsondata
                    if "commenter" in jsondata:
                                    ##Then, append the field  ["commenter"]["_id"] **(nested value in the json)** into list commenterid
                                    commenterid.append(jsondata["commenter"]["_id"])
                                    ##Get and count the unique ids for the commenter
                                    Ucommenter = (len(set(commenterid)))
                                    ##Appended that unique count in UList
                                    UList.append(Ucommenter)


                                    ## create or append to the Commenter.csv file
                                    with open('Commenter.csv', 'a') as csvfile:
                                            filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
                                            ##Write the individual filename and the unique commenters for that file
                                            filewriter.writerow([filename, Ucommenter])
            commenterid.clear()

                                            ##Issue: Not counting the commenters for each file and storing the filename and its specific number of commneters in csv.
                                            ##the cvs is being created but the rows in the csv is not generating correctly.

##Call the functions                                                     
get_FilePathList()
open_FilePath()

csv文件中的当前输出

enter image description here

csv文件的所需输出

enter image description here

建议后输出

enter image description here

Nemanja Radojković溶液后的输出和代码:

**正确的输出格式,但仍不计算每个文件的唯一注释者。你知道吗

enter image description here

import json, os
import pandas as pd
import numpy as np
from collections import Counter
TextFiles = []
FName = []
csv_rows = []
commenterid = []
unique_id = []
NC = []
for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):
 for file in files:
     if file.endswith("chatinfo.txt"):
         path = "/Users/ammcc/Desktop/"
         filepath = os.path.join(path,file)
         head, filename = os.path.split(filepath)
         TextFiles.append(filepath)
         FName.append(filename)

         n_commenters = 0
         with open(filepath) as open_file:
             for line in open_file:
                 jsondata = json.loads(line)
                 if "commenter" in jsondata:
                     commenterid.append(jsondata["commenter"]["_id"])

                     list_set = set(commenterid)
                     unique_list = (list(list_set))

                 for x in list_set:
                     n_commenters += 1

                     commenterid.clear()
             csv_rows.append([filename, n_commenters])
df = pd.DataFrame(csv_rows, columns=['FileName', 'Unique_Commenters'])
df.to_csv('CommeterID.csv', index=False)

Tags: 文件csvthepathinforosopen
1条回答
网友
1楼 · 发布于 2024-10-02 20:42:56

试试这个:

import json
import os
import pandas as pd

TextFiles = []
FName = []
csv_rows = []

for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):

    for file in files:
        ##Find File with specific ending
        if file.endswith("chatinfo.txt"):

            path = "/Users/ammcc/Desktop/"
            ##Get the File Path of the file
            filepath = os.path.join(path,file)
            ##Get the Filename of the file ending in chatinfo.txt
            head, filename = os.path.split(filepath)
            ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list
            TextFiles.append(filepath)
            ##Append all Filenames of files ending in chatinfo.txt to FName array/list
            FName.append(filename)

            n_commenters = 0

            with open(filepath) as open_file:
                ##Read that file line by line


                for line in open_file:
                        ##Parse the Json of the file into jsondata
                        jsondata = json.loads(line)

                        ## if the field commenter is found in jsondata
                        if "commenter" in jsondata:

                            n_commenters += 1

            csv_rows.append([filename, n_commenters])


df = pd.DataFrame(csv_rows, columns=['filename', 'n_commenters'])

df.to_csv('some_filename.csv', index=False)

相关问题 更多 >