Python：检查列表中的文件是否存在，只有存在时才执行函数

import os, csv def chkifexists(): files = ['A.csv', 'B.csv', 'C.csv'] for fname in files: if os.path.isfile(fname): if fname == "A.csv": hashcolumn = 7 filepathNum = 5 elif fname == "B.csv": hashcolumn = 15 filepathNum = 5 elif fname == "C.csv": hashcolumn = 1 filepathNum = 0 return fname, hashcolumn, filepathNum def removedupes(infile, outfile, hashcolumn): fname, hashcolumn, filepathNum = chkifexists() r1 = file(infile, 'rb') r2 = csv.reader(r1) w1 = file(outfile, 'wb') w2 = csv.writer(w1) hashes = set() for row in r2: if row[hashcolumn] =="": w2.writerow(row) hashes.add(row[hashcolumn]) if row[hashcolumn] not in hashes: w2.writerow(row) hashes.add(row[hashcolumn]) w1.close() r1.close() def bakcount(origfile1, origfile2): '''This function creates a .bak file of the original and does a row count to determine the number of rows removed''' os.rename(origfile1, origfile1+".bak") count1 = len(open(origfile1+".bak").readlines()) #print count1 os.rename(origfile2, origfile1) count2 = len(open(origfile1).readlines()) #print count2 print str(count1 - count2) + " duplicate rows removed from " + str(origfile1) +"!" def CleanAndPrettify(): print "Removing duplicate rows from input files..." fname, hashcolumn, filepathNum = chkifexists() removedupes(fname, os.path.splitext(fname)[0] + "2.csv", hashcolumn) bakcount (fname, os.path.splitext(fname)[0] + "2.csv") CleanAndPrettify()

3条回答

网友

1楼 · 编辑于 2024-10-01 07:41:38

你的代码有几个问题。在

首先，chkifexists一旦找到一个现有的文件，return就会立即出现，因此它不会检查任何剩余的名称；另外，如果没有找到任何文件，则不会设置hashcolumn和filepathNum，并给出UnboundLocalError。在

第二，在两个地方调用chkifexists，分别来自removedupes和{}。因此removedupes将为每个现有文件运行，而不是您想要的！事实上，由于CleanAndPrettify刚刚验证了该文件是否存在，removedupes应该随手而上。在

至少有三种方法可以处理找不到文件的情况：让chkifexists引发异常；在CleanAndPrettify中设置一个标记来跟踪是否找到了文件；或者将chkifexists的结果转换为list，然后可以检查其是否为空。在

在修改后的代码中，我将文件移到一个字典中，名称作为键，值为hashcolumn和filepathNum的元组。chkifexists现在接受作为字典查找的文件名，yields找到文件时的值；如果没有找到文件，将引发NoFilesFound异常。在

代码如下：

import os, csv

# store file attributes for easy modifications
# format is 'filename': (hashcolumn, filepathNum)
files = {
        'A.csv': (7, 5),
        'B.csv': (15, 5),
        'C.csv': (1, 0),
        }

class NoFilesFound(Exception):
    "No .csv files were found to clean up"

def chkifexists(somefiles):
    # load all three at once, but only yield them if filename
    # is found
    filesfound = False
    for fname, (hashcolumn, filepathNum) in somefiles.items():
        if os.path.isfile(fname):
            filesfound = True
            yield fname, hashcolumn, filepathNum
    if not filesfound:
        raise NoFilesFound

def removedupes(infile, outfile, hashcolumn, filepathNum):
    # this is now a single-run function
    r1 = file(infile, 'rb')
    r2 = csv.reader(r1)
    w1 = file(outfile, 'wb')
    w2 = csv.writer(w1)
    hashes = set()
    for row in r2:
        if row[hashcolumn] =="": 
            w2.writerow(row)       
            hashes.add(row[hashcolumn])  
        if row[hashcolumn] not in hashes:
            w2.writerow(row)
            hashes.add(row[hashcolumn])
    w1.close()
    r1.close()


def bakcount(origfile1, origfile2):
    '''This function creates a .bak file of the original and does a row count
    to determine the number of rows removed'''
    os.rename(origfile1, origfile1+".bak")
    count1 = len(open(origfile1+".bak").readlines())
    #print count1

    os.rename(origfile2, origfile1)
    count2 = len(open(origfile1).readlines())
    #print count2

    print str(count1 - count2) + " duplicate rows removed from " \
        + str(origfile1) +"!"


def CleanAndPrettify():
    print "Removing duplicate rows from input files..."
    try:
        for fname, hashcolumn, filepathNum in chkifexists(files):
            removedupes(
                   fname,
                   os.path.splitext(fname)[0] + "2.csv",
                   hashcolumn,
                   filepathNum,
                   )
            bakcount (fname, os.path.splitext(fname)[0] + "2.csv")
    except NoFilesFound:
        print "no files to clean up"

CleanAndPrettify()

无法测试，因为我没有A、B和{}.csv文件，但希望这能让你找到正确的方向。如您所见，raise NoFilesFound选项使用flag方法跟踪未找到的文件；下面是list方法：

^{pr2}$

网友

2楼 · 编辑于 2024-10-01 07:41:38

当然，它在第一个匹配之后停止，因为您正在从一个函数执行return。相反，您应该在循环中填充某个数组并在最后return，或者在每次迭代中使用yield和{}创建一个生成器，以防找不到任何内容。第一种方法更简单，更接近您的解决方案，这里是：

import os, csv

def chkifexists():
    files = ['A.csv', 'B.csv', 'C.csv']
    found = []
    for fname in files:
        if os.path.isfile(fname):
            if fname == "A.csv":
                hashcolumn = 7
                filepathNum = 5
            elif fname == "B.csv":
                hashcolumn = 15
                filepathNum = 5
            elif fname == "C.csv":
                hashcolumn = 1
                filepathNum = 0
            found.append({'fname': fname,
                          'hashcolumn': hashcolumn,
                          'filepathNum': filepathNum})
    return found

found = chkifexists()
if not found:
    print 'No files to scan'
else
    for f in found:
        print f['fname'], f['hashcolumn'], f['filepathNum']

网友

3楼 · 编辑于 2024-10-01 07:41:38

您使用的检查条件不是在python中比较两个字符串的建议方法。除非显式地interning字符串，否则不应使用is进行比较，因为无法保证它会返回True 请改用==。在

或者，可以执行以下操作：

files=['A.csv', 'B.csv', 'C.csv']
filedict['A.csv']=(7,5)
filedict['B.csv']=(15,5)
filedict['C.csv']=(1,0)
print [(fname,filedict[fname]) for fname in files if filedict.has_key(fname) and os.path.isfile(fname)]

相关问题更多 >

编程相关推荐

热门问题

热门文章