如何使用列表目录和路径搜索优化搜索？

count = 0 def SearchFiles1(path): global count pathList = os.listdir(path) for i in pathList: subPath = path+os.path.sep+i if os.path.isfile(subPath) == True : fileName = os.path.basename(subPath) extension = fileName[fileName.rfind("."):] if ".ext1" in extension or ".ext2" in extension or ".ext3" in extension: count += 1 #do stuff . . . else : if os.path.isdir(subPath) == True: if not "UselessFolder1" in subPath and not "UselessFolder1" in subPath: SearchFiles1(subPath)

2条回答

网友

1楼 · 编辑于 2024-06-02 18:02:24

因此，经过测试和与tdelaney的讨论，我优化了两种解决方案，如下所示：

import os

count = 0
target_files = set((".ext1", ".ext2", ".ext3")) # etc
useless_dirs = set(("UselessFolder2", "UselessFolder2")) # etc
# it could be target_dirs, just change `in` with `not in` when compared.

def SearchFiles1(path):
    global count
    pathList = os.listdir(path)
    for content in pathList:
        fullPath = os.path.join(path,content)
        if os.path.isfile(fullPath):
            if os.path.splitext(fullPath)[1] in target_files:
                count += 1
                #do stuff with 'fullPath' . . .
        else :
            if os.path.isdir(fullPath):
                if fullPath not in useless_dirs:
                    SearchFiles1(fullPath)

def SearchFiles2(path):
    count = 0
    for dirpath, subdirs, files in os.walk(path):
        for name in set(subdirs) & useless_dirs:
            subdirs.remove(name)
        for filename in [name for name in files if os.path.splitext(name)[1] in target_files]:
            count += 1
            fullPath = os.path.join(dirpath, filename)
            #do stuff with 'fullPath' . . .
    return count

它在Mac/pcv2.7.5上运行良好

关于速度这是完全平衡的。在

网友

2楼 · 编辑于 2024-06-02 18:02:24

您的第一个解决方案是合理的，除了可以使用os.path.splitext。在第二个解决方案中，它是不正确的，因为您重新访问每个子目录的文件列表，而不是只处理一次。使用os.path.walk的诀窍是从subdirs中删除的目录不是下一轮枚举的一部分。在

def SearchFiles2(path):
    useless_dirs = set(("UselessFolder1", "UselessFolder2"))
    useless_files = set((".ext1", ".ext2"))
    count = 0
    for dirpath, subdirs, files in os.walk(path):
        # remove unwanted subdirs from future enumeration
        for name in set(subdirs) & useless_dir:
            subdirs.remove(name)
        # list of interesting files
        myfiles = [os.path.join(dirpath, name) for name in files
            if os.path.splitext(name)[1] not in useless_files]
        count += len(myfiles)
        for filepath in myfiles:
            # example shows file stats
            print(filepath, os.stat(filepath)
    return count

单个文件存储单元的枚举速度只能这么快。加快速度的最佳方法是在不同的线程中运行不同存储单元的枚举。在

相关问题更多 >

编程相关推荐

热门问题

热门文章