Python中的多线程文件传输？

import queue, threading, os, time import shutil bfr_drive = '/home/test_folder' # cache ext = ".dat" # data file extension array = 0 # simluated array as t0-t6 fileList = [] # list of files to be moved from cache to storage destPath = '/home/test_folder/t' fileQueue = queue.Queue() class ThreadedCopy: totalFiles = 0 copyCount = 0 array = 0 lock = threading.Lock() def __init__(self): for file_name in os.listdir(bfr_drive): if file_name.endswith(ext): fileList.append(os.path.join(bfr_drive, file_name)) fileList.sort() self.totalFiles = len(fileList) print (str(self.totalFiles) + " files to copy.") self.threadWorkerCopy(fileList) def CopyWorker(self): global array while True: fileName = fileQueue.get() shutil.copy(fileName, destPath+str(array)) array += 1 if array > 6: array = 0 fileQueue.task_done() with self.lock: self.copyCount += 1 percent = (self.copyCount * 100) / self.totalFiles print (str(percent) + " percent copied.") def threadWorkerCopy(self, fileNameList): # global array for i in range(4): t = threading.Thread(target=self.CopyWorker) t.daemon = True t.start() # array += 1 # if array > 6: # array = 0 print ("current array is:" + str(array)) # output prints array0 for 4 times, did not iterate for fileName in fileNameList: fileQueue.put(fileName) fileQueue.join() ThreadedCopy()

1条回答

网友

1楼 · 发布于 2024-06-25 05:51:25

问题是您不会在工作线程中生成array的新值，而只是在threadWorkerCopy中创建线程时才生成。
结果将取决于系统上的实际计时。每个工作线程在读取值时都将使用array的值。这可能与threadWorkerCopy增加值或之后同时进行，因此您可能会得到不同目录中的文件或所有文件都在同一目录中

要为每个复制进程获取一个新的编号，必须在工作线程中增加array中的编号。在这种情况下，必须防止两个或多个线程同时访问array。您可以使用另一个锁来实现这一点

为了进行测试，我将目录列表替换为示例文件名的硬编码列表，并将复制替换为打印值

import queue, threading, os, time
import shutil

bfr_drive = '/home/test_folder' # cache
ext = ".dat" # data file extension
array = 0 # simluated array as t0-t6
fileList = [] # list of files to be moved from cache to storage
destPath = '/home/test_folder/t'
fileQueue = queue.Queue()


class ThreadedCopy:
    totalFiles = 0
    copyCount = 0
    array = 0
    lock = threading.Lock()
    lockArray = threading.Lock()

    def __init__(self):
        # directory listing replaced with hard-coded list for testing
        for file_name in [ 'foo.dat', 'bar.dat', 'baz.dat', 'a.dat', 'b.dat', 'c.dat', 'd.dat', 'e.dat', 'f.dat', 'g.dat' ] :
        #for file_name in os.listdir(bfr_drive):
            if file_name.endswith(ext):
                fileList.append(os.path.join(bfr_drive, file_name))
                fileList.sort()

        self.totalFiles = len(fileList)

        print (str(self.totalFiles) + " files to copy.")
        self.threadWorkerCopy(fileList)


    def CopyWorker(self):
        global array
        while True:
            fileName = fileQueue.get()

            with self.lockArray:
                myArray = array
                array += 1
                if array > 6:
                    array = 0

            # actual copying replaced with output for testing
            print('copying', fileName, destPath+str(myArray))
            #shutil.copy(fileName, destPath+str(myArray))

            with self.lock:
                self.copyCount += 1

                percent = (self.copyCount * 100) / self.totalFiles

                print (str(percent) + " percent copied.")

            # moved to end because otherwise main thread may terminate before the workers
            fileQueue.task_done()

    def threadWorkerCopy(self, fileNameList):
        for i in range(4):
            t = threading.Thread(target=self.CopyWorker)
            t.daemon = True
            t.start()

        for fileName in fileNameList:
            fileQueue.put(fileName)
        fileQueue.join()

ThreadedCopy()

这会打印如下内容（在不同运行之间可能会发生变化）：

10 files to copy.
copying /home/test_folder\a.dat /home/test_folder/t0
10.0 percent copied.
copying /home/test_folder\baz.dat /home/test_folder/t3
20.0 percent copied.
copying /home/test_folder\b.dat /home/test_folder/t1
copying /home/test_folder\c.dat /home/test_folder/t4
copying /home/test_folder\bar.dat /home/test_folder/t2
copying /home/test_folder\d.dat /home/test_folder/t5
30.0 percent copied.
copying /home/test_folder\e.dat /home/test_folder/t6
40.0 percent copied.
copying /home/test_folder\f.dat /home/test_folder/t0
50.0 percent copied.
copying /home/test_folder\foo.dat /home/test_folder/t1
60.0 percent copied.
copying /home/test_folder\g.dat /home/test_folder/t2
70.0 percent copied.
80.0 percent copied.
90.0 percent copied.
100.0 percent copied.

注意事项：

我将行fileQueue.task_done()移到了CopyWorker的末尾。否则，我不会得到所有百分比输出行，有时还会收到错误消息

Fatal Python error: could not acquire lock for <_io.BufferedWriter name='<stdout>'> at interpreter shutdown, possibly due to daemon threads

也许您应该在主线程结束之前等待所有工作线程的结束

我没有检查代码中是否还有其他错误

更改问题中的代码后编辑：

修改后的代码仍然存在一个问题，即工作线程仍将在fileQueue.task_done()之后执行一些输出，以便主线程可能在工作线程之前结束

修改后的代码包含工作线程访问array时的竞争条件，因此该行为可能是意外的

相关问题更多 >

编程相关推荐

热门问题

热门文章