我有许多文件(超过4000个)要同时加载到PostgreSQL中。我已经将它们分成4个不同的文件列表,我希望有一个线程遍历加载数据的每个列表。在
我的问题是我使用操作系统调用加载程序,但这会阻止其他线程同时运行。如果我使用子流程.Popen然后它们同时运行,但是线程认为它们已经完成了执行,所以请转到我脚本的下一部分。在
我这样做对吗?或者是否有更好的方法从线程中调用子进程。在
def thread1Load(self, thread1fileList):
connectionstring = settings.connectionstring
postgreshost = settings.postgreshost
postgresdatabase = settings.postgresdatabase
postgresport = settings.postgresport
postgresusername = settings.postgresusername
postgrespassword = settings.postgrespassword
tablename = None
encoding = None
connection = psycopg2.connect(connectionstring)
for filename in thread1fileList:
load_cmd = #load command
run = subprocess.Popen(load_cmd, shell=True)
print "finished loading thread 1"
def thread2Load(self, thread2fileList):
connectionstring = settings.connectionstring
postgreshost = settings.postgreshost
postgresdatabase = settings.postgresdatabase
postgresport = settings.postgresport
postgresusername = settings.postgresusername
postgrespassword = settings.postgrespassword
tablename = None
connection = psycopg2.connect(connectionstring)
for filename in thread2fileList:
load_cmd = #load command
run = subprocess.Popen(load_cmd, shell=True)
print "finished loading thread 2"
def thread3Load(self, thread3fileList):
connectionstring = settings.connectionstring
postgreshost = settings.postgreshost
postgresdatabase = settings.postgresdatabase
postgresport = settings.postgresport
postgresusername = settings.postgresusername
postgrespassword = settings.postgrespassword
tablename = None
connection = psycopg2.connect(connectionstring)
for shapefilename in thread3fileList:
load_cmd = #load command
run = subprocess.Popen(load_cmd, shell=True)
print "finished loading thread 3"
def thread4Load(self, thread4fileList):
connectionstring = settings.connectionstring
postgreshost = settings.postgreshost
postgresdatabase = settings.postgresdatabase
postgresport = settings.postgresport
postgresusername = settings.postgresusername
postgrespassword = settings.postgrespassword
tablename = None
connection = psycopg2.connect(connectionstring)
for filename in thread4fileList:
load_cmd = #load command
run = subprocess.Popen(load_cmd, shell=True)
print "finished loading thread 4"
def finishUp(self):
print 'finishing up'
def main():
load = Loader()
thread1 = threading.Thread(target=(load.thread1Load), args=(thread1fileList, ))
thread2 = threading.Thread(target=(load.thread2Load), args=(thread2fileList, ))
thread3 = threading.Thread(target=(load.thread3Load), args=(thread3fileList, ))
thread4 = threading.Thread(target=(load.thread4Load), args=(thread4fileList, ))
threads = [thread1, thread2, thread3, thread4]
for thread in threads:
thread.start()
thread.join()
load.finishUp(connectionstring)
if __name__ == '__main__':
main()
threadLoad
方法就足够了。这样,如果需要修改方法中的某些内容,则不需要在4个不同的位置进行相同的修改。在run.communicate()
来阻止,直到子进程完成。在从一个线程开始,然后阻塞,直到该线程结束,然后 启动另一个线程等:
相反,请先启动所有线程,然后连接所有线程:
^{2美元相关问题 更多 >
编程相关推荐