我有以下代码:
num_of_pages = 20
num_of_workers = num_of_pages
pages = [i for i in range(0, num_of_pages)]
datarALL = []
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
futh = [(executor.submit(self.getdata3, page, hed, data, apifolder,additional)) for page in pages]
for data in as_completed(futh):
datarALL.extend(data.result())
print ("Finished generateing data.")
return datarALL
线程函数是:
def getdata3(self, page, hed, data, apifolder, additional):
tries = 10
for n in range(tries):
try:
value_limit = self.config.page_limit # limit of records allowed per page
value_offset = page * value_limit
datarALL = []
url = 'http://www.url.com/{2}?WithTotal=true&cultureid=2&offset={0}&limit={1}{3}'.format(
value_offset, value_limit, apifolder, additional)
print ("{3} Generate page: #{0} run #{1} with URL: {2}".format(page, n, url,str(datetime.now())))
responsedata = requests.get(url, data=data, headers=hed, verify=False)
if responsedata.status_code == 200: # 200 for successful call
responsedata = responsedata.text
jsondata = json.loads(responsedata)
if "results" in jsondata:
if jsondata["results"]:
datarALL.extend(jsondata["results"])
break
else:
print ("page {0} run #{1} faild. Returned status code {2}. Message is: {3}. Retry".format(page, n,responsedata.status_code, responsedata.text))
continue
except ChunkedEncodingError as e:
print ("page #{0} run #{1} failed. Retry.".format(page, n))
if n == tries - 1:
print ("page {0} could not be imported. Max retried reached.".format(page))
print("Unexpected error:", sys.exc_info()[0])
raise e
print ("{2} page {0} finished. Length is {1}".format(page,len(datarALL),str(datetime.now())))
return datarALL
这将创建线程,并在所有线程完成后返回名为datarALL的列表
这意味着最后一次打印已完成,函数返回。
然而,在htop上,我看到:
我不应该在Finished generating data.
的打印之后看到这个,我应该只看到主进程。由于某些原因,并非所有线程都在关闭。我不明白为什么。
我应该注意到,这种情况并不总是发生。。有时候还可以
我找不出是什么问题
但是,即使在Ubuntu杀死进程I之后,htop上的线程也会如下所示(只删除了主进程):
只跑
ps -ef | grep name | grep -v grep | awk '{print $2}' | xargs kill -9
释放坏记录
这是怎么回事
目前没有回答
相关问题 更多 >
编程相关推荐