我真的不明白发生了什么事。我刚才正在执行这段代码,它工作了,然后返回了一个错误
已编辑
代码从euronext.index()中获取1700个(或多或少)索引的列表。我认为问题在列表的长度上:对于较小的数字(小于60),它工作得很好。当我使用整个列表时,它会输出该错误。(我从Windows运行它)。
TrendReq是一个模块python -m pip install pytrends
,用于下载google趋势数据
import pandas as pd
import numpy as np
import multiprocessing as mp
from multiprocessing.pool import ThreadPool
from pytrends.request import TrendReq
import requests
from bs4 import BeautifulSoup
def index():
url = 'https://live.euronext.com/en/pd/data/stocks'
params = {
"mics": "ALXB,ALXL,ALXP,XPAR,XAMS,XBRU,XLIS,XMLI,MLXB,ENXB,ENXL,TNLA,TNLB,XLDN,XESM,XMSM,XATL,VPXB,XOSL,XOAS,MERK",
"display_datapoints": "dp_stocks",
"display_filters": "df_stocks"
}
data = {
"draw": "1",
"columns[0][data]": "0",
"columns[0][name]": "",
"columns[0][searchable]": "true",
"columns[0][orderable]": "true",
"columns[0][search][value]": "",
"columns[0][search][regex]": "false",
"columns[1][data]": "1",
"columns[1][name]": "",
"columns[1][searchable]": "true",
"columns[1][orderable]": "false",
"columns[1][search][value]": "",
"columns[1][search][regex]": "false",
"columns[2][data]": "2",
"columns[2][name]": "",
"columns[2][searchable]": "true",
"columns[2][orderable]": "false",
"columns[2][search][value]": "",
"columns[2][search][regex]": "false",
"columns[3][data]": "3",
"columns[3][name]": "",
"columns[3][searchable]": "true",
"columns[3][orderable]": "false",
"columns[3][search][value]": "",
"columns[3][search][regex]": "false",
"columns[4][data]": "4",
"columns[4][name]": "",
"columns[4][searchable]": "true",
"columns[4][orderable]": "false",
"columns[4][search][value]": "",
"columns[4][search][regex]": "false",
"columns[5][data]": "5",
"columns[5][name]": "",
"columns[5][searchable]": "true",
"columns[5][orderable]": "false",
"columns[5][search][value]": "",
"columns[5][search][regex]": "false",
"columns[6][data]": "6",
"columns[6][name]": "",
"columns[6][searchable]": "true",
"columns[6][orderable]": "false",
"columns[6][search][value]": "",
"columns[6][search][regex]": "false",
"order[0][column]": "0",
"order[0][dir]": "asc",
"start": "0",
"length": "100",
"search[value]": "",
"search[regex]": "false",
"iDisplayLength": "2000",
"iDisplayStart": "0",
"sSortDir_0": "asc"
}
r = requests.post(url, params=params, data=data).json()
allin = {'Names': []}
for x in r['aaData']:
soup = BeautifulSoup(x[0], 'lxml')
allin['Names'].append(soup.a.text)
df = pd.DataFrame(allin)
index_list = df["Names"].tolist()
return index_list
def collecting_data(index):
pytrend = TrendReq()
pytrend.build_payload(kw_list=[index])
interest_over_time_df = pytrend.interest_over_time()
try:
interest_over_time_df.columns = ["Score", "isPartial"]
returned_variable = interest_over_time_df.mean(axis = 0)[0]
except:
returned_variable = np.nan
print("[ ]", index, " - processed")
return (index, ("Score", returned_variable))
if __name__ == "__main__":
index_list = index()
col = ["Score"]
df = pd.DataFrame(index=index_list, columns=col)
pool_size = min(mp.cpu_count(), len(index_list))
pool = mp.Pool(pool_size)
for result in pool.imap_unordered(collecting_data, index_list):
index, values = result
col, value = values
df.at[index, col] = value
print(df)
pool.close()
它返回:
Exception in thread Thread-3:
Traceback (most recent call last):
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\pool.py", line 576, in _handle_results
task = get()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() missing 1 required positional argument: 'response'
代码没有结束。它就在那里
我切换到使用线程池进行诊断,并注意到我将看到:
我认为,这意味着你发出了太多的请求。对于每单位时间内可以发出多少请求,必须有一些限制。因此,我恢复到像以前一样使用处理池,但修改代码如下,以捕获429异常。正如您所看到的,我现在只收到429个异常,因为我可能在测试中发出了太多的请求。但您需要研究对发出请求的限制(并可能放弃多处理)
但几乎每个请求都以谷歌429错误结束:
相关问题 更多 >
编程相关推荐