我特别需要帮助理解为什么这段代码一分钟有效,下一分钟无效那么,有人能帮我解释一下为什么会发生这种情况吗我运行下面的代码只是为了从Yahoo Finance中提取一些数据。我不时得到以下set_indexKeyError,其中引用了3条独立但相似的行(确切地说是55、93和131,我在其旁边添加了注释):
File "/Users/daniel/Desktop/CG.py", line 55, in <module>
df_balancesheet = df_balancesheet.set_index('0', drop=True, append=False,inplace=False, verify_integrity=False)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/frame.py", line 4303, in set_index
raise KeyError(f"None of {missing} are in the columns")
KeyError: "None of ['0'] are in the columns"
这个错误可能会连续发生15次,然后突然代码正常工作并运行……直到它不正常为止。下面是我正在运行的大部分代码:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
from urllib.request import urlopen as ur
from bs4 import BeautifulSoup as soup
print('Running Screen...')
collection = ['ar=180','r=21&ar=180','r=41&ar=180','r=61&ar=180','r=81&ar=180','r=101&ar=180','r=121&ar=180','r=141&ar=180','r=181&ar=180','r=201&ar=180','r=221&ar=180','r=241&ar=180']
for url in collection: #scrape multiple pages
my_url = 'https://finviz.com/screener.ashx?v=141&f=cap_smallover,fa_eps5years_pos,fa_grossmargin_o10,fa_netmargin_pos,fa_opermargin_pos,fa_sales5years_o5,geo_usa,sh_avgvol_o1000,ta_beta_o0.5&ft=2&' + str(url)
uClient = ur(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser") #find ticker
stock_tickers = page_soup.findAll("a", {"class":"screener-link-primary"})
for tickers in stock_tickers: #find all of the tickers
ticker = tickers.text
collection = [ticker]
for url in collection: #scrape multiple pages
#balance sheet data
my_url1 = 'https://finance.yahoo.com/quote/' + str(url) + '/balance-sheet?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url1, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = df_balancesheet.set_index('0', drop=True, append=False,inplace=False, verify_integrity=False) ***#ERROR LINE 1***
df_balancesheet = df_balancesheet.transpose()
cols = list(df_balancesheet.columns)
cols[0] = 'Date'
df_balancesheet = df_balancesheet.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_balancesheet.columns)[1::]
#income statement data
my_url2 = 'https://finance.yahoo.com/quote/' + str(url) + '/financials?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url2, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = df_incomestatement.set_index('0', drop=True, append=False,inplace=False, verify_integrity=False) ***#ERROR LINE 2***
df_incomestatement = df_incomestatement.transpose()
cols = list(df_incomestatement.columns)
cols[0] = 'Date'
df_incomestatement = df_incomestatement.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_incomestatement.columns)[1::]
# cash flow data
my_url3 = 'https://finance.yahoo.com/quote/' + str(url) + '/cash-flow?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url3, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = df_cashflow.set_index('0', drop=True, append=False,inplace=False, verify_integrity=False) ***#ERROR LINE 3***
df_cashflow = df_cashflow.transpose()
cols = list(df_cashflow.columns)
cols[0] = 'Date'
df_cashflow = df_cashflow.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_cashflow.columns)[1::]
writer = pd.ExcelWriter(ticker + '.xlsx')
df_incomestatement.to_excel(writer,'Income Statement')
df_balancesheet.to_excel(writer,'Balance Sheet')
df_cashflow.to_excel(writer,'Statement of Cash Flows')
writer.save()
print('Collecting data for ' + ticker + '...')
如果您能对此有所了解,请提前向您表示感谢
目前没有回答
相关问题 更多 >
编程相关推荐