from bs4 import BeautifulSoup
import requests, re
url = "https://www.sec.gov/Archives/edgar/data/20/000095012310024631/c97665e10vk.htm"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
for s in soup.find_all("td", attrs={"align":"center"}):
zipcode = re.findall("(\d{5}-\d{4})",str(s)) # you can also use your regex if you want
if zipcode != []:
print (zipcode)
[感谢您的帮助,我必须从文件夹中的文件数中提取zip和城市信息,我的代码如下,但会根据您的正则表达式进行更改。下一步是提取城市信息并将其保存到csv文件1
试试这个。首先,使用
BeautifulSoup
获取html。在html中查找所有td
标记。然后,使用regex
提取zipcode。你知道吗输出:
相关问题 更多 >
编程相关推荐