下面是一个chainmap/beautifulsoup scraper,它从这个website中获取医生的个人资料信息
from bs4 import BeautifulSoup
import requests
import csv
from collections import ChainMap
def get_data(soup):
default_data = {'name': 'n/a', 'clinic': 'n/a', 'profession': 'n/a', 'region': 'n/a', 'city': 'n/a'}
for doctor in soup.select('.view-practitioners .practitioner'):
doctor_data = {}
if doctor.select_one('.practitioner__name').text.strip():
doctor_data['name'] = doctor.select_one('.practitioner__name').text
if doctor.select_one('.practitioner__clinic').text.strip():
doctor_data['clinic'] = doctor.select_one('.practitioner__clinic').text
if doctor.select_one('.practitioner__profession').text.strip():
doctor_data['profession'] = doctor.select_one('.practitioner__profession').text
if doctor.select_one('.practitioner__region').text.strip():
doctor_data['region'] = doctor.select_one('.practitioner__region').text
if doctor.select_one('.practitioner__city').text.strip():
doctor_data['city'] = doctor.select_one('.practitioner__city').text
yield ChainMap(doctor_data, default_data)
url = 'https://sportmedbc.com/practitioners?field_profile_first_name_value=&field_profile_last_name_value=&field_pract_profession_tid=All&city=&taxonomy_vocabulary_5_tid=All&page=%s'
for i in range(5):
page=requests.get(url % i)
soup = BeautifulSoup(page.text, 'lxml')
def print_data(header_text, data, key):
print(header_text)
for d in data:
print(d[key])
print()
data = list(get_data(soup))
print_data('[Names]', data, 'name')
print_data('[Clinic]', data, 'clinic')
print_data('[Profession]', data, 'profession')
print_data('[Taxonomy]', data, 'region')
print_data('[City]', data, 'city')
f=csv.writer('Sports_Medicine_List','w')
f.writerow(['Names','Clinic', 'Profession','Taxonomy','City'])
for i in range(len('Names')):
f.writerow(['Names'[i],'Clinic'[i], 'Profession'[i],'Taxonomy'[i],'City'[i]])
代码运行时没有错误,但是csv输出没有显示在我的IDE中。我认为这是因为我没有正确地解释chainmap变量,但我不能完全确定。有人知道这是为什么吗?提前谢谢
这是您可以考虑尝试的另一种方式:
要编写csv字典,可以使用
csv.DictWriter
(docs here,ChainMap
只是字典的一个版本):()
这会将所有数据输出到
data.csv
文件。我的图书馆截图:相关问题 更多 >
编程相关推荐