<p>代码基本正常,将带有<code>get_soup()</code>的循环放在第一个循环中:</p>
<pre><code>from bs4 import BeautifulSoup
import requests
from collections import ChainMap
def get_data(soup):
default_data = {'name': 'n/a', 'clinic': 'n/a', 'profession': 'n/a', 'region': 'n/a', 'city': 'n/a'}
for doctor in soup.select('.view-practitioners .practitioner'):
doctor_data = {}
if doctor.select_one('.practitioner__name').text.strip():
doctor_data['name'] = doctor.select_one('.practitioner__name').text
if doctor.select_one('.practitioner__clinic').text.strip():
doctor_data['clinic'] = doctor.select_one('.practitioner__clinic').text
if doctor.select_one('.practitioner__profession').text.strip():
doctor_data['profession'] = doctor.select_one('.practitioner__profession').text
if doctor.select_one('.practitioner__region').text.strip():
doctor_data['region'] = doctor.select_one('.practitioner__region').text
if doctor.select_one('.practitioner__city').text.strip():
doctor_data['city'] = doctor.select_one('.practitioner__city').text
yield ChainMap(doctor_data, default_data)
url = 'https://sportmedbc.com/practitioners?field_profile_first_name_value=&field_profile_last_name_value=&field_pract_profession_tid=All&city=&taxonomy_vocabulary_5_tid=All&page=%s'
for i in range(5):
page=requests.get(url % i)
soup = BeautifulSoup(page.text, 'lxml')
print('Page {}'.format(i + 1))
print('#' * 80)
for doctor in get_data(soup):
print('name:\t\t', doctor['name'])
print('clinic:\t\t',doctor['clinic'])
print('profession:\t',doctor['profession'])
print('city:\t\t',doctor['city'])
print('region:\t\t',doctor['region'])
print('-' * 80)
</code></pre>
<p>印刷品:</p>
<pre><code>Page 1
################################################################################
name: Jaimie Ackerman
clinic: n/a
profession: n/a
city: n/a
region: n/a
name: Marilyn Adams
clinic: Fortius Sport & Health
profession: Physiotherapist
city: n/a
region: Fraser River Delta
name: Mahsa Ahmadi
clinic: Wellpoint Acupuncture (Sports Medicine)
profession: Acupuncturist
city: Vancouver
region: Vancouver & Sea to Sky
name: Tracie Albisser
clinic: Pacific Sport Northern BC, Tracie Albisser
profession: Strength and Conditioning Specialist, Exercise Physiologist
city: n/a
region: Cariboo - North East
name: Christine Alder
clinic: n/a
profession: n/a
city: Vancouver
region: Vancouver & Sea to Sky
name: Steacy Alexander
clinic: Go! Physiotherapy Sports and Wellness Centre
profession: Physiotherapist
city: Vancouver
region: Vancouver & Sea to Sky
name: Page Allison
clinic: AET Clinic, .
profession: Athletic Therapist
city: Victoria
region: Vancouver Island - Central Coast
name: Dana Alumbaugh
clinic: n/a
profession: Podiatrist
city: Squamish
region: Vancouver & Sea to Sky
name: Manouch Amel
clinic: Mountainview Kinesiology Ltd.
profession: Strength and Conditioning Specialist
city: Anmore
region: Vancouver & Sea to Sky
name: Janet Ames
clinic: Dr. Janet Ames
profession: Physician
city: Prince George
region: Cariboo - North East
name: Greg Anderson
clinic: University of the Fraser Valley
profession: Exercise Physiologist
city: Mission
region: Fraser Valley
name: Sandi Anderson
clinic: n/a
profession: n/a
city: Coquitlam
region: Fraser Valley
Page 2
################################################################################
... and so on.
</code></pre>