<p>本例将定位<code><script></code>标记并从中解析一些数据:</p>
<pre><code>import re
import json
import requests
from bs4 import BeautifulSoup
url = 'https://www.zoopla.co.uk/for-sale/details/53818653?search_identifier=7e57533214fc2402ba53dd6c14b624f8'
# locate the tag
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
script = soup.select_one('script:contains("ZPG.trackData.taxonomy")')
# parse some data from script
data1 = re.findall(r'ZPG\.trackData\.ecommerce = ({.*?});', script.text, flags=re.S)[0]
data1 = json.loads( re.sub(r'([^"\s]+):\s', r'"\1": ', data1) )
data2 = re.findall(r'ZPG\.trackData\.taxonomy = ({.*?});', script.text, flags=re.S)[0]
data2 = json.loads( re.sub(r'([^"\s]+):\s', r'"\1": ', data2) )
# print the data
print(json.dumps(data1, indent=4))
print(json.dumps(data2, indent=4))
</code></pre>
<p>印刷品:</p>
<pre><code>{
"detail": {
"products": [
{
"brand": "Walton and Allen Estate Agents Ltd",
"category": "for-sale/resi/agent/pre-owned/gb",
"id": 53818653,
"name": "FS_Contact",
"price": 1,
"quantity": 1,
"variant": "standard"
}
]
}
}
{
"signed_in_status": "signed out",
"acorn": 44,
"acorn_type": 44,
"area_name": "Aspley, Nottingham",
"beds_max": 3,
"beds_min": 3,
"branch_id": "43168",
"branch_logo_url": "https://st.zoocdn.com/zoopla_static_agent_logo_(586192).png",
"branch_name": "Walton & Allen Estate Agents",
"brand_name": "Walton and Allen Estate Agents Ltd",
"chain_free": false,
"company_id": "21619",
"country_code": "gb",
"county_area_name": "Nottingham",
"currency_code": "GBP",
"display_address": "Melbourne Road, Aspley, Nottingham NG8",
"furnished_state": "",
"group_id": "",
"has_epc": false,
"has_floorplan": true,
"incode": "5HN",
"is_retirement_home": false,
"is_shared_ownership": false,
"listing_condition": "pre-owned",
"listing_id": 53818653,
"listing_status": "for_sale",
"listings_category": "residential",
"location": "Aspley",
"member_type": "agent",
"num_baths": 1,
"num_beds": 3,
"num_images": 15,
"num_recepts": 1,
"outcode": "NG8",
"post_town_name": "Nottingham",
"postal_area": "NG",
"price": 150000,
"price_actual": 150000,
"price_max": 150000,
"price_min": 150000,
"price_qualifier": "guide_price",
"property_highlight": "",
"property_type": "semi_detached",
"region_name": "East Midlands",
"section": "for-sale",
"size_sq_feet": "",
"tenure": "",
"zindex": "129806"
}
</code></pre>