<p>很棒的教程:</p>
<h2><a href="https://www.youtube.com/watch?v=ind-mugxMxk" rel="nofollow noreferrer">https://www.youtube.com/watch?v=ind-mugxMxk</a></h2>
<pre><code>import re
import requests
from bs4 import BeautifulSoup
from babel.numbers import format_currency
session = requests.session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5', # these parameters can be changed as needed
'Accept-Encoding': 'gzip, deflate, br',
'content-type': 'application/json',
'skip-caching': 'true',
'DNT': '1',
'Connection': 'keep-alive',
'TE': 'Trailers'}
url = 'https://www.seloger.com/list.htm?tri=initial&idtypebien=1,2&pxMax=3000000&div=2238&idtt=2,5&naturebien=1,2,4&LISTING-LISTpg=2'
response = session.get(url, headers=headers)
page = response.text
soup = BeautifulSoup(page, "lxml")
for i, div in enumerate(soup.find_all('div', {'class': 'c-pa-price'}), 1):
price = div.text
# this regular expression substitution replaces all non alphanumeric characters but leaves in specialized language characters
price = re.sub('[^0-9A-Za-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02af\u1d00-\u1d25\u1d62-\u1d65\u1d6b-\u1d77\u1d79-\u1d9a\u1e00-\u1eff\u2090-\u2094\u2184-\u2184\u2488-\u2490\u271d-\u271d\u2c60-\u2c7c\u2c7e-\u2c7f\ua722-\ua76f\ua771-\ua787\ua78b-\ua78c\ua7fb-\ua7ff\ufb00-\ufb06]+','', price)
# remove extra word Bouquet - optional
extra_word = re.compile('Bouquet')
if extra_word.search(price):
price = price.split('Bouquet')[1]
price = format_currency(int(price), 'EUR', locale='fr_FR')
print('Inscription ' + str(i) + ':', price)
</code></pre>