擅长:python、mysql、java
<p>你可以试试这个:</p>
<pre><code>import csv, requests, re
from urllib.parse import urlparse
from bs4 import BeautifulSoup as soup
d = soup(requests.get('https://www.pistonheads.com/classifieds?Category=used-cars&ResultsPerPage=100').text, 'html.parser')
def extract_details(_s:soup) -> list:
_link = _s.find('a', {'href':re.compile('/classifieds/used\-cars/')})['href']
_, _, make, model, *_ = _link[1:].split('/')
price, img = _s.find('div', {'class':'price'}).text, [i['src'] for i in _s.find_all('img')]
return [_link, make, model, price, 'N/A' if not img else img[0]]
with open('filename.csv', 'w') as f:
_listings = [extract_details(i) for i in d.find_all('div', {'class':'ad-listing'}) if i.find('div', {'class':'price'})]
write = csv.writer(f)
write.writerows([['make', 'model', 'price', 'img'], *_listings])
</code></pre>