擅长:python、mysql、java
<p>有一个更简单的方法来获得冠军。每个页面都有一个<code><title></code>元素,其中正好包含您需要的信息:</p>
<pre class="lang-py prettyprint-override"><code>import requests # Simpler HTTP requests
from bs4 import BeautifulSoup # Python package for pulling data out of HTML and XML files
#import pandas as pd # Python package for data manipulation and analysis
import re # regular expressions
import json # Python package used to work with JSON data
#from tqdm import tqdm # python for displaying progressbar
from datetime import datetime
url = 'https://www.imdb.com/chart/top'
url_text = requests.get(url).text
url_soup = BeautifulSoup(url_text, 'html.parser')
template = 'https://www.imdb.com%s'
title_links = [template % a.attrs.get('href') for a in url_soup.select( 'td.titleColumn a' )]
movie_names = []
for title_link in title_links:
page_soup = BeautifulSoup(requests.get(title_link).text, 'html.parser')
movie_names.append(page_soup.title.get_text(strip=True).split(' (')[0])
print(movie_names)
</code></pre>