擅长:python、mysql、java
<p>您可以使用while循环来获取下一页<br/>
(即有下一页,所有评论少于1000条)</p>
<pre><code>import urllib.request
from bs4 import BeautifulSoup
import sys
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
all_comments = []
max_comments = 1000
base_url = 'https://www.mygov.in/'
next_page = base_url + '/group-issue/share-your-ideas-pm-narendra-modis-mann-ki-baat-26th-march-2017/'
while next_page and len(all_comments) < max_comments :
response = response = urllib.request.urlopen(next_page)
srcode = response.read()
soup = BeautifulSoup(srcode, "html.parser")
all_comments_div=soup.find_all('div', class_="comment_body");
for div in all_comments_div:
all_comments.append(div.find('p').text.translate(non_bmp_map))
next_page = soup.find('li', class_='pager-next first last')
if next_page :
next_page = base_url + next_page.find('a').get('href')
print('comments: {}'.format(len(all_comments)))
print(all_comments)
print(len(all_comments))
</code></pre>