擅长:python、mysql、java
<p>下面是如何在没有API的情况下实现它。一些困难源于使用权利
用户代理中的浏览器</p>
<pre><code>import re, requests
headers = { 'User-Agent': 'UCWEB/2.0 (compatible; Googlebot/2.1; +google.com/bot.html)'}
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
content = ""
for user in ['billgates']:
content += "============================\n\n"
content += user + "\n\n"
content += "============================\n\n"
url_twitter = 'https://twitter.com/%s' % user
resp = requests.get(url_twitter, headers=headers) # Send request
res = re.findall(r'<p class="TweetTextSize.*?tweet-text.*?>(.*?)</p>',resp.text)
for x in res:
x = cleanhtml(x)
x = x.replace("&#39;","'")
x = x.replace('&quot;','"')
x = x.replace("&nbsp;"," ")
content += x
content += "\n\n"
content += " -"
content += "\n\n"
</code></pre>