擅长:python、mysql、java
<p>嘿,伙计们,我想我明白了,但如果有人有更好的方法,请告诉我这是我想到的:</p>
<pre><code>import urllib2
import urllib
import re
def open(url):
req = urllib2.Request(url)
req.add_header('User-agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0')
response = urllib2.urlopen(req)
link = response.read()
response.close()
return link#.replace('\n', '').replace('\t','').replace('\r','')
url = 'http://goldfilmesonline.com/category/lancamentos/'
content = open(url)
match = re.compile('<a href="(.+?)" class="tip_trigger" style="float: left;">').findall(content)
match2 = re.compile('<span>(.+?) </span>').findall(content)
match3 = re.compile('<img width=".+?" height=".+?" src="(.+?)" class="attachment-inicio-thumbnails size-inicio-thumbnails wp-post-image"').findall(content)
for a in match:
url = a
for b in match2:
name = b
for c in match3:
image = c
print (url, name, image)
</code></pre>