擅长:python、mysql、java
<p>使用BeautifulSoup查找匹配的内容属性,然后将其替换为:</p>
<pre><code>from bs4 import BeautifulSoup
import re
html = """
<meta content="http://www.telegraaf.nl/telesport/voetbal/buitenlands/article22178882.ece" />
<meta content="http://www.telegraaf.nl/telesport/voetbal/buitenlands/22178882/__Wenger_vreest_het_ergste__.html" />
"""
soup = BeautifulSoup(html)
# reference table of url prefixes to full html link
html_links = {
el['content'].rpartition('/')[0]: el['content']
for el in soup.find_all('meta', content=re.compile('.html$'))
}
# find all ece links, strip the end of to match links, then adjust
# meta content with looked up element
for el in soup.find_all('meta', content=re.compile('.ece$')):
url = re.sub('(?:article(\d+).ece$)', r'\1', el['content'])
el['content'] = html_links[url]
print soup
# <meta content="http://www.telegraaf.nl/telesport/voetbal/buitenlands/22178882/__Wenger_vreest_het_ergste__.html"/>
</code></pre>