擅长:python、mysql、java
<p>这应该行得通。在</p>
<pre><code>import re
from BeautifulSoup import BeautifulSoup
html_doc = '<h3>HeaderName1</h3><ul class="prodoplist"><li>Parent</li><li class="lev1">Child1</li><li class="lev1">Child2</li><li class="lev1">Child3</li></ul> <h3>HeaderName2</h3><ul class="prodoplist"><li>Parent2</li><li class="lev1">Child4</li><li class="lev1">Child5</li><li class="lev1">Child6</li></ul>'
m = re.search(r'<h3>.*?<h3>', html_doc, re.DOTALL)
s = m.start()
e = m.end() - len('<h3>')
target_html = html_doc[s:e]
new_bs = BeautifulSoup(target_html)
ul_eles = new_bs.findAll('ul', attrs={'class' : 'prodoplist'})
for ul_ele in ul_eles:
li_eles = new_bs.findAll('li', attrs={'class' : 'lev1'})
for li_ele in li_eles:
print li_ele.text
</code></pre>