from bs4 import BeautifulSoup
text = '<div class="ds"><div title="Today" class="dh">...<div title="Pazartesi" class="dh">26 Agu Pzt'
soup = BeautifulSoup(text, 'html.parser')
for item in soup.select('.ds div[title]'):
print(item['title'])
# or as list comprehensions
titles = [item['title'] for item in soup.select('.ds div[title]')]
print(titles)
import lxml.html
text = '<div class="ds"><div title="Today" class="dh">...<div title="Pazartesi" class="dh">26 Agu Pzt'
soup = lxml.html.fromstring(text)
for item in soup.cssselect('.ds div[title]'):
print(item.attrib['title'])
# or as list comprehensions
titles = [item.attrib['title'] for item in soup.cssselect('.ds div[title]')]
print(titles)
import pyquery
text = '<div class="ds"><div title="Today" class="dh">...<div title="Pazartesi" class="dh">26 Agu Pzt'
soup = pyquery.PyQuery(text)
for item in soup('.ds div[title]'):
print(item.attrib['title'])
# or as list comprehensions
titles = [item.attrib['title'] for item in soup('.ds div[title]')]
print(titles)
import parsel
sel = parsel.Selector(text)
for item in sel.css('.ds div[title]'):
print(item.attrib['title'])
titles = [item.attrib['title'] for item in sel.css('.ds div[title]')]
print(titles)
使用
BeautifulSoup
、lxml
或类似模块代替regex
。你知道吗BeautifulSoup:
lxml:
PyQuery:
parsel:(由Scrapy's Selectors使用)
相关问题 更多 >
编程相关推荐