import requests
from lxml import html
SEARCH_URL = "https://www.yellowpages.com/search"
def crawl(name, state, page=1):
params={'search_terms': name, 'geo_location_terms': state, 'page': page}
data = requests.get(SEARCH_URL, params=params).text
tree = html.fromstring(data)
for items in tree.xpath("//div[@class='info']"):
name = items.findtext(".//span[@itemprop='name']")
address = items.findtext(".//span[@class='street-address']")
phone = items.findtext(".//div[@itemprop='telephone']")
showing = items.findtext("//*[@id='main-content']/div[2]/div[4]/p/text()")
yield (name, address, phone, showing)
def search(name, state, pages=1):
page = 1
while page is not pages:
for result in crawl(name, state, page=page):
print result
page +=1
if __name__ == '__main__':
search('pizza', 'tx', pages=10)
回溯:
^{pr2}$
问题出在这条线上:
将
^{pr2}$crawl
函数更改为:它将产生以下结果:
相关问题 更多 >
编程相关推荐