import operator
import urllib
#This line will import GoogleSearch, SearchError class from xgoogle/search.py file
from xgoogle.search import GoogleSearch, SearchError
my_dict = {}
print "Enter the word to be searched : "
#read user input
yourword = raw_input()
try:
#This will perform google search on our keyword
gs = GoogleSearch(yourword)
gs.results_per_page = 80
#get google search result
results = gs.get_results()
source = ''
#loop through all result to get each link and it's contain
for res in results:
#print res.url.encode('utf8')
#this will give url
parsedurl = res.url.encode("utf8")
myurl = urllib.urlopen(parsedurl)
#above line will read url content, in below line we parse the content of that web page
source = myurl.read()
#This line will count occurrence of enterd keyword in our webpage
count = source.count(yourword)
#We store our result in dictionary data structure. For each url, we store it word occurent. Similar to array, this is dictionary
my_dict[parsedurl] = count
except SearchError, e:
print "Search failed: %s" % e
print my_dict
#sorted_x = sorted(my_dict, key=lambda x: x[1])
for key in sorted(my_dict, key=my_dict.get, reverse=True):
print(key,my_dict[key])
这里,link是xgoogle库来做同样的事情。在
我尝试过类似的方法来获得前10个链接,它也可以计算链接中的单词数。我添加了代码片段供您参考:
你和pygoogle有什么问题?我知道它不再受支持,但我已经在很多场合使用过这个项目,它将很好地完成您所描述的琐碎任务。在
你的问题确实让我很好奇,所以我去了谷歌,输入了“python谷歌搜索”。Bam,找到this repository。安装了pip,在浏览文档后5分钟内就得到了您的要求:
也许下次再努力一点,好吗?在
使用BeautifulSoup和requests从google搜索结果中获取链接
相关问题 更多 >
编程相关推荐