如何在Python中使用BeautifulSoup解析google搜索结果

from urllib.request import urlretrieve import urllib.parse from urllib.parse import urlencode, urlparse, parse_qs import webbrowser from bs4 import BeautifulSoup import requests address = 'https://google.com/#q=' # Default Google search address start file = open( "OCR.txt", "rt" ) # Open text document that contains the question word = file.read() file.close() myList = [item for item in word.split('\n')] newString = ' '.join(myList) # The question is on multiple lines so this joins them together with proper spacing print(newString) qstr = urllib.parse.quote_plus(newString) # Encode the string newWord = address + qstr # Combine the base and the encoded query print(newWord) source = requests.get(newWord) soup = BeautifulSoup(source.text, 'lxml')

1条回答

网友

1楼 · 发布于 2024-09-28 17:30:56

你的网址对我不起作用。但有了https://google.com/search?q=我就能得到结果。

import urllib
from bs4 import BeautifulSoup
import requests
import webbrowser

text = 'hello world'
text = urllib.parse.quote_plus(text)

url = 'https://google.com/search?q=' + text

response = requests.get(url)

#with open('output.html', 'wb') as f:
#    f.write(response.content)
#webbrowser.open('output.html')

soup = BeautifulSoup(response.text, 'lxml')
for g in soup.find_all(class_='g'):
    print(g.text)
    print('-----')

阅读Beautiful Soup Documentation

相关问题更多 >

编程相关推荐

热门问题

热门文章