我正在做一个任务,其中我必须使用python请求库从web页面获取详细信息。我已经成功地从页面中获取了详细信息,但是它有一个“Show details”按钮,它使用ajax调用获取更多细节,现在我还需要获取这些额外的细节。有人能帮我吗?
以下是链接网址:http://ipindiaonline.gov.in/tmrpublicsearch/frmmain.aspx
网站截图:
这是我写的代码。在
from bs4 import BeautifulSoup
import requests,json
def returnJson(wordmark, page_class):
url = "http://ipindiaonline.gov.in/tmrpublicsearch/frmmain.aspx"
r_init = requests.get(url)
soup = BeautifulSoup(r_init.text, 'html.parser')
event_validation = soup.find("input", attrs={"name" : "__EVENTVALIDATION"})['value']
view_state = soup.find("input", attrs={"name" : "__VIEWSTATE"})['value']
search_type = 'WM'
postdata = {
'ctl00$ContentPlaceHolder1$DDLFilter' : '0',
'ctl00$ContentPlaceHolder1$DDLSearchType' : search_type,
'ctl00$ContentPlaceHolder1$TBWordmark' : wordmark,
'ctl00$ContentPlaceHolder1$TBClass' : page_class,
'__EVENTVALIDATION' : event_validation,
"__EVENTTARGET" : "ctl00$ContentPlaceHolder1$BtnSearch",
"__VIEWSTATE" : view_state,
}
r = requests.post(url, data=postdata)
return r
def scrapping(r):
soup = BeautifulSoup(r.text, 'html.parser')
counter=len(soup.findAll('tr',attrs={'class':'row'}))
counter+=len(soup.findAll('tr',attrs={'class':'alt'}))
wordmark_idvalue='ContentPlaceHolder1_MGVSearchResult_lblsimiliarmark_'
proprietor_idvalue='ContentPlaceHolder1_MGVSearchResult_LblVProprietorName_'
applicationno_idvalue='ContentPlaceHolder1_MGVSearchResult_lblapplicationnumber_'
class_idvalue='ContentPlaceHolder1_MGVSearchResult_lblsearchclass_'
status_idvalue='ContentPlaceHolder1_MGVSearchResult_Label6_'
words_list=[]
for i in range(0,counter):
words_dict={}
row=soup.find('span',attrs={'id':(wordmark_idvalue+str(i))})
words_dict['Wordmark']=row.text
row=soup.find('span',attrs={'id':(proprietor_idvalue+str(i))})
words_dict['Proprietor']=row.text
row=soup.find('span',attrs={'id':(applicationno_idvalue+str(i))})
words_dict['Application Number']=row.text
row=soup.find('span',attrs={'id':(class_idvalue+str(i))})
words_dict['Class ']=row.text
row=soup.find('span',attrs={'id':(status_idvalue+str(i))})
words_dict['Status']=row.text
words_list.append(words_dict)
return words_list
def showDetails(wordmark, page_class):
if(len(wordmark)>2 and page_class.isalnum()==1):
var=json.dumps(scrapping(returnJson(wordmark, page_class)))
return var
else:
print("Please Enter Valid Parameters\n")
showDetails('AIWA','2')
我可不想用美体素。不过,我认为你可以用硒来做
使用它的功能,您可以单击您选择的“显示详细信息”按钮,然后等待请求的信息出现在右侧面板中,然后像从该面板使用BeautifulGroup一样或多或少地拾取所需的信息。在
您需要使用来自第一个POST请求的信息创建另一个POST请求。下面显示如何从返回的数据中提取
Goods & Services Description
:这将显示:
^{pr2}$注意:返回的数据包含用}来获得HTML。在
|
字符分隔的其他字段。用于详细信息的HTML碰巧也包含这个字符,因此有必要提取字段7
和{相关问题 更多 >
编程相关推荐