使用Python POST请求解析ASPX站点

2024-09-29 23:31:06 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试执行解析,但当我发送POST方法以获取搜索结果时,获取错误页面: 请求的URL被拒绝。请咨询您的管理员。

网站:https://prod.ceidg.gov.pl/CEIDG/CEIDG.Public.UI/Search.aspx

我收集了viewstate、viewstategenerator等数据。。通过表格但不起作用。 我错过了什么

#import requests
from bs4 import BeautifulSoup
import lxml
import urllib
from requests_html import HTMLSession
from requests_html import AsyncHTMLSession
import time
#s = HTMLSession(browser_args=["--no-sandbox", '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'])
s= HTMLSession()
header_simple = {
'User_Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
    'HTTP_ACCEPT': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Content-Type': 'application/x-www-form-urlencoded',

}

r = s.request('get', 'http://prod.ceidg.gov.pl/CEIDG/CEIDG.Public.UI/Search.aspx')
soup_dummy = BeautifulSoup(r.content, "lxml")
# parse and retrieve two vital form values
viewstate = soup_dummy.select("#__VIEWSTATE")[0]['value']
viewstategen = soup_dummy.select("#__VIEWSTATEGENERATOR")[0]['value']
eventvalidation = soup_dummy.select("#__EVENTVALIDATION")[0]['value']
english = soup_dummy.select("#hfEnglishWebsiteUrl")[0]['value']

data = {
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategen,
'__EVENTVALIDATION': eventvalidation,
'ctl00$MainContent$txtName': 'bank',
'ctl00$MainContent$cbIncludeCeased': 'on',
'ctl00$MainContent$btnSearch': 'Find',
'ctl00$hfAuthRequired': 'False',
'ctl00$hfEnglishWebsiteUrl': english,
'ctl00$stWarningLength': '30',
'ctl00$stIdleAfter': '1200',
'ctl00$stPollingInterval': '60',
'ctl00$stMultiTabTimeoutSyncInterval': '20'
}
time.sleep(3)
p = s.request('post', 'https://prod.ceidg.gov.pl/CEIDG/CEIDG.Public.UI/Search.aspx', params=data, headers=header_simple)

print(p.content)

Tags: importuisearchapplicationvalueprodpublicselect
1条回答
网友
1楼 · 发布于 2024-09-29 23:31:06

这是使用“请求”模块从该页面填充结果的方法之一。在发送post请求以访问所需内容时,确保在数据参数中包含所有键和值

工作脚本:

import lxml
import requests
from pprint import pprint
from bs4 import BeautifulSoup

with requests.Session() as s:
    s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
    
    r = s.get('http://prod.ceidg.gov.pl/CEIDG/CEIDG.Public.UI/Search.aspx')
    soup = BeautifulSoup(r.text,"lxml")
    data = {i['name']:i.get('value','') for i in soup.select('input[name]')}

    data['ctl00$MainContent$txtName'] = 'bank'
    data['ctl00$MainContent$cbIncludeCeased'] = 'on'
    data['ctl00$MainContent$btnSearch'] = 'Find'
    data.pop('ctl00$MainContent$btnClear')
    data.pop('ctl00$versionDetails$btnClose')

    # pprint(data)   #print it to see the keys and values that have been included within data

    p = s.post('https://prod.ceidg.gov.pl/CEIDG/CEIDG.Public.UI/Search.aspx', data=data)
    soup = BeautifulSoup(p.text,"lxml")
    print(soup.select_one("table#MainContent_DataListEntities"))

相关问题 更多 >

    热门问题