python post返回的页面与brows返回的页面不同

import urllib import urllib2 import requests as rq import time _n = 1 url0 = 'http://david.abcc.ncifcrf.gov' url = 'http://david.abcc.ncifcrf.gov/summary.jsp' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:30.0) Gecko/20100101 Firefox/30.0' def get_cookie(session_id): # prepare 'Cookie' in the headers for the post domain_hash = '260267544' # according to what's been sent by firefox random_uid = '1113731634' # according to what's been sent by firefox global _t0 init_time = _t0 global _t prev_time = _t _t = int(time.time()) curr_time = _t global _n _n += 1 session_count = _n campaign_count = 1 utma = '.'.join(str(x) for x in (domain_hash, random_uid, init_time, prev_time, curr_time, session_count)) utmz = '.'.join(str(x) for x in (domain_hash, init_time, session_count, campaign_count, 'utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)')) cookie = '; '.join(str(x) for x in ('__utma=' + utma, '__utmz=' + utmz, 'JSESSIONID=' + session_id)) return(cookie) # first get the session ID _t = int(time.time()) _t0 = _t headers = {'User-Agent' : user_agent} r = rq.get(url, headers = headers) session_id = r.cookies['JSESSIONID'] cookie = get_cookie(session_id) # get the gene list gene = [] fh = open('list.txt', 'r') for line in fh: gene.append(line.rstrip('\n')) fh.close() # then post the form headers = { # all below is according to what's been sent by firefox 'Host' : 'david.abcc.ncifcrf.gov', 'User-Agent' : user_agent, 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language' : 'en-US,en;q=0.5', 'Accept-Encoding' : 'gzip, deflate', 'Referer' : url, 'Cookie': cookie, 'Connection' : 'keep-alive', # 'Content-Type' : 'multipart/form-data; boundary=---------------------------17914945481928137296675300642', # 'Content-Length' : '3581' } data = { # all below is according to what's been sent by firefox 'idType' : 'OFFICIAL_GENE_SYMBOL', 'uploadType' : 'list', 'multiList' : 'false', 'Mode' : 'paste', 'useIndex' : 'null', 'usePopIndex' : 'null', 'demoIndex' : 'null', 'ids' : '\n'.join(gene), 'removeIndex' : 'null', 'renameIndex' : 'null', 'renamePopIndex' : 'null', 'newName' : 'null', 'combineIndex' : 'null', 'selectedSpecies' : 'null', 'SESSIONID' : session_id[-12:], # according to the pattern that the last 12 characters of 'JSESSIONID' is sent by firefox 'uploadHTML' : 'null', 'managerHTML' : 'null', 'sublist' : '', 'rowids' : '', 'convertedListName' : 'null', 'convertedPopName' : 'null', 'pasteBox' : '\n'.join(gene), 'fileBrowser' : '', 'Identifier' : 'OFFICIAL_GENE_SYMBOL', 'rbUploadType' : 'list'} r = rq.post(url = url, data = data, headers = headers) if r.status_code == 200: fh = open("python.html", 'w') fh.write(r.text) fh.close()

Apba3 Apoa1bp Dexi Dhps Dnpep Eral1 Gcsh Git1 Grtp1 Guk1 Ifrd2 Lsm3 Map2k1ip1 Med31 Mettl11a Mrpl2 mrpl24 Mrpl30 Mrpl46 Ndufaf3 Nr1h2 Obfc2b Parp3 Pigt Pop5 Ppt2 Ptpmt1 RGD1304567 RGD1306215 RGD1309708 Rras

1条回答

网友

1楼 · 发布于 2024-09-30 08:25:16

使用Selenium：

from selenium import webdriver
from time import sleep

driver = webdriver.Firefox()
driver.get('http://david.abcc.ncifcrf.gov/summary.jsp')
sleep(0.1)
query = """Apba3
Apoa1bp
Dexi
Dhps
Dnpep
Eral1
Gcsh
Git1
Grtp1
Guk1
Ifrd2
Lsm3
Map2k1ip1
Med31
Mettl11a
Mrpl2
mrpl24
Mrpl30
Mrpl46
Ndufaf3
Nr1h2
Obfc2b
Parp3
Pigt
Pop5
Ppt2
Ptpmt1
RGD1304567
RGD1306215
RGD1309708
Rras"""
listBox = driver.find_element_by_id("LISTBox")
listBox.send_keys(query)

IDT = driver.find_element_by_id("IDT")
IDT.send_keys("O")

radioCheck = driver.find_element_by_name("rbUploadType")
radioCheck.click()


submitButton = driver.find_element_by_name("B52")

submitButton.click()
sleep(0.1)
alert = driver.switch_to_alert()
alert.accept()
sleep(0.1)
html = driver.page_source

变量“html”包含页面源代码。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章