Python请求未正确下载pdf

import requests downloadurl = "https://pedsinreview.aappublications.org/content/pedsinreview/40/10/e35.full.pdf" username = 'myusername' password = 'mypassword' chunk_size = 1024 payload = {'name': username, 'pass': password} r = requests.get(downloadurl, data=payload, verify=False, stream=True) #r.raw.decode_content = True with open("file_name.pdf", 'wb') as f: #1#f.write(r.content) #2#shutil.copyfileobj(r.raw, f) #3#for chunk in r.iter_content(chunk_size): #3#if chunk: #3#f.write(chunk)

import requests loginurl = "https://pedsinreview.aappublications.org/user/login" downloadurl = "https://pedsinreview.aappublications.org/content/pedsinreview/40/10/e35.full.pdf" username = 'myusername' password = 'mypassword' chunk_size = 1024 #r = requests.get(downloadurl, data=payload, verify=False, stream=True) # Do everything with the context of the session with requests.Session() as session: data = { 'form_id': 'user_login', 'name': username, 'pass': password } login_request = session.post(loginurl, data=data) print(login_request.status_code) # returns 200, I think it should be 302 because #that's what it shows when I login successfully in browser vs. 200 when I use a #wrong password. # Now you are logged in and should be able to request the pdf r = session.get(downloadurl) with open("file_name.pdf", 'wb') as f: for chunk in r.iter_content(chunk_size): if chunk: f.write(chunk)

1条回答

网友

1楼 · 发布于 2024-09-30 06:22:23

你说得对，这是个认证问题。由于您没有登录，服务器会将您重定向到一个html页面，这就是您得到的。你知道吗

所以首先，你要做的是：

# Do everything with the context of the session
with requests.Session() as session:
    # Not sure if the last few are required, but I went to the site and looked at 
    # the login request and this is everything that was included.
    data = {
        'name': 'myusername',
        'pass': 'mypassword',
        'form_id': 'highwire_user_login',
        'form_build_id': 'form-yXL7wQkB-M6s7VkeYYQMBr0lPt8ICKc1ZFB5Qc-bOJ4'
        'op': 'Log in'
    }
    login_request = session.post("https://pedsinreview.aappublications.org/content/40/10/e35", data=data)
    print(login_request.status_code) # should be 200
    # Now you are logged in and should be able to request the pdf
    r = requests.get(downloadurl, verify=False, stream=True)
    ...

相关问题更多 >

编程相关推荐

热门问题

热门文章