有没有一种方法可以让Python在登录页面上几秒钟后自动下载文件的URL？

from bs4 import BeautifulSoup import requests correction_urls = ['http://xmaths.free.fr/1ES/cours/indications.php?nomexo=1ESderiex01', 'http://xmaths.free.fr/1ES/cours/indications.php?nomexo=1ESderiex02', 'http://xmaths.free.fr/1ES/cours/indications.php?nomexo=1ESderian02'] # Accessing each webpage stored in correction_urls list for i, correction_url in enumerate(correction_urls): r = requests.get(correction_url) html_doc = r.text soup = BeautifulSoup(html_doc) # Iterate over each link on the page for link in soup.find_all("a"): href = link.get("href") # Identify links to corrections if str(href)[0:12] == "corrige.php?": # Build the full url and access it correction_pdf = "http://xmaths.free.fr/1ES/cours/" + href r = requests.get(correction_pdf) # Rename and save the pdf file with open("math_correction{}.pdf".format(i+1), "wb") as f: f.write(r.content)

1条回答

网友

1楼 · 发布于 2024-10-03 06:25:31

您可以从磁头内的<meta>标记中提取正确的路径：

<META HTTP-EQUIV="Refresh" CONTENT="1 ; url=../../corrections/rMu623S1NA.pdf">

import requests
from bs4 import BeautifulSoup


url = 'http://xmaths.free.fr/1ES/cours/corrige.php?nomexo=1ESpctgex01'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
r = requests.get('http://xmaths.free.fr/1ES/cours/' + soup.meta['content'].split(';')[-1].split('=')[-1])

with open('document.pdf', 'wb') as f_out:
    f_out.write(r.content)

相关问题更多 >

编程相关推荐

热门问题

热门文章