我正在尝试使用python从一个站点下载一个文件,这个文件只有在您登录后才能下载,它似乎工作正常!但当我真正尝试下载该文件时,我只收到一个文本文件,说我必须登录。我想我需要得到PHPSESSID cookie并使用它,但不知道怎么做。这是我的代码:
from BeautifulSoup import BeautifulSoup
import re
import requests
import sys
class LegendasTV(object):
URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1'
URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s'
URL_LOGIN = 'http://legendas.tv/login'
def __init__(self, usuario, senha):
self.usuario = usuario
self.senha = senha
self.cookie = None
self._login()
def _login(self):
s = requests.Session()
url = self.URL_LOGIN
payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
r = s.post(url, payload)
html = r.content
if "<title>Login - Legendas TV</title>" in html:
return 0
else:
print 'Success login!'
return 1
def _request(self, url, method='GET', data=None):
if method == 'GET':
r = requests.get(url, stream=True)
if method == 'POST' and data:
r = requests.post(url, data=data)
return r
def search(self, q, lang='pt-br', tipo='release'):
if not q:
pass # raise exception
if not lang or not self.LEGENDA_LANG.get(lang):
pass # raise exception
if not tipo or not self.LEGENDA_TIPO.get(tipo):
pass # raise exception
busca = { 'txtLegenda': q,
'int_idioma': self.LEGENDA_LANG[lang],
'selTipo': self.LEGENDA_TIPO[tipo] }
r = self._request(self.URL_BUSCA % q, method='POST', data=busca)
if r:
legendas = self._parser(r.text)
else:
pass # raise exception
return legendas
def _parser(self, data):
legendas = []
html = BeautifulSoup(data)
results = html.findAll("a")
for result in results:
if result.get("href") is not None and "S09E16" in result.get("href"):
path_href = result.get("href").split("/")
unique_id_download = path_href[2]
url = self.URL_DOWNLOAD % unique_id_download
def download(self, url_da_legenda):
r = self._request(url_da_legenda)
if r:
with open("teste.rar", 'wb') as handle:
print u'Baixando legenda:', url_da_legenda
handle.write(r.content)
下面是我如何使用代码下载一个文件:
^{pr2}$我会很感激你的帮助。在
有了这个答案,我终于明白了!在
https://stackoverflow.com/a/12737874/1718174
我试着直接使用饼干,但似乎session已经完成了繁重的工作,并为我们做好了准备。以下是我的代码需要更新的部分:
相关问题 更多 >
编程相关推荐