如何在python中获取会话cookie

2024-10-01 00:14:36 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试使用python从一个站点下载一个文件,这个文件只有在您登录后才能下载,它似乎工作正常!但当我真正尝试下载该文件时,我只收到一个文本文件,说我必须登录。我想我需要得到PHPSESSID cookie并使用它,但不知道怎么做。这是我的代码:

from BeautifulSoup import BeautifulSoup
import re
import requests
import sys

class LegendasTV(object):
    URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1'

    URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s'

    URL_LOGIN = 'http://legendas.tv/login'

    def __init__(self, usuario, senha):
        self.usuario = usuario
        self.senha = senha
        self.cookie = None

        self._login()

    def _login(self):
        s = requests.Session()
        url = self.URL_LOGIN
        payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
        r = s.post(url, payload)
        html = r.content

        if "<title>Login - Legendas TV</title>" in html:
            return 0
        else:
            print 'Success login!'
            return 1

    def _request(self, url, method='GET', data=None):
        if method == 'GET':
            r = requests.get(url, stream=True)
        if method == 'POST' and data:
            r = requests.post(url, data=data)

        return r

    def search(self, q, lang='pt-br', tipo='release'):
        if not q:
           pass # raise exception

        if not lang or not self.LEGENDA_LANG.get(lang):
           pass # raise exception

        if not tipo or not self.LEGENDA_TIPO.get(tipo):
           pass # raise exception

        busca = { 'txtLegenda': q,
                  'int_idioma': self.LEGENDA_LANG[lang],
                  'selTipo':    self.LEGENDA_TIPO[tipo] }

        r = self._request(self.URL_BUSCA % q, method='POST', data=busca)
        if r:
            legendas = self._parser(r.text)
        else: 
            pass # raise exception

        return legendas

    def _parser(self, data):
        legendas = []

        html = BeautifulSoup(data)
        results = html.findAll("a")
        for result in results:
            if result.get("href") is not None and "S09E16" in result.get("href"):
                path_href = result.get("href").split("/")
                unique_id_download = path_href[2]
                url = self.URL_DOWNLOAD % unique_id_download

    def download(self, url_da_legenda):
        r = self._request(url_da_legenda)
        if r:
            with open("teste.rar", 'wb') as handle:
                print u'Baixando legenda:', url_da_legenda
                handle.write(r.content)

下面是我如何使用代码下载一个文件:

^{pr2}$

我会很感激你的帮助。在


Tags: 文件importselfurldatagetifdef
1条回答
网友
1楼 · 发布于 2024-10-01 00:14:36

有了这个答案,我终于明白了!在

https://stackoverflow.com/a/12737874/1718174

我试着直接使用饼干,但似乎session已经完成了繁重的工作,并为我们做好了准备。以下是我的代码需要更新的部分:

def _login(self):
    s = requests.Session()
    url = self.URL_LOGIN
    payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
    r = s.post(url, payload)
    html = r.content

    if "<title>Login - Legendas TV</title>" in html:
        return 0
    else:
        print 'Success on login!'
        self.session = s

        return 1

def _request(self, url, method='GET', data=None):
    if self.session:
        if method == 'GET':
            r = self.session.get(url, cookies=self.cookie, stream=True)
        if method == 'POST' and data:
            r = self.session.post(url, data=data, cookies=self.cookie)

        return r

相关问题 更多 >