使用python登录到棘手的站点

2024-06-02 13:32:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试从https://www.astrill.com/member-zone/tools/vpn-servers中删除服务器列表,该列表仅供成员使用。用户名、密码和验证码是必需的。如果我用浏览器登录并复制“PHPSESSID”cookie,一切都会正常工作,但我想用Python登录。我正在下载capthca并手动输入。但无论如何,我无法登录。登录URL:https://www.astrill.com/member-zone/log-in 有人能帮我吗

SERVERS_URL = 'https://www.astrill.com/member-zone/tools/vpn-servers'
LOGIN_URL = 'https://www.astrill.com/member-zone/log-in'


def get_capcha(url):
    print(f'Scraping url: {url}')
    try:
        response = requests.get(url)
        response.raise_for_status()
    except Exception as e:
        print(type(e), e)
    if response.status_code == 200:
        print('Success!')
    page = response.content
    soup = bs4.BeautifulSoup(page, 'html.parser')
    captcha_url = (soup.find('img', alt='captcha')['src'])
    captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
    id = soup.find(id='csrf_token')
    print(id['value'])
    print(f'Captcha: {captcha_url}')
    print(response.headers)
    urlretrieve(captcha_url, captcha_file)
    return id['value']


def login(url, id):
    captcha_text = input('Captcha: ')
    print(id)
    payload = {
        'action': 'log-in',
        'username': 'myusername@a.com',
        'password': '1111111',
        'captcha': captcha_text,
        '_random': 'l4r1b7hf4g',
        'csrf_token': id
    }

    session = requests.session()
    post = session.post(url, data=payload)
    r = session.get(SERVERS_URL)
    print(r.text)
    print(r.cookies)


if __name__ == '__main__':
    id = get_capcha(LOGIN_URL)
    login(LOGIN_URL, id)

Tags: httpscomlogidurlzonegetresponse
1条回答
网友
1楼 · 发布于 2024-06-02 13:32:10

首先,我不确定要发布的有效载荷字段。通过Firefox开发工具-网络,可以很容易地发现它们。你可以找到你的浏览器实际上发布了什么。我发现的第二件事是,我需要在会话中请求capthca文件以及我的头和cookie。因此,我的代码现在看起来像下面这样,并且它可以工作!(可能可以删除某些标题字段)

cookies = {}

headers = {
        'Host': 'www.astrill.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'X-Requested-With': 'XMLHttpRequest',
        'Content-Length': '169',
        'Origin': 'https://www.astrill.com',
        'Connection': 'keep-alive',
        'Referer': 'https://www.astrill.com/member-zone/log-in',
    }

payload = {
    'action': 'log-in',
    'username': 'myusername@a.com',
    'password': '1111111',
    'remember_me': 0,
    'captcha': '',
    '_random': 'somerandom1',
    'csrf_token': ''
    }

def get_capcha(url):
    print(f'Scraping url: {url}')
    try:
        response = session.get(url)
        response.raise_for_status()
    except Exception as e:
        print(type(e), e)
    if response.status_code == 200:
        print('Success!')
    page = response.content
    soup = bs4.BeautifulSoup(page, 'html.parser')
    captcha_url = (soup.find('img', alt='captcha')['src'])
    captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
    payload['csrf_token'] = soup.find(id='csrf_token')['value']
    print(f'csrf_token: {payload["csrf_token"]}')
    print(f'Captcha: {captcha_url}')
    cookies.update(response.cookies)
    captcha_img = session.get(captcha_url, headers=headers, cookies=cookies)
    file = open(captcha_file, "wb")
    file.write(captcha_img.content)
    file.close()
    payload['captcha'] = input('Captcha: ')
    return


def login(url):
    post = session.post(url, data=payload, headers=headers, cookies=cookies)
    print(post.text)
    r = session.get(SERVERS_URL, cookies=cookies)
    print(r.text)
    print(r.cookies)


def main():
    get_capcha(LOGIN_URL)
    login(LOGIN_URL)


if __name__ == '__main__':
    main()

相关问题 更多 >