Python通过Tor连接请求流

1条回答
网友
1楼 · 发布于 2024-09-30 12:22:49
通过简单地接受连接将终止并编写一个新函数以精确的偏移量恢复下载，解决了这个问题，其原理在这个问题中解释-How to resume file download in Python?
我的代码（警告，混乱）：
def onionrequestthreadeddataleakdownloadresume(onionurl,resume_byte_pos):
    print("rerunning")
    companyname = onionurl[0]
    onionurl = onionurl[1]
    dataloc = '/media/archangel/Elements/clop/dataleaks/'
    foldername = dataloc
    dataloc = dataloc + companyname + "/"
    try:
       if not os.path.isdir(dataloc):

           os.mkdir(dataloc)
    except Exception as e:

        print(e)
        print("folder not created")


    filename = os.path.basename(onionurl)
    filenamebasename = filename



    dataloc = dataloc + filename

    try:
 #       seconds = 20
  #      timeout = Timeout(seconds)
   #     timeout.start()



        session = requests.session()
        session.proxies = {}
        session.proxies['http'] = 'socks5h://localhost:9050'
        session.proxies['https'] = 'socks5h://localhost:9050'
        #print(onionurlforrequest)
        
      #  onionurlforrequest = "http://" + onionurl
        print("dataloc")
        print(dataloc)
        print("onionurl")
        print(onionurl)
        url = onionurl

        try:
            print("url")
            print(url)
            if not os.path.isdir(foldername):
                os.makedirs(foldername)
            # download the body of response by chunk, not immediately
#https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
            try:
                try:
                    seconds = 20
                    timeout = Timeout(seconds)
                    timeout.start()
                except Exception as ex:
                    print(ex)

                resume_header = {'Accept-Encoding': None, 'Range': 'bytes=%d-' % resume_byte_pos}
                try:
                    with session.get(url, stream=True, verify=False, headers=resume_header, timeout=600) as response:
                        #response.raise_for_status()

                        # get the total file size
                        file_size = int(response.headers['Content-Length'])
                        if (file_size  > 1000000):
                            filesizemb = file_size / 1000000
                        else:
                            filesizemb = 1
                        print(file_size)
                        # get the file name

                        filename = dataloc
            #            filename = os.path.join(dataloc, url.split("/")[-1])
                        # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
             #           response = session.get(url, stream = True)
            #            progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
                        try:
                            with open(filename, "ab") as text_file: 
                                for chunk in response.iter_content(chunk_size=1024*1024):
                                    #https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
                                    #if len(chunk) != 1024*36:
                                    if chunk: 
                                        #print(len(chunk))
                                        text_file.write(chunk)
                                        text_file.flush()
                        except Exception as ex:
                            logging.error(f'write failed with error: {ex}')
                            print(ex)
                                #else:
                                
                                    # write data read to the file
                #                    f.write(data)
                                    # update the progress bar manually
                 #                   progress.update(len(data))
                                # finally, if the url is valid

                        #logging.info('Download finished successfully')

                        print("exited with for file")
                except Exception as ex:
                    logging.error(f'Request failed with error: {ex}')
                    print(ex)

            except Exception as ex:
                logging.error(f'Attempt failed with error: {ex}')
                print(ex)

            print("closing text file")
          #  text_file.close()

                #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize

        except Exception as e:
            print("FAILED DOWNLOAD 2")

            print(e)
    except Exception as e:
        print("FAILED DOWNLOAD 5")
        print(e)












def onionrequestthreadeddataleakdownload2(onionurl):
    companyname = onionurl[0]
    onionurl = onionurl[1]
    dataloc = '/media/archangel/Elements/clop/dataleaks/'
    foldername = dataloc
    dataloc = dataloc + companyname + "/"
    try:
       if not os.path.isdir(dataloc):

           os.mkdir(dataloc)
    except Exception as e:

        print(e)
        print("folder not created")


    filename = os.path.basename(onionurl)
    filenamebasename = filename



    dataloc = dataloc + filename

    try:
 #       seconds = 20
  #      timeout = Timeout(seconds)
   #     timeout.start()



        session = requests.session()
        session.proxies = {}
        session.proxies['http'] = 'socks5h://localhost:9050'
        session.proxies['https'] = 'socks5h://localhost:9050'
        #print(onionurlforrequest)
        
      #  onionurlforrequest = "http://" + onionurl
        print("dataloc")
        print(dataloc)
        print("onionurl")
        print(onionurl)
        url = onionurl

        try:
            print("url")
            print(url)
            if not os.path.isdir(foldername):
                os.makedirs(foldername)
            # download the body of response by chunk, not immediately
#https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
            try:
                try:
                    seconds = 20
                    timeout = Timeout(seconds)
                    timeout.start()
                except Exception as ex:
                    print(ex)

               # resume_header = ({'Range': f'bytes=0-2000000'})
                #file_size_online = int(r.headers.get('content-length', 0))
                headersac = {'Accept-Encoding': None}
                try:
                    with session.get(url, stream=True, verify=False, headers = headersac, timeout=600) as response:
                        #response.raise_for_status()

                        # get the total file size
    #                    file_size = int(response.headers.get("Content-Length", 0))
                        file_size = int(response.headers['Content-Length'])
                        if (file_size  > 1000000):
                            filesizemb = file_size / 1000000
                        else:
                            filesizemb = 1
                        print(file_size)
                        #e
                        # get the file name

                        filename = dataloc
            #            filename = os.path.join(dataloc, url.split("/")[-1])
                        # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
             #           response = session.get(url, stream = True)
            #            progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
                        try:
                            with open(filename, "wb") as text_file: 
                                for chunk in response.iter_content(chunk_size=1024*1024):
                                    #https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
                                    #if len(chunk) != 1024*36:
                                    if chunk: 
                                       # print(len(chunk))
                                        text_file.write(chunk)
                                        text_file.flush()
                        except Exception as ex:
                            logging.error(f'write failed with error: {ex}')
                            print(ex)
                                #else:
                                
                                    # write data read to the file
                #                    f.write(data)
                                    # update the progress bar manually
                 #                   progress.update(len(data))
                                # finally, if the url is valid

                        #logging.info('Download finished successfully')
                except Exception as ex:
                    logging.error(f'request failed with error: {ex}')
                    print(ex)
                    print("exited with for file")
                #path = Path(filename)
                file_size_offline = Path(filename).stat().st_size
                print("file size offline")
                while (file_size_offline != file_size):
                    try:
                        print(file_size_offline)
                        print(file_size)
                        print("file size incomplete")
                        file_size_offline = Path(filename).stat().st_size
                        onionurllist = []
                        onionurllist.append(companyname)

                        onionurllist.append(onionurl)
                        onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                        file_size_offline = Path(filename).stat().st_size

                    except Exception as ex:
                        print("redownload failed")
                        print(ex)
                print("LOOP FINISHED")

                print(file_size)
                print(file_size_offline)
                print(filename)
            except Exception as ex:
                logging.error(f'Attempt failed with error: {ex}')
                print(ex)

#            print("closing text file")
          #  text_file.close()
            if(file_size_offline != file_size):
                while (file_size_offline != file_size):
                    try:
                        print(file_size_offline)
                        print(file_size)
                        print("file size incomplete")
                        file_size_offline = Path(filename).stat().st_size
                        onionurllist = []
                        onionurllist.append(companyname)

                        onionurllist.append(onionurl)
                        onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                        file_size_offline = Path(filename).stat().st_size

                    except Exception as ex:
                        print("redownload failed")
                        print(ex)
            else:
                #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize
                returnedlist = []
                returnedlist.append(dataloc)
                returnedlist.append(filenamebasename)
                returnedlist.append(url)
                returnedlist.append(filesizemb)
                return returnedlist
            if(file_size_offline != file_size):
                print("rerunning a final FINAL time")
                while (file_size_offline != file_size):
                    try:
                        print(file_size_offline)
                        print(file_size)
                        print("file size incomplete")
                        file_size_offline = Path(filename).stat().st_size
                        onionurllist = []
                        onionurllist.append(companyname)

                        onionurllist.append(onionurl)
                        onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                        file_size_offline = Path(filename).stat().st_size

                    except Exception as ex:
                        print("redownload failed")
                        print(ex)
            else:
                #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize
                returnedlist = []
                returnedlist.append(dataloc)
                returnedlist.append(filenamebasename)
                returnedlist.append(url)
                returnedlist.append(filesizemb)
                return returnedlist
                


            returnedlist = []
            returnedlist.append(dataloc)
            returnedlist.append(filenamebasename)
            returnedlist.append(url)
            returnedlist.append(filesizemb)
            return returnedlist
        except Exception as e:
            print("FAILED DOWNLOAD 2")

            print(e)
    except Exception as e:
        print("FAILED DOWNLOAD 5")
        print(e)
相关问题更多 >

编程相关推荐

热门问题

热门文章

Python通过Tor连接请求流

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >