Python:需要在一个循环中从URL下载图像(JPG和PNG)

2024-10-01 15:39:31 发布

您现在位置:Python中文网/ 问答频道 /正文

我的学校有电子图书,但要想访问这些图书,我需要登录几次,这样就需要很长时间才能拿到我的课本。除此之外,我需要一个互联网连接才能看到他们。在

但是我发现这些书在出版商服务器上是单独的图片(包括jpg和png),现在我想下载这些图片并将它们合并成PDF文件。在

我遇到的问题是这些文件通常不可访问,所以我需要给脚本一个确切的URL,而我无法让它转到下一个文件。在

到目前为止我得到的是:Pastebin link

import os
import urllib
import requests
import sys
from time import sleep
from PIL import Image
from reportlab.lib.utils import ImageReader
from reportlab.lib import utils
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm
from subprocess import Popen, PIPE
import shutil
import nltk  
from urllib import urlopen

#Change those variables:

URL = "http://cdpcontent.toegang.nu/c436b908-7a8d-49ce-ae5e-24892fa06fd7/20140808123622/extract/assets/img/layout/page-00"

#_____________________________________________________

FILE_END_JPG = ".jpg"
FILE_END_PNG = ".png"
SAVE_TO_DIRECTORY = "images"

NUM = 1 # Default 1
MAX_NUM = 500

builded_link_jpg = URL + str(NUM) + FILE_END_JPG
builded_link_png = URL + str(NUM) + FILE_END_PNG

def link_alive(some_url):
        try:
                html = urlopen(some_url).read()
                four_zero_four = "De door u gevraagde pagina of resource kan helaas niet worden gevonden."
                if four_zero_four in html:
                        #print "Link dead."
                        return 0
                else:
                        #print "Link alive."
                        return 1

        except Exception as Error:
                print Error
                print "\nError in check_dead_link function.\n"


def save(NUM, MAX_NUM, SAVE_TO_DIRECTORY, FILE_END_PNG, FILE_END_JPG, URL):
        save_name = 0

        try:
                if not os.path.exists(SAVE_TO_DIRECTORY):
                        os.makedirs(SAVE_TO_DIRECTORY)
                        print SAVE_TO_DIRECTORY + " created."

                print "All images will be saved to the folder:", SAVE_TO_DIRECTORY + "\n"

                while NUM <= MAX_NUM:

                                        if link_alive(builded_link_jpg) == 1:
                                                print "This is a JPG page\n"
                                                save_name = "%04d" % save_name
                                                image = str(save_name) + FILE_END_JPG
                                                save_name = int(save_name)
                                                save_name += 1
                                                urllib.urlretrieve(builded_link_jpg, SAVE_TO_DIRECTORY + "//" + image)
                                                NUM += 1
                                                print builded_link_jpg + " saved.\n"

                                        else:
                                                print "This is a PNG page\n"
                                                save_name = "%04d"% save_name
                                                image = str(save_name) + FILE_END_PNG
                                                save_name = int(save_name)
                                                save_name += 1
                                                urllib.urlretrieve(builded_link_jpg, SAVE_TO_DIRECTORY + "//" + image)
                                                NUM += 1
                                                print builded_link_jpg + " saved.\n"

                print "Done saving all the images!"


        except Exception as Error:
                print Error
                print "\nFail in save function.\n"

save(NUM, MAX_NUM, SAVE_TO_DIRECTORY, FILE_END_PNG, FILE_END_JPG, URL)

我被卡住的部分是最后一段时间,它继续下载相同的图片,但名称递增;/

有人能帮我吗?在


Tags: tonamefromimporturlsavelinknum

热门问题