如何在Python中将数据下载到未创建的文件目录中?

2024-09-22 16:38:46 发布

您现在位置:Python中文网/ 问答频道 /正文

#This is my webpage_getinfo.py script used to parse emails, phone numbers and other details from a webpage. It will be needed for the file_download.py script#

import sys, re
import webpage_get

def print_md5s(page):
    print '[*] print_md5s()'
    md5s = re.findall(r'([a-fA-F\d]{32})', page)
    md5s.sort()
    print '[+]', str(len(md5s)), 'MD5 Hashes Found:'
    for md5 in md5s:
        print md5

def print_emails(page):
    print '[*] print_emails()'
    emails = re.findall(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}', page)
    emails.sort()
    print '[+]', str(len(emails)), 'Email Addresses Found:'
    for email in emails:
        print email

def print_phones(page):
    print '[*] print_phones numbers()'
    phones = re.findall(r'\+\d................', page)
    phones.sort()
    print '[+]', str(len(phones)), 'Phone Numbers Found:'
    for phone in phones:
        print phone

def print_images(page):
    print '[*] print_images()'
    images = re.findall(r'\w+.\jpg|\w+.\gif|\w+.\w+\.\wmp', page)
    images.sort()
    print '[+]', str(len(images)), 'Images Found:'
    for image in images:
        print image

def print_documents(page):
    print '[*] print_documents()'
    documents = re.findall(r'\w+\.\wocx', page)
    documents.sort()
    print '[+]', str(len(documents)), 'Documents Found:'
    for document in documents:
        print document

def main():
    page = webpage_get.webget(sys.argv) # getting the webpage from webpage_get.py as 'page'
    print_md5s(page)
    print_emails(page)
    print_phones(page)
    print_images(page)
    print_documents(page) 

if __name__ == '__main__':
    main()



 ## This is the code for file_download.py below. It download the details from webpage_getinfo and stores it in an uncreated file directory, creating the directory as soon as the script is run##  

    import errno
    import sys, os, urllib2, urllib
    import webpage_getinfo

    page = 'http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html'

    def path_to_download():
        file_name = 'file.txt' # creates the file name
        script_dir = os.path.dirname(os.path.abspath(file_name)) 
        dest_dir = os.path.join(script_dir, 'C:\\temp', 'coursework')

        try:
            os.makedirs(dest_dir)

        except IOError:
            print 'Warning: file already exists'    
        path = os.path.join(dest_dir, file_name)
        # writing the details of each information from webpage_getinfo
        file_dest = open(dest_dir, 'w') # opening the destination directory
        file_dest.write('Here is the site documents\n')
        file_dest.write(md5details)
        file_dest.write(emails)
        file_dest.write(phones)
        file_dest.write(images)
        file_dest.write(documents)

    def main():
        path_to_download()
        md5_info = webpage_getinfo.print_md5s() # getting md5 from webpage_getinfo.py
        email_info = webpage_getinfo.print_emails() # getting emails from webpage_getinfo.py
        phones_info = webpage_getinfo.print_phones() # getting phone numbers from webpage_getinfo.py
        images_info = webpage_getinfo.print_images() # getting images from webpage_getinfo.py
        documents_info = webpage_getinfo.print_documents() # getting documents from webpage_getinfo.py  

    if __name__ == '__main__':
        main()

#每当我运行脚本时,如果文件不存在,它就会告诉我“IOError:Permission denied”。请,任何代码更正和协助通过建议将不胜感激。我是python的业余爱好者,请原谅我的混乱密码。谢谢#


Tags: thefrompydefpagedocumentsfiledest