用python抓取网页,带导航con

2024-09-28 22:36:18 发布

您现在位置:Python中文网/ 问答频道 /正文

我是python新手,我需要帮助使用web抓取代码来每周保存一个动态地图。 这就是我感兴趣的site。 目的是进入页面,选择季节,选择星期,然后将图像下载到本地文件夹。我将使用该映像来集成使用SAS的自动每周报告。你知道吗

提前谢谢!你知道吗


Tags: 代码图像目的文件夹web报告site页面
1条回答
网友
1楼 · 发布于 2024-09-28 22:36:18
import sys
import os
import time
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 from selenium import webdriver
import arrow

BASE_URL = 'https://gis.cdc.gov/grasp/fluview/main.html'
DOWNLOAD_PATH = "/Users/"

def closeWebDriver(driver):

    if os.name == 'nt':
        driver.quit()
    else:
        driver.close()

def getImage():

    profile = FirefoxProfile()

    profile.set_preference("browser.download.panel.shown", False)
    profile.set_preference("browser.helperApps.neverAsk.openFile","image/png")
    profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "image/png")
    profile.set_preference("browser.download.folderList", 2);
    profile.set_preference("browser.download.dir", DOWNLOAD_PATH)

    driver = webdriver.Firefox(firefox_profile=profile)

    driver.get(BASE_URL)

    time.sleep(5)

    if not isValidTimeFrame(driver):
        print('Not the time to download yet!')
        closeWebDriver(driver)
        return

    selectFirstWeek(driver)

    print('- Consume the web.')
    wrapper = driver.find_element_by_class_name('downloads-help-area')

    download_img_els = wrapper.find_elements_by_class_name('downloads-button')

    for el in download_img_els:
        text = el.text.encode('utf-8')
#        print(text)
        if 'download image' == text.strip().lower():
            # Process
            downloadImage(el)
            break

    time.sleep(5)
    closeWebDriver(driver)


def isValidTimeFrame(driver):
    seasons_button = driver.find_element_by_class_name('seasons-button')
    time_frame = seasons_button.text.encode('utf-8').strip().lower()
    current_year = arrow.now().to('local')
    current_year_str = current_year.format('YYYY')
    next_year = current_year.shift(years=1)
    next_year_str = next_year.format('YY')
    print(time_frame)
    compare_year = '%s-%s' % (current_year_str, next_year_str)

    return time_frame == compare_year

def selectFirstWeek(driver):
    prev = driver.find_element_by_id('prevMap')
    week = driver.find_element_by_id('weekSlider')

    while True:
        print(week)
        current_number = week.get_property('value')
        print('- Week: ' + current_number)
        prev.click()
        if int(current_number) < 2:
            break;

    time.sleep(1)


def downloadImage(el):
    print('- Click on ' + el.text)
    el.click()


getImage()

相关问题 更多 >