在将任何信息解析为单个csv之前,我很难从多个URL下载多个csv。这是我的代码,但我不确定用scrapy处理逻辑的最佳方法是什么。另外,有没有一种更智能的方法可以根据csv的名称下载csv列表,而不是从列表中传递每个url?任何帮助都将不胜感激
import glob
import os
import scrapy
import pandas as pd
import requests
from datetime import datetime
from scrapy.utils.project import get_project_settings
from Scrapy.spiders.ao_base_spider import AoBaseSpider
class IrsMigInflow(AoBaseSpider):
name = "irs_mig_inflow"
def __init__(self, *args, **kwargs):
super(IrsMigInflow,self).__init__(*args, **kwargs)
def download_file(self, url):
url = [
'https://www.irs.gov/pub/irs-soi/countyinflow1112.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1213.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1314.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1415.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1516.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1617.csv',
'https://www.irs.gov/pub/irs-soi/countyinflow1718.csv'
]
for urls in url:
base_dir = os.path.abspath(self.get_download_base_path())
self.make_dir(base_dir)
path = os.path.join(base_dir, "irs.csv")
response = requests.get(url)
with open(path, 'wb') as f:
f.write(response.content)
return path
目前没有回答
相关问题 更多 >
编程相关推荐