创建不同类型嵌套列表的快速方法：numpy、pandas或列表串联？

import pandas as pd import numpy as np import time import datetime def overflow_check(x): # in SQL code the column is decimal(13, 2) p=13 s=3 max_limit = float("9"*(p-s) + "." + "9"*s) #min_limit = 0.01 #float("0" + "." + "0"*(s-2) + '1') #min_limit = 0.1 if np.logical_not(isinstance(x, np.ndarray)) or len(x) < 1: raise Exception("Non-numeric or empty array.") else: #print(x) return x * (np.abs(x) < max_limit) + np.sign(x)* max_limit * (np.abs(x) >= max_limit) def list_creation(y_forc): backcast_length = len(y_forc) backcast = pd.DataFrame(data=np.full(backcast_length, 2), columns=['TypeId']) backcast['id2'] = None backcast['Daily'] = 1 backcast['ForecastDate'] = y_forc.index.strftime('%Y-%m-%d') backcast['ReportDate'] = pd.to_datetime('today').strftime('%Y-%m-%d') backcast['ForecastMethodId'] = 1 backcast['ForecastVolume'] = overflow_check(y_forc.values) backcast['CreatedBy'] = 'test' backcast['CreatedDt'] = pd.to_datetime('today') return backcast.values.tolist() i=pd.date_range('05-01-2010', '21-05-2018', freq='D') x=pd.DataFrame(index=i, data = np.random.randint(0, 100, len(i))) t=time.perf_counter() y =list_creation(x) print(time.perf_counter()-t)

1条回答

网友

1楼 · 发布于 2024-10-06 11:35:05

这应该快一点，它只是直接创建列表：

def list_creation1(y_forc):
    zipped = zip(y_forc.index.strftime('%Y-%m-%d'), overflow_check(y_forc.values)[:,0])
    t = pd.to_datetime('today').strftime('%Y-%m-%d')
    t1 =pd.to_datetime('today')
    return [
        [2, None, 1, i, t,
        1, v, 'test', t1] 
        for i,v in zipped
    ]


%%timeit
list_creation(x)
> 29.3 ms ± 468 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

%%timeit
list_creation1(x)
> 17.1 ms ± 517 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

编辑：速度慢的一个大问题是从datetime转换为指定格式所需的时间。如果我们可以通过以下措辞来消除这种情况：

def list_creation1(i, v):
    zipped = zip(i, overflow_check(np.array([[_x] for _x in v]))[:,0])
    t = pd.to_datetime('today').strftime('%Y-%m-%d')
    t1 =pd.to_datetime('today')
    return [
        [2, None, 1, i, t,
        1, v, 'test', t1] 
        for i,v in zipped
    ]

start = datetime.datetime.strptime("05-01-2010", "%d-%m-%Y")
end = datetime.datetime.strptime("21-05-2018", "%d-%m-%Y")
i = [(start + datetime.timedelta(days=x)).strftime("%d-%m-%Y") for x in range(0, (end-start).days)]
x=np.random.randint(0, 100, len(i))

那么现在这要快得多：

%%timeit
list_creation1(i, x)
> 1.87 ms ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

相关问题更多 >

编程相关推荐

热门问题

热门文章