Numpy数组条件匹配

import time import datetime as dt import numpy as np def combineArs(dict1, dict2): """Combine data from 2 dictionaries into a list. dict1 contains primary data (e.g. seeing parameter). The function compares each timestamp in dict1 to dict2 to see if there is a matching timestamp record(s) in dict2 (plus/minus 5 minutes). ==If yes: a list called data gets appended with the corresponding parameter value from dict2. (Note that if there are more than 1 record matching, the first occuring value gets appended to the list). ==If no: a list called data gets appended with 0.""" # Specify the keys to use pwfs2Key = 'pwfs2:dc:seeing' dimmKey = 'ws:seeFwhm' # Create an iterator for primary dict datesPrimDictIter = iter(dict1[pwfs2Key]['datetimes']) # Take the first timestamp value in primary dict nextDatePrimDict = next(datesPrimDictIter) # Split the second dictionary into lists datesSecondDict = dict2[dimmKey]['datetime'] valsSecondDict = dict2[dimmKey]['values'] # Define time window fiveMins = dt.timedelta(minutes = 5) data = [] #st = time.time() for i, nextDateSecondDict in enumerate(datesSecondDict): try: while nextDatePrimDict < nextDateSecondDict - fiveMins: # If there is no match: append zero and move on data.append(0) nextDatePrimDict = next(datesPrimDictIter) while nextDatePrimDict < nextDateSecondDict + fiveMins: # If there is a match: append the value of second dict data.append(valsSecondDict[i]) nextDatePrimDict = next(datesPrimDictIter) except StopIteration: break data = np.array(data) #st = time.time() - st return data

3条回答

网友

1楼 · 编辑于 2024-06-28 21:00:16

建立在joaquin's idea上：

import datetime as dt
import itertools

def combineArs(dict1, dict2, delta = dt.timedelta(minutes = 5)):
    marks = dict1['datetime']
    values = dict1['values']
    pdates = iter(dict2['datetime'])

    data = []
    datei = next(pdates)
    for datej, val in itertools.izip(marks, values):
        try:
            while datei < datej - delta:
                data.append(0)
                datei = next(pdates)
            while datei < datej + delta:
                data.append(val)
                datei = next(pdates)
        except StopIteration:
            break
    return data

dict1 = { 'ws:seeFwhm':
          {'datetime': [dt.datetime(2011, 12, 19, 12, 0, 0),
                        dt.datetime(2011, 12, 19, 12, 1, 0),
                        dt.datetime(2011, 12, 19, 12, 20, 0),
                        dt.datetime(2011, 12, 19, 12, 22, 0),
                        dt.datetime(2011, 12, 19, 12, 40, 0), ],
           'values': [1, 2, 3, 4, 5] } }
dict2 = { 'pwfs2:dc:seeing':
          {'datetime': [dt.datetime(2011, 12, 19, 12, 9),
                         dt.datetime(2011, 12, 19, 12, 19),
                         dt.datetime(2011, 12, 19, 12, 29),
                         dt.datetime(2011, 12, 19, 12, 39),
                        ], } }

if __name__ == '__main__':
    dimmKey = 'ws:seeFwhm'
    pwfs2Key = 'pwfs2:dc:seeing'    
    print(combineArs(dict1[dimmKey], dict2[pwfs2Key]))

收益率

^{pr2}$

网友

2楼 · 编辑于 2024-06-28 21:00:16

数组日期是否排序？在

如果是的话，你可以通过打破内心世界来加速你的比较如果其日期大于外环。通过这种方式，您将只进行一次比较，而不是循环dimVals项len(pwfs2Vals)次
如果不是，也许您应该将当前的pwfs2Dates数组转换为，例如，一个成对的数组[(date, array_index),...]，然后可以按为所有数组添加日期，以便进行上面和同时能够获得设置data[i]所需的原始索引

例如，如果数组已经排序（我在这里使用列表，不确定是否需要数组）：（已编辑：现在使用and iterator不从每个步骤的开始循环pwfs2Dates）：

pdates = iter(enumerate(pwfs2Dates))
i, datei = pdates.next() 

for datej, valuej in zip(dimmDates, dimvals):
    while datei < datej - fiveMinutes:
        i, datei = pdates.next()
    while datei < datej + fiveMinutes:
        data[i] = valuej
        i, datei = pdates.next()

否则，如果它们没有被排序，并且您创建了如下排序的索引列表：

^{pr2}$

代码应该是：
（已编辑：现在使用and iterator不从每个步骤的开始循环pwfs2Dates）：

^{3}$ 太好了！在

。。在

请注意，dimVals：
```
dimVals  = np.array(dict1[dimmKey]['values'])
```
不在代码中使用，可以消除。
请注意，通过循环遍历数组本身而不是使用xrange

编辑：来自unutbu的答案解决了上面代码中的一些薄弱部分。我在这里指出它们的完整性：

使用next:next(iterator)优于iterator.next()。 iterator.next()是传统命名规则的一个例外已在py3k中修复，将此方法重命名为 iterator.__next__()。在
使用try/except检查迭代器的结尾。毕竟迭代器中的项在下一次调用next()时完成生成StopIteration异常。使用try/except来表示善意当这种情况发生时，跳出循环。对于问题是两条射线不一样使for循环与迭代器同时结束。所以没有例外情况出现了。但是，可能有dict1和dict2 大小不一样。在这种情况下异常正在上升。问题是：使用try/except还是准备数组更好在循环之前，将它们与较短的相等。在

网友

3楼 · 编辑于 2024-06-28 21:00:16

我想你可以少做一个循环：

import datetime
import numpy

# Test data

# Create an array of dates spaced at 1 minute intervals
m = range(1, 21)
n = datetime.datetime.now()
a = numpy.array([n + datetime.timedelta(minutes=i) for i in m])

# A smaller array with three of those dates
m = [5, 10, 15]
b = numpy.array([n + datetime.timedelta(minutes=i) for i in m])

# End of test data

def date_range(date_array, single_date, delta):
    plus = single_date + datetime.timedelta(minutes=delta)
    minus = single_date - datetime.timedelta(minutes=delta)
    return date_array[(date_array < plus) * (date_array > minus)]

dates = []
for i in b:
    dates.append(date_range(a, i, 5))

all_matches = numpy.unique(numpy.array(dates).flatten())

当然有更好的方法来收集和合并火柴，但是你知道。。。{{你也可以使用新的索引，而不是使用新的索引。在

相关问题更多 >

编程相关推荐

热门问题

热门文章