Python:改进长累积和

dataset.sort(key=operator.attrgetter('time_point')) # For the whole set sys_qty1 = 0 sys_qty2 = 0 sys_combo = 0 sys_max = 0 # For the cluster grouping cluster_qty1 = defaultdict(int) cluster_qty2 = defaultdict(int) cluster_combo = defaultdict(int) cluster_max = defaultdict(int) cluster_peak = defaultdict(int) # For the node grouping node_qty1 = defaultdict(int) node_qty2 = defaultdict(int) node_combo = defaultdict(int) node_max = defaultdict(int) node_peak = defaultdict(int) for t in dataset: # For the whole system ###################################################### sys_qty1 += t.qty1 sys_qty2 += t.qty2 sys_combo = sys_qty1 + sys_qty2 if sys_combo > sys_max: sys_max = sys_combo # The Peak class is to record the time point and the cumulative quantities system_peak = Peak(time_point=t.time_point, qty1=sys_qty1, qty2=sys_qty2) # For the cluster grouping ################################################## cluster_qty1[t.cluster] += t.qty1 cluster_qty2[t.cluster] += t.qty2 cluster_combo[t.cluster] = cluster_qty1[t.cluster] + cluster_qty2[t.cluster] if cluster_combo[t.cluster] > cluster_max[t.cluster]: cluster_max[t.cluster] = cluster_combo[t.cluster] cluster_peak[t.cluster] = Peak(time_point=t.time_point, qty1=cluster_qty1[t.cluster], qty2=cluster_qty2[t.cluster]) # For the node grouping ##################################################### node_qty1[t.node] += t.qty1 node_qty2[t.node] += t.qty2 node_combo[t.node] = node_qty1[t.node] + node_qty2[t.node] if node_combo[t.node] > node_max[t.node]: node_max[t.node] = node_combo[t.node] node_peak[t.node] = Peak(time_point=t.time_point, qty1=node_qty1[t.node], qty2=node_qty2[t.node])

dataset.sort(key=operator.attrgetter('time_point')) def cuml_sum(seq): rseq = [] t = 0 for i in seq: t += i rseq.append(t) return rseq time_get = operator.attrgetter('time_point') q1_get = operator.attrgetter('qty1') q2_get = operator.attrgetter('qty2') timeline = [time_get(t) for t in dataset] cuml_qty1 = cuml_sum([q1_get(t) for t in dataset]) cuml_qty2 = cuml_sum([q2_get(t) for t in dataset]) cuml_combo = [q1 + q2 for q1, q2 in zip(cuml_qty1, cuml_qty2)] combo_max = max(cuml_combo) time_max = timeline.index(combo_max) q1_at_max = cuml_qty1.index(time_max) q2_at_max = cuml_qty2.index(time_max)

timeline = defaultdict(int) cuml_qty1 = defaultdict(int) #...etc. for c in cluster_list: timeline[c] = [time_get(t) for t in dataset if t.cluster == c] cuml_qty1[c] = [q1_get(t) for t in dataset if t.cluster == c] #...etc.

1条回答

网友

1楼 · 发布于 2024-07-08 11:05:09

这似乎是应用一点面向对象的经典机会。我建议将派生的数据作为一个类，并将累计和计算抽象为对该类起作用的东西。在

比如：

class DerivedData(object):
    def __init__(self):
        self.qty1 = 0.0
        self.qty2 = 0.0
        self.combo = 0.0
        self.max = 0.0
        self.peak = Peak(time_point=0.0, qty1=0.0, qty2=0.0)

    def accumulate(self, data):
        self.qty1 += data.qty1
        self.qty2 += data.qty2
        self.combo = self.qty1 + self.qty2
        if self.combo > self.max:
            self.max = self.combo
            self.peak = Peak(time_point=data.time_point,
                             qty1=self.qty1,
                             qty2=self.qty2)

sys = DerivedData()
clusters = defaultdict(DerivedData)
nodes = defaultdict(DerivedData)

dataset.sort(key=operator.attrgetter('time_point'))

for t in dataset:
    sys.accumulate(t)
    clusters[t.cluster].accumulate(t)
    nodes[t.node].accumulate(t)

这个解决方案抽象出了寻找峰值的逻辑，但仍然只遍历数据集一次。在

相关问题更多 >

编程相关推荐

热门问题

热门文章