Python试图重构(耗尽)长控件F

2024-09-28 21:20:30 发布

您现在位置:Python中文网/ 问答频道 /正文

我从和SQL查询中获取了大量数据,这些数据需要很长时间才能运行。由于SQL查询需要很长时间才能运行,所以我从数据库中以最细粒度的形式获取数据。然后我循环使用这些数据一次,并将其聚合为对我有用的表单。在

我的问题是我一次又一次地重复自己。但是,我不确定重构此控制流的最佳方法。提前谢谢!在

def processClickOutData(cls, raw_data):
    singles = {}
    total={}
    absolute_total = 0
    channels = {}

    singles_true = {}
    total_true={}
    channels_true = {}
    absolute_total_true = 0

    list_channels = set([])
    list_tids = set([])


    total_position = {}
    total_position_true = {}
    tid_position = {}
    channel_position = {}
    channel_position_true = {}
    tid_position_true = {}

    for row in raw_data:
        gap=row[0]
        count=row[1]
        tid=row[2]
        prefered=row[3]
        channel=row[4]
        position=row[5]

        list_channels.add(channel)
        list_tids.add(tid)


        absolute_total += int(count)

        if total.has_key(gap):
            total[gap] += count
        else:
            total[gap] = count

        if singles.has_key(gap) and singles[gap].has_key(tid):
            singles[gap][tid] += count
        elif singles.has_key(gap):
            singles[gap][tid] = count
        else:
            singles[gap] = {}
            singles[gap][tid] = count

        if channels.has_key(gap) and channels[gap].has_key(channel):
            channels[gap][channel] += count
        elif channels.has_key(gap):
            channels[gap][channel] = count
        else:
            channels[gap] = {}
            channels[gap][channel] = count
        if total_position.has_key(position):
            total_position[position] += count
        else:
            total_position[position] = count
        if tid_position.has_key(position) and tid_position[position].has_key(tid):
            tid_position[position][tid] += count     
        elif tid_position.has_key(position):
            tid_position[position][tid] = count
        else:
            tid_position[position] = {}
            tid_position[position][tid] = count

        if channel_position.has_key(position) and channel_position[position].has_key(channel):
            channel_position[position][channel] += count     
        elif channel_position.has_key(position):
            channel_position[position][channel] = count
        else:
            channel_position[position] = {}
            channel_position[position][channel] = count

        if prefered == 0:
            absolute_total_true += count
            if total_true.has_key(gap):
                total_true[gap] += count
            else:
                total_true[gap] = count

            if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
                singles_true[gap][tid] += count
            elif singles_true.has_key(gap):
                singles_true[gap][tid] = count
            else:
                singles_true[gap] = {}
                singles_true[gap][tid] = count

            if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
               channels_true[gap][channel] += count
            elif channels_true.has_key(gap):
               channels_true[gap][channel] = count
            else:
               channels_true[gap] = {}
               channels_true[gap][channel] = count

            if total_position_true.has_key(position):
               total_position_true[position] += count
            else:
               total_position_true[position] = count 

            if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
               tid_position_true[position][tid] += count     
            elif tid_position_true.has_key(position):
               tid_position_true[position][tid] = count
            else:
               tid_position_true[position] = {}
               tid_position_true[position][tid] = count

            if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
               channel_position_true[position][channel] += count     
            elif channel_position_true.has_key(position):
               channel_position_true[position][channel] = count
            else:
               channel_position_true[position] = {}
               channel_position_true[position][channel] = count




    final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
                     "total_position" :  total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
    return final_values

Tags: keytrueifcountchannelpositionelselist
1条回答
网友
1楼 · 发布于 2024-09-28 21:20:30

您用于存储数据的整个结构可能是错误的,但是由于我不知道您是如何使用它的,所以我无法帮助您。在

您可以通过使用^{}删除所有这些has_key()调用的所有。注意thedict.has_key(key)无论如何都是不推荐使用的,您应该只使用key in thedict。在

看看我是如何更改for循环的—您可以在for语句中为名称赋值,无需单独进行。在

from collections import defaultdict

def processClickOutData(cls, raw_data):
    absolute_total = 0
    absolute_total_true = 0

    list_channels = set()
    list_tids = set()

    total = defaultdict(int)
    total_true = defaultdict(int)
    total_position = defaultdict(int)
    total_position_true = defaultdict(int)

    def defaultdict_int():
        return defaultdict(int)

    singles = defaultdict(defaultdict_int)
    singles_true = defaultdict(defaultdict_int)
    channels = defaultdict(defaultdict_int)
    channels_true = defaultdict(defaultdict_int)
    tid_position = defaultdict(defaultdict_int)
    tid_position_true = defaultdict(defaultdict_int)
    channel_position = defaultdict(defaultdict_int)
    channel_position_true = defaultdict(defaultdict_int)    

    for gap, count, prefered, channel, position in raw_data:
        list_channels.add(channel)
        list_tids.add(tid)

        absolute_total += count
        total[gap] += count
        singles[gap][tid] += count
        channels[gap][channel] += count
        total_position[position] += count
        tid_position[position][tid] += count
        channel_position[position][channel] += count

        if prefered == 0:
            absolute_total_true += count
            total_true[gap] += count
            singles_true[gap][tid] += count
            channels_true[gap][channel] += count
            total_position_true[position] += count
            tid_position_true[position][tid] += count
            channel_position_true[position][channel] += count




    final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
                     "total_position" :  total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
    return final_values

如果键不存在,它会自动填充正确的默认值。这里有两种。在添加ints的地方,您需要从0开始,如果它不存在,int会返回,因此defaultdict(int)。在添加添加ints的字典时,需要使用一个返回defaultdict(int)的函数,defaultdict_int就是这个函数。在

编辑:建议的备用字典结构:

^{pr2}$

_true版本也做同样的操作,您已经从12dict变成了4。在

相关问题 更多 >