根据显著性标记来自Tueky测试结果的组

1条回答
网友
1楼 · 发布于 2024-06-16 14:24:29
所以，经过几天的研究，没有其他用户的建议答案/评论，我想我已经明白了。假设我问题中的表名为df。下面的脚本是为我的需要，但我希望它可以帮助其他人。我添加了一些评论以便于理解。在
    df_True = df.loc[df.reject==True,:]

    letters = list(string.ascii_lowercase)
    n = 0

    group1_list = df_True.group1.tolist() #get the groups from the df with only True (True df) to a list
    group2_list = df_True.group2.tolist()
    group3 = group1_list+group2_list #concat both lists
    group4 = list(set(group3)) #get unique items from the list
    group5 = [str(i) for i in group4 ] #convert unicode to a str
    group5.sort() #sort the list

    gen = ((i, 0) for i in group5) #create dict with 0 so the dict won't be empty when starts
    dictionary = dict(gen)

    group6 = [(group5[i],group5[j]) for i in range(len(group5)) for j in range(i+1, len(group5))] #get all combination pairs
    for pairs in group6: #check for each combination if it is present in df_True

        print n
        print dictionary

        try:        
            a = df_True.loc[(df_True.group1==pairs[0])&(df_True.group2==pairs[1]),:] #check if the pair exists in the df

        except:
            a.shape[0] == 0 

        if a.shape[0] == 0: #it mean that the df is empty as it does not appear in df_True so this pair is equal

            print 'equal'

            if dictionary[pairs[0]] != 0 and dictionary[pairs[1]] == 0: #if the 1st is populated but the 2nd in not populated
                print "1st is populated and 2nd is empty"      
                dictionary[pairs[1]] = dictionary[pairs[0]]

            elif dictionary[pairs[0]] != 0 and dictionary[pairs[1]] != 0: #if both are populated, check matching labeles
                print "both are populated"
                if len(list(set([c for c in dictionary[pairs[0]] if c in dictionary[pairs[1]]]))) >0: #check if they have a common label
                        print "they have a shared character"
                else:
                    print "equal but have different labels"
                 #check if the 1st group label doesn't appear in anyother labels, if it is unique then the 2nd group can have the first group label
                    m = 0 #count the number of groups that have a shared char with 1st group
                    j = 0 #count the number of groups that have a shared char with 2nd group
                    for key, value in dictionary.iteritems():
                        if key != pairs[0] and len(list(set([c for c in dictionary[pairs[0]] if c in value])))==0:
                            m+=1
                    for key, value in dictionary.iteritems():
                        if key != pairs[1] and len(list(set([c for c in dictionary[pairs[1]] if c in value])))==0:
                            j+=1
                    if m == len(dictionary)-1 and j == len(dictionary)-1: #it means that this value is unique because it has no shared char with another group
                        print "unique"
                        dictionary[pairs[1]] = dictionary[pairs[0]][0]

                    else:
                        print "there is at least one group in the dict that shares a char with the 1st group"                                 
                        dictionary[pairs[1]] = dictionary[pairs[1]] + dictionary[pairs[0]][0]


            else:  # if it equals 0, meaning if the 1st is empty (which means that the 2nd must be also empty)
                print "both are empty"
                dictionary[pairs[0]] = letters[n]
                dictionary[pairs[1]] = letters[n]

        else:

            print "not equal"

            if dictionary[pairs[0]] != 0: # if the first one is populated (has a value) then give a value only to the second 

                print '1st is populated'            
                # if the 2nd is not empty and they don't share a charcter then no change is needed as they already have different labels
                if dictionary[pairs[1]] != 0 and len(list(set([c for c in dictionary[pairs[0]] if c in dictionary[pairs[1]]]))) == 0: 
                    print "no change"

                elif dictionary[pairs[1]] == 0: #if the 2nd is not populated give it a new letter
                    dictionary[pairs[1]] = letters[n+1]

                #if the 2nd is populated and equal to the 1st, then change the letter of the 2nd to a new one and assign its original letter to all the others that had the same original letter       
                elif  dictionary[pairs[1]] != 0 and len(list(set([c for c in dictionary[pairs[0]] if c in dictionary[pairs[1]]]))) > 0:
                    #need to check that they don't share a charcter
                    print "need to add a letter"
                    original_value = dictionary[pairs[1]]
                    dictionary[pairs[1]] = letters[n]
                    for key, value in dictionary.iteritems():
                        if key != pairs[0] and len(list(set([c for c in original_value if c in value])))>0: #for any given value, check if it had a character from the group that will get a new letter, if so, it means  that they are equal and thus the new letter should also appear in the value of the "old" group 
                            dictionary[key] = original_value + letters[n]  #add the original letter of the group to all the other groups it was similar to               

            else:

                print '1st is empty'
                dictionary[pairs[0]] = letters[n]
                dictionary[pairs[1]] = letters[n+1]
                print dictionary
            n+=1

    # get the letter out the dictionary

    labels = list(dictionary.values())
    labels1 = list(set(labels))
    labels1.sort()
    final_label = ''.join(labels1)

    for GroupName in group_names:
        if GroupName in dictionary:
            print "already exists"
        else:
            dictionary[GroupName] = final_label

    for key, value in dictionary.iteritems(): #this keeps only the unique char per group and sort it by group
        dictionary[key] =  ''.join(set(value))
相关问题更多 >

编程相关推荐

热门问题

热门文章

根据显著性标记来自Tueky测试结果的组

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >