如果值有新数据，则为dataframe迭代列填充

1条回答

网友

1楼 · 发布于 2024-09-30 10:31:12

方法是创建一个函数，用于检查数据帧中的索引值：

def right_tree_pn(df5,base_partnum):
 if(len(df5) == 0):
            parts_list = []
 else:
            new_pn_list = df5
            parts_list = new_pn_list.replace("[","").replace("]","").replace("'","").split(", ")
 if(base_partnum in parts_list):
     base_index = "Yes"
 else :
     base_index = "No"
 
 return base_index

现在创建一个for循环：

for i in range(len(df4)):
    print(i)
    base = df4.loc[i]["Data1"]
    type_part = df4.loc[i]["Description"]
    associate_data = df4.loc[i]["Associated Data"]
    test_parts2 = base + "," + associate_data
    test_parts =test_parts2.split(",")
    #check for condition
    if((str(type_part) == "TO") == True):
    #initially the data frame is empty
        if(len(df5) == 0):
           parts_list = []
     ## Check for value . it is 0 for first step
           if(len(parts_list) > 0):
                index = parts_list.index(base)
                #index_val = len(df5)
           else:
                index = 0
                index_val = len(df5)
               # Insert to first Position as index i.e base value
                parts_list.insert(index, base)
           if(associate_data in parts_list):
                associate_index = parts_list.index(associate_data)
           else:
                associate_index = index
        #########################################
           if((associate_index < index) or (associate_index > index) == True):
            #df5.loc[new_index,"Data"] =  str(parts_list)
                pass
           else:
                #add Associate data to next index value i.1 1 for first step
                 parts_list.insert(index + 1,associate_data)
           df5.loc[index_val ,"Data"] = str(parts_list)
           df5.loc[index_val ,"Base_PN"] = str(parts_list[0])
   ######### Add to new line if dataframe is not empty 
        else:
            df_index = []
    ###############Get the index values from the dataframe if the base data exists
            for x in range(len(df5)):
                df6 = right_tree_pn(df5.loc[x]["Data"],base)
                if(str(df6) == "Yes"):
                    df_index1 = x
                    df_index.append(df_index1)
#Check length of the index if it is 1 perform following operations
            if(len(df_index) == 1):
                df_index = df_index[0]
                new_pn_list = df5.loc[df_index]["Data"]
                parts_list1 = new_pn_list.replace("[","").replace("]","").replace("'","").replace(" ","").split(",")
                ######List compare
                if(all(w in parts_list1 for w in test_parts) == True):
                    pass
                else:
                    index = parts_list1.index(base)
                    if((index == 0 and len(parts_list1) != 0)) == True:
                        ass_pasrt2 = parts_list1[index + 1]
                        if(index == 0 and associate_data != ass_pasrt2) == True:
                            parts_list = []
                            index_val = len(df5)
                        else:
                             parts_list = parts_list1
                             index_val = df_index
                    elif(((index == 0 and len(parts_list1) == 0)) ==True):
                            parts_list = []
                            index_val = len(df5)
                    else:
                        index2 = len(parts_list1) - 1
                        if(index2 != index):
                            ass_pasrt2 = parts_list1[index + 1]
                        else:
                            ass_pasrt2 = ""
                        if((associate_data == ass_pasrt2)== True):
                            parts_list = parts_list1
                            index_val = df_index
                        elif((ass_pasrt2 == "") == True):
                            parts_list = parts_list1
                            index_val = df_index
                        else:
                            parts_list = parts_list1
                            if(ass_pasrt2 in parts_list):
                                parts_list.remove(ass_pasrt2)
                            index_val = len(df5)
                    if(len(parts_list) > 0):
                        index = parts_list.index(base)
                    else:
                            index = 0
                            parts_list.insert(index, base)
                    if(associate_data in parts_list):
                        associate_index = parts_list.index(associate_data)
                    else:
                        associate_index = index
        #########################################
                    if((associate_index < index) or (associate_index > index) == True):
            #df5.loc[new_index,"Data"] =  str(parts_list)
                        pass
                    else:
                        parts_list.insert(index + 1,associate_data) 
                        parts_list2 = list(OrderedDict.fromkeys(parts_list))
                    df5.loc[index_val,"Data"] = str(parts_list2)
                    df5.loc[index_val ,"Base_PN"] = str(parts_list2[0])
  #### If the base is not present in the data frame create new row similar to first iteration
            elif(len(df_index) == 0):
                parts_list = []
                if(len(parts_list) > 0):
                    index = parts_list.index(base)
                    #index_val = len(df5)
                else:
                    index = 0
                    parts_list.insert(index, base)
                    index_val = len(df5)
                if(associate_data in parts_list):
                    associate_index = parts_list.index(associate_data)
                else:
                    associate_index = index
        #########################################
                if((associate_index < index) or (associate_index > index) == True):
            #df5.loc[new_index,"Data"] =  str(parts_list)
                    pass
                else:
                    parts_list.insert(index + 1,associate_data)
                df5.loc[index_val ,"Data"] = str(parts_list)
                df5.loc[index_val ,"Base_PN"] = str(base)
            else:
#Code for multiple index data 
                for z in range(len(df_index)):
                    index_value = df_index[z]
                    new_pn_list = df5.loc[index_value]["Data"]
                    parts_list1 = new_pn_list.replace("[","").replace("]","").replace("'","").split(", ")
                    if(all(w in parts_list1 for w in test_parts) == True):
                        pass
                    else:
                        index = parts_list1.index(base)
                        if((index == 0 and len(parts_list1) != 0)) ==True:
                            parts_list = parts_list1
                        elif(((index == 0 and len(parts_list1) == 0)) ==True):
                            parts_list = []
                        else:
                            parts_list = parts_list1
                    #    parts_list = parts_list1
                    if(len(parts_list) > 0):
                        index = parts_list.index(base)
                    else:
                            index = 0
                            parts_list.insert(index, base)
                    if(associate_data in parts_list):
                        associate_index = parts_list.index(associate_data)
                    else:
                        associate_index = index
        #########################################
                    if((associate_index < index) or (associate_index > index) == True):
            #df5.loc[new_index,"Data"] =  str(parts_list)
                        pass
                    else:
                        parts_list.insert(index + 1,associate_data)
                        parts_list2 = list(OrderedDict.fromkeys(parts_list))
                    df5.loc[index_value ,"Data"] = str(parts_list2)
                    df5.loc[index_value ,"Base_PN"] = str(parts_list2[0])

创建数据框后，我们必须将数据拆分为多列：

df5 = df5[["Base_PN","Data"]]                   
df5["Data2"] = df5["Data"].str.replace("[","").str.replace("]","").str.replace("'","").str.replace(" ","")
df6 = df5.Data2.apply(lambda x: pd.Series(str(x).split(","))).add_prefix('Level_')

输出：

Level_0 Level_1 Level_2 Level_3 Level_4 
                    
F104    F5334       
F104    F105    F1212 
F104    F105    F5332   
F104    F105    F1212 F1000 
F104    F105    F1212 F1242B    F1015B

相关问题更多 >

编程相关推荐

热门问题

热门文章

如果值有新数据，则为dataframe迭代列填充

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >