功能工具 AssertionError: 在数据帧上索引不唯一

2024-09-28 23:50:49 发布

您现在位置:Python中文网/ 问答频道 /正文

我是第一次使用Python中的featuretool包。在尝试执行以下命令时,我得到“AssertionError:Index is not unique on dataframe”。我仔细查看了我的专栏,没有发现任何有类似名称的专栏。有什么关于可能出错的提示/建议吗?你知道吗

课程: 客户数据框由Key、CustomerID、AccountNumber、Account Open Date组成

事务数据包含:KeyT、CustomerIDT、AccountNumberT、TransactionDateTime、transactionmount

(注意:Key和KeyT;CustomerID和CustomerIDT;AccountNumber和AccountNumberT包含相似的信息,只是列名不同)。你知道吗

客户到交易将有一对多关系。 客户数据集中的所有记录都是唯一的。你知道吗

Key CustomerID  AccountNumber   Account Open Date
123123  123 123 6/16/16
345345  345 345 9/20/16
345789  345 789 12/25/16

KeyT    CustomerIDT AccountNumberT  TransactionDateTime TransactionAmount
123123  123 123 07/20/2016T09:20:33 50
123123  123 123 07/24/2016T15:30:11 100
123123  123 123 07/24/2016T21:15:01 175
123123  123 123 07/28/2016T08:30:00 75
import pandas as pd 

# intialise data of lists. 
Customer_Df = {'Key':[123123, 345345, 345789], 'CustomerId':[123, 345, 345], 'AccountNumber':[123, 345, 789],'AccountOpenDate':['6/16/16','9/20/16','12/25/16']} 

# Create DataFrame 
Customer_Df = pd.DataFrame(Customer_Df) 

# Print the output. 
print(Customer_Df) 

# intialise data of lists. 
Transaction_Df = {'KeyT':[123123, 123123, 123123,123123,345345, 345345, 345345,345789,345789], 'CustomerIdT':[123, 123, 123, 123,345,345,345,345,345], 'AccountNumberT':[123, 123, 123,123,345,345,345,789,789],'TransactionDateTime':['07/20/2016T09:20:33','07/24/2016T15:30:11','07/24/2016T21:15:01','07/28/2016T08:30:00','10/01/2016T08:15:13','10/04/2016T19:12:12','10/10/2016T16:12:55','01/04/2017T09:20:22','01/18/2017T18:18:19'],'TransactionAmount':[50,100,175,75,44,9,37.5,55,157]} 

# Create DataFrame 
Transaction_Df = pd.DataFrame(Transaction_Df) 

# Print the output. 
print(Transaction_Df) 

entities = {
             "customers" : (Customer_df, "Key"),
             "transactions" : (Transaction_df, "KeyT", "transactionDateTime")
           }
relationships = [("customers", "Key", "transactions", "KeyT")]

feature_matrix_transactions,features_defs = ft.dfs(
                                                     entities=entities,
                                                     relationships=relationships,
                                                     target_entity="customers")
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-31-8906bed57593> in <module>
      4                                                      entities=entities,
      5                                                      relationships=relationships,
----> 6                                                      target_entity="customers")

~/anaconda3/lib/python3.7/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     44                     ep.on_error(error=e,
     45                                 runtime=runtime)
---> 46                 raise e
     47 
     48             # send return value

~/anaconda3/lib/python3.7/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     36                 # call function
     37                 start = time.time()
---> 38                 return_value = func(*args, **kwargs)
     39                 runtime = time.time() - start
     40             except Exception as e:

~/anaconda3/lib/python3.7/site-packages/featuretools/synthesis/dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)
    180     '''
    181     if not isinstance(entityset, EntitySet):
--> 182         entityset = EntitySet("dfs", entities, relationships)
    183 
    184     dfs_object = DeepFeatureSynthesis(target_entity, entityset,

~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entityset.py in __init__(self, id, entities, relationships)
     82                                        index=index_column,
     83                                        time_index=time_column,
---> 84                                        variable_types=variable_types)
     85 
     86         for relationship in relationships:

~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entityset.py in entity_from_dataframe(self, entity_id, dataframe, index, variable_types, make_index, time_index, secondary_time_index, already_sorted)
    486             secondary_time_index=secondary_time_index,
    487             already_sorted=already_sorted,
--> 488             make_index=make_index)
    489         self.entity_dict[entity.id] = entity
    490         self.reset_data_description()

~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entity.py in __init__(self, id, df, entityset, variable_types, index, time_index, secondary_time_index, last_time_index, already_sorted, make_index, verbose)
     79 
     80         self.df = df[[v.id for v in self.variables]]
---> 81         self.set_index(index)
     82 
     83         self.time_index = None

~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entity.py in set_index(self, variable_id, unique)
    451         self.df.index.name = None
    452         if unique:
--> 453             assert self.df.index.is_unique, "Index is not unique on dataframe (Entity {})".format(self.id)
    454 
    455         self.convert_variable_type(variable_id, vtypes.Index, convert_data=False)

AssertionError: Index is not unique on dataframe (Entity transactions)

Tags: inselfiddfindextimelibsite