我是第一次使用Python中的featuretool包。在尝试执行以下命令时,我得到“AssertionError:Index is not unique on dataframe”。我仔细查看了我的专栏,没有发现任何有类似名称的专栏。有什么关于可能出错的提示/建议吗?你知道吗
课程: 客户数据框由Key、CustomerID、AccountNumber、Account Open Date组成
事务数据包含:KeyT、CustomerIDT、AccountNumberT、TransactionDateTime、transactionmount
(注意:Key和KeyT;CustomerID和CustomerIDT;AccountNumber和AccountNumberT包含相似的信息,只是列名不同)。你知道吗
客户到交易将有一对多关系。 客户数据集中的所有记录都是唯一的。你知道吗
Key CustomerID AccountNumber Account Open Date
123123 123 123 6/16/16
345345 345 345 9/20/16
345789 345 789 12/25/16
KeyT CustomerIDT AccountNumberT TransactionDateTime TransactionAmount
123123 123 123 07/20/2016T09:20:33 50
123123 123 123 07/24/2016T15:30:11 100
123123 123 123 07/24/2016T21:15:01 175
123123 123 123 07/28/2016T08:30:00 75
import pandas as pd
# intialise data of lists.
Customer_Df = {'Key':[123123, 345345, 345789], 'CustomerId':[123, 345, 345], 'AccountNumber':[123, 345, 789],'AccountOpenDate':['6/16/16','9/20/16','12/25/16']}
# Create DataFrame
Customer_Df = pd.DataFrame(Customer_Df)
# Print the output.
print(Customer_Df)
# intialise data of lists.
Transaction_Df = {'KeyT':[123123, 123123, 123123,123123,345345, 345345, 345345,345789,345789], 'CustomerIdT':[123, 123, 123, 123,345,345,345,345,345], 'AccountNumberT':[123, 123, 123,123,345,345,345,789,789],'TransactionDateTime':['07/20/2016T09:20:33','07/24/2016T15:30:11','07/24/2016T21:15:01','07/28/2016T08:30:00','10/01/2016T08:15:13','10/04/2016T19:12:12','10/10/2016T16:12:55','01/04/2017T09:20:22','01/18/2017T18:18:19'],'TransactionAmount':[50,100,175,75,44,9,37.5,55,157]}
# Create DataFrame
Transaction_Df = pd.DataFrame(Transaction_Df)
# Print the output.
print(Transaction_Df)
entities = {
"customers" : (Customer_df, "Key"),
"transactions" : (Transaction_df, "KeyT", "transactionDateTime")
}
relationships = [("customers", "Key", "transactions", "KeyT")]
feature_matrix_transactions,features_defs = ft.dfs(
entities=entities,
relationships=relationships,
target_entity="customers")
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-31-8906bed57593> in <module>
4 entities=entities,
5 relationships=relationships,
----> 6 target_entity="customers")
~/anaconda3/lib/python3.7/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
44 ep.on_error(error=e,
45 runtime=runtime)
---> 46 raise e
47
48 # send return value
~/anaconda3/lib/python3.7/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
36 # call function
37 start = time.time()
---> 38 return_value = func(*args, **kwargs)
39 runtime = time.time() - start
40 except Exception as e:
~/anaconda3/lib/python3.7/site-packages/featuretools/synthesis/dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)
180 '''
181 if not isinstance(entityset, EntitySet):
--> 182 entityset = EntitySet("dfs", entities, relationships)
183
184 dfs_object = DeepFeatureSynthesis(target_entity, entityset,
~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entityset.py in __init__(self, id, entities, relationships)
82 index=index_column,
83 time_index=time_column,
---> 84 variable_types=variable_types)
85
86 for relationship in relationships:
~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entityset.py in entity_from_dataframe(self, entity_id, dataframe, index, variable_types, make_index, time_index, secondary_time_index, already_sorted)
486 secondary_time_index=secondary_time_index,
487 already_sorted=already_sorted,
--> 488 make_index=make_index)
489 self.entity_dict[entity.id] = entity
490 self.reset_data_description()
~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entity.py in __init__(self, id, df, entityset, variable_types, index, time_index, secondary_time_index, last_time_index, already_sorted, make_index, verbose)
79
80 self.df = df[[v.id for v in self.variables]]
---> 81 self.set_index(index)
82
83 self.time_index = None
~/anaconda3/lib/python3.7/site-packages/featuretools/entityset/entity.py in set_index(self, variable_id, unique)
451 self.df.index.name = None
452 if unique:
--> 453 assert self.df.index.is_unique, "Index is not unique on dataframe (Entity {})".format(self.id)
454
455 self.convert_variable_type(variable_id, vtypes.Index, convert_data=False)
AssertionError: Index is not unique on dataframe (Entity transactions)
目前没有回答
相关问题 更多 >
编程相关推荐