尝试在python上运行市场篮子分析。最后一步是 一个空的数据集,有人能建议为什么以及如何修复它吗
#importing relevant packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#loading data
dataset=pd.read_excel('http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx')
#Data preprocessing
#creating new seperate columns for invoice date and time
dataset['Date'] = pd.to_datetime(dataset['InvoiceDate']).dt.date
dataset['Time'] = pd.to_datetime(dataset['InvoiceDate']).dt.time
#converting InvoiceNo to numeric datatype and to check if there is other value than number
#it will convert into Nan
dataset['InvoiceNo'] = pd.to_numeric(dataset['InvoiceNo'],errors='coerce')
#removing rows with missing values
dataset=dataset.dropna()
baskets = (dataset.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
baskets.head()
from mlxtend.frequent_patterns import apriori, association_rules
def hot_encode(x):
if(x<= 0):
return 0
if(x>= 1):
return 1
baskets_encoded = baskets.applymap(hot_encode)
baskets = baskets_encoded
frq_items = apriori(baskets, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="confidence", min_threshold = 0.8)
rules = rules.sort_values('confidence', ascending =False)
print(rules.head())
下面是空数据框:
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction]
Index: []
目前没有回答
相关问题 更多 >
编程相关推荐