from sklearn.tree import DecisionTreeClassifier

clfs = {'RF': RandomForestClassifier(n_estimators=50, n_jobs=-1),
        'ET': ExtraTreesClassifier(n_estimators=10, n_jobs=-1, criterion='entropy'),
        'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
        'LR': LogisticRegression(penalty='l1', C=1e5),
        'SVM': svm.SVC(kernel='linear', probability=True, random_state=0),
        'GB': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
        'NB': GaussianNB(),
        'DT': DecisionTreeClassifier()
class Model:

    def __init__(self, dataset, dependentVar, doFeatureSelection=True, doPCA=False, nComponents=10):
          for i,tp in enumerate(dataSet.dtypes):
            if tp == 'object': 
                print 'Encoding feature \"' + dataSet.columns[i] + '\" ...'
                print 'Old dataset shape: ' + str(dataSet.shape)
                temp = pd.get_dummies(dataSet[dataSet.columns[i]],prefix=dataSet.columns[i])
                dataSet = pd.concat([dataSet,temp],axis=1).drop(dataSet.columns[i],axis=1)
                print 'New dataset shape: ' + str(dataSet.shape)
                #unique_vals, dataSet.ix[:,i]  = np.unique(dataSet.ix[:,i] , return_inverse=True)

        # Set the dependent variable (y) to the appropriate column
        y = dataSet.loc[:,dependentVar]

        # Transform that information to a format that scikit-learn understands
        # This may be redundant at times
        labels = preprocessing.LabelEncoder().fit_transform(y)

        # Remove the dependent variable from training sets
        X = dataSet.drop(dependentVar,1).values

        # Perform entropy-based feature selection 
        if doFeatureSelection:
            print 'Performing Feature Selection:'
            print 'Shape of dataset before feature selection: ' + str(X.shape)
            clf = DecisionTreeClassifier(criterion='entropy')
            X =, y).transform(X)
            print 'Shape of dataset after feature selection: ' + str(X.shape) + '\n'

