# load example data and classifier
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# for determining distance
from sklearn import tree
import networkx as nx
import pydot
# load data and fit a DecisionTreeClassifier
X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X_train, y_train);
def dt_to_mg(clf):
"""convert a fit DecisionTreeClassifier to a Networkx undirected MultiGraph"""
# export the classifier to a string DOT format
dot_data = tree.export_graphviz(clf)
# Use pydot to convert the dot data to a graph
dot_graph = pydot.graph_from_dot_data(dot_data)[0]
# Import the graph data into Networkx
MG = nx.drawing.nx_pydot.from_pydot(dot_graph)
# Convert the tree to an undirected Networkx Graph
uMG = MG.to_undirected()
return uMG
uMG = dt_to_mg(clf)
# get leaves
leaves = set(str(x) for x in clf.apply(X))
print(leaves)
{'10', '7', '9', '5', '3', '4'}
# find the distance for two leaves
print(nx.shortest_path_length(uMG, source='9', target='5'))
5
# undirected graph means this should also work
print(nx.shortest_path_length(uMG, source='5', target='9'))
5
一个依赖于其他Python包的示例,即networkx和pydot。因此,人们对解决方案进行了慷慨的评论。这个问题用
scikit-learn
标记,因此解决方案是用Python表示的。你知道吗一些数据和一个通用的
DecisionTreeClassifier
:此函数使用^{} 、^{} 、^{} 和^{} 将fit
DecisionTreeClassifier
转换为networkx无向MultiGraph
。你知道吗使用^{} 查找树中任何两个节点之间的距离。你知道吗
shortest_path_length
返回source
和target
之间的边数。这不是OP请求的距离度量。我认为它们之间的节点数应该是n_edges - 1
。你知道吗或者找到所有叶子的距离,并将它们存储在字典或其他有用的对象中,以便进行下游计算。你知道吗
相关问题 更多 >
编程相关推荐