Python sklearn.tree 模块,export_graphviz() 实例源码
我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.tree.export_graphviz()。
def analyseReasonWithDecisonTree(anamolySample,normalSample,name):
data = anamolySample
target = []
for i in range(0,len(anamolySample)):
target.append(1)
data.extend(normalSample)
for i in range(0,len(normalSample)):
target.append(0)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(data,target)
dot_data = tree.export_graphviz(clf, out_file=None,feature_names=name,filled = True,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
s = str(time.time())
graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,len(anamolySample)):
target.append(1)
data = data.append(normalSample)
for i in range(0,len(normalSample)):
target.append(0)
print len(data)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(data, target)
dot_data = tree.export_graphviz(clf,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
s = str(time.time())
graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,normalSample):
data = anamolySample
target = []
for i in range(0, target)
name = []
for i in data.columns:
name.append(i)
dot_data = tree.export_graphviz(clf,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
s = str(time.time())
graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
s = str(time.time())
graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
s = str(time.time())
graph.write_pdf(s+"DT.pdf")
def train(self, training_set, training_target, fea_index):
clf = tree.DecisionTreeClassifier(criterion="entropy", min_samples_split=30, class_weight="balanced")
clf = clf.fit(training_set, training_target)
class_names = np.unique([str(i) for i in training_target])
feature_names = [attr_list[i] for i in fea_index]
dot_data = tree.export_graphviz(clf,
feature_names=feature_names,
class_names=class_names,
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("output/tree-vis.pdf")
joblib.dump(clf, 'output/CART.pkl')
def visualize_tree(clf, feature_names, class_names, output_file,
method='pdf'):
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data,
feature_names=iris.feature_names,
class_names=iris.target_names,
filled=True,
special_characters=True,
impurity=False)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
if method == 'pdf':
graph.write_pdf(output_file + ".pdf")
elif method == 'inline':
Image(graph.create_png())
return graph
# An example using the iris dataset
def visualize_tree(tree, feature_name, dot_file):
"""Create tree png using graphviz.
tree -- scikit-learn DecsisionTree.
feature_names -- list of feature names.
dot_file -- dot file name and path
"""
with open("tree.dot", 'w') as f:
export_graphviz(tree, out_file=f,
feature_names=feature_name)
dt_png = dot_file.replace('dot', 'png')
command = ["dot", "-Tpng", dot_file, "-o", dt_png]
try:
subprocess.check_call(command)
except Exception as e:
print e
exit("Could not run dot,ie graphviz,to "
"produce visualization")
def test_friedman_mse_in_graphviz():
clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0)
clf.fit(X, y)
dot_data = StringIO()
export_graphviz(clf, out_file=dot_data)
clf = GradientBoostingClassifier(n_estimators=2, y)
for estimator in clf.estimators_:
export_graphviz(estimator[0], out_file=dot_data)
for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()):
assert_in("friedman_mse", finding.group())
def scikitExportDecisionTree2Dot (fname):
assert(DT_MODEL is not None)
fdot = open(fname, "w")
tree.export_graphviz(DT_MODEL, out_file=fdot)
fdot.close()
def use_tree(X_data,y_data):
tree = DecisionTreeClassifier(criterion='entropy',max_features='sqrt',max_depth=3,random_state=0)
tree.fit(X_data,y_data)
return tree
# hua tu
# X_comined = np.vstack((X_data,X_valid))
# y_comined = np.hstack((y_data,y_valid))
# plot_decision_regions(X_comined,y_comined,classifier=tree,test_idx=range(105,150))
# plt.show()
# export_graphviz(tree,out_file='tree.dot',feature_names = ['petal length','petal width'])
def constructModel(corpus, classList, features, modelOutput):
"""
Trains a Decision Tree model on the test corpus.
Args:
corpus: A list of lists,containing the GC content,coverage,and class number.
classList: A list of class names.
features: List of variables used by each contig.
modelOutput: Location to save model as GraphViz DOT,or False to save no model.
Returns:
classifier: A DecisionTreeClassifier object that has been trained on the test corpus.
"""
corpus.sort() # just in case
X = []
Y = []
for item in corpus:
X.append(item[:-1]) # all but the last item
Y.append(item[-1]) # only the last item
X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0)
# Todo: implement classifier testing and comparison,Now only baggingClassifier is used as per paper
#treeClassifier = tree.DecisionTreeClassifier()
#treeClassifier = treeClassifier.fit(X_train,Y_train)
#click.echo("Decision tree classifier built,score is %s out of 1.00" % treeClassifier.score(X_test,Y_test))
baggingClassifier = ensemble.BaggingClassifier()
baggingClassifier = baggingClassifier.fit(X_train, Y_train)
click.echo("Bagging classifier built,score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test))
#forestClassifier = ensemble.RandomForestClassifier(n_estimators=10)
#forestClassifier = forestClassifier.fit(X_train,Y_train)
#click.echo("Random forest classifier built,score is %s out of 1.00" % forestClassifier.score(X_test,Y_test))
#adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100)
#adaClassifier = adaClassifier.fit(X_train,Y_train)
#click.echo("AdaBoost classifier built,score is %s out of 1.00" % adaClassifier.score(X_test,Y_test))
#gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100)
#gradientClassifier = gradientClassifier.fit(X_train,Y_train)
#click.echo("Gradient tree boosting classifier built,score is %s out of 1.00" % gradientClassifier.score(X_test,Y_test))
if modelOutput:
with open(modelOutput, 'w') as dotfile:
tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features,
class_names=classList, filled=True, special_characters=True)
return baggingClassifier
def export_model(self, IDcol):
#Export the model into the model file as well as create a submission
#with model index. This will be used for creating an ensemble.
self.export_model_base(IDcol,'decision_tree')
## UNDER DEVELOPMENT CODE FOR PRINTING TREES
# def get_tree(self):
# return self.alg.tree_
# Print the tree in visual format
# Inputs:
# export_pdf - if True,a pdf will be exported with the
# filename as specified in pdf_name argument
# pdf_name - name of the pdf file if export_pdf is True
# def printTree(self,export_pdf=True,file_name="Decision_Tree.pdf"):
# dot_data = StringIO()
# export_graphviz(
# self.alg,out_file=dot_data,feature_names=self.predictors,
# filled=True,rounded=True,special_characters=True)
# export_graphviz(
# self.alg,out_file='data.dot',
# filled=True,special_characters=True
# )
# graph = pydot.graph_from_dot_data(dot_data.getvalue())
# if export_pdf:
# graph.write_pdf(file_name)
# return graph
#####################################################################
##### RANDOM FOREST
#####################################################################
def save_decision_tree(treePath, model, fold_idx, featNames):
if not os.path.exists(treePath):
os.makedirs(treePath)
export_graphviz(model, out_file=treePath+'fold'+str(fold_idx)+'.dot', feature_names=featNames, class_names=["absent","mild","moderate","severe"], proportion = True)
def classify(y, x, test_y, test_x):
global data_df, factor_name, left, right, feature, ratio, threshold
y_c = np.zeros(len(y))
y_c[y > 0.02] = 1
y_c[y < -0.02] = -1
min_n = int(0.05 * len(y))
clf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=min_n)
clf.fit(x, y_c)
y_p = clf.predict(x)
fname = "D:\\Cache\\tree.txt"
test_y = y
with open(fname, 'w') as f:
tree.export_graphviz(clf, out_file=f)
f.close()
factor_exchange(factor_name, fname)
left = clf.tree_.children_left
right = clf.tree_.children_right
feature = clf.tree_.feature
threshold = clf.tree_.threshold
disp_tree()
# precision,recall,thresholds = precision_recall_curve(y_c,clf.predict(x))
'''''???????'''
print("mean income is:", str(np.average(test_y)),
"\nwin ratio is: ", str(np.sum(test_y > 0) / len(test_y)))
print("after training\n"
"mean class_1 is: ", str(np.average(test_y[y_p > 0])), str(np.sum(test_y[y_p > 0] > 0) / np.sum(y_p > 0)),
"\ntotal class_1 is:", str(np.sum(np.sum(y_p > 0))),
"\nmean class_0 is: ", str(np.average(test_y[y_p < 0])))
def DecisionTreeModel(self, dummy_x, dummy_y):
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(dummy_x, dummy_y)
return clf
# with open('dt_information_gain.dot','w') as f:
# f = export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
def create_graphviz_file(self, file_name):
dot_data = tree.export_graphviz(
self.clf,
out_file=None,
feature_names=self.feature_names,
class_names=self.target_names,
filled=True,
special_characters=True)
graph = pdp.graph_from_dot_data(dot_data)
graph.write_pdf(file_name + ".pdf")
print "Decision graph created"
def decisionTree(X,y,attributeNames,classNames,fileName,s="",X_train=None,y_train=None, X_test=None, y_test=None):
print "Doing decision tree for: "
print s
if(X_train is None or X_test is None or y_train is None or y_test is None):
X_train = X
X_test = X
y_train = y
y_test = y
# Fit regression tree classifier,Gini split criterion,pruning enabled
dtc = tree.DecisionTreeClassifier(criterion='gini', min_samples_split=100)
dtc = dtc.fit(X_train,y_train)
# Export tree graph for visualization purposes:
# (note: you can use i.e. Graphviz application to visualize the file)
out = tree.export_graphviz(dtc, out_file=fileName, feature_names=attributeNames)
out.close()
correct = 0
wrong = 0
for i in range(0,len(X_test)):
x = X_test[i,:]
x_class = dtc.predict(x)[0]
if((x_class < 0.5 and y_test[i] < 0.5) or (x_class > 0.5 and y_test[i] > 0.5)):
correct += 1
else:
wrong += 1
rate = double(wrong) / double(correct + wrong)
print rate
print '\n'
return rate
def train_predictor(df, markov_blanket, p_train=0.6):
# DecisionTreeClassifier(criterion='gini',splitter='best',max_depth=None,min_samples_split=2,min_samples_leaf=1,
# min_weight_fraction_leaf=0.0,max_features=None,random_state=None,max_leaf_nodes=None,
# min_impurity_split=1e-07,class_weight=None,presort=False)
# RandomForestClassifier(n_estimators=10,criterion='gini',max_features='auto',bootstrap=True,oob_score=False,n_jobs=1,
# verbose=0,warm_start=False,class_weight=None)
rf = RandomForestClassifier(n_estimators=5)
clf1 = tree.DecisionTreeClassifier(max_leaf_nodes=10,class_weight=None)
x = df[list(markov_blanket)].values
y = df["TAR10"].values
n_samples = x.shape[0]
n_train = int(np.round(p_train * n_samples))
xt = x[:n_train, :]
yt = y[:n_train]
n_check = n_samples - n_train
xc = x[n_train:, :]
yc = y[n_train:]
ynames = ["lateral", "alcista"]
xnames = list(markov_blanket)
clf1.fit(xt, yt)
sys.stdout.write("Result INS is {}\n".format(clf1.score(xt, yt)))
sys.stdout.write("Result OOS is {}\n".format(clf1.score(xc, yc)))
scores = confusion_matrix(yt, clf1.predict(xt), labels=[0, 1, ])
scores2 = confusion_matrix(yc, clf1.predict(xc), 1])
print(scores)
print(scores2)
tree.export_graphviz(clf1, out_file='D:\MLmaster\Tree.dot', class_names=ynames, feature_names=xnames)
return rf
def iris_demo():
clf = tree.DecisionTreeClassifier()
iris = load_iris()
# iris.data??150*4,iris.target ???????0,1,2(150*1)
clf = clf.fit(iris.data, iris.target)
dot_file = 'tree.dot'
tree.export_graphviz(clf, out_file=dot_file)
visualize_tree(clf, iris.feature_names, dot_file)
# (graph,) = pydot.graph_from_dot_file('tree.dot')
# graph.write_png('somefile.png')
def loan_demo():
dt = tree.DecisionTreeClassifier()
X, Y = get_loan_data_lh()
dt = dt.fit(X, Y)
dot_file = 'loan.dot'
tree.export_graphviz(dt, out_file=dot_file)
feature_names = ['age', 'has work', 'own house', 'loan level']
visualize_tree(dt, dot_file)
def test_graphviz_errors():
# Check for errors of export_graphviz
clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2)
clf.fit(X, y)
# Check feature_names error
out = StringIO()
assert_raises(IndexError, export_graphviz, clf, out, feature_names=[])
# Check class_names error
out = StringIO()
assert_raises(IndexError, class_names=[])
def classifyTree(Xtr, ytr, Xte, yte, splitCriterion="gini", maxDepth=0, visualizeTree=False):
""" Classifies data using CART """
try:
accuracyRate, probabilities, timing = 0.0, [], 0.0
# Perform classification
cartClassifier = tree.DecisionTreeClassifier(criterion=splitCriterion, max_depth=maxDepth)
startTime = time.time()
prettyPrint("Training a CART tree for classification using \"%s\" and maximum depth of %s" % (splitCriterion, maxDepth), "debug")
cartClassifier.fit(numpy.array(Xtr), numpy.array(ytr))
prettyPrint("Submitting the test samples", "debug")
predicted = cartClassifier.predict(Xte)
endTime = time.time()
# Compare the predicted and ground truth and append result to list
accuracyRate = round(metrics.accuracy_score(predicted, yte), 2)
# Also append the probability estimates
probs = cartClassifier.predict_proba(Xte)
probabilities.append(probs)
timing = endTime-startTime # Keep track of performance
if visualizeTree:
# Visualize the tree
dot_data = StringIO()
tree.export_graphviz(cartClassifier, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
prettyPrint("Saving learned CART to \"tritonTree_%s.pdf\"" % getTimestamp(), "debug")
graph.write_pdf("tree_%s.pdf" % getTimestamp())
except Exception as e:
prettyPrint("Error encountered in \"classifyTree\": %s" % e, "error")
return accuracyRate, timing, predicted
def visualize_tree(clf, outname, headers):
from sklearn.externals.six import StringIO
import pydot
dot_data = StringIO()
tree.export_graphviz(clf, feature_names=list(headers))
graph = pydot.graph_from_dot_data(dot_data.getvalue().decode('latin1').encode('utf8'))
graph.write_pdf(outname)
def decision_tree_classifier(all_feature_data):
input_data=np.asarray(all_feature_data[0])
label=np.asarray(all_feature_data[1])
data=input_data[:,:]
# data=sklearn.preprocessing.normalize(data,axis=0)
# clf = DecisionTreeClassifier(criterion="gini",
# splitter="best",
# max_features=None,
# max_depth=5,
# min_samples_leaf=1,
# min_samples_split=2,
# class_weight=None
# )
clf = DecisionTreeClassifier()
fit_clf=clf.fit(data,label)
result=fit_clf.predict(data)
accuracy=float(np.sum(result==label))/len(label)
print "Training accuracy is " + str(accuracy)
with open("cityscapes.dot", 'w') as f:
f = tree.export_graphviz(clf, out_file=f)
# dot_data = StringIO()
# tree.export_graphviz(clf,out_file=dot_data)
# graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
# graph.write_pdf("cityscapes.pdf")
# scores = cross_val_score(clf,data,label,cv=10)
# print "Cross validation score is "+ str(scores.mean())
return fit_clf
def makePrediction(para,rawData,totalNumRows,labels):
traingSetSize=int(math.floor(totalNumRows*para['trainingSetPercent']))
print('%d instances are selected as training dataset!'%traingSetSize)
trainX=np.array(rawData[0:traingSetSize])
trainY=np.array(labels[0:traingSetSize])
clf=tree.DecisionTreeClassifier()
clf=clf.fit(trainX,trainY)
feaNames=['event'+str(i) for i in range(1,386)]
classNames=trainY
# generate the decision tree figure
# dot_data = StringIO() #class_names=classNames,
# tree.export_graphviz(clf,feature_names=feaNames,
# filled=True,
# special_characters=True)
# graph = pydot.graph_from_dot_data(dot_data.getvalue())
# graph.write_png('sample_SOSP.png')
testingX=rawData[traingSetSize:]
testingY=labels[traingSetSize:]
prediction=list(clf.predict(testingX))
if len(prediction)!=len(testingY):
print ('prediction and testingY have different length and SOMEWHERE WRONG!')
sameLabelNum=0
sameFailureNum=0
for i in range(len(testingY)):
if prediction[i]==testingY[i]:
sameLabelNum+=1
if prediction[i]==1:
sameFailureNum+=1
accuracy=float(sameLabelNum)/len(testingY)
print ('accuracy is %.5f:'%accuracy)
predictSuccess=0
predictFailure=0
for item in prediction:
if item==0:
predictSuccess+=1
elif item==1:
predictFailure+=1
testSuccess=0
testFailure=0
for tt in testingY:
if tt==0:
testSuccess+=1
elif tt==1:
testFailure+=1
print(predictSuccess,predictFailure,testSuccess,testFailure,sameFailureNum)
if sameFailureNum==0:
print ('precision is 0 and recall is 0')
else:
precision=float(sameFailureNum)/(predictFailure)
print('precision is %.5f'%precision)
recall=float(sameFailureNum)/(testFailure)
print('recall is %.5f'%recall)
F_measure=2*precision*recall/(precision+recall)
print('F_measure is %.5f'%F_measure)
return predictFailure,sameFailureNum,precision,recall,F_measure
def do_training(self, speech_corpus, text_corpus):
if self.model: ## if already trained...
return
## 1) get data:
#### [Added dump_features method to Utterance class,use that: ]
x_data = []
y_data = []
for utterance in speech_corpus:
utt_feats = utterance.dump_features(self.target_nodes, \
self.context_list, return_dict=True)
for example in utt_feats:
assert 'response' in example,example
y_data.append({'response': example['response']})
del example['response']
x_data.append(example)
## Handle categorical features (strings) but to keep numerical ones
## as they are:
x_vectoriser = DictVectorizer()
x_data = x_vectoriser.fit_transform(x_data).toarray()
y_vectoriser = DictVectorizer()
y_data = y_vectoriser.fit_transform(y_data).toarray()
if False:
print x_data
print y_data
## 2) train classifier:
model = tree.DecisionTreeClassifier(min_samples_leaf=self.min_samples_leaf)
model.fit(x_data, y_data)
print '\n Trained classifier: '
print model
print '\n Trained x vectoriser:'
print x_vectoriser
print 'Feature names:'
print x_vectoriser.get_feature_names()
print '\n Trained y vectoriser:'
print y_vectoriser
print 'Feature names:'
print y_vectoriser.get_feature_names()
## 3) Save classifier by pickling:
output = open(self.model_file, 'wb')
pickle.dump([x_vectoriser, y_vectoriser, model], output)
output.close()
## Write ASCII tree representation (which can be plotted):
tree.export_graphviz(model, out_file=self.model_file + '.dot', \
feature_names=x_vectoriser.get_feature_names())
self.verify(self.voice_resources) # ## reload -- get self.model etc
def classifyTreeKFold(X, y, kFold=2, visualizeTree=False):
""" Classifies data using CART and K-Fold cross validation """
try:
groundTruthLabels, predictedLabels = [], []
accuracyRates = [] # Meant to hold the accuracy rates
# Split data into training and test datasets
trainingDataset, testDataset = [], []
trainingLabels, testLabels = [], []
accuracyRates = []
probabilities = []
timings = []
kFoldValidator = KFold(n=len(X), n_folds=kFold, shuffle=False)
currentFold = 1
for trainingIndices, testIndices in kFoldValidator:
# Prepare the training and testing datasets
for trIndex in trainingIndices:
trainingDataset.append(X[trIndex])
trainingLabels.append(y[trIndex])
for teIndex in testIndices:
testDataset.append(X[teIndex])
testLabels.append(y[teIndex])
# Perform classification
startTime = time.time()
cartClassifier = tree.DecisionTreeClassifier(criterion=splitCriterion, max_depth=maxDepth)
prettyPrint("Training a CART tree for classification using \"%s\" and maximum depth of %s" % (splitCriterion, "debug")
cartClassifier.fit(numpy.array(trainingDataset), numpy.array(trainingLabels))
prettyPrint("Submitting the test samples", "debug")
predicted = cartClassifier.predict(testDataset)
endTime = time.time()
# Add that to the groundTruthLabels and predictedLabels matrices
groundTruthLabels.append(testLabels)
predictedLabels.append(predicted)
# Compare the predicted and ground truth and append result to list
accuracyRates.append(round(metrics.accuracy_score(predicted, testLabels), 2))
# Also append the probability estimates
probs = cartClassifier.predict_proba(testDataset)
probabilities.append(probs)
timings.append(endTime-startTime) # Keep track of performance
if visualizeTree:
# Visualize the tree
dot_data = StringIO()
tree.export_graphviz(cartClassifier, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
prettyPrint("Saving learned CART to \"tritonTree_%s.pdf\"" % currentFold, "debug")
graph.write_pdf("tritonTree_%s.pdf" % currentFold)
trainingDataset, trainingLabels = [], []
testDataset, []
currentFold += 1
except Exception as e:
prettyPrint("Error encountered in \"classifyTreeKFold\": %s" % e, "error")
return [], []
return accuracyRates, timings, groundTruthLabels, predictedLabels
def train_tree_classifer(features, labels, model_output_path):
"""
train_tree_classifer will train a DecisionTree and write it out to a pdf file
features: 2D array of each input feature for each sample
labels: array of string labels classifying each sample
model_output_path: path for storing the trained tree model
"""
# save 20% of data for performance evaluation
X_train, y_train, y_test = cross_validation.train_test_split(features, test_size=0.2)
param = [
{
"max_depth": [None, 10, 100, 1000, 10000]
}
]
dtree = tree.DecisionTreeClassifier(random_state=0)
# 10-fold cross validation,use 4 thread as each fold and each parameter set can be train in parallel
clf = grid_search.gridsearchcv(dtree, param,
cv=10, n_jobs=20, verbose=3)
clf.fit(X_train, y_train)
if os.path.exists(model_output_path):
joblib.dump(clf.best_estimator_, model_output_path)
else:
print("Cannot save trained tree model to {0}.".format(model_output_path))
dot_data = tree.export_graphviz(clf.best_estimator_, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf('best_tree.pdf')
print("\nBest parameters set:")
print(clf.best_params_)
y_predict=clf.predict(X_test)
labels=sorted(list(set(labels)))
print("\nConfusion matrix:")
print("Labels: {0}\n".format(",".join(labels)))
print(confusion_matrix(y_test, y_predict, labels=labels))
print("\nClassification report:")
print(classification_report(y_test, y_predict))
def decision_tree_manual_classifier(all_feature_data):
input_data=np.asarray(all_feature_data[0])
label=np.asarray(all_feature_data[1])
data_for_manual_tree=[]
for row_index in range(len(all_feature_data[0])):
current_row=all_feature_data[0][row_index]+[all_feature_data[1][row_index]]
data_for_manual_tree.append(current_row)
# # splitting rule
# set1,set2 = divideset(data_for_manual_tree,14)
# # print(set1)
# print(uniquecounts(set1))
# print("")
# # print(set2)
# print(uniquecounts(set2))
#
# print entropy(set1)
# print entropy(set2)
# print entropy(data_for_manual_tree)
tree = buildtree(data_for_manual_tree)
data=input_data[:,
# class_weight=None
# )
for row_index in range(len(all_feature_data[0])):
to_be_predicted_data=all_feature_data[0][row_index]
predicted_label=classify(to_be_predicted_data,tree)
clf = DecisionTreeClassifier()
fit_clf=clf.fit(data, out_file=f)
return fit_clf
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。