Point Cloud Library (PCL)  1.9.1
decision_tree_trainer.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2010-2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #ifndef PCL_ML_DT_DECISION_TREE_TRAINER_H_
39 #define PCL_ML_DT_DECISION_TREE_TRAINER_H_
40 
41 #include <pcl/common/common.h>
42 
43 #include <pcl/ml/dt/decision_tree.h>
44 #include <pcl/ml/feature_handler.h>
45 #include <pcl/ml/stats_estimator.h>
46 #include <pcl/ml/dt/decision_tree_data_provider.h>
47 
48 #include <vector>
49 
50 namespace pcl
51 {
52 
53  /** \brief Trainer for decision trees. */
54  template <
55  class FeatureType,
56  class DataSet,
57  class LabelType,
58  class ExampleIndex,
59  class NodeType >
60  class PCL_EXPORTS DecisionTreeTrainer
61  {
62 
63  public:
64 
65  /** \brief Constructor. */
67  /** \brief Destructor. */
68  virtual
70 
71  /** \brief Sets the feature handler used to create and evaluate features.
72  * \param[in] feature_handler The feature handler.
73  */
74  inline void
76  {
77  feature_handler_ = &feature_handler;
78  }
79 
80  /** \brief Sets the object for estimating the statistics for tree nodes.
81  * \param[in] stats_estimator The statistics estimator.
82  */
83  inline void
85  {
86  stats_estimator_ = &stats_estimator;
87  }
88 
89  /** \brief Sets the maximum depth of the learned tree.
90  * \param[in] max_tree_depth Maximum depth of the learned tree.
91  */
92  inline void
93  setMaxTreeDepth (const size_t max_tree_depth)
94  {
95  max_tree_depth_ = max_tree_depth;
96  }
97 
98  /** \brief Sets the number of features used to find optimal decision features.
99  * \param[in] num_of_features The number of features.
100  */
101  inline void
102  setNumOfFeatures (const size_t num_of_features)
103  {
104  num_of_features_ = num_of_features;
105  }
106 
107  /** \brief Sets the number of thresholds tested for finding the optimal decision threshold on the feature responses.
108  * \param[in] num_of_threshold The number of thresholds.
109  */
110  inline void
111  setNumOfThresholds (const size_t num_of_threshold)
112  {
113  num_of_thresholds_ = num_of_threshold;
114  }
115 
116  /** \brief Sets the input data set used for training.
117  * \param[in] data_set The data set used for training.
118  */
119  inline void
120  setTrainingDataSet (DataSet & data_set)
121  {
122  data_set_ = data_set;
123  }
124 
125  /** \brief Example indices that specify the data used for training.
126  * \param[in] examples The examples.
127  */
128  inline void
129  setExamples (std::vector<ExampleIndex> & examples)
130  {
131  examples_ = examples;
132  }
133 
134  /** \brief Sets the label data corresponding to the example data.
135  * \param[in] label_data The label data.
136  */
137  inline void
138  setLabelData (std::vector<LabelType> & label_data)
139  {
140  label_data_ = label_data;
141  }
142 
143  /** \brief Sets the minimum number of examples to continue growing a tree.
144  * \param[in] n Number of examples
145  */
146  inline void
148  {
149  min_examples_for_split_ = n;
150  }
151 
152  /** \brief Specify the thresholds to be used when evaluating features.
153  * \param[in] thres The threshold values.
154  */
155  void
156  setThresholds (std::vector<float> & thres)
157  {
158  thresholds_ = thres;
159  }
160 
161  /** \brief Specify the data provider.
162  * \param[in] dtdp The data provider that should implement getDatasetAndLabels(...) function
163  */
164  void
166  {
167  decision_tree_trainer_data_provider_ = dtdp;
168  }
169 
170  /** \brief Specify if the features are randomly generated at each split node.
171  * \param[in] b Do it or not.
172  */
173  void
175  {
176  random_features_at_split_node_ = b;
177  }
178 
179  /** \brief Trains a decision tree using the set training data and settings.
180  * \param[out] tree Destination for the trained tree.
181  */
182  void
183  train (DecisionTree<NodeType> & tree);
184 
185  protected:
186 
187  /** \brief Trains a decision tree node from the specified features, label data, and examples.
188  * \param[in] features The feature pool used for training.
189  * \param[in] examples The examples used for training.
190  * \param[in] label_data The label data corresponding to the examples.
191  * \param[in] max_depth The maximum depth of the remaining tree.
192  * \param[out] node The resulting node.
193  */
194  void
195  trainDecisionTreeNode (std::vector<FeatureType> & features,
196  std::vector<ExampleIndex> & examples,
197  std::vector<LabelType> & label_data,
198  size_t max_depth,
199  NodeType & node);
200 
201  /** \brief Creates uniformely distrebuted thresholds over the range of the supplied values.
202  * \param[in] num_of_thresholds The number of thresholds to create.
203  * \param[in] values The values for estimating the expected value range.
204  * \param[out] thresholds The resulting thresholds.
205  */
206  static void
207  createThresholdsUniform (const size_t num_of_thresholds,
208  std::vector<float> & values,
209  std::vector<float> & thresholds);
210 
211  private:
212 
213  /** \brief Maximum depth of the learned tree. */
214  size_t max_tree_depth_;
215  /** \brief Number of features used to find optimal decision features. */
216  size_t num_of_features_;
217  /** \brief Number of thresholds. */
218  size_t num_of_thresholds_;
219 
220  /** \brief FeatureHandler instance, responsible for creating and evaluating features. */
222  /** \brief StatsEstimator instance, responsible for gathering stats about a node. */
224 
225  /** \brief The training data set. */
226  DataSet data_set_;
227  /** \brief The label data. */
228  std::vector<LabelType> label_data_;
229  /** \brief The example data. */
230  std::vector<ExampleIndex> examples_;
231 
232  /** \brief Minimum number of examples to split a node. */
233  size_t min_examples_for_split_;
234  /** \brief Thresholds to be used instead of generating uniform distributed thresholds. */
235  std::vector<float> thresholds_;
236  /** \brief The data provider which is called before training a specific tree, if pointer is NULL, then data_set_ is used. */
237  boost::shared_ptr<pcl::DecisionTreeTrainerDataProvider<FeatureType, DataSet, LabelType, ExampleIndex, NodeType> > decision_tree_trainer_data_provider_;
238  /** \brief If true, random features are generated at each node, otherwise, at start of training the tree */
239  bool random_features_at_split_node_;
240  };
241 
242 }
243 
244 #include <pcl/ml/impl/dt/decision_tree_trainer.hpp>
245 
246 #endif
pcl
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
pcl::DecisionTreeTrainer::setRandomFeaturesAtSplitNode
void setRandomFeaturesAtSplitNode(bool b)
Specify if the features are randomly generated at each split node.
Definition: decision_tree_trainer.h:174
common.h
pcl::DecisionTreeTrainer::setMinExamplesForSplit
void setMinExamplesForSplit(size_t n)
Sets the minimum number of examples to continue growing a tree.
Definition: decision_tree_trainer.h:147
pcl::DecisionTreeTrainer::setMaxTreeDepth
void setMaxTreeDepth(const size_t max_tree_depth)
Sets the maximum depth of the learned tree.
Definition: decision_tree_trainer.h:93
pcl::DecisionTreeTrainer::setLabelData
void setLabelData(std::vector< LabelType > &label_data)
Sets the label data corresponding to the example data.
Definition: decision_tree_trainer.h:138
pcl::DecisionTree
Class representing a decision tree.
Definition: decision_tree.h:51
pcl::FeatureHandler
Utility class interface which is used for creating and evaluating features.
Definition: feature_handler.h:54
pcl::DecisionTreeTrainer::setNumOfFeatures
void setNumOfFeatures(const size_t num_of_features)
Sets the number of features used to find optimal decision features.
Definition: decision_tree_trainer.h:102
pcl::DecisionTreeTrainer
Trainer for decision trees.
Definition: decision_tree_trainer.h:60
pcl::DecisionTreeTrainerDataProvider
Definition: decision_tree_data_provider.h:46
pcl::DecisionTreeTrainer::setExamples
void setExamples(std::vector< ExampleIndex > &examples)
Example indices that specify the data used for training.
Definition: decision_tree_trainer.h:129
pcl::DecisionTreeTrainer::setNumOfThresholds
void setNumOfThresholds(const size_t num_of_threshold)
Sets the number of thresholds tested for finding the optimal decision threshold on the feature respon...
Definition: decision_tree_trainer.h:111
pcl::DecisionTreeTrainer::setFeatureHandler
void setFeatureHandler(pcl::FeatureHandler< FeatureType, DataSet, ExampleIndex > &feature_handler)
Sets the feature handler used to create and evaluate features.
Definition: decision_tree_trainer.h:75
pcl::DecisionTreeTrainer::setTrainingDataSet
void setTrainingDataSet(DataSet &data_set)
Sets the input data set used for training.
Definition: decision_tree_trainer.h:120
pcl::DecisionTreeTrainer::setThresholds
void setThresholds(std::vector< float > &thres)
Specify the thresholds to be used when evaluating features.
Definition: decision_tree_trainer.h:156
pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex >
pcl::DecisionTreeTrainer::setStatsEstimator
void setStatsEstimator(pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex > &stats_estimator)
Sets the object for estimating the statistics for tree nodes.
Definition: decision_tree_trainer.h:84
pcl::DecisionTreeTrainer::setDecisionTreeDataProvider
void setDecisionTreeDataProvider(boost::shared_ptr< pcl::DecisionTreeTrainerDataProvider< FeatureType, DataSet, LabelType, ExampleIndex, NodeType > > &dtdp)
Specify the data provider.
Definition: decision_tree_trainer.h:165