Inductive data mining: Automatic generation of decision trees from data for QSAR modelling and process historical data analysis

Created by W.Langdon from gp-bibliography.bib Revision:1.3872

@InCollection{Ma2008581,
  author =       "Chao Y Ma and Frances V Buontempo and Xue Z Wang",
  title =        "Inductive data mining: Automatic generation of
                 decision trees from data for QSAR modelling and process
                 historical data analysis",
  editor =       "Bertrand Braunschweig and Xavier Joulia",
  booktitle =    "18th European Symposium on Computer Aided Process
                 Engineering",
  publisher =    "Elsevier",
  year =         "2008",
  volume =       "25",
  pages =        "581--586",
  series =       "Computer Aided Chemical Engineering",
  ISSN =         "1570-7946",
  DOI =          "doi:10.1016/S1570-7946(08)80102-2",
  URL =          "http://www.sciencedirect.com/science/article/B8G5G-4TK2DGX-3M/2/2d0cbf83807000db928a8f08986360cf",
  keywords =     "genetic algorithms, genetic programming, inductive
                 data mining, decision trees, QSAR, process historical
                 data analysis",
  abstract =     "A new inductive data mining method for automatic
                 generation of decision trees from data (GPTree) is
                 presented. Compared with other decision tree induction
                 techniques that are based upon recursive partitioning
                 employing greedy searches to choose the best splitting
                 attribute and value at each node therefore will
                 necessarily miss regions of the search space, GPTree
                 can overcome the problem. In addition, the approach is
                 extended to a new method (YAdapt) that models the
                 original continuous endpoint by adaptively finding
                 suitable ranges to describe the endpoints during the
                 tree induction process, removing the need for
                 discretization prior to tree induction and allowing the
                 ordinal nature of the endpoint to be taken into account
                 in the models built. A strategy for further improving
                 the predictive performance for previously unseen data
                 is investigated that uses multiple decisions trees,
                 i.e., a decision forest, and a majority voting strategy
                 to give a prediction (GPForest). The methods were
                 applied to QSAR (quantitative structure--activity
                 relationships) modeling for eco-toxicity prediction of
                 chemicals and to the analysis of a historical database
                 for a wastewater treatment plant.",
  notes =        "See \cite{Ma20091602}",
}

Genetic Programming entries for Cai-Yun Ma Frances V Buontempo Xue Zhong Wang

Citations