Accurate and interpretable nanoSAR models from genetic programming-based decision tree construction approaches

Created by W.Langdon from gp-bibliography.bib Revision:1.3973

@Article{Oksel:2016:Nanotoxicology,
  author =       "Ceyda Oksel and David A. Winkler and Cai Y. Ma and 
                 Terry Wilkins and Xue Z. Wang",
  title =        "Accurate and interpretable {nanoSAR} models from
                 genetic programming-based decision tree construction
                 approaches",
  journal =      "Nanotoxicology",
  year =         "2016",
  volume =       "10",
  number =       "7",
  pages =        "1001--1012",
  keywords =     "genetic algorithms, genetic programming",
  publisher =    "Taylor \& Francis",
  ISSN =         "1743-5390",
  bibsource =    "OAI-PMH server at eprints.whiterose.ac.uk",
  oai =          "oai:eprints.whiterose.ac.uk:96571",
  URL =          "http://dx.doi.org/10.3109/17435390.2016.1161857",
  URL =          "http://eprints.whiterose.ac.uk/96571/",
  DOI =          "doi:10.3109/17435390.2016.1161857",
  size =         "12 pages",
  abstract =     "The number of engineered nanomaterials (ENMs) being
                 exploited commercially is growing rapidly, due to the
                 novel properties they exhibit. Clearly, it is important
                 to understand and minimise any risks to health or the
                 environment posed by the presence of ENMs. Data-driven
                 models that decode the relationships between the
                 biological activities of ENMs and their physicochemical
                 characteristics provide an attractive means of
                 maximising the value of scarce and expensive
                 experimental data. Although such structure-activity
                 relationship (SAR) methods have become very useful
                 tools for modelling nanotoxicity endpoints (nanoSAR),
                 they have limited robustness and predictivity and, most
                 importantly, interpretation of the models they generate
                 is often very difficult. New computational modelling
                 tools or new ways of using existing tools are required
                 to model the relatively sparse and sometimes lower
                 quality data on the biological effects of ENMs. The
                 most commonly used SAR modelling methods work best with
                 large data sets, are not particularly good at feature
                 selection, can be relatively opaque to interpretation,
                 and may not account for nonlinearity in the
                 structure-property relationships. To overcome these
                 limitations, we describe the application of a novel
                 algorithm, a genetic programming-based decision tree
                 construction tool (GPTree) to nanoSAR modelling. We
                 demonstrate the use of GPTree in the construction of
                 accurate and interpretable nanoSAR models by applying
                 it to four diverse literature datasets. We describe the
                 algorithm and compare model results across the four
                 studies. We show that GPTree generates models with
                 accuracies equivalent to or superior to those of prior
                 modelling studies on the same datasets. GPTree is a
                 robust, automatic method for generation of accurate
                 nanoSAR models with important advantages that it works
                 with small datasets, automatically selects descriptors,
                 and provides significantly improved interpretability of
                 models.",
  notes =        "Supplementary material available online",
}

Genetic Programming entries for Ceyda Oksel David A Winkler Cai Y Ma Terry Wilkins Xue Zhong Wang

Citations