Operator equalisation, bloat and overfitting: a study on human oral bioavailability prediction

Created by W.Langdon from gp-bibliography.bib Revision:1.4420

  author =       "Sara Silva and Leonardo Vanneschi",
  title =        "Operator equalisation, bloat and overfitting: a study
                 on human oral bioavailability prediction",
  booktitle =    "GECCO '09: Proceedings of the 11th Annual conference
                 on Genetic and evolutionary computation",
  year =         "2009",
  editor =       "Guenther Raidl and Franz Rothlauf and 
                 Giovanni Squillero and Rolf Drechsler and Thomas Stuetzle and 
                 Mauro Birattari and Clare Bates Congdon and 
                 Martin Middendorf and Christian Blum and Carlos Cotta and 
                 Peter Bosman and Joern Grahl and Joshua Knowles and 
                 David Corne and Hans-Georg Beyer and Ken Stanley and 
                 Julian F. Miller and Jano {van Hemert} and 
                 Tom Lenaerts and Marc Ebner and Jaume Bacardit and 
                 Michael O'Neill and Massimiliano {Di Penta} and Benjamin Doerr and 
                 Thomas Jansen and Riccardo Poli and Enrique Alba",
  pages =        "1115--1122",
  address =      "Montreal",
  publisher =    "ACM",
  publisher_address = "New York, NY, USA",
  month =        "8-12 " # jul,
  organisation = "SigEvo",
  keywords =     "genetic algorithms, genetic programming",
  isbn13 =       "978-1-60558-325-9",
  bibsource =    "DBLP, http://dblp.uni-trier.de",
  DOI =          "doi:10.1145/1569901.1570051",
  abstract =     "Operator equalisation was recently proposed as a new
                 bloat control technique for genetic programming. By
                 controlling the distribution of program lengths inside
                 the population, it can bias the search towards smaller
                 or larger programs. In this paper we propose a new
                 implementation of operator equalisation and compare it
                 to a previous version, using a hard real-world
                 regression problem where bloat and overfitting are
                 major issues. The results show that both
                 implementations of operator equalisation are completely
                 bloat-free, producing smaller individuals than standard
                 genetic programming, without compromising the
                 generalization ability. We also show that the new
                 implementation of operator equalisation is more
                 efficient and exhibits a more predictable and reliable
                 behavior than the previous version. We advance some
                 arguable ideas regarding the relationship between bloat
                 and overfitting, and support them with our results.",
  notes =        "GECCO-2009 A joint meeting of the eighteenth
                 international conference on genetic algorithms
                 (ICGA-2009) and the fourteenth annual genetic
                 programming conference (GP-2009).

                 ACM Order Number 910092.",

Genetic Programming entries for Sara Silva Leonardo Vanneschi