Evaluation of Mutual Information and Genetic Programming for Feature Selection in QSAR

Created by W.Langdon from gp-bibliography.bib Revision:1.3872

@Article{venkatraman:2004:CIM,
  author =       "Vishwesh Venkatraman and Andrew Rowland Dalby and 
                 Zheng Rong Yang",
  title =        "Evaluation of Mutual Information and Genetic
                 Programming for Feature Selection in {QSAR}",
  journal =      "Journal of Chemical Information and Modeling",
  year =         "2004",
  volume =       "44",
  number =       "5",
  pages =        "1686--1692",
  keywords =     "genetic algorithms, genetic programming",
  DOI =          "doi:10.1021/ci049933v",
  abstract =     "Feature selection is a key step in Quantitative
                 Structure Activity Relationship (QSAR) analysis. Chance
                 correlations and multicollinearity are two major
                 problems often encountered when attempting to find
                 generalised QSAR models for use in drug design. Optimal
                 QSAR models require an objective variable relevance
                 analysis step for producing robust classifiers with low
                 complexity and good predictive accuracy. Genetic
                 algorithms coupled with information theoretic
                 approaches such as mutual information have been used to
                 find near-optimal solutions to such multicriteria
                 optimisation problems. In this paper, we describe a
                 novel approach for analyzing QSAR data based on these
                 methods. Our experiments with the Thrombin dataset,
                 previously studied as part of the KDD (Knowledge
                 Discovery and Data Mining) Cup 2001 demonstrate the
                 feasibility of this approach. It has been found that it
                 is important to take into account the data
                 distribution, the rule {"}interestingness{"}, and the
                 need to look at more invariant and monotonic measures
                 of feature selection.",
  notes =        "http://pubs.acs.org/journals/jcisd8/index.html
                 American Chemical Society S0095-2338(04)09933-0

                 School of Biological Sciences, University of Exeter,
                 Exeter EX4 4QF, Great Britain and School of Engineering
                 and Computer Science, University of Exeter, Exeter EX4
                 4QF, Great Britain

                 PMID: 15446827",
}

Genetic Programming entries for Vishwesh Venkatraman Andrew Rowland Dalby Zheng Rong Yang

Citations