An Evolutionary Algorithm Approach for Feature Generation from Sequence Data and Its Application to DNA Splice Site Prediction

Created by W.Langdon from gp-bibliography.bib Revision:1.3872

@Article{Kamath:2012:cbb,
  author =       "Uday Kamath and Jack Compton and 
                 Rezarta Islamaj-Dogan and Kenneth A. {De Jong} and Amarda Shehu",
  title =        "An Evolutionary Algorithm Approach for Feature
                 Generation from Sequence Data and Its Application to
                 DNA Splice Site Prediction",
  journal =      "IEEE/ACM Transactions on Computational Biology and
                 Bioinformatics",
  year =         "2012",
  volume =       "9",
  number =       "5",
  pages =        "1387--1398",
  month =        sep # "/" # oct,
  keywords =     "genetic algorithms, genetic programming, Evolutionary
                 computation, feature extraction and construction,
                 classifier design and evaluation, data mining, DNA
                 splice sites",
  ISSN =         "1545-5963",
  DOI =          "doi:10.1109/TCBB.2012.53",
  size =         "12 pages",
  abstract =     "Associating functional information with biological
                 sequences remains a challenge for machine learning
                 methods. The performance of these methods often depends
                 on deriving predictive features from the sequences
                 sought to be classified. Feature generation is a
                 difficult problem, as the connection between the
                 sequence features and the sought property is not known
                 a priori. It is often the task of domain experts or
                 exhaustive feature enumeration techniques to generate a
                 few features whose predictive power is then tested in
                 the context of classification. This paper proposes an
                 evolutionary algorithm to effectively explore a large
                 feature space and generate predictive features from
                 sequence data. The effectiveness of the algorithm is
                 demonstrated on an important component of the
                 gene-finding problem, DNA splice site prediction. This
                 application is chosen due to the complexity of the
                 features needed to obtain high classification accuracy
                 and precision. Our results test the effectiveness of
                 the obtained features in the context of classification
                 by Support Vector Machines and show significant
                 improvement in accuracy and precision over
                 state-of-the-art approaches.",
  notes =        "Entered for 2011 HUMIES GECCO 2011
                 http://www.genetic-programming.org/combined.php

                 Also known as \cite{6185531}",
}

Genetic Programming entries for Uday Kamath Jack Compton Rezarta Islamaj-Dogan Kenneth De Jong Amarda Shehu

Citations