Impact of Imputation of Missing Values on Genetic Programming based Multiple Feature Construction for Classification

Created by W.Langdon from gp-bibliography.bib Revision:1.3872

@InProceedings{Tran:2015:CEC,
  author =       "Cao Truong Tran and Peter Andreae and Mengjie Zhang",
  title =        "Impact of Imputation of Missing Values on Genetic
                 Programming based Multiple Feature Construction for
                 Classification",
  booktitle =    "Proceedings of 2015 IEEE Congress on Evolutionary
                 Computation (CEC 2015)",
  year =         "2015",
  editor =       "Yadahiko Murata",
  pages =        "2398--2405",
  address =      "Sendai, Japan",
  month =        "25-28 " # may,
  publisher =    "IEEE Press",
  keywords =     "genetic algorithms, genetic programming",
  DOI =          "doi:10.1109/CEC.2015.7257182",
  abstract =     "Missing values are a common problem in many real world
                 databases. A common way to cope with this problem is to
                 use imputation methods to fill missing values with
                 plausible values. Genetic programming-based multiple
                 feature construction (GPMFC) is a filter approach to
                 multiple feature construction for classifiers using
                 Genetic programming. The GPMFC algorithm has been
                 demonstrated to improve classification performance in
                 decision tree and rule-based classifiers for complete
                 data, but it has not been tested on imputed data. This
                 paper studies the effect of GPMFC on classification
                 accuracy with imputed data and how the choice of
                 different imputation methods (mean imputation, hot deck
                 imputation, Knn imputation, EM imputation and MICE
                 imputation) affects classifiers using constructed
                 features. Results show that GPMFC improves
                 classification performance for datasets with a small
                 amount of missing values. The combination of GPMFC and
                 MICE imputation, in most cases, enhances classification
                 performance for datasets with varying amounts of
                 missing values and obtains the best classification
                 accuracy.",
  notes =        "0950 hrs 15225 CEC2015",
}

Genetic Programming entries for Cao Truong Tran Peter Andreae Mengjie Zhang

Citations