Active Learning Genetic programming for record deduplication

Created by W.Langdon from gp-bibliography.bib Revision:1.3973

@InProceedings{deFreitas:2010:cec,
  author =       "Junio {de Freitas} and Gisele L. Pappa and 
                 Altigran S. {da Silva} and Marcos A. Goncalves and Edleno Moura and 
                 Adriano Veloso and Alberto H. F. Laender and 
                 Moises G. {de Carvalho}",
  title =        "Active Learning Genetic programming for record
                 deduplication",
  booktitle =    "IEEE Congress on Evolutionary Computation (CEC 2010)",
  year =         "2010",
  address =      "Barcelona, Spain",
  month =        "18-23 " # jul,
  publisher =    "IEEE Press",
  keywords =     "genetic algorithms, genetic programming",
  isbn13 =       "978-1-4244-6910-9",
  URL =          "http://www.dcc.ufmg.br/~adrianov/papers/CEC10/cec10.pdf",
  abstract =     "The great majority of genetic programming (GP)
                 algorithms that deal with the classification problem
                 follow a supervised approach, i.e., they consider that
                 all fitness cases available to evaluate their models
                 are labelled. However, in certain application domains,
                 a lot of human effort is required to label training
                 data, and methods following a semi-supervised approach
                 might be more appropriate. This is because they
                 significantly reduce the time required for data
                 labelling while maintaining acceptable accuracy rates.
                 This paper presents the Active Learning GP (AGP), a
                 semi-supervised GP, and instantiates it for the data
                 deduplication problem. AGP uses an active learning
                 approach in which a committee of multi-attribute
                 functions votes for classifying record pairs as
                 duplicates or not. When the committee majority voting
                 is not enough to predict the class of the data pairs, a
                 user is called to solve the conflict. The method was
                 applied to three datasets and compared to two other
                 deduplication methods. Results show that AGP guarantees
                 the quality of the deduplication while reducing the
                 number of labeled examples needed.",
  DOI =          "doi:10.1109/CEC.2010.5586104",
  notes =        "WCCI 2010. Also known as \cite{5586104}",
}

Genetic Programming entries for Junio de Freitas Gisele L Pappa Altigran S da Silva Marcos Andre Goncalves Edleno Silva de Moura Adriano Veloso Alberto H F Laender Moises G de Carvalho

Citations