Automatic Selection of Training Examples for a Record Deduplication Method Based on Genetic Programming

Created by W.Langdon from gp-bibliography.bib Revision:1.3872

@Article{Goncalves:2010:JIDM,
  title =        "Automatic Selection of Training Examples for a Record
                 Deduplication Method Based on Genetic Programming",
  author =       "Gabriel Silva Goncalves and 
                 Moises G. {de Carvalho} and Alberto H. F. Laender and Marcos Andre Goncalves",
  journal =      "Journal of Information and Data Management",
  year =         "2010",
  number =       "2",
  volume =       "1",
  pages =        "213--228",
  month =        jun,
  keywords =     "genetic algorithms, genetic programming, replica
                 identification, artificial intelligence",
  ISSN =         "2178-7107",
  URL =          "http://seer.lcc.ufmg.br/index.php/jidm/article/view/59",
  size =         "16 pages",
  bibdate =      "2010-11-03",
  bibsource =    "DBLP,
                 http://dblp.uni-trier.de/db/journals/jidm/jidm1.html#GoncalvesCLG10",
  abstract =     "Recently, machine learning techniques have been used
                 to solve the record deduplication problem. However,
                 these techniques require examples, manually generated
                 in most cases, for training purposes. This hinders the
                 use of such techniques because of the cost required to
                 create the set of examples. In this article, we propose
                 an approach based on a deterministic technique to
                 automatically suggest training examples for a
                 deduplication method based on genetic programming. Our
                 experiments with synthetic datasets show that, by using
                 only 15percent of the examples suggested by our
                 approach, it is possible to achieve results in terms of
                 F1 that are equivalent to those obtained when using all
                 the examples, leading to savings in training time of up
                 to 85percent",
  notes =        "An official publication of the Brazilian Computer
                 Society Special Interest Group on Databases",
}

Genetic Programming entries for Gabriel Silva Goncalves Moises G de Carvalho Alberto H F Laender Marcos Andre Goncalves

Citations