Learning Expressive Linkage Rules using Genetic Programming

Created by W.Langdon from gp-bibliography.bib Revision:1.4420

  author =       "Robert Isele and Christian Bizer",
  title =        "Learning Expressive Linkage Rules using Genetic
  journal =      "Proceedings of the VLDB Endowment",
  year =         "2012",
  volume =       "5",
  number =       "11",
  pages =        "1638--1649",
  month =        jul,
  editor =       "Ahmet Sacan and Nesime Tatbul",
  keywords =     "genetic algorithms, genetic programming, VLDB",
  URL =          "http://vldb.org/pvldb/vol5/p1638_robertisele_vldb2012.pdf",
  URL =          "http://www.vldb.org/pvldb/vol5.html",
  URL =          "http://arxiv.org/abs/1208.0291",
  URL =          "http://arxiv.org/pdf/1208.0291v1",
  size =         "12 page",
  abstract =     "A central problem in data integration and data
                 cleansing is to find entities in different data sources
                 that describe the same real-world object. Many existing
                 methods for identifying such entities rely on explicit
                 linkage rules which specify the conditions that
                 entities must fulfil in order to be considered to
                 describe the same real-world object. In this paper, we
                 present the GenLink algorithm for learning expressive
                 linkage rules from a set of existing reference links
                 using genetic programming. The algorithm is capable of
                 generating linkage rules which select discriminative
                 properties for comparison, apply chains of data
                 transformations to normalise property values, choose
                 appropriate distance measures and thresholds and
                 combine the results of multiple comparisons using
                 non-linear aggregation functions. Our experiments show
                 that the GenLink algorithm outperforms the
                 state-of-the-art genetic programming approach to
                 learning linkage rules recently presented by Carvalho
                 et. al. and is capable of learning linkage rules which
                 achieve a similar accuracy as human written rules for
                 the same problem.",
  notes =        "Articles from this volume were invited to present
                 their results at The 38th International Conference on
                 Very Large Data Bases, August 27th 31st 2012, Istanbul,
  bibsource =    "OAI-PMH server at export.arxiv.org",
  oai =          "oai:arXiv.org:1208.0291",

Genetic Programming entries for Robert Isele Christian Bizer