Automatic Meaning Discovery Using Google

  author =       "Rudi Cilibrasi and Paul M. B. Vitanyi",
  title =        "Automatic Meaning Discovery Using {Google}",
  year =         "2005",
  number =       "cs.CL/0412098",
  month =        "15 " # mar,
  note =         "v2",
  keywords =     "genetic algorithms, genetic programming, randomised
                 hill-climbing, SVM, support vector machines, complearn,
                 Computation and Language, Artificial Intelligence,
                 Databases, Information Retrieval, Learning",
  URL =          "",
  URL =          "",
  abstract =     "We have found a method to automatically extract the
                 meaning of words and phrases from the world-wide-web
                 using Google page counts. The approach is novel in its
                 unrestricted problem domain, simplicity of
                 implementation, and manifestly ontological
                 underpinnings. The world-wide-web is the largest
                 database on earth, and the latent semantic context
                 information entered by millions of independent users
                 averages out to provide automatic meaning of useful
                 quality. We demonstrate positive correlations,
                 evidencing an underlying semantic structure, in both
                 numerical symbol notations and number-name words in a
                 variety of natural languages and contexts. Next, we
                 demonstrate the ability to distinguish between colours
                 and numbers, and to distinguish between 17th century
                 Dutch painters; the ability to understand electrical
                 terms, religious terms, and emergency incidents; we
                 conduct a massive experiment in understanding WordNet
                 categories; and finally we demonstrate the ability to
                 do a simple automatic English-Spanish translation.",
  notes =        "ACM-class: I.2.4; I.2.7

                 Date (v1): Tue, 21 Dec 2004 16:05:36 GMT (127kb,S) Date
                 (revised v2): Tue, 15 Mar 2005 16:53:43 GMT

                 cited by \cite{graham-rowe:2005:complearn}

  size =         "31 pages",

