Created by W.Langdon from gp-bibliography.bib Revision:1.2031
@InProceedings{Gabel:2010:FSE,
author = "Mark Gabel and Zhendong Su",
title = "A Study of the Uniqueness of Source Code",
booktitle = "Proceedings of the eighteenth ACM SIGSOFT
international symposium on Foundations of software
engineering",
year = "2010",
pages = "147--156",
address = "Santa Fe, New Mexico, USA",
month = "7-11 " # nov,
publisher = "ACM",
acmid = "1882315",
keywords = "genetic algorithms, genetic programming, large scale
study, software uniqueness, source code",
isbn13 = "978-1-60558-791-2",
URL = "
http://www.cs.ucdavis.edu/~su/publications/fse10.pdf",
doi = "
doi:10.1145/1882291.1882315",
size = "10 pages",
abstract = "This paper presents the results of the first study of
the uniqueness of source code. We define the uniqueness
of a unit of source code with respect to the entire
body of written software, which we approximate with a
corpus of 420 million lines of source code. Our
high-level methodology consists of examining a
collection of 6,000 software projects and measuring the
degree to which each project can be `assembled' solely
from portions of this corpus, thus providing a precise
measure of `uniqueness' that we call syntactic
redundancy. We parametrised our study over a variety of
variables, the most important of which being the level
of granularity at which we view source code. Our suite
of experiments together consumed approximately four
months of CPU time, providing quantitative answers to
the following questions: at what levels of granularity
is software unique, and at a given level of
granularity, how unique is software? While we believe
these questions to be of intrinsic interest, we discuss
possible applications to genetic programming and
developer productivity tools.",
notes = "Brief mention of GP and how their results apply to GP.
C,C++,Java. n-grams.
p147 'Singularity in software engineering's future'.
p149 'syntactically redundant' p152 'striking
similarity' between 30 current sourceforge projects.
p155 Almost all small code fragments have been written
many times (Small means 'approximately one to seven
lines of source code'). Cites Jiang and Zu ISSTA 2009,
\cite{koza:book} and \cite{Weimer:2009:ICES}.
FSE '10, Gabel:2010:SUS:1882291.1882315",
}
Genetic Programming entries for Mark Gabel Zhendong Su