Created by W.Langdon from gp-bibliography.bib Revision:1.2031
@InProceedings{langdon:2009:gecco,
author = "William B. Langdon and J. Rowsell and A. P. Harrison",
title = "Creating Regular Expressions as m{RNA} Motifs with
{GP} to Predict Human Exon Splitting",
booktitle = "GECCO '09: Proceedings of the 11th Annual conference
on Genetic and evolutionary computation",
year = "2009",
editor = "Guenther Raidl and Franz Rothlauf and
Giovanni Squillero and Rolf Drechsler and Thomas Stuetzle and
Mauro Birattari and Clare Bates Congdon and
Martin Middendorf and Christian Blum and Carlos Cotta and
Peter Bosman and Joern Grahl and Joshua Knowles and
David Corne and Hans-Georg Beyer and Ken Stanley and
Julian F. Miller and Jano {van Hemert} and
Tom Lenaerts and Marc Ebner and Jaume Bacardit and
Michael O'Neill and Massimiliano {Di Penta} and Benjamin Doerr and
Thomas Jansen and Riccardo Poli and Enrique Alba",
pages = "1789--1790",
address = "Montreal",
month = "8-12 " # jul,
organisation = "SIGEVO",
publisher = "ACM",
publisher_address = "New York, NY, USA",
keywords = "genetic algorithms, genetic programming, Poster, Gene
expression and regulation, alternative splicing,
Microarray analysis, Integration of genetic programming
into bioinformatics, Biological interpretation of
computer generated motifs, Bioinformatics, Affymetrix
GeneChip, strongly typed genetic programming, grammar,
regular expression, Alternative splicing of Homosapiens
exons, HDONA",
isbn13 = "978-1-60558-325-9",
URL = "
http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/papers/langdon_2009_gecco.pdf",
doi = "
doi:10.1145/1569901.1570162",
size = "2 pages",
abstract = "RNAnet \cite{CES-486}
http://bioinformatics.essex.ac.uk/users/wlangdon/rnanet/
allows the user to calculate correlations of gene
expression, both between genes and between components
within genes. We investigate all of Ensembl
http://www.ensembl.org and find all the Homo Sapiens
exons for which there are sufficient robust Affymetrix
HG-U133 Plus 2 GeneChip probes. Calculating correlation
between mRNA probe measurements for the same exon shows
many exons whose components are consistently up
regulated and down regulated. However we identify other
Ensembl exons where sub-regions within them are self
consistent but these transcript blocks are not well
correlated with other blocks in the same exon. We
suggest many current Ensembl exon definitions are
incomplete.
Secondly, having identified exon with substructure we
use machine learning to try and identify patterns in
the DNA sequence lying between blocks of high
correlation which might yield biological or
technological explanations. A Backus-Naur form (BNF)
context-free grammar constrains strongly typed genetic
programming (STGP) to evolve biological motifs in the
form of regular expressions (RE) (e.g. TCTTT) which
classify gene exons with potential alternative mRNA
expression from those without. We show biological
patterns can be data mined by a GP written in gawk and
using egrep from NCBI's GEO
http://www.ncbi.nlm.nih.gov/geo/ database. The
automatically produced DNA motifs suggest that
alternative polyadenylation is not responsible. (Full
version in TR-09-02
\cite{langdon:2009:TR-09-02}.)
Blocky exons can be found in
http://bioinformatics.essex.ac.uk/users/wlangdon/tr-09-02.tar.gz",
notes = "t03p220. Longer version in
\cite{langdon:2009:TR-09-02}.
GECCO-2009 A joint meeting of the eighteenth
international conference on genetic algorithms
(ICGA-2009) and the fourteenth annual genetic
programming conference (GP-2009).
ACM Order Number 910092. Also known as
\cite{DBLP:conf/gecco/LangdonRH09}",
}
Genetic Programming entries for William B Langdon J Rowsell A P Harrison