bibli.bib

@article{Aspremont2007Direct,
  author = {d'Aspremont, A. and El Ghaoui, L. and Jordan, M. I. and Lanckriet,
	G. R. G.},
  title = {A Direct Formulation for Sparse {PCA} Using Semidefinite Programming},
  journal = {SIAM Review},
  year = {2007},
  volume = {49},
  pages = {434--448},
  number = {3},
  doi = {10.1137/050645506},
  owner = {jp},
  timestamp = {2013.01.07},
  url = {http://dx.doi.org/10.1137/050645506}
}

@article{Veer2008Enabling,
  author = {{van't Veer}, L. J. and Bernards, R.},
  title = {Enabling personalized cancer medicine through analysis of gene-expression
	patterns.},
  journal = {Nature},
  year = {2008},
  volume = {452},
  pages = {564--570},
  number = {7187},
  month = {Apr},
  abstract = {Therapies for patients with cancer have changed gradually over the
	past decade, moving away from the administration of broadly acting
	cytotoxic drugs towards the use of more-specific therapies that are
	targeted to each tumour. To facilitate this shift, tests need to
	be developed to identify those individuals who require therapy and
	those who are most likely to benefit from certain therapies. In particular,
	tests that predict the clinical outcome for patients on the basis
	of the genes expressed by their tumours are likely to increasingly
	affect patient management, heralding a new era of personalized medicine.},
  doi = {10.1038/nature06915},
  pdf = {../local/Veer2008Enabling.pdf},
  file = {Veer2008Enabling.pdf:Veer2008Enabling.pdf:PDF},
  institution = {Agendia BV, Louwesweg 6, 1066 EC Amsterdam, The Netherlands.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature06915},
  pmid = {18385730},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1038/nature06915}
}

@article{Tayrac2009Simultaneous,
  author = {{de Tayrac}, M. and L\^e, S. and Aubry, M. and Mosser, J. and Husson,
	F.},
  title = {Simultaneous analysis of distinct Omics data sets with integration
	of biological knowledge: Multiple Factor Analysis approach.},
  journal = {BMC Genomics},
  year = {2009},
  volume = {10},
  pages = {32},
  abstract = {Genomic analysis will greatly benefit from considering in a global
	way various sources of molecular data with the related biological
	knowledge. It is thus of great importance to provide useful integrative
	approaches dedicated to ease the interpretation of microarray data.Here,
	we introduce a data-mining approach, Multiple Factor Analysis (MFA),
	to combine multiple data sets and to add formalized knowledge. MFA
	is used to jointly analyse the structure emerging from genomic and
	transcriptomic data sets. The common structures are underlined and
	graphical outputs are provided such that biological meaning becomes
	easily retrievable. Gene Ontology terms are used to build gene modules
	that are superimposed on the experimentally interpreted plots. Functional
	interpretations are then supported by a step-by-step sequence of
	graphical representations.When applied to genomic and transcriptomic
	data and associated Gene Ontology annotations, our method prioritize
	the biological processes linked to the experimental settings. Furthermore,
	it reduces the time and effort to analyze large amounts of 'Omics'
	data.},
  doi = {10.1186/1471-2164-10-32},
  institution = {CNRS UMR 6061, Université de Rennes 1, IFR 140, Faculté de Médecine,
	CS 34317, 35043 Rennes, France. marie.de-tayrac@univ-rennes1.fr},
  keywords = {Animals; Comparative Genomic Hybridization; Factor Analysis, Statistical;
	Gene Expression Profiling, methods; Genomics, methods; Glioma, genetics;
	Humans; Mice; Models, Biological; Oligonucleotide Array Sequence
	Analysis, methods},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2164-10-32},
  pmid = {19154582},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2164-10-32}
}

@article{Consortium2010map,
  author = {{1000 Genomes Project Consortium}},
  title = {A map of human genome variation from population-scale sequencing.},
  journal = {Nature},
  year = {2010},
  volume = {467},
  pages = {1061--1073},
  number = {7319},
  month = {Oct},
  abstract = {The 1000 Genomes Project aims to provide a deep characterization of
	human genome sequence variation as a foundation for investigating
	the relationship between genotype and phenotype. Here we present
	results of the pilot phase of the project, designed to develop and
	compare different strategies for genome-wide sequencing with high-throughput
	platforms. We undertook three projects: low-coverage whole-genome
	sequencing of 179 individuals from four populations; high-coverage
	sequencing of two mother-father-child trios; and exon-targeted sequencing
	of 697 individuals from seven populations. We describe the location,
	allele frequency and local haplotype structure of approximately 15
	million single nucleotide polymorphisms, 1 million short insertions
	and deletions, and 20,000 structural variants, most of which were
	previously undescribed. We show that, because we have catalogued
	the vast majority of common variation, over 95\% of the currently
	accessible variants found in any individual are present in this data
	set. On average, each person is found to carry approximately 250
	to 300 loss-of-function variants in annotated genes and 50 to 100
	variants previously implicated in inherited disorders. We demonstrate
	how these results can be used to inform association and functional
	studies. From the two trios, we directly estimate the rate of de
	novo germline base substitution mutations to be approximately 10(-8)
	per base pair per generation. We explore the data with regard to
	signatures of natural selection, and identify a marked reduction
	of genetic variation in the neighbourhood of genes, due to selection
	at linked sites. These methods and public data will support the next
	phase of human genetic research.},
  doi = {10.1038/nature09534},
  keywords = {Calibration; Chromosomes, Human, Y, genetics; Computational Biology;
	DNA Mutational Analysis; DNA, Mitochondrial, genetics; Evolution,
	Molecular; Female; Genetic Association Studies; Genetic Variation,
	genetics; Genetics, Population, methods; Genome, Human, genetics;
	Genome-Wide Association Study; Genomics, methods; Genotype; Haplotypes,
	genetics; Humans; Male; Mutation, genetics; Pilot Projects; Polymorphism,
	Single Nucleotide, genetics; Recombination, Genetic, genetics; Sample
	Size; Selection, Genetic, genetics; Sequence Alignment; Sequence
	Analysis, DNA, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nature09534},
  pmid = {20981092},
  timestamp = {2012.02.24},
  url = {http://dx.doi.org/10.1038/nature09534}
}

@article{Razzak1992Applications,
  author = {A-Razzak, M. and Glen, R. C.},
  title = {Applications of rule-induction in the derivation of quantitative
	structure-activity relationships.},
  journal = {J. Comput. Aided Mol. Des.},
  year = {1992},
  volume = {6},
  pages = {349--383},
  number = {4},
  month = {Aug},
  abstract = {Recently, methods have been developed in the field of Artificial Intelligence
	(AI), specifically in the expert systems area using rule-induction,
	designed to extract rules from data. We have applied these methods
	to the analysis of molecular series with the objective of generating
	rules which are predictive and reliable. The input to rule-induction
	consists of a number of examples with known outcomes (a training
	set) and the output is a tree-structured series of rules. Unlike
	most other analysis methods, the results of the analysis are in the
	form of simple statements which can be easily interpreted. These
	are readily applied to new data giving both a classification and
	a probability of correctness. Rule-induction has been applied to
	in-house generated and published QSAR datasets and the methodology,
	application and results of these analyses are discussed. The results
	imply that in some cases it would be advantageous to use rule-induction
	as a complementary technique in addition to conventional statistical
	and pattern-recognition methods.},
  keywords = {Algorithms, Anticonvulsants, Antimalarials, Artificial Intelligence,
	Cardiotonic Agents, Software, Structure-Activity Relationship, gamma-Aminobutyric
	Acid, 1403028},
  owner = {mahe},
  pmid = {1403028},
  timestamp = {2006.09.06}
}

@article{Abeel2010Robust,
  author = {Abeel, T. and Helleputte, T. and Van de Peer, Y. and Dupont, P. and
	Saeys, Y.},
  title = {Robust biomarker identification for cancer diagnosis with ensemble
	feature selection methods},
  journal = {Bioinformatics},
  year = {2010},
  volume = {26},
  pages = {392--398},
  month = {Nov},
  abstract = {MOTIVATION: Biomarker discovery is an important topic in biomedical
	applications of computational biology, including applications such
	as gene and SNP selection from high dimensional data. Surprisingly,
	the stability with respect to sampling variation or robustness of
	such selection processes has received attention only recently. However,
	robustness of biomarkers is an important issue, as it may greatly
	influence subsequent biological validations. In addition, a more
	robust set of markers may strengthen the confidence of an expert
	in the results of a selection method. RESULTS: Our first contribution
	is a general framework for the analysis of the robustness of a biomarker
	selection algorithm. Secondly, we conducted a large-scale analysis
	of the recently introduced concept of ensemble feature selection,
	where multiple feature selections are combined in order to increase
	the robustness of the final set of selected features. We focus on
	selection methods that are embedded in the estimation of support
	vector machines (SVMs). SVMs are powerful classification models that
	have shown state-of-the-art performance on several diagnosis and
	prognosis tasks on biological data. Their feature selection extensions
	also offered good results for gene selection tasks. We show that
	the robustness of SVMs for biomarker discovery can be substantially
	increased by using ensemble feature selection techniques, while at
	the same time improving upon classification performances. The proposed
	methodology is evaluated on four microarray data sets showing increases
	of up to almost 30\% in robustness of the selected biomarkers, along
	with an improvement of about 15\% in classification performance.
	The stability improvement with ensemble methods is particularly noticeable
	for small signature sizes (a few tens of genes), which is most relevant
	for the design of a diagnosis or prognosis model from a gene signature.
	CONTACT: yvan.saeys@psb.ugent.be.},
  doi = {10.1093/bioinformatics/btp630},
  pdf = {../local/Abeel2009Robust.pdf},
  file = {Abeel2009Robust.pdf:Abeel2009Robust.pdf:PDF},
  institution = {Department of Plant Systems Biology, VIB, Technologiepark 927, 9052
	Gent, Belgium.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {btp630},
  pmid = {19942583},
  timestamp = {2010.01.09},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp630}
}

@article{Abernethy2008new,
  author = {Abernethy, J. and Bach, F. and Evgeniou, T. and Vert, J.-P.},
  title = {A new approach to collaborative filtering: operator estimation with
	spectral regularization},
  journal = {J. Mach. Learn. Res.},
  year = {2009},
  volume = {10},
  pages = {803--826},
  issn = {1532-4435},
  publisher = {JMLR.org}
}

@techreport{Abernethy2008New-techreport,
  author = {Abernethy, J. and Bach, F. and Evgeniou, T. and Vert, J.-P.},
  title = {A New Approach to Collaborative Filtering: Operator Estimation with
	Spectral Regularization},
  institution = {HAL},
  year = {2008},
  number = {00250231},
  timestamp = {2008.03.31}
}

@techreport{Abernethy2006Low-rank,
  author = {Abernethy, J. and Bach, F. and Evgeniou, T. and Vert, J.-P.},
  title = {Low-rank matrix factorization with attributes},
  institution = {arXiv},
  year = {2006},
  number = {cs/0611124},
  owner = {jp},
  timestamp = {2008.06.06}
}

@article{Abernethy2008Eliciting,
  author = {Abernethy, J. and Evgeniou, T. and Toubia, O. and Vert, J.-P.},
  title = {Eliciting consumer preferences using robust adaptive choice questionnaires},
  journal = {IEEE Trans. Knowl. Data Eng.},
  year = {2008},
  volume = {20},
  pages = {145--155},
  number = {2},
  abstract = {We propose a framework for designing adaptive choice-based conjoint
	questionnaires that are robust to response error. It is developed
	based on a combination of experimental design and statistical learning
	theory principles. We implement and test a specific case of this
	framework using Regularization Networks. We also formalize within
	this framework the polyhedral methods recently proposed in marketing.
	We use simulations as well as an online market research experiment
	with 500 participants to compare the proposed method to benchmark
	methods. Both experiments show that the proposed adaptive questionnaires
	outperform existing ones in most cases. This work also indicates
	the potential of using machine learning methods in marketing.},
  doi = {10.1109/TKDE.2007.190632},
  pdf = {../local/Abernethy2008Eliciting.pdf},
  file = {Abernethy2008Eliciting.pdf:Abernethy2008Eliciting.pdf:PDF},
  owner = {jp},
  timestamp = {2009.02.26},
  url = {http://dx.doi.org/10.1109/TKDE.2007.190632}
}

@techreport{Abernethy2004optimization,
  author = {Abernethy, J. and Evgeniou, T. and Vert, J.-P.},
  title = {An optimization framework for adaptive conjoint questionnaire design},
  institution = {INSEAD},
  year = {2004},
  owner = {vert}
}

@article{Abraham2010Prediction,
  author = {Abraham, Gad and Kowalczyk, Adam and Loi, Sherene and Haviv, Izhak
	and Zobel, Justin},
  title = {Prediction of breast cancer prognosis using gene set statistics provides
	signature stability and biological context},
  journal = {BMC Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {277},
  number = {1},
  doi = {10.1186/1471-2105-11-277},
  pdf = {../local/Abraham2010Prediction.pdf},
  file = {Abraham2010Prediction.pdf:Abraham2010Prediction.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1186/1471-2105-11-277}
}

@article{Abrahamian2003Efficient,
  author = {E. Abrahamian and P. C. Fox and L. Naerum and I. T. Christensen and
	H. Th{\o}gersen and R. D. Clark},
  title = {{E}fficient generation, storage, and manipulation of fully flexible
	pharmacophore multiplets and their use in 3-{D} similarity searching.},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {458--468},
  number = {2},
  abstract = {Pharmacophore triplets and quartets have been used by many groups
	in recent years, primarily as a tool for molecular diversity analysis.
	In most cases, slow processing speeds and the very large size of
	the bitsets generated have forced researchers to compromise in terms
	of how such multiplets were stored, manipulated, and compared, e.g.,
	by using simple unions to represent multiplets for sets of molecules.
	Here we report using bitmaps in place of bitsets to reduce storage
	demands and to improve processing speed. Here, a bitset is taken
	to mean a fully enumerated string of zeros and ones, from which a
	compressed bitmap is obtained by replacing uniform blocks ("runs")
	of digits in the bitset with a pair of values identifying the content
	and length of the block (run-length encoding compression). High-resolution
	multiplets involving four features are enabled by using 64 bit executables
	to create and manipulate bitmaps, which "connect" to the 32 bit executables
	used for database access and feature identification via an extensible
	mark-up language (XML) data stream. The encoding system used supports
	simple pairs, triplets, and quartets; multiplets in which a privileged
	substructure is used as an anchor point; and augmented multiplets
	in which an additional vertex is added to represent a contingent
	feature such as a hydrogen bond extension point linked to a complementary
	feature (e.g., a donor or an acceptor atom) in a base pair or triplet.
	It can readily be extended to larger, more complex multiplets as
	well. Database searching is one particular potential application
	for this technology. Consensus bitmaps built up from active ligands
	identified in preliminary screening can be used to generate hypothesis
	bitmaps, a process which includes allowance for differential weighting
	to allow greater emphasis to be placed on bits arising from multiplets
	expected to be particularly discriminating. Such hypothesis bitmaps
	are shown to be useful queries for database searching, successfully
	retrieving active compounds across a range of structural classes
	from a corporate database. The current implementation allows multiconformer
	bitmaps to be obtained from pregenerated conformations or by random
	perturbation on-the-fly. The latter application involves random sampling
	of the full range of conformations not precluded by steric clashes,
	which limits the usefulness of classical fingerprint similarity measures.
	A new measure of similarity, The Stochastic Cosine, is introduced
	here to address this need. This new similarity measure uses the average
	number of bits common to independently drawn conformer sets to normalize
	the cosine coefficient. Its use frees the user from having to ensure
	strict comparability of starting conformations and having to use
	fixed torsional increments, thereby allowing fully flexible characterization
	of pharmacophoric patterns.},
  doi = {10.1021/ci025595r},
  pdf = {../local/Abrahamian2003Efficient.pdf},
  file = {Abrahamian2003Efficient.pdf:Abrahamian2003Efficient.pdf:PDF},
  owner = {mahe},
  pmid = {12653509},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/ci025595r}
}

@article{Abramovich2006Adapting,
  author = {Abramovich, F. and Benjamini, Y. and Donoho, D. L. and Johnstone,
	I. M.},
  title = {Adapting to unknown sparsity by controlling the false discovery rate},
  journal = {Ann. Stat.},
  year = {2006},
  volume = {34},
  pages = {584--653},
  number = {2},
  doi = {10.1214/009053606000000074},
  pdf = {../local/Abramovich2006Adapting.pdf},
  file = {Abramovich2006Adapting.pdf:Abramovich2006Adapting.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1214/009053606000000074}
}

@article{Achard2001XML,
  author = {F. Achard and G. Vaysseix and E. Barillot},
  title = {XML, bioinformatics and data integration.},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {115--125},
  number = {2},
  month = {Feb},
  abstract = {Motivation: The eXtensible Markup Language (XML) is an emerging standard
	for structuring documents, notably for the World Wide Web. In this
	paper, the authors present XML and examine its use as a data language
	for bioinformatics. In particular, XML is compared to other languages,
	and some of the potential uses of XML in bioinformatics applications
	are presented. The authors propose to adopt XML for data interchange
	between databases and other sources of data. Finally the discussion
	is illustrated by a test case of a pedigree data model in XML. Contact:
	Emmanuel.Barillot@infobiogen.fr},
  institution = {CRI Infobiogen, 523 place des terrasses de l'agora, 91000 Evry, France.},
  keywords = {Computational Biology; Humans; Information Storage and Retrieval;
	Internet; Programming Languages},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pmid = {11238067},
  timestamp = {2011.06.01}
}

@article{Adie2005Speeding,
  author = {Adie, E. A. and Adams, R. R. and Evans, K. L. and Porteous, D. J.
	and Pickard, B. S.},
  title = {Speeding disease gene discovery by sequence based candidate prioritization},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {55},
  abstract = {BACKGROUND: Regions of interest identified through genetic linkage
	studies regularly exceed 30 centimorgans in size and can contain
	hundreds of genes. Traditionally this number is reduced by matching
	functional annotation to knowledge of the disease or phenotype in
	question. However, here we show that disease genes share patterns
	of sequence-based features that can provide a good basis for automatic
	prioritization of candidates by machine learning. RESULTS: We examined
	a variety of sequence-based features and found that for many of them
	there are significant differences between the sets of genes known
	to be involved in human hereditary disease and those not known to
	be involved in disease. We have created an automatic classifier called
	PROSPECTR based on those features using the alternating decision
	tree algorithm which ranks genes in the order of likelihood of involvement
	in disease. On average, PROSPECTR enriches lists for disease genes
	two-fold 77\% of the time, five-fold 37\% of the time and twenty-fold
	11\% of the time. CONCLUSION: PROSPECTR is a simple and effective
	way to identify genes involved in Mendelian and oligogenic disorders.
	It performs markedly better than the single existing sequence-based
	classifier on novel data. PROSPECTR could save investigators looking
	at large regions of interest time and effort by prioritizing positional
	candidate genes for mutation detection and case-control association
	studies.},
  doi = {10.1186/1471-2105-6-55},
  pdf = {../local/Adie2005Speeding.pdf},
  file = {Adie2005Speeding.pdf:Adie2005Speeding.pdf:PDF},
  institution = {Medical Genetics Section, Department of Medical Sciences, The University
	of Edinburgh, Edinburgh, UK. euan.adie@ed.ac.uk},
  owner = {jp},
  pii = {1471-2105-6-55},
  pmid = {15766383},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1186/1471-2105-6-55}
}

@article{Aebersold2003Mass,
  author = {Aebersold, R. and Mann, M.},
  title = {Mass spectrometry-based proteomics},
  journal = {Nature},
  year = {2003},
  volume = {422},
  pages = {198-207},
  number = {6928},
  month = {Mar},
  abstract = {Recent successes illustrate the role of mass spectrometry-based proteomics
	as an indispensable tool for molecular and cellular biology and for
	the emerging field of systems biology. {T}hese include the study
	of protein-protein interactions via affinity-based isolations on
	a small and proteome-wide scale, the mapping of numerous organelles,
	the concurrent description of the malaria parasite genome and proteome,
	and the generation of quantitative protein profiles from diverse
	species. {T}he ability of mass spectrometry to identify and, increasingly,
	to precisely quantify thousands of proteins from complex samples
	can be expected to impact broadly on biology and medicine.},
  comment = {A good ref for the detection of protein-protein interactions by coimmunoprecipitation
	followed by mass spectrometry},
  doi = {10.1038/nature01511},
  pdf = {../local/Aebersold2003Mass.pdf},
  file = {Aebersold2003Mass.pdf:Aebersold2003Mass.pdf:PDF},
  keywords = {bio},
  owner = {vert},
  url = {http://dx.doi.org/10.1038/nature01511}
}

@article{Aerts2006Gene,
  author = {Aerts, S. and Lambrechts, D. and Maity, S. and Van Loo, P. and Coessens,
	B. and De Smet, F. and Tranchevent, L.-C. and De Moor, B. and Marynen,
	P. and Hassan, B. and Carmeliet, P. and Moreau, Y.},
  title = {Gene prioritization through genomic data fusion},
  journal = {Nat. Biotechnol.},
  year = {2006},
  volume = {24},
  pages = {537--544},
  number = {5},
  month = {May},
  abstract = {The identification of genes involved in health and disease remains
	a challenge. We describe a bioinformatics approach, together with
	a freely accessible, interactive and flexible software termed Endeavour,
	to prioritize candidate genes underlying biological processes or
	diseases, based on their similarity to known genes involved in these
	phenomena. Unlike previous approaches, ours generates distinct prioritizations
	for multiple heterogeneous data sources, which are then integrated,
	or fused, into a global ranking using order statistics. In addition,
	it offers the flexibility of including additional data sources. Validation
	of our approach revealed it was able to efficiently prioritize 627
	genes in disease data sets and 76 genes in biological pathway sets,
	identify candidates of 16 mono- or polygenic diseases, and discover
	regulatory genes of myeloid differentiation. Furthermore, the approach
	identified a novel gene involved in craniofacial development from
	a 2-Mb chromosomal region, deleted in some patients with DiGeorge-like
	birth defects. The approach described here offers an alternative
	integrative method for gene discovery.},
  doi = {10.1038/nbt1203},
  pdf = {../local/Aerts2006Gene.pdf},
  file = {Aerts2006Gene.pdf:Aerts2006Gene.pdf:PDF},
  institution = {artment of Human Genetics, Flanders Interuniversity Institute for
	Biotechnology (VIB), University of Leuven, Herestraat 49, bus 602,
	3000 Leuven, Belgium. stein.aerts@med.kuleuven.be},
  owner = {jp},
  pii = {nbt1203},
  pmid = {16680138},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1038/nbt1203}
}

@inproceedings{Agarwal2009Ranking,
  author = {Agarwal, S. and Sengupta, S.},
  title = {Ranking Genes By Relevance to a Disease},
  booktitle = {Proceedings of the 8th Annual International Conference on Computational
	Systems Bioinformatics},
  year = {2009},
  owner = {mordelet},
  timestamp = {2010.10.01}
}

@article{Aguda2001Chaos,
  author = {Aguda, B. D.},
  title = {Kick-starting the cell cycle: From growth-factor stimulation to initiation
	of DNA replication},
  journal = {Chaos},
  year = {2001},
  volume = {11},
  pages = {269-276},
  number = {1},
  abstract = {The essential genes, proteins and associated regulatory networks involved
	in the entry into the mammalian cell cycle are identified, from activation
	of growth-factor receptors to intracellular signal transduction pathways
	that impinge on the cell cycle machinery and ultimately on the initiation
	of DNA replication. Signaling pathways mediated by the oncoproteins
	Ras and Myc induce the activation of cyclin-dependent kinases CDK4
	and CDK2, and the assembly and firing of pre-replication complexes
	require a collaboration among E2F, CDK2, and Cdc7 kinase. A proposed
	core mechanism of the restriction point, the major checkpoint prior
	to commitment to DNA synthesis, involves cyclin E/CDK2, the phosphatase
	Cdc25A, and the CDK inhibitor p27Kip1. (c) 2001 American Institute
	of Physics.}
}

@article{Aguda1999Oncogene,
  author = {Aguda, B. D.},
  title = {Instabilities in phosphorylation-dephosphorylation cascades and cell
	cycle checkpoints},
  journal = {Oncogene},
  year = {1999},
  volume = {18},
  pages = {2846-51},
  number = {18},
  abstract = {The G2-M checkpoint in the cell cycle is identified with a set of
	phosphorylation-dephosphorylation (PD) cycles involving Cdc25 and
	the maturation-promoting factor (MPF); these PD cycles are coupled
	in a way that generates an instability. This instability arises out
	of a transcritical bifurcation which could be exploited by the G2
	DNA damage checkpoint pathway in order to arrest or delay entry into
	mitosis. The coupling between PD cycles involving Wee1 and MPF does
	not lead to an instability and therefore Wee1 may not be a crucial
	target of the checkpoint pathway. A set of PD cycles exhibiting transcritical
	bifurcation also possesses the integrative ability of a checkpoint
	for 'checking' that prerequisites are satisfied prior to the next
	cell cycle event. Such a set of coupled PD cycles is suggested to
	be a core mechanism of cell cycle checkpoints.},
  keywords = {csbcbook}
}

@article{Aguda1999PNAS,
  author = {Aguda, B. D.},
  title = {A quantitative analysis of the kinetics of the G(2) DNA damage checkpoint
	system},
  journal = {Proc Natl Acad Sci U S A},
  year = {1999},
  volume = {96},
  pages = {11352-7},
  number = {20},
  abstract = {A detailed model of the G(2) DNA damage checkpoint (G2DDC) system
	is presented that includes complex regulatory networks of the mitotic
	kinase Cdc2, phosphatase Cdc25, Wee1 kinase, and damage signal transduction
	pathways involving Chk1 and p53. Assumptions on the kinetic equations
	of the G2DDC are made, and computer simulations are carried out to
	demonstrate how the various subsystems operate to delay or arrest
	cell cycle progression. The detailed model could be used to explain
	various experiments relevant to G2DDC reported recently, including
	the nuclear export of 14-3-3-bound Cdc25, the down-regulation of
	cyclin B1 expression by p53, the effect of Chk1 and p53 on Cdc25
	levels, and Wee1 degradation. It also is shown that, under certain
	conditions, p53 is necessary to sustain a G(2) arrest.},
  keywords = {csbcbook}
}

@article{Aguda2003CellCycle,
  author = {Aguda, B. D. and Algar, C. K.},
  title = {A structural analysis of the qualitative networks regulating the
	cell cycle and apoptosis},
  journal = {Cell Cycle},
  year = {2003},
  volume = {2},
  pages = {538-44},
  number = {6},
  abstract = {This paper proposes an integration and modular organization of the
	complex regulatory networks involved in the mammalian cell cycle,
	apoptosis, and related intracellular signaling cascades. A common
	node linking the cell cycle and apoptosis permits the possibility
	of coordinate control between the initiation of these two cellular
	processes. From this node, pathways emanate that lead to the activation
	of cyclin-dependent kinases (in the cell cycle) and caspases (in
	apoptosis). Computer simulations are carried out to demonstrate that
	the proposed network architecture and certain module-module interactions
	can account for the experimentally observed sequence of cellular
	events (quiescence, cell cycle, and apoptosis) as the transcriptional
	activities of E2F-1 and c-Myc are increased. Despite the lack of
	quantitative kinetic data on most of the pathways, it is demonstrated
	that there can be meaningful conclusions regarding system stability
	that arise from the topology of the network. It is shown that only
	cycles in the network graph determine stability. Thus, several positive
	and negative feedback loops are identified from a literature review
	of the major pathways involved in the initiation of the cell cycle
	and of apoptosis.},
  keywords = {csbcbook}
}

@article{Aguda2007PLOSCompBiol,
  author = {Aguda, B. D. and Goryachev, A. B.},
  title = {From pathways databases to network models of switching behavior},
  journal = {PLoS Comput Biol},
  year = {2007},
  volume = {3},
  pages = {1674-8},
  number = {9},
  keywords = {csbcbook}
}

@article{Aguda1999CellProl,
  author = {Aguda, B. D. and Tang, Y.},
  title = {The kinetic origins of the restriction point in the mammalian cell
	cycle},
  journal = {Cell Prolif},
  year = {1999},
  volume = {32},
  pages = {321-35},
  number = {5},
  abstract = {A detailed model mechanism for the G1/S transition in the mammalian
	cell cycle is presented and analysed by computer simulation to investigate
	whether the kinetic origins of the restriction point (R-point) can
	be identified. The R-point occurs in mid-to-late G1 phase and marks
	the transition between mitogen-dependent to mitogen-independent progression
	of the cell cycle. For purposes of computer simulations, the R-point
	is defined as the first point in time after mitosis where cutting
	off mitogen stimulation does not prevent the cell reaching the threshold
	activity of cyclin-E/cdk2 required for entry into S phase. The key
	components of the network that generate a dynamic switching behaviour
	associated with the R-point include a positive feedback loop between
	cyclin-E/cdk2 and Cdc25A, along with the mutually negative interaction
	between the cdk inhibitor p27Kip1 and cyclin-E/cdk2. Simulations
	of the passage through the R-point were carried out and the factors
	affecting the position of the R-point in G1 are determined. The detailed
	model also shows various points in the network where the activation
	of cyclin-E/cdk2 can be initiated with or without the involvement
	of the retinoblastoma protein.},
  keywords = {csbcbook}
}

@article{Aguilera2008Genome,
  author = {Aguilera, A. and G{\'o}mez-Gonz{\'a}lez, B.},
  title = {Genome instability: a mechanistic view of its causes and consequences.},
  journal = {Nat. Rev. Genet.},
  year = {2008},
  volume = {9},
  pages = {204--217},
  number = {3},
  month = {Mar},
  abstract = {Genomic instability in the form of mutations and chromosome rearrangements
	is usually associated with pathological disorders, and yet it is
	also crucial for evolution. Two types of elements have a key role
	in instability leading to rearrangements: those that act in trans
	to prevent instability--among them are replication, repair and S-phase
	checkpoint factors--and those that act in cis--chromosomal hotspots
	of instability such as fragile sites and highly transcribed DNA sequences.
	Taking these elements as a guide, we review the causes and consequences
	of instability with the aim of providing a mechanistic perspective
	on the origin of genomic instability.},
  doi = {10.1038/nrg2268},
  pdf = {../local/Aguilera2008Genome.pdf},
  file = {Aguilera2008Genome.pdf:Aguilera2008Genome.pdf:PDF},
  institution = {Centro Andaluz de Biologia Molecular y Medicina Regenerativa CABIMER,
	Universidad de Sevilla-CSIC, Avd. Américo Vespucio s/n, 41092 Sevilla,
	Spain. aguilo@us.es},
  keywords = {csbcbook},
  owner = {jp},
  pii = {nrg2268},
  pmid = {18227811},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1038/nrg2268}
}

@article{Aires-de-Sousa2005Prediction,
  author = {Aires-de-Sousa, J. and Gasteiger, J.},
  title = {Prediction of enantiomeric excess in a combinatorial library of catalytic
	enantioselective reactions.},
  journal = {J {C}omb {C}hem},
  year = {2005},
  volume = {7},
  pages = {298-301},
  number = {2},
  abstract = {A quantitative structure-enantioselectivity relationship was established
	for a combinatorial library of enantioselective reactions performed
	by addition of diethyl zinc to benzaldehyde. {C}hiral catalysts and
	additives were encoded by their chirality codes and presented as
	input to neural networks. {T}he networks were trained to predict
	the enantiomeric excess. {W}ith independent test sets, predictions
	of enantiomeric excess could be made with an average error as low
	as 6\% ee. {M}ultilinear regression, perceptrons, and support vector
	machines were also evaluated as modeling tools. {T}he method is of
	interest for the computer-aided design of combinatorial libraries
	involving chiral compounds or enantioselective reactions. {T}his
	is the first example of a quantitative structure-property relationship
	based on chirality codes.},
  doi = {10.1021/cc049961q},
  pdf = {../local/Aires-de-Sousa2005Prediction.pdf},
  file = {Aires-de-Sousa2005Prediction.pdf:local/Aires-de-Sousa2005Prediction.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/cc049961q}
}

@article{Aizerman1964Theoretical,
  author = {Aizerman, M. A. and Braverman, E. M. and Rozono{\'e}r, L. I.},
  title = {Theoretical foundations of the potential function method in pattern
	recognition learning},
  journal = {Automation and {R}emote {C}ontrol},
  year = {1964},
  volume = {25},
  pages = {821--837},
  owner = {vert},
  timestamp = {2006.02.02}
}

@inproceedings{Akaike1973Information,
  author = {Hirotogu Akaike},
  title = {Information theory and an extension of the maximum likelihood principle},
  booktitle = {Proc. of the 2nd Int. Symp. on Information Theory},
  year = {1973},
  editor = {Petrov B. N. and Csaki F.},
  pages = {267--281},
  abstract = {The problem of estimating the dimensionality of a model occurs in
	various forms in applied statistics. There is estimating the number
	of factor in factor analysis, estimating the degree of a polynomial
	describing the data, selecting the variables to},
  keywords = {conf, Akaike Information Criterion, criteria, AIC, model, modelling,
	parameters, complexity, overfitting, c1973, c197x, c19xx}
}

@article{Akutsu2000Algorithms,
  author = {T. Akutsu and S. Miyano and S. Kuhara},
  title = {{A}lgorithms for identifying {B}oolean networks and related biological
	networks based on matrix multiplication and fingerprint function},
  journal = {J. Comput. Biol.},
  year = {2000},
  volume = {7},
  pages = {331--343},
  number = {3-4},
  abstract = {Due to the recent progress of the DNA microarray technology, a large
	number of gene expression profile data are being produced. How to
	analyze gene expression data is an important topic in computational
	molecular biology. Several studies have been done using the Boolean
	network as a model of a genetic network. This paper proposes efficient
	algorithms for identifying Boolean networks of bounded indegree and
	related biological networks, where identification of a Boolean network
	can be formalized as a problem of identifying many Boolean functions
	simultaneously. For the identification of a Boolean network, an O(mnD+1)
	time naive algorithm and a simple O (mnD) time algorithm are known,
	where n denotes the number of nodes, m denotes the number of examples,
	and D denotes the maximum in degree. This paper presents an improved
	O(momega-2nD + mnD+omega-3) time Monte-Carlo type randomized algorithm,
	where omega is the exponent of matrix multiplication (currently,
	omega < 2.376). The algorithm is obtained by combining fast matrix
	multiplication with the randomized fingerprint function for string
	matching. Although the algorithm and its analysis are simple, the
	result is nontrivial and the technique can be applied to several
	related problems.},
  doi = {10.1089/106652700750050817},
  pdf = {../local/Akutsu2000Algorithms.pdf},
  file = {Akutsu2000Algorithms.pdf:Akutsu2000Algorithms.pdf:PDF},
  pmid = {11108466},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1089/106652700750050817}
}

@article{Akutsu2000Inferring,
  author = {T. Akutsu and S. Miyano and S. Kuhara},
  title = {Inferring qualitative relations in genetic networks and metabolic
	pathways},
  journal = {Bioinformatics},
  year = {2000},
  volume = {16},
  pages = {727--734},
  number = {8},
  pdf = {../local/Akutsu2000Inferring.pdf},
  file = {Akutsu2000Inferring.pdf:local/Akutsu2000Inferring.pdf:PDF},
  subject = {bionet},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/16/8/727}
}

@article{Ala2008Prediction,
  author = {Ala, U. and Piro, R.M. and Grassi, E. and Damasco, C. and Silengo,
	L. and Oti, M. and Provero, P. and Di Cunto, F.},
  title = {Prediction of human disease genes by human-mouse conserved coexpression
	analysis.},
  journal = {PLoS Comput. Biol.},
  year = {2008},
  volume = {4},
  pages = {e1000043},
  number = {3},
  month = {Mar},
  abstract = {BACKGROUND: Even in the post-genomic era, the identification of candidate
	genes within loci associated with human genetic diseases is a very
	demanding task, because the critical region may typically contain
	hundreds of positional candidates. Since genes implicated in similar
	phenotypes tend to share very similar expression profiles, high throughput
	gene expression data may represent a very important resource to identify
	the best candidates for sequencing. However, so far, gene coexpression
	has not been used very successfully to prioritize positional candidates.
	METHODOLOGY/PRINCIPAL FINDINGS: We show that it is possible to reliably
	identify disease-relevant relationships among genes from massive
	microarray datasets by concentrating only on genes sharing similar
	expression profiles in both human and mouse. Moreover, we show systematically
	that the integration of human-mouse conserved coexpression with a
	phenotype similarity map allows the efficient identification of disease
	genes in large genomic regions. Finally, using this approach on 850
	OMIM loci characterized by an unknown molecular basis, we propose
	high-probability candidates for 81 genetic diseases. CONCLUSION:
	Our results demonstrate that conserved coexpression, even at the
	human-mouse phylogenetic distance, represents a very strong criterion
	to predict disease-relevant relationships among human genes.},
  doi = {10.1371/journal.pcbi.1000043},
  institution = {Molecular Biotechnology Center, Department of Genetics, Biology and
	Biochemistry, University of Turin, Turin, Italy.},
  keywords = {Algorithms; Animals; Biological Markers; Chromosome Mapping; Conserved
	Sequence; Diagnosis, Computer-Assisted; Gene Expression Profiling;
	Genetic Diseases, Inborn; Genetic Predisposition to Disease; Humans;
	Mice; Proteome},
  owner = {mordelet},
  pmid = {18369433},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000043}
}

@article{Albeck2008PLOSBiol,
  author = {Albeck, John G. and Burke, John M. and Spencer, Sabrina L. and Lauffenburger,
	Douglas A. and Sorger, Peter K.},
  title = {Modeling a Snap-Action, Variable-Delay Switch Controlling Extrinsic
	Cell Death},
  journal = {PLoS Biol},
  year = {2008},
  volume = {6},
  pages = {e299},
  number = {12},
  abstract = {A combination of single-cell experiments and mathematical modeling
	reveals the mechanisms underlying all-or-none caspase activation
	during receptor-induced apoptosis.}
}

@article{Albert2008SCBM,
  author = {Albert, I. and Thakar, J. and Li, S. and Zhang, R. and Albert, R.},
  title = {Boolean network simulations for life scientists},
  journal = {Source Code Biol Med},
  year = {2008},
  volume = {3},
  pages = {16},
  abstract = {ABSTRACT: Modern life sciences research increasingly relies on computational
	solutions, from large scale data analyses to theoretical modeling.
	Within the theoretical models Boolean networks occupy an increasing
	role as they are eminently suited at mapping biological observations
	and hypotheses into a mathematical formalism. The conceptual underpinnings
	of Boolean modeling are very accessible even without a background
	in quantitative sciences, yet it allows life scientists to describe
	and explore a wide range of surprisingly complex phenomena. In this
	paper we provide a clear overview of the concepts used in Boolean
	simulations, present a software library that can perform these simulations
	based on simple text inputs and give three case studies. The large
	scale simulations in these case studies demonstrate the Boolean paradigms
	and their applicability as well as the advanced features and complex
	use cases that our software package allows. Our software is distributed
	via a liberal Open Source license and is freely accessible from http://booleannet.googlecode.com.}
}

@article{Albert2002Statistical,
  author = {Albert, R. and Barab{\'a}si, A.L.},
  title = {Statistical mechanics of complex networks},
  journal = {Rev. {M}od. {P}hys.},
  year = {2002},
  volume = {74},
  pages = {47--97},
  pdf = {../local/albe02.pdf},
  file = {albe02.pdf:local/albe02.pdf:PDF},
  subject = {compnet},
  url = {http://www.nd.edu/~networks/PDF/rmp.pdf}
}

@article{Albert2000Attack,
  author = {Albert, R. and Jeong, H. and Barab{\'a}si, A.-L.},
  title = {Attack and error tolerance in complex networks},
  journal = {Nature},
  year = {2000},
  volume = {406},
  pages = {378--381},
  pdf = {../local/albe00.pdf},
  file = {albe00.pdf:local/albe00.pdf:PDF},
  subject = {compnet},
  url = {http://www.nd.edu/~networks/Papers/nature_attack.pdf}
}

@article{Albert1999Diameter,
  author = {Albert, R. and Jeong, H. and Barab{\'a}si, A.-L.},
  title = {Diameter of the {W}orld-{W}ide {W}eb},
  journal = {Nature},
  year = {1999},
  volume = {401},
  pages = {130--131},
  pdf = {../local/albe99.pdf},
  file = {albe99.pdf:local/albe99.pdf:PDF},
  subject = {compnet},
  url = {http://www.nd.edu/~networks/Papers/401130A0.pdf}
}

@book{Alberts2002Molecular,
  title = {Molecular Biology of the Cell},
  publisher = {Garland Science, Taylor \& Francis Group, LLC},
  year = {2002},
  author = {Alberts, B. and Johnson, A. and Lewis, J. and Raff, M. and Roberts,
	K. and Walter, P.},
  note = {Fourth Edition},
  annote = {Fourth Edition},
  keywords = {csbcbook}
}

@article{Albertson2003Chromosome,
  author = {Albertson, D. G. and Collins, C. and McCormick, F. and Gray, J. W.},
  title = {Chromosome aberrations in solid tumors},
  journal = {Nat. Genet.},
  year = {2003},
  volume = {34},
  pages = {369--376},
  number = {4},
  month = {Aug},
  abstract = {Chromosome aberrations in human solid tumors are hallmarks of gene
	deregulation and genome instability. This review summarizes current
	knowledge regarding aberrations, discusses their functional importance,
	suggests mechanisms by which aberrations may form during cancer progression
	and provides examples of clinical advances that have come from studies
	of chromosome aberrations.},
  doi = {10.1038/ng1215},
  pdf = {../local/Albertson2003Chromosome.pdf},
  file = {Albertson2003Chromosome.pdf:Albertson2003Chromosome.pdf:PDF},
  institution = {Cancer Research Institute, University of California San Francisco,
	San Francisco, California 94143-0808, USA. albertson@cc.ucsf.edu},
  keywords = {csbcbook},
  owner = {jp},
  pii = {ng1215},
  pmid = {12923544},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1038/ng1215}
}

@article{Albertson2003Genomic,
  author = {Albertson, D. G. and Pinkel, D.},
  title = {Genomic microarrays in human genetic disease and cancer},
  journal = {Hum. Mol. Genet.},
  year = {2003},
  volume = {12 Spec No 2},
  pages = {R145--R152},
  month = {Oct},
  abstract = {Alterations in the genome that lead to changes in DNA sequence copy
	number are a characteristic of solid tumors and are found in association
	with developmental abnormalities and/or mental retardation. Comparative
	genomic hybridization (CGH) can be used to detect and map these changes.
	Recent improvements in the resolution and sensitivity of CGH have
	been possible through implementation of microarray-based CGH (array
	CGH). Here we discuss the performance characteristics of different
	array platforms and review some of the recent applications of array
	CGH in cancer and medical genetics.},
  doi = {10.1093/hmg/ddg261},
  pdf = {../local/Albertson2003Genomic.pdf},
  file = {Albertson2003Genomic.pdf:Albertson2003Genomic.pdf:PDF},
  institution = {Department of Laboratory Medicine, University of California San Francisco,
	San Francisco, CA 94143-0808,USA. albertson@cc.ucsf.edu},
  keywords = {csbcbook},
  owner = {jp},
  pii = {ddg261},
  pmid = {12915456},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1093/hmg/ddg261}
}

@inproceedings{Aldea2007Image,
  author = {Aldea, E. and Atif, J. and Bloch, I.},
  title = {Image classification using marginalized kernels for graphs},
  booktitle = {Graph-Based Representations in Pattern Recognition},
  year = {2007},
  volume = {4538/2007},
  series = {Lecture Notes in Computer Science},
  pages = {103--113},
  publisher = {Springer Berlin / Heidelberg},
  abstract = {We propose in this article an image classification technique based
	on kernel methods and graphs. Our work explores the possibility of
	applying marginalized kernels to image processing. In machine learning,
	performant algorithms have been developed for data organized as real
	valued arrays; these algorithms are used for various purposes like
	classification or regression. However, they are inappropriate for
	direct use on complex data sets. Our work consists of two distinct
	parts. In the first one we model the images by graphs to be able
	to represent their structural properties and inherent attributes.
	In the second one, we use kernel functions to project the graphs
	in a mathematical space that allows the use of performant classification
	algorithms. Experiments are performed on medical images acquired
	with various modalities and concerning different parts of the body.},
  doi = {10.1007/978-3-540-72903-7_10},
  pdf = {../local/Aldea2007Image.pdf},
  file = {Aldea2007Image.pdf:local/Aldea2007Image.pdf:PDF},
  keywords = {image},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1007/978-3-540-72903-7_10}
}

@article{Alexandersson2003SLAM,
  author = {Alexandersson, M. and Cawley, S. and Pachter, L.},
  title = {S{LAM}: cross-species gene finding and alignment with a generalized
	pair hidden {M}arkov model.},
  journal = {Genome {R}es.},
  year = {2003},
  volume = {13},
  pages = {496--502},
  number = {3},
  month = {Mar},
  abstract = {Comparative-based gene recognition is driven by the principle that
	conserved regions between related organisms are more likely than
	divergent regions to be coding. {W}e describe a probabilistic framework
	for gene structure and alignment that can be used to simultaneously
	find both the gene structure and alignment of two syntenic genomic
	regions. {A} key feature of the method is the ability to enhance
	gene predictions by finding the best alignment between two syntenic
	sequences, while at the same time finding biologically meaningful
	alignments that preserve the correspondence between coding exons.
	{O}ur probabilistic framework is the generalized pair hidden {M}arkov
	model, a hybrid of (1). generalized hidden {M}arkov models, which
	have been used previously for gene finding, and (2). pair hidden
	{M}arkov models, which have applications to sequence alignment. {W}e
	have built a gene finding and alignment program called {SLAM}, which
	aligns and identifies complete exon/intron structures of genes in
	two related but unannotated sequences of {DNA}. {SLAM} is able to
	reliably predict gene structures for any suitably related pair of
	organisms, most notably with fewer false-positive predictions compared
	to previous methods (examples are provided for {H}omo sapiens/{M}us
	musculus and {P}lasmodium falciparum/{P}lasmodium vivax comparisons).
	{A}ccuracy is obtained by distinguishing conserved noncoding sequence
	({CNS}) from conserved coding sequence. {CNS} annotation is a novel
	feature of {SLAM} and may be useful for the annotation of {UTR}s,
	regulatory elements, and other noncoding features.},
  doi = {10.1101/gr.424203},
  pdf = {../local/Alexandersson2003SLAM.pdf},
  file = {Alexandersson2003SLAM.pdf:local/Alexandersson2003SLAM.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pmid = {12618381},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1101/gr.424203}
}

@article{Algoet1994strong,
  author = {Algoet, P.H.},
  title = {The strong law of large numbers for sequential decisions under uncertainty},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1994},
  volume = {40},
  pages = {609-633},
  number = {3},
  month = {May},
  abstract = {Combines optimization and ergodic theory to characterize the optimum
	long-run average performance that can be asymptotically attained
	by nonanticipating sequential decisions. {L}et {{X}t} be a stationary
	ergodic process, and suppose an action bt must be selected in a space
	&{B}scr; with knowledge of the t-past ({X}0, Â·Â·Â·, {X}t-1) at the
	beginning of every period t⩾0. {A}ction bt will incur a loss
	l(bt, {X}t) at the end of period t when the random variable {X}t
	is revealed. {T}he author proves under mild integrability conditions
	that the optimum strategy is to select actions that minimize the
	conditional expected loss given the currently available information
	at each step. {T}he minimum long-run average loss per decision can
	be approached arbitrarily closely by strategies that are finite-order
	{M}arkov, and under certain continuity conditions, it is equal to
	the minimum expected loss given the infinite past. {I}f the loss
	l(b, x) is bounded and continuous and if the space &{B}scr; is compact,
	then the minimum can be asymptotically attained, even if the distribution
	of the process {{X}t} is unknown a priori and must be learned from
	experience },
  doi = {10.1109/18.335876},
  pdf = {../local/Algoet1994strong.pdf},
  file = {Algoet1994strong.pdf:local/Algoet1994strong.pdf:PDF},
  keywords = {information-theory},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/18.335876}
}

@inproceedings{Aliferis2002Machine,
  author = {Aliferis, C.F. and Hardin, D.P. and Massion, P.},
  title = {Machine {L}earning {M}odels {F}or {L}ung {C}ancer {C}lassification
	{U}sing {A}rray {C}omparative {G}enomic {H}ybridization},
  booktitle = {Proceedings of the 2002 {A}merican {M}edical {I}nformatics {A}ssociation
	({AMIA}) {A}nnual {S}ymposium},
  year = {2002},
  pages = {7-11},
  abstract = {Array {CGH} is a recently introduced technology that measures changes
	in the gene copy number of hundreds of genes in a single experiment.
	{T}he primary goal of this study was to develop machine learning
	models that classify non-small {L}ung {C}ancers according to histopathology
	types and to compare several machine learning methods in this learning
	task. {DNA} from tumors of 37 patients (21 squamous carcinomas, and
	16 adenocarcinomas) were extracted and hybridized onto a 452 {BAC}
	clone array. {T}he following algorithms were used: {KNN}, {D}ecision
	{T}ree {I}nduction, {S}upport {V}ector {M}achines and {F}eed-{F}orward
	{N}eural {N}etworks. {P}erformance was measured via leave-one-out
	classification accuracy. {T}he best multi-gene model found had a
	leave-one-out accuracy of 89.2\%. {D}ecision {T}rees performed poorer
	than the other methods in this learning task and dataset. {W}e conclude
	that gene copy numbers as measured by array {CGH} are, collectively,
	an excellent indicator of histological subtype. {S}everal interesting
	research directions are discussed.},
  pdf = {../local/Aliferis2002Machine.pdf},
  file = {Aliferis2002Machine.pdf:local/Aliferis2002Machine.pdf:PDF},
  keywords = {biosvm microarray, cgh},
  owner = {jeanphilippevert}
}

@article{Alizadeh2000Distinct,
  author = {Alizadeh, A. A. and Eisen, M. B. and Davis, R. E. and Ma, C. and
	Lossos, I. S. and Rosenwald, A. and Boldrick, J. C. and Sabet, H.
	and Tran, T. and Yu, X. and Powell, J. I. and Yang, L. and Marti,
	G. E. and Moore, T. and Hudson, J. and Lu, L. and Lewis, D. B. and
	Tibshirani, R. and Sherlock, G. and Chan, W. C. and Greiner, T. C.
	and Weisenburger, D. D. and Armitage, J. O. and Warnke, R. and Levy,
	R. and Wilson, W. and Grever, M. R. and Byrd, J. C. and Botstein,
	D. and Brown, P. O. and Staudt, L. M.},
  title = {Distinct types of diffuse large {B}-cell lymphoma identified by gene
	expression profiling},
  journal = {Nature},
  year = {2000},
  volume = {403},
  pages = {503--511},
  number = {6769},
  month = {Feb},
  abstract = {Diffuse large B-cell lymphoma (DLBCL), the most common subtype of
	non-Hodgkin's lymphoma, is clinically heterogeneous: 40\% of patients
	respond well to current therapy and have prolonged survival, whereas
	the remainder succumb to the disease. We proposed that this variability
	in natural history reflects unrecognized molecular heterogeneity
	in the tumours. Using DNA microarrays, we have conducted a systematic
	characterization of gene expression in B-cell malignancies. Here
	we show that there is diversity in gene expression among the tumours
	of DLBCL patients, apparently reflecting the variation in tumour
	proliferation rate, host response and differentiation state of the
	tumour. We identified two molecularly distinct forms of DLBCL which
	had gene expression patterns indicative of different stages of B-cell
	differentiation. One type expressed genes characteristic of germinal
	centre B cells ('germinal centre B-like DLBCL'); the second type
	expressed genes normally induced during in vitro activation of peripheral
	blood B cells ('activated B-like DLBCL'). Patients with germinal
	centre B-like DLBCL had a significantly better overall survival than
	those with activated B-like DLBCL. The molecular classification of
	tumours on the basis of gene expression can thus identify previously
	undetected and clinically significant subtypes of cancer.},
  doi = {10.1038/35000501},
  pdf = {../local/Alizadeh2000Distinct.pdf},
  file = {Alizadeh2000Distinct.pdf:local/Alizadeh2000Distinct.pdf:PDF},
  institution = {Department of Biochemistry, Stanford University School of Medicine,
	California 94305, USA.},
  keywords = {csbcbook},
  owner = {jp},
  pmid = {10676951},
  timestamp = {2008.11.15},
  url = {http://dx.doi.org/10.1038/35000501}
}

@article{Alkan2009Personalized,
  author = {Can Alkan and Jeffrey M Kidd and Tomas Marques-Bonet and Gozde Aksay
	and Francesca Antonacci and Fereydoun Hormozdiari and Jacob O Kitzman
	and Carl Baker and Maika Malig and Onur Mutlu and S. Cenk Sahinalp
	and Richard A Gibbs and Evan E Eichler},
  title = {Personalized copy number and segmental duplication maps using next-generation
	sequencing.},
  journal = {Nat. Genet.},
  year = {2009},
  volume = {41},
  pages = {1061--1067},
  number = {10},
  month = {Oct},
  abstract = {Despite their importance in gene innovation and phenotypic variation,
	duplicated regions have remained largely intractable owing to difficulties
	in accurately resolving their structure, copy number and sequence
	content. We present an algorithm (mrFAST) to comprehensively map
	next-generation sequence reads, which allows for the prediction of
	absolute copy-number variation of duplicated segments and genes.
	We examine three human genomes and experimentally validate genome-wide
	copy number differences. We estimate that, on average, 73-87 genes
	vary in copy number between any two individuals and find that these
	genic differences overwhelmingly correspond to segmental duplications
	(odds ratio = 135; P < 2.2 x 10(-16)). Our method can distinguish
	between different copies of highly identical genes, providing a more
	accurate assessment of gene content and insight into functional constraint
	without the limitations of array-based technology.},
  doi = {10.1038/ng.437},
  pdf = {../local/Alkan2009Personalized.pdf},
  file = {Alkan2009Personalized.pdf:Alkan2009Personalized.pdf:PDF},
  institution = {Department of Genome Sciences, University of Washington School of
	Medicine, Seattle, Washington, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {ng.437},
  pmid = {19718026},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/ng.437}
}

@book{Allgoweri1990Numerical,
  title = {Numerical continuation methods},
  publisher = {Springer},
  year = {1990},
  author = {E.L. Allgower and K.Georg},
  isbn = {3-540-12760-7}
}

@article{Almohamad1993linear,
  author = {Almohamad, H.A. and Duffuaa, S.O.},
  title = {A linear programming approach for the weighted graph matching problem},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {1993},
  volume = {15},
  pages = {522--525},
  number = {5},
  month = {May },
  doi = {10.1109/34.211474},
  pdf = {../local/Almohamad1993linear.pdf},
  file = {Almohamad1993linear.pdf:Almohamad1993linear.pdf:PDF},
  owner = {jp},
  timestamp = {2008.10.05},
  url = {http://dx.doi.org/10.1109/34.211474}
}

@article{Alon1998Finding,
  author = {Alon, N. and Krivelevich, M. and Sudakov, B.},
  title = {Finding a large hidden clique in a random graph},
  journal = {Random Struct. Algorithm.},
  year = {1998},
  volume = {13},
  pages = {457--466},
  doi = {10.1002/(SICI)1098-2418(199810/12)13:3/4<457::AID-RSA14>3.0.CO;2-W},
  pdf = {../local/Alon1998Finding.pdf},
  file = {Alon1998Finding.pdf:Alon1998Finding.pdf:PDF},
  owner = {jp},
  timestamp = {2013.01.07},
  url = {http://dx.doi.org/10.1002/(SICI)1098-2418(199810/12)13:3/4<457::AID-RSA14>3.0.CO;2-W}
}

@article{Alon2007Network,
  author = {Alon, U.},
  title = {Network motifs: theory and experimental approaches.},
  journal = {Nat Rev Genet},
  year = {2007},
  volume = {8},
  pages = {450--461},
  number = {6},
  month = {Jun},
  abstract = {Transcription regulation networks control the expression of genes.
	The transcription networks of well-studied microorganisms appear
	to be made up of a small set of recurring regulation patterns, called
	network motifs. The same network motifs have recently been found
	in diverse organisms from bacteria to humans, suggesting that they
	serve as basic building blocks of transcription networks. Here I
	review network motifs and their functions, with an emphasis on experimental
	studies. Network motifs in other biological networks are also mentioned,
	including signalling and neuronal networks.},
  doi = {10.1038/nrg2102},
  pdf = {../local/Alon2007Network.pdf},
  file = {Alon2007Network.pdf:Alon2007Network.pdf:PDF},
  institution = {Department of Molecular Cell Biology, Weizmann Institute of Science,
	Rehovot 76100, Israel. urialon@weizmann.ac.il},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {nrg2102},
  pmid = {17510665},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/nrg2102}
}

@article{Alter2000Singular,
  author = {Alter, O. and Brown, P. O. and Botstein, D.},
  title = {Singular value decomposition for genome-wide expression data processing
	and modeling.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2000},
  volume = {97},
  pages = {10101--10106},
  number = {18},
  month = {Aug},
  abstract = {We describe the use of singular value decomposition in transforming
	genome-wide expression data from genes x arrays space to reduced
	diagonalized "eigengenes" x "eigenarrays" space, where the eigengenes
	(or eigenarrays) are unique orthonormal superpositions of the genes
	(or arrays). Normalizing the data by filtering out the eigengenes
	(and eigenarrays) that are inferred to represent noise or experimental
	artifacts enables meaningful comparison of the expression of different
	genes across different arrays in different experiments. Sorting the
	data according to the eigengenes and eigenarrays gives a global picture
	of the dynamics of gene expression, in which individual genes and
	arrays appear to be classified into groups of similar regulation
	and function, or similar cellular state and biological phenotype,
	respectively. After normalization and sorting, the significant eigengenes
	and eigenarrays can be associated with observed genome-wide effects
	of regulators, or with measured samples, in which these regulators
	are overactive or underactive, respectively.},
  doi = {10.1073/pnas.97.18.10101},
  pdf = {../local/Alter2000Singular.pdf},
  file = {Alter2000Singular.pdf:Alter2000Singular.pdf:PDF},
  institution = {Departments of Genetics and Biochemistry, Stanford University, Stanford,
	CA 94305, USA. orly@genome.stanford.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {97/18/10101},
  pmid = {10963673},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1073/pnas.97.18.10101}
}

@article{Altschul1990basic,
  author = {S.F. Altschul and W.~Gish and W.~Miller and E.W. Myers and D.J. Lipman},
  title = {A basic local alignment search tool},
  journal = {J. {M}ol. {B}iol.},
  year = {1990},
  volume = {215},
  pages = {403--410}
}

@article{Altschul1997Gapped,
  author = {S.F. Altschul and T.L. Madden and A.A. Schaffer and J. Zhang and
	Z. Zhang and W. Miller and D.J. Lipman},
  title = {Gapped {BLAST} and {PSI}-{BLAST}: {A} new generation of protein database
	search programs},
  journal = {Nucleic {A}cids {R}esearch},
  year = {1997},
  volume = {25},
  pages = {3389--3402},
  pdf = {../local/alts97.pdf},
  file = {alts97.pdf:local/alts97.pdf:PDF},
  subject = {biocasp},
  url = {http://nar.oupjournals.org/cgi/reprint/25/17/3389.pdf}
}

@inproceedings{Altun2003Large,
  author = {Altun, Y. and Hofmann, T.},
  title = {Large {M}argin {M}ethods for {L}abel {S}equence {L}earning},
  booktitle = { 8th {E}uropean {C}onference on {S}peech {C}ommunication and {T}echnology
	({E}uro{S}peech)},
  year = {2003},
  abstract = {Label sequence learning is the problem of inferring a state sequence
	from an observation sequence, where the state sequence may encode
	a labeling, annotation or segmentation of the sequence. {I}n this
	paper we give an overview of discriminative methods developed for
	this problem. {S}pecial emphasis is put on large margin methods by
	generalizing multiclass {S}upport {V}ector {M}achines and {A}da{B}oost
	to the case of label sequences.{A}n experimental evaluation demonstrates
	the advantages over classical approaches like {H}idden {M}arkov {M}odels
	and the competitiveness with methods like {C}onditional {R}andom
	{F}ields.},
  pdf = {../local/Altun2003Large.pdf},
  file = {Altun2003Large.pdf:local/Altun2003Large.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@inproceedings{Altun2004Gaussian,
  author = {Altun, Y. and Hofmann, T. and Smola, A.J.},
  title = {Gaussian process classification for segmenting and annotating sequences},
  booktitle = {Twenty-first international conference on {M}achine learning},
  year = {2004},
  publisher = {ACM Press},
  abstract = {Many real-world classification tasks involve the prediction of multiple,
	inter-dependent class labels. {A} prototypical case of this sort
	deals with prediction of a sequence of labels for a sequence of observations.
	{S}uch problems arise naturally in the context of annotating and
	segmenting observation sequences. {T}his paper generalizes {G}aussian
	{P}rocess classification to predict multiple labels by taking dependencies
	between neighboring labels into account. {O}ur approach is motivated
	by the desire to retain rigorous probabilistic semantics, while overcoming
	limitations of parametric methods like {C}onditional {R}andom {F}ields,
	which exhibit conceptual and computational difficulties in high-dimensional
	input spaces. {E}xperiments on named entity recognition and pitch
	accent prediction tasks demonstrate the competitiveness of our approach.},
  doi = {10.1145/1015330.1015433},
  pdf = {../local/Altun2004Gaussian.pdf},
  file = {Altun2004Gaussian.pdf:local/Altun2004Gaussian.pdf:PDF},
  isbn = {1-58113-828-5},
  keywords = {conditional-random-field},
  location = {Banff, Alberta, Canada},
  url = {http://doi.acm.org/10.1145/1015330.1015433}
}

@inproceedings{Altun2004Exponential,
  author = {Altun, Y. and Smola, A. and Hofmann, T.},
  title = {Exponential {F}amilies for {C}onditional {R}andom {F}ields},
  booktitle = {20th {C}onference on {U}ncertainty in {A}rtificial {I}ntelligennce},
  year = {2004},
  pdf = {../local/Altun2004Exponential.pdf},
  file = {Altun2004Exponential.pdf:local/Altun2004Exponential.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@article{Amaral2000Classes,
  author = {L. A. N. Amaral and A. Scala and M. Barth{\'e}l{\'e}my and H. E.
	Stanley },
  title = {Classes of small-world networks},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2000},
  volume = {97},
  pages = {11149--11152},
  number = {21},
  pdf = {../local/amar00.pdf},
  file = {amar00.pdf:local/amar00.pdf:PDF},
  subject = {compnet},
  url = {http://www.pnas.org/cgi/content/full/97/21/11149}
}

@article{Amari2001Information,
  author = {Amari, S.-I.},
  title = {Information geometry on hierarchy of probability distributions},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2001},
  volume = {47},
  pages = {1701--1711},
  number = {5},
  month = {July},
  pdf = {../local/amar01.pdf},
  file = {amar01.pdf:local/amar01.pdf:PDF},
  subject = {stat},
  url = {http://www.islab.brain.riken.go.jp/~amari/pub/IGHI.ps.gz}
}

@article{Amari1998Natural,
  author = {Amari, S.-I.},
  title = {Natural {G}radient {W}orks {E}fficiently in {L}earning},
  journal = {Neural {C}omputation},
  year = {1998},
  volume = {10},
  pages = {251-276},
  number = {2},
  pdf = {../local/amar98.pdf},
  file = {amar98.pdf:local/amar98.pdf:PDF},
  subject = {ml},
  url = {http://www.islab.brain.riken.go.jp/amari/pub/am45-2.ps.gz}
}

@book{Amari2001Methods,
  title = {Methods of information geometry},
  publisher = {AMS vol. 191},
  year = {2001},
  author = {Amari, S.-I. and Nagaoka, H.}
}

@article{Amari1999Improving,
  author = {Amari, S.-I. and Wu, S.},
  title = {Improving support vector machine classifiers by modifying kernel
	functions},
  journal = {Neural {N}etworks},
  year = {1999},
  volume = {12},
  pages = {783--789},
  number = {6},
  month = {Jul},
  abstract = {We propose a method of modifying a kernel function to improve the
	performance of a support vector machine classifier. {T}his is based
	on the structure of the {R}iemannian geometry induced by the kernel
	function. {T}he idea is to enlarge the spatial resolution around
	the separating boundary surface, by a conformal mapping, such that
	the separability between classes is increased. {E}xamples are given
	specifically for modifying {G}aussian {R}adial {B}asis {F}unction
	kernels. {S}imulation results for both artificial and real data show
	remarkable improvement of generalization errors, supporting our idea.},
  pdf = {../local/amar99.pdf},
  file = {amar99.pdf:local/amar99.pdf:PDF},
  subject = {kernel},
  url = {http://www.islab.brain.riken.go.jp/wusi/GKSVM.ps}
}

@article{Amarzguioui2004algorithm,
  author = {Amarzguioui, M. and Prydz, H.},
  title = {An algorithm for selection of functional si{RNA} sequences.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {316},
  pages = {1050-8},
  number = {4},
  month = {Apr},
  abstract = {Randomly designed si{RNA} targeting different positions within the
	same m{RNA} display widely differing activities. {W}e have performed
	a statistical analysis of 46 si{RNA}, identifying various features
	of the 19bp duplex that correlate significantly with functionality
	at the 70\% knockdown level and verified these results against an
	independent data set of 34 si{RNA} recently reported by others. {F}eatures
	that consistently correlated positively with functionality across
	the two data sets included an asymmetry in the stability of the duplex
	ends (measured as the {A}/{U} differential of the three terminal
	basepairs at either end of the duplex) and the motifs {S}1, {A}6,
	and {W}19. {T}he presence of the motifs {U}1 or {G}19 was associated
	with lack of functionality. {A} selection algorithm based on these
	findings strongly differentiated between the two functional groups
	of si{RNA} in both data sets and proved highly effective when used
	to design si{RNA} targeting new endogenous human genes.},
  doi = {10.1016/j.bbrc.2004.02.157},
  keywords = {sirna},
  pii = {S0006291X04004425},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.02.157}
}

@article{Amato2006multi-step,
  author = {R. Amato and A. Ciaramella and N. Deniskina and C. Del Mondo and
	D. di Bernardo and C. Donalek and G. Longo and G. Mangano and G.
	Miele and G. Raiconi and A. Staiano and R. Tagliaferri},
  title = {A multi-step approach to time series analysis and gene expression
	clustering.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {589--596},
  number = {5},
  month = {Mar},
  abstract = {MOTIVATION: The huge growth in gene expression data calls for the
	implementation of automatic tools for data processing and interpretation.
	RESULTS: We present a new and comprehensive machine learning data
	mining framework consisting in a non-linear PCA neural network for
	feature extraction, and probabilistic principal surfaces combined
	with an agglomerative approach based on Negentropy aimed at clustering
	gene microarray data. The method, which provides a user-friendly
	visualization interface, can work on noisy data with missing points
	and represents an automatic procedure to get, with no a priori assumptions,
	the number of clusters present in the data. Cell-cycle dataset and
	a detailed analysis confirm the biological nature of the most significant
	clusters. AVAILABILITY: The software described here is a subpackage
	part of the ASTRONEURAL package and is available upon request from
	the corresponding author. SUPPLEMENTARY INFORMATION: Supplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btk026},
  institution = {Dipartimento di Scienze Fisiche, University of Naples Federico II,
	Naples, Italy.},
  owner = {fantine},
  pii = {btk026},
  pmid = {16397005},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/btk026}
}

@inproceedings{Ambauen2003Graph,
  author = {R. Ambauen and S. Fischer and H. Bunke},
  title = {Graph Edit Distance with Node Splitting and Merging, and Its Application
	to Diatom Idenfication},
  booktitle = {GbRPR},
  year = {2003},
  pages = {95-106},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=2726{\&}spage=95}
}

@article{Ambroise2002Selection,
  author = {Ambroise, C. and McLachlan, G.J.},
  title = {Selection bias in gene extraction on the basis of microarray gene-expression
	data},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2002},
  volume = {99},
  pages = {6562-6566},
  number = {10},
  abstract = {In the context of cancer diagnosis and treatment, we consider the
	problem of constructing an accurate prediction rule on the basis
	of a relatively small number of tumor tissue samples of known type
	containing the expression data on very many (possibly thousands)
	genes. {R}ecently, results have been presented in the literature
	suggesting that it is possible to construct a prediction rule from
	only a few genes such that it has a negligible prediction error rate.
	{H}owever, in these results the test error or the leave-one-out cross-validated
	error is calculated without allowance for the selection bias. {T}here
	is no allowance because the rule is either tested on tissue samples
	that were used in the first instance to select the genes being used
	in the rule or because the cross-validation of the rule is not external
	to the selection process; that is, gene selection is not performed
	in training the rule at each stage of the cross-validation process.
	{W}e describe how in practice the selection bias can be assessed
	and corrected for by either performing a cross-validation or applying
	the bootstrap external to the selection process. {W}e recommend using
	10-fold rather than leave-one-out cross-validation, and concerning
	the bootstrap, we suggest using the so-called .632+ bootstrap error
	estimate designed to handle overfitted prediction rules. {U}sing
	two published data sets, we demonstrate that when correction is made
	for the selection bias, the cross-validated error is no longer zero
	for a subset of only a few genes.},
  pdf = {../local/Ambroise2002Selection.pdf},
  file = {Ambroise2002Selection.pdf:local/Ambroise2002Selection.pdf:PDF},
  keywords = {featureselection biosvm},
  owner = {jeanphilippevert},
  url = {http://www.pnas.org/cgi/content/abstract/99/10/6562}
}

@article{Ameres2007Molecular,
  author = {Stefan Ludwig Ameres and Javier Martinez and Renée Schroeder},
  title = {Molecular basis for target RNA recognition and cleavage by human
	RISC.},
  journal = {Cell},
  year = {2007},
  volume = {130},
  pages = {101--112},
  number = {1},
  month = {Jul},
  abstract = {The RNA-Induced Silencing Complex (RISC) is a ribonucleoprotein particle
	composed of a single-stranded short interfering RNA (siRNA) and an
	endonucleolytically active Argonaute protein, capable of cleaving
	mRNAs complementary to the siRNA. The mechanism by which RISC cleaves
	a target RNA is well understood, however it remains enigmatic how
	RISC finds its target RNA. Here, we show, both in vitro and in vivo,
	that the accessibility of the target site correlates directly with
	the efficiency of cleavage, demonstrating that RISC is unable to
	unfold structured RNA. In the course of target recognition, RISC
	transiently contacts single-stranded RNA nonspecifically and promotes
	siRNA-target RNA annealing. Furthermore, the 5' part of the siRNA
	within RISC creates a thermodynamic threshold that determines the
	stable association of RISC and the target RNA. We therefore provide
	mechanistic insights by revealing features of RISC and target RNAs
	that are crucial to achieve efficiency and specificity in RNA interference.},
  doi = {10.1016/j.cell.2007.04.037},
  pdf = {../local/Ameres2007Molecular.pdf},
  file = {Ameres2007Molecular.pdf:Ameres2007Molecular.pdf:PDF},
  institution = {Max F. Perutz Laboratories, University of Vienna, Vienna, Austria.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092-8674(07)00583-1},
  pmid = {17632058},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.cell.2007.04.037}
}

@inproceedings{Amit2007Uncovering,
  author = {Yonatan Amit and Michael Fink and Nathan Srebro and Shimon Ullman},
  title = {Uncovering shared structures in multiclass classification},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {17--24},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1273496.1273499},
  isbn = {978-1-59593-793-3},
  location = {Corvalis, Oregon}
}

@article{Anderson2003new,
  author = {Anderson, D.C. and Li, W. and Payan, D.G. and Noble, W.S.},
  title = {A new algorithm for the evaluation of shotgun peptide sequencing
	in proteomics: support vector machine classification of peptide {{MS}/{MS}}
	spectra and {SEQUEST} scores.},
  journal = {J {P}roteome {R}es},
  year = {2003},
  volume = {2},
  pages = {137-146},
  number = {2},
  abstract = {Shotgun tandem mass spectrometry-based peptide sequencing using programs
	such as {SEQUEST} allows high-throughput identification of peptides,
	which in turn allows the identification of corresponding proteins.
	{W}e have applied a machine learning algorithm, called the support
	vector machine, to discriminate between correctly and incorrectly
	identified peptides using {SEQUEST} output. {E}ach peptide was characterized
	by {SEQUEST}-calculated features such as delta {C}n and {X}corr,
	measurements such as precursor ion current and mass, and additional
	calculated parameters such as the fraction of matched {MS}/{MS} peaks.
	{T}he trained {SVM} classifier performed significantly better than
	previous cutoff-based methods at separating positive from negative
	peptides. {P}ositive and negative peptides were more readily distinguished
	in training set data acquired on a {QTOF}, compared to an ion trap
	mass spectrometer. {T}he use of 13 features, including four new parameters,
	significantly improved the separation between positive and negative
	peptides. {U}se of the support vector machine and these additional
	parameters resulted in a more accurate interpretation of peptide
	{MS}/{MS} spectra and is an important step toward automated interpretation
	of peptide tandem mass spectrometry data in proteomics.},
  pdf = {../local/Anderson2003new.pdf},
  file = {Anderson2003new.pdf:local/Anderson2003new.pdf:PDF},
  keywords = {biosvm proteomics},
  owner = {jeanphilippevert}
}

@article{Andersson2007Multivariate,
  author = {C. D. Andersson and E. Thysell and A. Lindstr{\"o}m and M. Bylesj{\"o}
	and F. Raubacher and A. Linusson},
  title = {A multivariate approach to investigate docking parameters' effects
	on docking performance.},
  journal = {J. Chem. Inform. Model.},
  year = {2007},
  volume = {47},
  pages = {1673--1687},
  number = {4},
  abstract = {Increasingly powerful docking programs for analyzing and estimating
	the strength of protein-ligand interactions have been developed in
	recent decades, and they are now valuable tools in drug discovery.
	Software used to perform dockings relies on a number of parameters
	that affect various steps in the docking procedure. However, identifying
	the best choices of the settings for these parameters is often challenging.
	Therefore, the settings of the parameters are quite often left at
	their default values, even though scientists with long experience
	with a specific docking tool know that modifying certain parameters
	can improve the results. In the study presented here, we have used
	statistical experimental design and subsequent regression based on
	root-mean-square deviation values using partial least-square projections
	to latent structures (PLS) to scrutinize the effects of different
	parameters on the docking performance of two software packages: FRED
	and GOLD. Protein-ligand complexes with a high level of ligand diversity
	were selected from the PDBbind database for the study, using principal
	component analysis based on 1D and 2D descriptors, and space-filling
	design. The PLS models showed quantitative relationships between
	the docking parameters and the ability of the programs to reproduce
	the ligand crystallographic conformation. The PLS models also revealed
	which of the parameters and what parameter settings were important
	for the docking performance of the two programs. Furthermore, the
	variation in docking results obtained with specific parameter settings
	for different protein-ligand complexes in the diverse set examined
	indicates that there is great potential for optimizing the parameter
	settings for selected sets of proteins.},
  doi = {10.1021/ci6005596},
  institution = {Department of Chemistry, UmeÃ¥ University, SE-901 87 UmeÃ¥, Sweden.},
  owner = {bricehoffmann},
  pmid = {17559207},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1021/ci6005596}
}

@article{Ando2005A,
  author = {Rie Kubota Ando and Tong Zhang and Peter Bartlett},
  title = {A framework for learning predictive structures from multiple tasks
	and unlabeled data},
  journal = {Journal of Machine Learning Research},
  year = {2005},
  volume = {6},
  pages = {1817--1853}
}

@article{Andrea1991Applications,
  author = {T. A. Andrea and H. Kalayeh},
  title = {{A}pplications of neural networks in quantitative structure-activity
	relationships of dihydrofolate reductase inhibitors.},
  journal = {J. Med. Chem.},
  year = {1991},
  volume = {34},
  pages = {2824--2836},
  number = {9},
  month = {Sep},
  abstract = {Back propagation neural networks is a new technology useful for modeling
	nonlinear functions of several variables. This paper explores their
	applications in the field of quantitative structure-activity relationships.
	In particular, their ability to fit biological activity surfaces,
	predict activity, and determine the "functional forms" of its dependence
	on physical properties is compared to well-established methods in
	the field. A dataset of 256 5-phenyl-3,4-diamino-6,6-dimethyldihydrotriazines
	that inhibit dihydrofolate reductase enzyme is used as a basis for
	comparison. It is found that neural networks lead to enhanced surface
	fits and predictions relative to standard regression methods. Moreover,
	they circumvent the need for ad hoc indicator variables, which account
	for a significant part of the variance in linear regression models.
	Additionally, they lead to the elucidation of nonlinear and "cross-products"
	effects that correspond to trade-offs between physical properties
	in their effect on biological activity. This is the first demonstration
	of the latter two findings. On the other hand, due to the complexity
	of the resulting models, an understanding of the local, but not the
	global, structure-activity relationships is possible. The latter
	must await further developments. Furthermore, the longer computational
	time required to train the networks is somewhat inconveniencing,
	although not restrictive.},
  keywords = {Animals, Carcinoma 256, Cultured, Experimental, Folic Acid Antagonists,
	Leukemia, Neural Pathways, Neurons, Regression Analysis, Structure-Activity
	Relationship, Tumor Cells, Walker, 1895302},
  owner = {mahe},
  pmid = {1895302},
  timestamp = {2006.09.06}
}

@inproceedings{Andrews2002Multiple,
  author = {Andrews, S. and Hofmann, T. and Tsochantaridis, I.},
  title = {Multiple {I}nstance {L}earning with {G}eneralized {S}upport {V}ector
	{M}achines},
  booktitle = {Proceedings of the {E}ighteenth {N}ational {C}onference on {A}rtificial
	{I}ntelligence},
  year = {2002},
  pages = {943-944},
  publisher = {American Association for Artificial Intelligence},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@article{Anguita2003Quantum,
  author = {Davide Anguita and Sandro Ridella and Fabio Rivieccio and Rodolfo
	Zunino},
  title = {Quantum optimization for training support vector machines.},
  journal = {Neural {N}etw.},
  year = {2003},
  volume = {16},
  pages = {763-70},
  number = {5-6},
  abstract = {Refined concepts, such as {R}ademacher estimates of model complexity
	and nonlinear criteria for weighting empirical classification errors,
	represent recent and promising approaches to characterize the generalization
	ability of {S}upport {V}ector {M}achines ({SVM}s). {T}he advantages
	of those techniques lie in both improving the {SVM} representation
	ability and yielding tighter generalization bounds. {O}n the other
	hand, they often make {Q}uadratic-{P}rogramming algorithms no longer
	applicable, and {SVM} training cannot benefit from efficient, specialized
	optimization techniques. {T}he paper considers the application of
	{Q}uantum {C}omputing to solve the problem of effective {SVM} training,
	especially in the case of digital implementations. {T}he presented
	research compares the behavioral aspects of conventional and enhanced
	{SVM}s; experiments in both a synthetic and real-world problems support
	the theoretical analysis. {A}t the same time, the related differences
	between {Q}uadratic-{P}rogramming and {Q}uantum-based optimization
	techniques are considered.},
  doi = {10.1016/S0893-6080(03)00087-X},
  pdf = {../local/Anguita2003Quantum.pdf},
  file = {Anguita2003Quantum.pdf:local/Anguita2003Quantum.pdf:PDF},
  pii = {S089360800300087X},
  url = {http://dx.doi.org/10.1016/S0893-6080(03)00087-X}
}

@article{Anstreicheir2001new,
  author = {K. M. Anstreicher and N. W. Brixius},
  title = {A New Bound for the Quadratic Assignment Problem Based on Convex
	Quadratic Programming},
  journal = {Math. Program.},
  year = {2001},
  volume = {89},
  pages = {341--357},
  number = {3}
}

@article{Antes2006DynaPred,
  author = {Iris Antes and Shirley W I Siu and Thomas Lengauer},
  title = {{D}yna{P}red: a structure and sequence based method for the prediction
	of {MHC} class {I} binding peptide sequences and conformations.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {e16--e24},
  number = {14},
  month = {Jul},
  abstract = {MOTIVATION: The binding of endogenous antigenic peptides to MHC class
	I molecules is an important step during the immunologic response
	of a host against a pathogen. Thus, various sequence- and structure-based
	prediction methods have been proposed for this purpose. The sequence-based
	methods are computationally efficient, but are hampered by the need
	of sufficient experimental data and do not provide a structural interpretation
	of their results. The structural methods are data-independent, but
	are quite time-consuming and thus not suited for screening of whole
	genomes. Here, we present a new method, which performs sequence-based
	prediction by incorporating information obtained from molecular modeling.
	This allows us to perform large databases screening and to provide
	structural information of the results. RESULTS: We developed a SVM-trained,
	quantitative matrix-based method for the prediction of MHC class
	I binding peptides, in which the features of the scoring matrix are
	energy terms retrieved from molecular dynamics simulations. At the
	same time we used the equilibrated structures obtained from the same
	simulations in a simple and efficient docking procedure. Our method
	consists of two steps: First, we predict potential binders from sequence
	data alone and second, we construct protein-peptide complexes for
	the predicted binders. So far, we tested our approach on the HLA-A0201
	allele. We constructed two prediction models, using local, position-dependent
	(DynaPred(POS)) and global, position-independent (DynaPred) features.
	The former model outperformed the two sequence-based methods used
	in our evaluation; the latter shows a much higher generalizability
	towards other alleles than the position-dependent models. The constructed
	peptide structures can be refined within seconds to structures with
	an average backbone RMSD of 1.53 A from the corresponding experimental
	structures.},
  doi = {10.1093/bioinformatics/btl216},
  keywords = {immunoinformatics},
  pii = {22/14/e16},
  pmid = {16873467},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl216}
}

@article{Aoki2005score,
  author = {Aoki, K. F. and Mamitsuka, H. and Akutsu, T. and Kanehisa, M.},
  title = {A score matrix to reveal the hidden links in glycans},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1457-63},
  number = {8},
  month = {Apr},
  abstract = {M{OTIVATION}: {G}lycans are the third major class of biomolecules
	following {DNA} and proteins. {T}hey are extremely vital for the
	functioning of multicellular organisms. {H}owever, comparing the
	fast development of sequence analysis techniques, informatics work
	on glycans have a long way to go. {A}lignment algorithms for glycan
	tree structures are one of the foremost concerns. {I}n addition,
	the statistical analysis of these algorithms in terms of biological
	significance needs to be addressed. {RESULTS}: {W}e developed a tree-structure
	alignment algorithm for glycans and performed a statistical analysis
	of these alignment scores such that biologically interesting features
	could be captured into a score matrix for glycans. {W}e generated
	our score matrix in a manner similar to {BLOSUM}, but with slight
	variations to accomodate our glycan data, including the incorporation
	of linkage information. {W}e verified the effectiveness of our new
	glycan score matrix by illustrating how well the resulting score
	matrix entries correspond with biological knowledge. {F}uture work
	for even better improvements with the use of a variety of score matrices
	for different subclasses of glycans due to their complexity is also
	discussed. {CONTACT}: mami@kuicr.kyoto-u.ac.jp {SUPPLEMENTARY} {INFORMATION}:
	{T}he glycan score matrix can be downloaded from http://kanehisa.kuicr.kyoto-u.ac.jp/{P}aper/kcam/glycan{M}atrix0.1.txt.},
  doi = {10.1093/bioinformatics/bti193},
  keywords = {glycans},
  pii = {bti193},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti193}
}

@article{Aoki2004KCaM,
  author = {Aoki, K. F. and Yamaguchi, A. and Ueda, N. and Akutsu, T. and Mamitsuka,
	H. and Goto, S. and Kanehisa, M.},
  title = {K{C}a{M} ({KEGG} {C}arbohydrate {M}atcher): a software tool for analyzing
	the structures of carbohydrate sugar chains.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {W267-72},
  number = {Web Server issue},
  month = {Jul},
  abstract = {K{C}a{M} ({KEGG} {C}arbohydrate {M}atcher) is a tool for the analysis
	of carbohydrate sugar chains, or glycans. {I}t consists of a web-based
	graphical user interface that allows users to enter glycans easily
	with the mouse. {T}he glycan structure is then transformed into our
	{KCF} ({KEGG} {C}hemical {F}unction) file format and sent to our
	program which implements an efficient tree-structure alignment algorithm,
	similar to sequence alignment algorithms but for branched tree structures.
	{U}sers can also retrieve glycan tree structures in {KCF} format
	from their local computers for visualization over the web. {T}he
	tree-matching algorithm provides several options for performing different
	types of tree-matching procedures on glycans. {T}hese options consist
	of whether to incorporate gaps in a match, whether to take the linkage
	information into consideration and local versus global alignment.
	{T}he results of this program are returned as a list of glycan structures
	in order of similarity based on these options. {T}he actual alignment
	can be viewed graphically, and the annotation information can also
	be viewed easily since all this information is linked with {KEGG}'s
	comprehensive suite of genomic data. {A}nalogously to {BLAST}, users
	are thus able to compare glycan structures of interest with glycans
	from different glycan databases using a variety of tree-alignment
	options. {KC}a{M} is currently available at http://glycan.genome.ad.jp.},
  doi = {10.1093/nar/gkh473},
  keywords = {glycans},
  pii = {32/suppl_2/W267},
  url = {http://dx.doi.org/10.1093/nar/gkh473}
}

@article{Aouba2007Les,
  author = {Aouba, A. and P{\'e}quignot, F. and Le Toullec, A. and Jougla, E.},
  title = {Les causes médicales de d{\'e}c{\`e}s en {France} en 2004 et leur
	évolution 1980-2004},
  journal = {Bulletin {\'e}pid{\'e}miologique hebdomadaire},
  year = {2007},
  volume = {35-36},
  pages = {308--314},
  pdf = {../local/Aouba2007Les.pdf},
  file = {Aouba2007Les.pdf:Aouba2007Les.pdf:PDF},
  keywords = {csbcbook},
  url = {http://www.invs.sante.fr/beh/2007/35_36/beh_35_36_2007.pdf}
}

@article{Aoyama1990Neural,
  author = {T. Aoyama and Y. Suzuki and H. Ichikawa},
  title = {{N}eural networks applied to quantitative structure-activity relationship
	analysis.},
  journal = {J. Med. Chem.},
  year = {1990},
  volume = {33},
  pages = {2583--2590},
  number = {9},
  month = {Sep},
  abstract = {An application of the neural network to quantitative structure-activity
	relationship (QSAR) analysis has been studied. The new method was
	compared with the linear multiregression analysis in various ways.
	It was found that the neural network can be a potential tool in the
	routine work of QSAR analysis. The mathematical relationship of operation
	between the neural network and the multiregression analysis was described.
	It was shown that the neural network can exceed the level of the
	linear multiregression analysis.},
  keywords = {Animals, Azirines, Benzodiazepines, Carbazilquinone, Comparative Study,
	Nerve Net, Nervous System, Regression Analysis, Structure-Activity
	Relationship, 2202830},
  owner = {mahe},
  pmid = {2202830},
  timestamp = {2006.09.07}
}

@article{Aphinyanaphongs2005Text,
  author = {Yindalon Aphinyanaphongs and Ioannis Tsamardinos and Alexander Statnikov
	and Douglas Hardin and Constantin F Aliferis},
  title = {Text categorization models for high-quality article retrieval in
	internal medicine.},
  journal = {J. {A}m. {M}ed. {I}nform. {A}ssoc.},
  year = {2005},
  volume = {12},
  pages = {207-16},
  number = {2},
  abstract = {O{BJECTIVE} {F}inding the best scientific evidence that applies to
	a patient problem is becoming exceedingly difficult due to the exponential
	growth of medical publications. {T}he objective of this study was
	to apply machine learning techniques to automatically identify high-quality,
	content-specific articles for one time period in internal medicine
	and compare their performance with previous {B}oolean-based {P}ub{M}ed
	clinical query filters of {H}aynes et al. {DESIGN} {T}he selection
	criteria of the {ACP} {J}ournal {C}lub for articles in internal medicine
	were the basis for identifying high-quality articles in the areas
	of etiology, prognosis, diagnosis, and treatment. {N}aive {B}ayes,
	a specialized {A}da{B}oost algorithm, and linear and polynomial support
	vector machines were applied to identify these articles. {MEASUREMENTS}
	{T}he machine learning models were compared in each category with
	each other and with the clinical query filters using area under the
	receiver operating characteristic curves, 11-point average recall
	precision, and a sensitivity/specificity match method. {RESULTS}
	{I}n most categories, the data-induced models have better or comparable
	sensitivity, specificity, and precision than the clinical query filters.
	{T}he polynomial support vector machine models perform the best among
	all learning methods in ranking the articles as evaluated by area
	under the receiver operating curve and 11-point average recall precision.
	{CONCLUSION} {T}his research shows that, using machine learning methods,
	it is possible to automatically build models for retrieving high-quality,
	content-specific articles using inclusion or citation by the {ACP}
	{J}ournal {C}lub as a gold standard in a given time period in internal
	medicine that perform better than the 1994 {P}ub{M}ed clinical query
	filters.},
  doi = {10.1197/jamia.M1641},
  pdf = {../local/Aphinyanaphongs2005Text.pdf},
  file = {Aphinyanaphongs2005Text.pdf:local/Aphinyanaphongs2005Text.pdf:PDF},
  keywords = {biosvm nlp},
  pii = {M1641},
  url = {http://dx.doi.org/10.1197/jamia.M1641}
}

@book{Applegate06Traveling,
  title = {The Traveling Salesman Problem: A Computational Study (Princeton
	Series in Applied Mathematics)},
  publisher = {Princeton University Press},
  year = {2007},
  author = {Applegate, D. L. and Bixby, R. E. and Chvatal, V. and Cook, W. J.
	},
  month = {January},
  abstract = {This book presents the latest findings on one of the most intensely
	investigated subjects in computational mathematics--the traveling
	salesman problem. It sounds simple enough: given a set of cities
	and the cost of travel between each pair of them, the problem challenges
	you to find the cheapest route by which to visit all the cities and
	return home to where you began. Though seemingly modest, this exercise
	has inspired studies by mathematicians, chemists, and physicists.
	Teachers use it in the classroom. It has practical applications in
	genetics, telecommunications, and neuroscience.
	
	The authors of this book are the same pioneers who for nearly two
	decades have led the investigation into the traveling salesman problem.
	They have derived solutions to almost eighty-six thousand cities,
	yet a general solution to the problem has yet to be discovered. Here
	they describe the method and computer code they used to solve a broad
	range of large-scale problems, and along the way they demonstrate
	the interplay of applied mathematics with increasingly powerful computing
	platforms. They also give the fascinating history of the problem--how
	it developed, and why it continues to intrigue us.},
  citeulike-article-id = {3991575},
  howpublished = {Hardcover},
  isbn = {0691129932},
  posted-at = {2009-02-01 17:26:20},
  priority = {2}
}

@misc{Concorde:website,
  author = {Applegate, D. L. and Bixby, R. E. and Chvatal, V. and Cook, W. J.
	},
  title = {Concorde TSP solver},
  howpublished = {\url{http://www.tsp.gatech.edu/concorde.html}},
  year = {2005}
}

@article{Arakawa2003Application,
  author = {M. Arakawa and K. Hasegawa and K. Funatsu},
  title = {{A}pplication of the novel molecular alignment method using the {H}opfield
	{N}eural {N}etwork to 3{D}-{QSAR}.},
  journal = {J Chem Inf Comput Sci},
  year = {2003},
  volume = {43},
  pages = {1396--1402},
  number = {5},
  abstract = {Recently, we investigated and proposed the novel molecular alignment
	method with the Hopfield Neural Network (HNN). Molecules are represented
	by four kinds of chemical properties (hydrophobic group, hydrogen-bonding
	acceptor, hydrogen-bonding donor, and hydrogen-bonding donor/acceptor),
	and then those properties between two molecules correspond to each
	other using HNN. The 12 pairs of enzyme-inhibitors were used for
	validation, and our method could successfully reproduce the real
	molecular alignments obtained from X-ray crystallography. In this
	paper, we apply the molecular alignment method to three-dimensional
	quantitative structure-activity relationship (3D-QSAR) analysis.
	The two data sets (human epidermal growth factor receptor-2 inhibitors
	and cyclooxygenase-2 inhibitors) were investigated to validate our
	method. As a result, the robust and predictive 3D-QSAR models were
	successfully obtained in both data sets.},
  doi = {10.1021/ci030005q},
  keywords = {Chemical, Cyclooxygenase 2, Cyclooxygenase 2 Inhibitors, Cyclooxygenase
	Inhibitors, Databases, Enzyme Inhibitors, Epidermal Growth Factor,
	Factual, Humans, Isoenzymes, Membrane Proteins, Models, Molecular,
	Neural Networks (Computer), Prostaglandin-Endoperoxide Synthases,
	Quantitative Structure-Activity Relationship, Receptor, 14502472},
  owner = {mahe},
  pmid = {14502472},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/ci030005q}
}

@article{Aranda2010IntAct,
  author = {B. Aranda and P. Achuthan and Y. Alam-Faruque and I. Armean and A.
	Bridge and C. Derow and M. Feuermann and A. T. Ghanbarian and S.
	Kerrien and J. Khadake and J. Kerssemakers and C. Leroy and M. Menden
	and M. Michaut and L. Montecchi-Palazzi and S. N. Neuhauser and S.
	Orchard and V. Perreau and B. Roechert and K. van Eijk and H. Hermjakob},
  title = {The IntAct molecular interaction database in 2010.},
  journal = {Nucleic Acids Res},
  year = {2010},
  volume = {38},
  pages = {D525--D531},
  number = {Database issue},
  month = {Jan},
  abstract = {IntAct is an open-source, open data molecular interaction database
	and toolkit. Data is abstracted from the literature or from direct
	data depositions by expert curators following a deep annotation model
	providing a high level of detail. As of September 2009, IntAct contains
	over 200.000 curated binary interaction evidences. In response to
	the growing data volume and user requests, IntAct now provides a
	two-tiered view of the interaction data. The search interface allows
	the user to iteratively develop complex queries, exploiting the detailed
	annotation with hierarchical controlled vocabularies. Results are
	provided at any stage in a simplified, tabular view. Specialized
	views then allows 'zooming in' on the full annotation of interactions,
	interactors and their properties. IntAct source code and data are
	freely available at http://www.ebi.ac.uk/intact.},
  doi = {10.1093/nar/gkp878},
  institution = {EMBL Outstation, European Bioinformatics Institute, Wellcome Trust
	Genome Campus Hinxton, Cambridge CB10 1SD, UK.},
  keywords = {Animals; Computational Biology; Databases, Genetic; Databases, Protein;
	False Positive Reactions; Humans; Information Storage and Retrieval;
	Internet; Programming Languages; Protein Interaction Mapping; Protein
	Structure, Tertiary; Proteins; Software; User-Computer Interface;
	Vocabulary, Controlled},
  owner = {fantine},
  pii = {gkp878},
  pmid = {19850723},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/nar/gkp878}
}

@inproceedings{Argyriou2007Multi-task,
  author = {Argyriou, A. and Evgeniou, T. and Pontil, M.},
  title = {Multi-task feature learning},
  booktitle = {Adv. Neural. Inform. Process Syst. 19},
  year = {2007},
  editor = {Sch\"{o}lkopf, B. and Platt, J. and Hoffman, T.},
  pages = {41--48},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  timestamp = {2007.10.22}
}

@article{Argyriou2008Convex,
  author = {Argyriou, A. and Evgeniou, T. and Pontil, M.},
  title = {Convex multi-task feature learning},
  journal = {Mach. Learn.},
  year = {2008},
  volume = {73},
  pages = {243--272},
  number = {3},
  note = {To appear.},
  owner = {jp},
  timestamp = {2008.07.09}
}

@article{Argyriou2012Sparse,
  author = {Argyriou, A. and Foygel, R. and Srebro, N.},
  title = {Sparse Prediction with the k-Overlap Norm},
  journal = {arXiv preprint arXiv:1204.5043},
  year = {2012}
}

@article{Argyriou2008When,
  author = {Andreas Argyriou and Charles A. Micchelli and Massimiliano Pontil},
  title = {When is there a representer theorem? Vector versus matrix regularizers},
  journal = {CoRR},
  year = {2008},
  volume = {abs/0809.1590},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://arxiv.org/abs/0809.1590}
}

@incollection{Argyriou2008A,
  author = {Andreas Argyriou and Charles A. Micchelli and Massimiliano Pontil
	and Yiming Ying},
  title = {A Spectral Regularization Framework for Multi-Task Structure Learning},
  booktitle = {Advances in Neural Information Processing Systems 20},
  publisher = {MIT Press},
  year = {2008},
  editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  pages = {25--32},
  address = {Cambridge, MA}
}

@article{Arimoto2005Development,
  author = {Rieko Arimoto and Madhu-Ashni Prasad and Eric M Gifford},
  title = {Development of {CYP}3{A}4 inhibition models: comparisons of machine-learning
	techniques and molecular descriptors.},
  journal = {J {B}iomol {S}creen},
  year = {2005},
  volume = {10},
  pages = {197-205},
  number = {3},
  month = {Apr},
  abstract = {Computational models of cytochrome {P}450 3{A}4 inhibition were developed
	based on high-throughput screening data for 4470 proprietary compounds.
	{M}ultiple models differentiating inhibitors ({IC}(50) <3 micro{M})
	and noninhibitors were generated using various machine-learning algorithms
	(recursive partitioning [{RP}], {B}ayesian classifier, logistic regression,
	k-nearest-neighbor, and support vector machine [{SVM}]) with structural
	fingerprints and topological indices. {N}ineteen models were evaluated
	by internal 10-fold cross-validation and also by an independent test
	set. {T}hree most predictive models, {B}arnard {C}hemical {I}nformation
	({BCI})-fingerprint/{SVM}, {MDL}-keyset/{SVM}, and topological indices/{RP},
	correctly classified 249, 248, and 236 compounds of 291 noninhibitors
	and 135, 137, and 147 compounds of 179 inhibitors in the validation
	set. {T}heir overall accuracies were 82\%, 82\%, and 81\%, respectively.
	{I}nvestigating applicability of the {BCI}/{SVM} model found a strong
	correlation between the predictive performance and the structural
	similarity to the training set. {U}sing {T}animoto similarity index
	as a confidence measurement for the predictions, the limitation of
	the extrapolation was 0.7 in the case of the {BCI}/{SVM} model. {T}aking
	consensus of the 3 best models yielded a further improvement in predictive
	capability, kappa = 0.65 and accuracy = 83\%. {T}he consensus model
	could also be tuned to minimize either false positives or false negatives
	depending on the emphasis of the screening.},
  doi = {10.1177/1087057104274091},
  keywords = {biosvm chemoinformatics},
  pii = {10/3/197},
  url = {http://dx.doi.org/10.1177/1087057104274091}
}

@article{Arkin1997test,
  author = {A. Arkin and P. Shen and J. Ross},
  title = {A Test Case of Correlation Metric Construction of a Reaction Pathway
	from Measurements},
  journal = {Science},
  year = {1997},
  volume = {277},
  pages = {1275--1279},
  number = {5330},
  abstract = {A method for the prediction of the interactions within complex reaction
	networks from experimentally measured time series of the concentration
	of the species composing the system has been tested experimentally
	on the first few steps of the glycolytic pathway. The reconstituted
	reaction system, containing eight enzymes and 14 metabolic intermediates,
	was kept away from equilibrium in a continuous-flow, stirred-tank
	reactor. Input concentrations of adenosine monophosphate and citrate
	were externally varied over time, and their concentrations in the
	reactor and the response of eight other species were measured. Multidimensional
	scaling analysis and heuristic algorithms applied to two-species
	time-lagged correlation functions derived from the time series yielded
	a diagram from which the interactions among all of the species could
	be deduced. The diagram predicts essential features of the known
	reaction network in regard to chemical reactions and interactions
	among the measured species. The approach is applicable to many complex
	reaction systems.},
  keywords = {reconstruction, kinetic, metabolism, analysis, multidimensional scaling,
	Correlation Metric Construction },
  url = {http://www.sciencemag.org/cgi/reprint/277/5330/1275.pdf}
}

@misc{Armstrong1999review,
  author = {J. W. Armstrong},
  title = {A review of high-throughput screening approaches for drug discovery},
  howpublished = {Application note},
  year = {1999},
  owner = {mahe},
  timestamp = {2006.09.05}
}

@article{Arodz2005Pattern,
  author = {Tomasz Arod{\'z} and Marcin Kurdziel and Erik O D Sevre and David
	A Yuen},
  title = {Pattern recognition techniques for automatic detection of suspicious-looking
	anomalies in mammograms.},
  journal = {Comput. {M}ethods {P}rograms {B}iomed.},
  year = {2005},
  volume = {79},
  pages = {135-49},
  number = {2},
  month = {Aug},
  abstract = {We have employed two pattern recognition methods used commonly for
	face recognition in order to analyse digital mammograms. {T}he methods
	are based on novel classification schemes, the {A}da{B}oost and the
	support vector machines ({SVM}). {A} number of tests have been carried
	out to evaluate the accuracy of these two algorithms under different
	circumstances. {R}esults for the {A}da{B}oost classifier method are
	promising, especially for classifying mass-type lesions. {I}n the
	best case the algorithm achieved accuracy of 76\% for all lesion
	types and 90\% for masses only. {T}he {SVM} based algorithm did not
	perform as well. {I}n order to achieve a higher accuracy for this
	method, we should choose image features that are better suited for
	analysing digital mammograms than the currently used ones.},
  doi = {10.1016/j.cmpb.2005.03.009},
  pdf = {../local/Arodz2005Pattern.pdf},
  file = {Arodz2005Pattern.pdf:local/Arodz2005Pattern.pdf:PDF},
  keywords = {biosvm image},
  pii = {S0169-2607(05)00083-0},
  url = {http://dx.doi.org/10.1016/j.cmpb.2005.03.009}
}

@article{Aronov2005Predictive,
  author = {Aronov, A. M.},
  title = {{P}redictive in silico modeling for h{ERG} channel blockers.},
  journal = {Drug Discov. Today},
  year = {2005},
  volume = {10},
  pages = {149--155},
  number = {2},
  month = {Jan},
  abstract = {hERG-mediated sudden death as a side effect of non-antiarrhythmic
	drugs has been receiving increased regulatory attention. Perhaps
	owing to the unique shape of the ligand-binding site and its hydrophobic
	character, the hERG channel has been shown to interact with pharmaceuticals
	of widely varying structure. Several in silico approaches have attempted
	to predict hERG channel blockade. Some of these approaches are aimed
	primarily at filtering out potential hERG blockers in the context
	of virtual libraries, others involve understanding structure-activity
	relationships governing hERG-drug interactions. This review summarizes
	the most recent efforts in this emerging field.},
  doi = {10.1016/S1359-6446(04)03278-7},
  pdf = {../local/Aronov2005Predictive.pdf},
  file = {Aronov2005Predictive.pdf:Aronov2005Predictive.pdf:PDF},
  keywords = {chemoinformatics herg},
  pii = {S1359644604032787},
  pmid = {15718164},
  timestamp = {2007.02.03},
  url = {http://dx.doi.org/10.1016/S1359-6446(04)03278-7}
}

@article{Aronszajn1950Theory,
  author = {Aronszajn, N.},
  title = {Theory of reproducing kernels},
  journal = {Trans. {A}m. {M}ath. {S}oc.},
  year = {1950},
  volume = {68},
  pages = {337~-~404},
  pdf = {../local/Aronszajn1950Theory.pdf},
  file = {Aronszajn1950Theory.pdf:local/Aronszajn1950Theory.pdf:PDF},
  keywords = {kernel-theory},
  subject = {kernelml}
}

@article{Asefa2005Support,
  author = {Tirusew Asefa and Mariush Kemblowski and Gilberto Urroz and Mac McKee},
  title = {Support vector machines ({SVM}s) for monitoring network design.},
  journal = {Ground {W}ater},
  year = {2005},
  volume = {43},
  pages = {413-22},
  number = {3},
  abstract = {In this paper we present a hydrologic application of a new statistical
	learning methodology called support vector machines ({SVM}s). {SVM}s
	are based on minimization of a bound on the generalized error (risk)
	model, rather than just the mean square error over a training set.
	{D}ue to {M}ercer's conditions on the kernels, the corresponding
	optimization problems are convex and hence have no local minima.
	{I}n this paper, {SVM}s are illustratively used to reproduce the
	behavior of {M}onte {C}arlo-based flow and transport models that
	are in turn used in the design of a ground water contamination detection
	monitoring system. {T}he traditional approach, which is based on
	solving transient transport equations for each new configuration
	of a conductivity field, is too time consuming in practical applications.
	{T}hus, there is a need to capture the behavior of the transport
	phenomenon in random media in a relatively simple manner. {T}he objective
	of the exercise is to maximize the probability of detecting contaminants
	that exceed some regulatory standard before they reach a compliance
	boundary, while minimizing cost (i.e., number of monitoring wells).
	{A}pplication of the method at a generic site showed a rather promising
	performance, which leads us to believe that {SVM}s could be successfully
	employed in other areas of hydrology. {T}he {SVM} was trained using
	510 monitoring configuration samples generated from 200 {M}onte {C}arlo
	flow and transport realizations. {T}he best configurations of well
	networks selected by the {SVM} were identical with the ones obtained
	from the physical model, but the reliabilities provided by the respective
	networks differ slightly.},
  doi = {10.1111/j.1745-6584.2005.0050.x},
  pdf = {../local/Asefa2005Support.pdf},
  file = {Asefa2005Support.pdf:local/Asefa2005Support.pdf:PDF},
  keywords = {Adult, Aged, Aging, Algorithms, Apoptosis, Artificial Intelligence,
	Automated, Computer-Assisted, Female, Foot, Gait, Gene Expression
	Profiling, Humans, Image Interpretation, Male, Neoplasms, Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, Pattern Recognition,
	Polymerase Chain Reaction, Proteins, Reproducibility of Results,
	Research Support, Sensitivity and Specificity, Subcellular Fractions,
	Unknown Primary, 15882333},
  pii = {GWAT50},
  url = {http://dx.doi.org/10.1111/j.1745-6584.2005.0050.x}
}

@article{Ashburner2000Gene,
  author = {M. Ashburner and C. A. Ball and J. A. Blake and D. Botstein and H.
	Butler and J. M. Cherry and A. P. Davis and K. Dolinski and S. S.
	Dwight and J. T. Eppig and M. A. Harris and D. P. Hill and L. Issel-Tarver
	and A. Kasarskis and S. Lewis and J. C. Matese and J. E. Richardson
	and M. Ringwald and G. M. Rubin and G. Sherlock},
  title = {Gene ontology: tool for the unification of biology. The Gene Ontology
	Consortium.},
  journal = {Nat Genet},
  year = {2000},
  volume = {25},
  pages = {25--29},
  number = {1},
  month = {May},
  doi = {10.1038/75556},
  institution = {Department of Genetics, Stanford University School of Medicine, California,
	USA. cherry@stanford.edu},
  keywords = {Animals; Computer Communication Networks; Databases, Factual; Eukaryotic
	Cells; Genes; Humans; Metaphysics; Mice; Molecular Biology; Sequence
	Analysis, DNA; Terminology as Topic},
  owner = {fantine},
  pmid = {10802651},
  timestamp = {2010.10.25},
  url = {http://dx.doi.org/10.1038/75556}
}

@article{Atalay2005Implicit,
  author = {Atalay, V. and Cetin-Atalay, R.},
  title = {Implicit motif distribution based hybrid computational kernel for
	sequence classification},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1429-1436},
  number = {8},
  month = {Apr},
  abstract = {M{OTIVATION}: {W}e designed a general computational kernel for classification
	problems that require specific motif extraction and search from sequences.
	{I}nstead of searching for explicit motifs, our approach finds the
	distribution of implicit motifs and uses as a feature for classification.
	{I}mplicit motif distribution approach may be used as modus operandi
	for bioinformatics problems that require specific motif extraction
	and search, which is otherwise computationally prohibitive. {RESULTS}:
	{A} system named {P}2{SL} that infer protein subcellular targeting
	was developed through this computational kernel. {T}argeting-signal
	was modeled by the distribution of subsequence occurrences (implicit
	motifs) using self-organizing maps. {T}he boundaries among the classes
	were then determined with a set of support vector machines. {P}2{SL}
	hybrid computational system achieved approximately 81\% of prediction
	accuracy rate over {ER} targeted, cytosolic, mitochondrial and nuclear
	protein localization classes. {P}2{SL} additionally offers the distribution
	potential of proteins among localization classes, which is particularly
	important for proteins, shuttle between nucleus and cytosol. {AVAILABILITY}:
	http://staff.vbi.vt.edu/volkan/p2sl and http://www.i-cancer.fen.bilkent.edu.tr/p2sl
	{CONTACT}: rengul@bilkent.edu.tr.},
  doi = {10.1093/bioinformatics/bti212},
  pdf = {../local/Atalay2005Implicit.pdf},
  file = {Atalay2005Implicit.pdf:local/Atalay2005Implicit.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti212}
}

@article{Attwood2003PRINTS,
  author = {T. K. Attwood and P. Bradley and D. R. Flower and A. Gaulton and
	N. Maudling and A. L. Mitchell and G. Moulton and A. Nordle and K.
	Paine and P. Taylor and A. Uddin and C. Zygouri},
  title = {PRINTS and its automatic supplement, prePRINTS.},
  journal = {Nucleic Acids Res.},
  year = {2003},
  volume = {31},
  pages = {400--402},
  number = {1},
  month = {Jan},
  abstract = {The PRINTS database houses a collection of protein fingerprints. These
	may be used to assign uncharacterised sequences to known families
	and hence to infer tentative functions. The September 2002 release
	(version 36.0) includes 1800 fingerprints, encoding approximately
	11 000 motifs, covering a range of globular and membrane proteins,
	modular polypeptides and so on. In addition to its continued steady
	growth, we report here the development of an automatic supplement,
	prePRINTS, designed to increase the coverage of the resource and
	reduce some of the manual burdens inherent in its maintenance. The
	databases are accessible for interrogation and searching at http://www.bioinf.man.ac.uk/dbbrowser/PRINTS/.},
  keywords = {Amino Acid Motifs; Animals; Automation; Conserved Sequence; Databases,
	Protein; Proteins; Software},
  owner = {laurent},
  pmid = {12520033},
  timestamp = {2007.09.22}
}

@article{Auer2010Statistical,
  author = {Auer, P. L. and Doerge, R. W.},
  title = {Statistical design and analysis of RNA sequencing data.},
  journal = {Genetics},
  year = {2010},
  volume = {185},
  pages = {405--416},
  number = {2},
  month = {Jun},
  abstract = {Next-generation sequencing technologies are quickly becoming the preferred
	approach for characterizing and quantifying entire genomes. Even
	though data produced from these technologies are proving to be the
	most informative of any thus far, very little attention has been
	paid to fundamental design aspects of data collection and analysis,
	namely sampling, randomization, replication, and blocking. We discuss
	these concepts in an RNA sequencing framework. Using simulations
	we demonstrate the benefits of collecting replicated RNA sequencing
	data according to well known statistical designs that partition the
	sources of biological and technical variation. Examples of these
	designs and their corresponding models are presented with the goal
	of testing differential expression.},
  doi = {10.1534/genetics.110.114983},
  institution = {Department of Statistics, Purdue University, West Lafayette, Indiana
	47907, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {genetics.110.114983},
  pmid = {20439781},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1534/genetics.110.114983}
}

@article{Auliac2008Evolutionary,
  author = {C{\'e}dric Auliac and Vincent Frouin and Xavier Gidrol and Florence
	d'Alch{\'e}-Buc},
  title = {Evolutionary approaches for the reverse-engineering of gene regulatory
	networks: A study on a biologically realistic dataset},
  journal = {BMC Bioinformatics},
  year = {2008},
  volume = {9},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1186/1471-2105-9-91}
}

@article{Avidan2004Support,
  author = {Shai Avidan},
  title = {Support vector tracking.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2004},
  volume = {26},
  pages = {1064-72},
  number = {8},
  month = {Aug},
  abstract = {Support {V}ector {T}racking ({SVT}) integrates the {S}upport {V}ector
	{M}achine ({SVM}) classifier into an optic-flow-based tracker. {I}nstead
	of minimizing an intensity difference function between successive
	frames, {SVT} maximizes the {SVM} classification score. {T}o account
	for large motions between successive frames, we build pyramids from
	the support vectors and use a coarse-to-fine approach in the classification
	stage. {W}e show results of using {SVT} for vehicle tracking in image
	sequences.},
  doi = {10.1109/TPAMI.2004.53},
  pdf = {../local/Avidan2004Support.pdf},
  file = {Avidan2004Support.pdf:local/Avidan2004Support.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TPAMI.2004.53}
}

@article{Avlani2007Critical,
  author = {Avlani, V. A. and Gregory, K. J. and Morton, C. J. and Parker, M.
	W. and Sexton, P. M. and Christopoulos, A.},
  title = {Critical role for the second extracellular loop in the binding of
	both orthosteric and allosteric G protein-coupled receptor ligands.},
  journal = {J. Biol. Chem.},
  year = {2007},
  volume = {282},
  pages = {25677--25686},
  number = {35},
  month = {Aug},
  abstract = {The second extracellular (E2) loop of G protein-coupled receptors
	(GPCRs) plays an essential but poorly understood role in the binding
	of non-peptidic small molecules. We have utilized both orthosteric
	ligands and allosteric modulators of the M2 muscarinic acetylcholine
	receptor, a prototypical Family A GPCR, to probe possible E2 loop
	binding dynamics. We developed a homology model based on the crystal
	structure of bovine rhodopsin and predicted novel cysteine substitutions
	that should dramatically reduce E2 loop flexibility via disulfide
	bond formation and significantly inhibit the binding of both types
	of ligands. This prediction was validated experimentally using radioligand
	binding, dissociation kinetics, and cell-based functional assays.
	The results argue for a flexible "gatekeeper" role of the E2 loop
	in the binding of both allosteric and orthosteric GPCR ligands.},
  doi = {10.1074/jbc.M702311200},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {M702311200},
  pmid = {17591774},
  timestamp = {2008.07.21},
  url = {http://dx.doi.org/10.1074/jbc.M702311200}
}

@article{Azencott2007One,
  author = {Azencott, C.-A. and Ksikes, A. and Swamidass, S. J. and Chen, J.
	H. and Ralaivola, L. and Baldi, P.},
  title = {One- to four-dimensional kernels for virtual screening and the prediction
	of physical, chemical, and biological properties.},
  journal = {J. Chem. Inform. Model.},
  year = {2007},
  volume = {47},
  pages = {965--974},
  number = {3},
  abstract = {Many chemoinformatics applications, including high-throughput virtual
	screening, benefit from being able to rapidly predict the physical,
	chemical, and biological properties of small molecules to screen
	large repositories and identify suitable candidates. When training
	sets are available, machine learning methods provide an effective
	alternative to ab initio methods for these predictions. Here, we
	leverage rich molecular representations including 1D SMILES strings,
	2D graphs of bonds, and 3D coordinates to derive efficient machine
	learning kernels to address regression problems. We further expand
	the library of available spectral kernels for small molecules developed
	for classification problems to include 2.5D surface and 3D kernels
	using Delaunay tetrahedrization and other techniques from computational
	geometry, 3D pharmacophore kernels, and 3.5D or 4D kernels capable
	of taking into account multiple molecular configurations, such as
	conformers. The kernels are comprehensively tested using cross-validation
	and redundancy-reduction methods on regression problems using several
	available data sets to predict boiling points, melting points, aqueous
	solubility, octanol/water partition coefficients, and biological
	activity with state-of-the art results. When sufficient training
	data are available, 2D spectral kernels in general tend to yield
	the best and most robust results, better than state-of-the art. On
	data sets containing thousands of molecules, the kernels achieve
	a squared correlation coefficient of 0.91 for aqueous solubility
	prediction and 0.94 for octanol/water partition coefficient prediction.
	Averaging over conformations improves the performance of kernels
	based on the three-dimensional structure of molecules, especially
	on challenging data sets. Kernel predictors for aqueous solubility
	(kSOL), LogP (kLOGP), and melting point (kMELT) are available over
	the Web through: http://cdb.ics.uci.edu.},
  doi = {10.1021/ci600397p},
  pdf = {../local/Azencott2007One.pdf},
  file = {Azencott2007One.pdf:Azencott2007One.pdf:PDF},
  owner = {pmahe},
  pmid = {17338509},
  timestamp = {2007.07.13},
  url = {http://dx.doi.org/10.1021/ci600397p}
}

@book{Bohm2003Protein-ligand,
  title = {Protein-ligand interactions},
  publisher = {Wiley},
  year = {2003},
  author = {H.-J. B\"{o}hm and G. Schneider and R. Mannhold and H. Kubinyi and
	G. Folkers},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.02.03}
}

@article{Babur2004,
  author = {Babur, O and Demir, E and Ayaz, A and Dogrusoz, U and Sakarya, O},
  title = {Pathway activity inference using microarray data},
  journal = {Technical report, {B}ilkent {C}enter for {B}ioinformatics ({BCBI})},
  year = {2004}
}

@inproceedings{Bach2009Exploring,
  author = {Bach, F.},
  title = {Exploring large feature spaces with hierarchical multiple kernel
	learning},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2009},
  volume = {21},
  pdf = {../local/Bach2009Exploring.pdf},
  file = {Bach2009Exploring.pdf:Bach2009Exploring.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.25}
}

@article{Bach2008Consistencya,
  author = {Bach, F.},
  title = {Consistency of the group lasso and multiple kernel learning},
  journal = {J. Mach. Learn. Res.},
  year = {2008},
  volume = {9},
  pages = {1179--1225},
  abstract = {We consider the least-square regression problem with regularization
	by a block l1-norm, that is, a sum of Euclidean norms over spaces
	of dimensions larger than one. This problem, referred to as the group
	Lasso, extends the usual regularization by the l1-norm where all
	spaces have dimension one, where it is commonly referred to as the
	Lasso. In this paper, we study the asymptotic group selection consistency
	of the group Lasso. We derive necessary and sufficient conditions
	for the consistency of group Lasso under practical assumptions, such
	as model mis specification. When the linear predictors and Euclidean
	norms are replaced by functions and reproducing kernel Hilbert norms,
	the problem is usually referred to as multiple kernel learning and
	is commonly used for learning from heterogeneous data sources and
	for non linear variable selection. Using tools from functional analysis,
	and in particular covar iance operators, we extend the consistency
	results to this infinite dimensional case and also propose an adaptive
	scheme to obtain a consistent model estimate, even when the necessary
	condition required for the non adaptive scheme is not satisfied.},
  pdf = {../local/Bach2008Consistencya.pdf},
  file = {Bach2008Consistencya.pdf:Bach2008Consistencya.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2008.12.21},
  url = {http://jmlr.csail.mit.edu/papers/v9/bach08b.html}
}

@article{Bach2011Optimization,
  author = {Bach, F. and Jenatton, R. and Mairal, J. and Obozinski, G.},
  title = {Optimization with sparsity-inducing penalties},
  journal = {Foundations and Trends{\textregistered} in Machine Learning},
  year = {2011},
  volume = {4},
  pages = {1--106},
  number = {1},
  doi = {10.1561/2200000015},
  pdf = {../local/Bach2011Optimization.pdf},
  file = {Bach2011Optimization.pdf:Bach2011Optimization.pdf:PDF},
  url = {http://dx.doi.org/10.1561/2200000015}
}

@article{Bach2011Structured,
  author = {Bach, F. and Jenatton, R. and Mairal, J. and Obozinski, G.},
  title = {Structured sparsity through convex optimization},
  journal = {arXiv preprint arXiv:1109.2397},
  year = {2011}
}

@article{Bach2002Kernel,
  author = {Bach, F.R. and Jordan, M.I.},
  title = {Kernel independent component analysis},
  journal = {J. Mach. Learn. Res.},
  year = {2002},
  volume = {3},
  pages = {1--48},
  pdf = {../local/Bach2002Kernel.pdf},
  file = {Bach2002Kernel.pdf:local/Bach2002Kernel.pdf:PDF},
  html = {http://www.ai.mit.edu/projects/jmlr/papers/volume3/bshouty02a/abstract.html},
  subject = {kernel},
  url = {http://jmlr.csail.mit.edu/papers/v3/bach02a.html}
}

@inproceedings{Bach2008Bolasso,
  author = {Bach, F. R.},
  title = {Bolasso: model consistent {Lasso} estimation through the bootstrap},
  booktitle = {Proceedings of the 25th international conference on Machine learning},
  year = {2008},
  editor = {William W. Cohen and Andrew McCallum and Sam T. Roweis},
  volume = {308},
  series = {ACM International Conference Proceeding Series},
  pages = {33--40},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1390156.1390161},
  isbn = {978-1-60558-205-4},
  location = {Helsinki, Finland}
}

@article{Bach2008Consistency,
  author = {Bach, F. R.},
  title = {Consistency of trace norm minimization},
  journal = {J. Mach. Learn. Res.},
  year = {2008},
  volume = {9},
  pages = {1019--1048},
  abstract = {Regularization by the sum of singular values, also referred to as
	the trace norm, is a popular tech- 
	
	nique for estimating low rank rectangular matrices. In this paper,
	we extend some of the consis- 
	
	tency results of the Lasso to provide necessary and sufficient conditions
	for rank consistency of 
	
	trace norm minimization with the square loss. We also provide an adaptive
	version that is rank 
	
	consistent even when the necessary condition for the non adaptive
	version is not fulfilled.},
  pdf = {../local/Bach2008Consistency.pdf},
  file = {Bach2008Consistency.pdf:Bach2008Consistency.pdf:PDF},
  url = {http://jmlr.csail.mit.edu/papers/volume9/bach08a/bach08a.pdf}
}

@inproceedings{Bach2005Predictive,
  author = {Bach, F. R. and Jordan, M. I.},
  title = {Predictive low-rank decomposition for kernel methods},
  booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine
	learning},
  year = {2005},
  pages = {33--40},
  address = {New York, NY, USA},
  publisher = {ACM},
  abstract = {Low-rank matrix decompositions are essential tools in the application
	of kernel methods to large-scale learning problems. These decompositions
	have generally been treated as black boxes---the decomposition of
	the kernel matrix that they deliver is independent of the specific
	learning task at hand---and this is a potentially significant source
	of inefficiency. In this paper, we present an algorithm that can
	exploit side information (e.g., classification labels, regression
	responses) in the computation of low-rank decompositions for kernel
	matrices. Our algorithm has the same favorable scaling as state-of-the-art
	methods such as incomplete Cholesky decomposition---it is linear
	in the number of data points and quadratic in the rank of the approximation.
	We present simulation results that show that our algorithm yields
	decompositions of significantly smaller rank than those found by
	incomplete Cholesky decomposition.},
  doi = {http://doi.acm.org/10.1145/1102351.1102356}
}

@inproceedings{Bach2003Learning,
  author = {Francis R. Bach and Michael I. Jordan},
  title = {Learning Spectral Clustering},
  booktitle = {Advances in Neural Information Processing Systems 16},
  year = {2003},
  publisher = {MIT Press}
}

@techreport{Bach2004Fast,
  author = {Bach, F. R. and Lanckriet, G. and Jordan, M. I.},
  title = {Fast kernel learning using sequential minimal optimization},
  institution = {Computer Science Division, UC Berkeley},
  year = {2004},
  number = {UCB/CSD-04-1307},
  month = {February},
  pdf = {../local/Bach2004Fast.pdf},
  file = {Bach2004Fast.pdf:Bach2004Fast.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.05}
}

@inproceedings{Bach2004Multiple,
  author = {Bach, F. R. and Lanckriet, G. R. G. and Jordan, M. I.},
  title = {Multiple Kernel Learning, Conic Duality, and the {SMO} Algorithm},
  booktitle = {Proceedings of the Twenty-First International Conference on Machine
	Learning},
  year = {2004},
  pages = {6},
  address = {New York, NY, USA},
  publisher = {ACM},
  abstract = {While classical kernel-based classifiers are based on a single kernel,
	in practice it is often desirable to base classifiers on combinations
	of multiple kernels. Lanckriet et al. (2004) considered conic combinations
	of kernel matrices for the support vector machine (SVM), and showed
	that the optimization of the coefficients of such a combination reduces
	to a convex optimization problem known as a quadratically-constrained
	quadratic program (QCQP). Unfortunately, current convex optimization
	toolboxes can solve this problem only for a small number of kernels
	and a small number of data points; moreover, the sequential minimal
	optimization (SMO) techniques that are essential in large-scale implementations
	of the SVM cannot be applied because the cost function is non-differentiable.
	We propose a novel dual formulation of the QCQP as a second-order
	cone programming problem, and show how to exploit the technique of
	Moreau-Yosida regularization to yield a formulation to which SMO
	techniques can be applied. We present experimental results that show
	that our SMO-based algorithm is significantly more efficient than
	the general-purpose interior point methods available in current optimization
	toolboxes.},
  doi = {http://doi.acm.org/10.1145/1015330.1015424},
  pdf = {../local/Bach2004Multiple.pdf},
  file = {Bach2004Multiple.pdf:Bach2004Multiple.pdf:PDF}
}

@inproceedings{Bach2005Computing,
  author = {F. R. Bach and R. Thibaux and M. I. Jordan},
  title = {Computing Regularization Paths for Learning Multiple Kernels},
  booktitle = {Advances in Neural Information Processing Systems 17},
  year = {2005},
  editor = {Saul, L. K. and Weiss, Y. and Bottou, L.},
  pages = {73-80},
  address = {Cambridge, MA},
  publisher = {MIT Press}
}

@article{Bacilieri2006Ligand,
  author = {Magdalena Bacilieri and Stefano Moro},
  title = {Ligand-based drug design methodologies in drug discovery process:
	an overview.},
  journal = {Curr Drug Discov Technol},
  year = {2006},
  volume = {3},
  pages = {155--165},
  number = {3},
  month = {Sep},
  abstract = {Ligand-based drug design represents an important research field in
	the drug discovery and optimisation process. This review provides
	an overview about the theoretical background of the quantitative
	structure activity relationship (QSAR) models.},
  institution = {Molecular Modeling Section, Dipartimento di Scienze Farmaceutiche,
	UniversitÃ  di Padova, Via Marzolo 5, I-35131 Padova, Italy.},
  keywords = {Drug Design; Ligands; Models, Theoretical; Molecular Structure; Pharmaceutical
	Preparations, chemistry/metabolism; Quantitative Structure-Activity
	Relationship; Technology, Pharmaceutical, methods},
  owner = {bricehoffmann},
  pmid = {17311561},
  timestamp = {2009.02.13}
}

@article{Bader2003BIND,
  author = {Bader, G.D. and Betel, D. and Hogue, C.W.V.},
  title = {BIND: the Biomolecular Interaction Network Database.},
  journal = {Nucleic Acids Res},
  year = {2003},
  volume = {31},
  pages = {248--250},
  number = {1},
  month = {Jan},
  abstract = {The Biomolecular Interaction Network Database (BIND: http://bind.ca)
	archives biomolecular interaction, complex and pathway information.
	A web-based system is available to query, view and submit records.
	BIND continues to grow with the addition of individual submissions
	as well as interaction data from the PDB and a number of large-scale
	interaction and complex mapping experiments using yeast two hybrid,
	mass spectrometry, genetic interactions and phage display. We have
	developed a new graphical analysis tool that provides users with
	a view of the domain composition of proteins in interaction and complex
	records to help relate functional domains to protein interactions.
	An interaction network clustering tool has also been developed to
	help focus on regions of interest. Continued input from users has
	helped further mature the BIND data specification, which now includes
	the ability to store detailed information about genetic interactions.
	The BIND data specification is available as ASN.1 and XML DTD.},
  institution = {Department of Biochemistry, Samuel Lunenfeld Research Institute,
	University of Toronto, Toronto M5G 1X5, Canada.},
  owner = {fantine},
  pmid = {12519993},
  timestamp = {2010.10.21}
}

@article{Baek2008impact,
  author = {Daehyun Baek and Judit Villén and Chanseok Shin and Fernando D Camargo
	and Steven P Gygi and David P Bartel},
  title = {The impact of microRNAs on protein output.},
  journal = {Nature},
  year = {2008},
  volume = {455},
  pages = {64--71},
  number = {7209},
  month = {Sep},
  abstract = {MicroRNAs are endogenous approximately 23-nucleotide RNAs that can
	pair to sites in the messenger RNAs of protein-coding genes to downregulate
	the expression from these messages. MicroRNAs are known to influence
	the evolution and stability of many mRNAs, but their global impact
	on protein output had not been examined. Here we use quantitative
	mass spectrometry to measure the response of thousands of proteins
	after introducing microRNAs into cultured cells and after deleting
	mir-223 in mouse neutrophils. The identities of the responsive proteins
	indicate that targeting is primarily through seed-matched sites located
	within favourable predicted contexts in 3' untranslated regions.
	Hundreds of genes were directly repressed, albeit each to a modest
	degree, by individual microRNAs. Although some targets were repressed
	without detectable changes in mRNA levels, those translationally
	repressed by more than a third also displayed detectable mRNA destabilization,
	and, for the more highly repressed targets, mRNA destabilization
	usually comprised the major component of repression. The impact of
	microRNAs on the proteome indicated that for most interactions microRNAs
	act as rheostats to make fine-scale adjustments to protein output.},
  doi = {10.1038/nature07242},
  pdf = {../local/Baek2008impact.pdf},
  file = {Baek2008impact.pdf:Baek2008impact.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center,
	Cambridge, Massachusetts 02142, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature07242},
  pmid = {18668037},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1038/nature07242}
}

@article{Bagci2008PLOS1,
  author = {Bagci, E. Z. and Vodovotz, Y. and Billiar, T. R. and Ermentrout,
	B. and Bahar, I.},
  title = {Computational insights on the competing effects of nitric oxide in
	regulating apoptosis},
  journal = {PLoS One},
  year = {2008},
  volume = {3},
  pages = {e2249},
  number = {5},
  abstract = {Despite the establishment of the important role of nitric oxide (NO)
	on apoptosis, a molecular-level understanding of the origin of its
	dichotomous pro- and anti-apoptotic effects has been elusive. We
	propose a new mathematical model for simulating the effects of nitric
	oxide (NO) on apoptosis. The new model integrates mitochondria-dependent
	apoptotic pathways with NO-related reactions, to gain insights into
	the regulatory effect of the reactive NO species N(2)O(3), non-heme
	iron nitrosyl species (FeL(n)NO), and peroxynitrite (ONOO(-)). The
	biochemical pathways of apoptosis coupled with NO-related reactions
	are described by ordinary differential equations using mass-action
	kinetics. In the absence of NO, the model predicts either cell survival
	or apoptosis (a bistable behavior) with shifts in the onset time
	of apoptotic response depending on the strength of extracellular
	stimuli. Computations demonstrate that the relative concentrations
	of anti- and pro-apoptotic reactive NO species, and their interplay
	with glutathione, determine the net anti- or pro-apoptotic effects
	at long time points. Interestingly, transient effects on apoptosis
	are also observed in these simulations, the duration of which may
	reach up to hours, despite the eventual convergence to an anti-apoptotic
	state. Our computations point to the importance of precise timing
	of NO production and external stimulation in determining the eventual
	pro- or anti-apoptotic role of NO.},
  keywords = {csbcbook}
}

@article{Bagci2006BiophysJ,
  author = {Bagci, E. Z. and Vodovotz, Y. and Billiar, T. R. and Ermentrout,
	G. B. and Bahar, I.},
  title = {Bistability in apoptosis: roles of bax, bcl-2, and mitochondrial
	permeability transition pores},
  journal = {Biophys J},
  year = {2006},
  volume = {90},
  pages = {1546-59},
  number = {5},
  abstract = {We propose a mathematical model for mitochondria-dependent apoptosis,
	in which kinetic cooperativity in formation of the apoptosome is
	a key element ensuring bistability. We examine the role of Bax and
	Bcl-2 synthesis and degradation rates, as well as the number of mitochondrial
	permeability transition pores (MPTPs), on the cell response to apoptotic
	stimuli. Our analysis suggests that cooperative apoptosome formation
	is a mechanism for inducing bistability, much more robust than that
	induced by other mechanisms, such as inhibition of caspase-3 by the
	inhibitor of apoptosis (IAP). Simulations predict a pathological
	state in which cells will exhibit a monostable cell survival if Bax
	degradation rate is above a threshold value, or if Bax expression
	rate is below a threshold value. Otherwise, cell death or survival
	occur depending on initial caspase-3 levels. We show that high expression
	rates of Bcl-2 can counteract the effects of Bax. Our simulations
	also demonstrate a monostable (pathological) apoptotic response if
	the number of MPTPs exceeds a threshold value. This study supports
	our contention, based on mathematical modeling, that cooperativity
	in apoptosome formation is critically important for determining the
	healthy responses to apoptotic stimuli, and helps define the roles
	of Bax, Bcl-2, and MPTP vis-a-vis apoptosome formation.},
  keywords = {csbcbook}
}

@article{Bagga2005Quantitative,
  author = {Harmohina Bagga and David S Greenfield and William J Feuer},
  title = {Quantitative assessment of atypical birefringence images using scanning
	laser polarimetry with variable corneal compensation.},
  journal = {Am {J} {O}phthalmol},
  year = {2005},
  volume = {139},
  pages = {437-46},
  number = {3},
  month = {Mar},
  abstract = {P{URPOSE}: {T}o define the clinical characteristics of atypical birefringence
	images and to describe a quantitative method for their identification.
	{DESIGN}: {P}rospective, comparative, clinical observational study.
	{METHODS}: {N}ormal and glaucomatous eyes underwent complete examination,
	standard automated perimetry, scanning laser polarimetry with variable
	corneal compensation ({GD}x-{VCC}), and optical coherence tomography
	({OCT}) of the macula, peripapillary retinal nerve fiber layer ({RNFL}),
	and optic disk. {E}yes were classified into two groups: normal birefringence
	pattern ({NBP}) and atypical birefringence pattern ({ABP}). {C}linical,
	functional, and structural characteristics were assessed separately.
	{A} multiple logistic regression model was used to predict eyes with
	{ABP} on the basis of a quantitative scan score generated by a support
	vector machine ({SVM}) with {GD}x-{VCC}. {RESULTS}: {S}ixty-five
	eyes of 65 patients were enrolled. {ABP} images were observed in
	5 of 20 (25\%) normal eyes and 23 of 45 (51\%) glaucomatous eyes.
	{C}ompared with eyes with {NBP}, glaucomatous eyes with {ABP} demonstrated
	significantly lower {SVM} scores ({P} < .0001, < 0.0001, 0.008, 0.03,
	and 0.03, respectively) and greater temporal, mean, inferior, and
	nasal {RNFL} thickness using {GD}x-{VCC}; and a weaker correlation
	with {OCT} generated {RNFL} thickness ({R}(2) = .75 vs .27). {ABP}
	images were significantly correlated with older age ({R}(2) = .16,
	{P} = .001). {T}he {SVM} score was the only significant ({P} < .0001)
	predictor of {ABP} images and provided high discriminating power
	between eyes with {NBP} and {ABP} (area under the receiver operator
	characteristic curve = 0.98). {CONCLUSIONS}: {ABP} images exist in
	a subset of normal and glaucomatous eyes, are associated with older
	patient age, and produce an artifactual increase in {RNFL} thickness
	using {GD}x-{VCC}. {T}he {SVM} score is highly predictive of {ABP}
	images.},
  doi = {10.1016/j.ajo.2004.10.019},
  pdf = {../local/Bagga2005Quantitative.pdf},
  file = {Bagga2005Quantitative.pdf:locql/Bagga2005Quantitative.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15767051},
  pii = {S0002-9394(04)01265-6},
  url = {http://dx.doi.org/10.1016/j.ajo.2004.10.019}
}

@article{Bagirov2003New,
  author = {A. M. Bagirov and B. Ferguson and S. Ivkovic and G. Saunders and
	J. Yearwood},
  title = {New algorithms for multi-class cancer diagnosis using tumor gene
	expression signatures.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1800-7},
  number = {14},
  month = {Sep},
  abstract = {M{OTIVATION}: {T}he increasing use of {DNA} microarray-based tumor
	gene expression profiles for cancer diagnosis requires mathematical
	methods with high accuracy for solving clustering, feature selection
	and classification problems of gene expression data. {RESULTS}: {N}ew
	algorithms are developed for solving clustering, feature selection
	and classification problems of gene expression data. {T}he clustering
	algorithm is based on optimization techniques and allows the calculation
	of clusters step-by-step. {T}his approach allows us to find as many
	clusters as a data set contains with respect to some tolerance. {F}eature
	selection is crucial for a gene expression database. {O}ur feature
	selection algorithm is based on calculating overlaps of different
	genes. {T}he database used, contains over 16 000 genes and this number
	is considerably reduced by feature selection. {W}e propose a classification
	algorithm where each tissue sample is considered as the center of
	a cluster which is a ball. {T}he results of numerical experiments
	confirm that the classification algorithm in combination with the
	feature selection algorithm perform slightly better than the published
	results for multi-class classifiers based on support vector machines
	for this data set. {AVAILABILITY}: {A}vailable on request from the
	authors.},
  pdf = {../local/Bagirov2003New.pdf},
  file = {Bagirov2003New.pdf:local/Bagirov2003New.pdf:PDF},
  keywords = {Algorithms, Amino Acid Sequence, Anion Exchange Resins, Antigen-Antibody
	Complex, Artificial Intelligence, Automated, Automatic Data Processing,
	Biological, Blood Cells, Chemical, Chromatography, Cluster Analysis,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	DNA, Data Interpretation, Databases, Decision Making, Decision Trees,
	Diffusion Magnetic Resonance Imaging, English Abstract, Epitopes,
	Expert Systems, Factual, Fuzzy Logic, Gene Expression Profiling,
	Gene Expression Regulation, Gene Targeting, Genetic, Genome, Histocompatibility
	Antigens Class I, Humans, Image Interpretation, Image Processing,
	In Vitro, Indicators and Reagents, Information Storage and Retrieval,
	Ion Exchange, Least-Squares Analysis, Liver Cirrhosis, Magnetic Resonance
	Imaging, Male, Models, Molecular Sequence Data, Neoplasms, Neoplastic,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic
	Acid Conformation, Oligonucleotide Array Sequence Analysis, P.H.S.,
	Pattern Recognition, Pro, Prostatic Neoplasms, Protein, Protein Binding,
	Protein Interaction Mapping, Proteins, Quantitative Structure-Activity
	Relationship, RNA, ROC Curve, Reproducibility of Results, Research
	Support, Sensitivity and Specificity, Sequence Alignment, Sequence
	Analysis, Severity of Illness Index, Statistical, Structure-Activity
	Relationship, Subtraction Technique, T-Lymphocyte, Transcription
	Factors, Transfer, Treatment Outcome, Tumor Markers, U.S. Gov't,
	User-Computer Interface, inear Dynamics, teome, 14512351},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/19/14/1800}
}

@article{Bagos2005Evaluation,
  author = {Pantelis G Bagos and Theodore D Liakopoulos and Stavros J Hamodrakas},
  title = {Evaluation of methods for predicting the topology of beta-barrel
	outer membrane proteins and a consensus prediction method.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {7},
  number = {1},
  month = {Jan},
  abstract = {B{ACKGROUND}: {P}rediction of the transmembrane strands and topology
	of beta-barrel outer membrane proteins is of interest in current
	bioinformatics research. {S}everal methods have been applied so far
	for this task, utilizing different algorithmic techniques and a number
	of freely available predictors exist. {T}he methods can be grossly
	divided to those based on {H}idden {M}arkov {M}odels ({HMM}s), on
	{N}eural {N}etworks ({NN}s) and on {S}upport {V}ector {M}achines
	({SVM}s). {I}n this work, we compare the different available methods
	for topology prediction of beta-barrel outer membrane proteins. {W}e
	evaluate their performance on a non-redundant dataset of 20 beta-barrel
	outer membrane proteins of gram-negative bacteria, with structures
	known at atomic resolution. {A}lso, we describe, for the first time,
	an effective way to combine the individual predictors, at will, to
	a single consensus prediction method. {RESULTS}: {W}e assess the
	statistical significance of the performance of each prediction scheme
	and conclude that {H}idden {M}arkov {M}odel based methods, {HMM}-{B}2{TMR},
	{P}rof{TMB} and {PRED}-{TMBB}, are currently the best predictors,
	according to either the per-residue accuracy, the segments overlap
	measure ({SOV}) or the total number of proteins with correctly predicted
	topologies in the test set. {F}urthermore, we show that the available
	predictors perform better when only transmembrane beta-barrel domains
	are used for prediction, rather than the precursor full-length sequences,
	even though the {HMM}-based predictors are not influenced significantly.
	{T}he consensus prediction method performs significantly better than
	each individual available predictor, since it increases the accuracy
	up to 4\% regarding {SOV} and up to 15\% in correctly predicted topologies.
	{CONCLUSIONS}: {T}he consensus prediction method described in this
	work, optimizes the predicted topology with a dynamic programming
	algorithm and is implemented in a web-based application freely available
	to non-commercial users at http://bioinformatics.biol.uoa.gr/{C}on{BBPRED}.},
  doi = {10.1186/1471-2105-6-7},
  pdf = {../local/Bagos2005Evaluation.pdf},
  file = {Bagos2005Evaluation.pdf:local/Bagos2005Evaluation.pdf:PDF},
  keywords = {Algorithms, Cell Nucleus, Cytoplasm, Databases, Genetic Vectors, Humans,
	Internet, Mitochondria, Models, Non-U.S. Gov't, Peptides, Protein,
	Proteins, Proteomics, Reproducibility of Results, Research Support,
	Software, Theoretical, 15647112},
  pii = {1471-2105-6-7},
  url = {http://dx.doi.org/10.1186/1471-2105-6-7}
}

@article{Bailey1994Fitting,
  author = {Bailey, T. L. and Elkan, C.},
  title = {Fitting a mixture model by expectation maximization to discover motifs
	in biopolymers.},
  journal = {Proc Int Conf Intell Syst Mol Biol},
  year = {1994},
  volume = {2},
  pages = {28--36},
  abstract = {The algorithm described in this paper discovers one or more motifs
	in a collection of DNA or protein sequences by using the technique
	of expectation maximization to fit a two-component finite mixture
	model to the set of sequences. Multiple motifs are found by fitting
	a mixture model to the data, probabilistically erasing the occurrences
	of the motif thus found, and repeating the process to find successive
	motifs. The algorithm requires only a set of unaligned sequences
	and a number specifying the width of the motifs as input. It returns
	a model of each motif and a threshold which together can be used
	as a Bayes-optimal classifier for searching for occurrences of the
	motif in other databases. The algorithm estimates how many times
	each motif occurs in each sequence in the dataset and outputs an
	alignment of the occurrences of the motif. The algorithm is capable
	of discovering several different motifs with differing numbers of
	occurrences in a single dataset.},
  institution = {Department of Computer Science and Engineering, University of California
	at San Diego, La Jolla 92093-0114, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {7584402},
  timestamp = {2010.02.21},
  url = {http://view.ncbi.nlm.nih.gov/pubmed/7584402}
}

@article{Bailey2006MEME,
  author = {Bailey, Timothy L. and Williams, Nadya and Misleh, Chris and Li,
	Wilfred W.},
  title = {MEME: discovering and analyzing DNA and protein sequence motifs},
  journal = {Nucl. Acids Res.},
  year = {2006},
  volume = {34},
  pages = {W369--373},
  number = {suppl\_2},
  month = {July},
  abstract = {MEME (Multiple EM for Motif Elicitation) is one of the most widely
	used tools for searching for novel signals' in sets of biological
	sequences. Applications include the discovery of new transcription
	factor binding sites and protein domains. MEME works by searching
	for repeated, ungapped sequence patterns that occur in the DNA or
	protein sequences provided by the user. Users can perform MEME searches
	via the web server hosted by the National Biomedical Computation
	Resource (http://meme.nbcr.net) and several mirror sites. Through
	the same web server, users can also access the Motif Alignment and
	Search Tool to search sequence databases for matches to motifs encoded
	in several popular formats. By clicking on buttons in the MEME output,
	users can compare the motifs discovered in their input sequences
	with databases of known motifs, search sequence databases for matches
	to the motifs and display the motifs in various formats. This article
	describes the freely accessible web server and its architecture,
	and discusses ways to use MEME effectively to find new sequence patterns
	in biological sequences and analyze their significance. 10.1093/nar/gkl198},
  address = {Institute of Molecular Bioscience, The University of Queensland,
	St Lucia, QLD 4072, Australia. t.bailey@imb.uq.edu.au},
  doi = {10.1093/nar/gkl198},
  issn = {1362-4962},
  keywords = {motif-identification, sequence-pattern-recognition, software},
  posted-at = {2009-09-18 19:38:23},
  priority = {2},
  url = {http://dx.doi.org/10.1093/nar/gkl198}
}

@inproceedings{Baird1993Document,
  author = {Baird, H.},
  title = {Document image defect models and their uses},
  booktitle = {Proceedings of the Second International Conference on Document Analysis
	and Recognition ICDAR-93},
  year = {1993},
  pdf = {../local/Baird1993Document.pdf},
  file = {Baird1993Document.pdf:Baird1993Document.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.22}
}

@article{Bajorath2002Integration,
  author = {J. Bajorath},
  title = {{I}ntegration of virtual and high-throughput screening.},
  journal = {Nat Rev Drug Discov},
  year = {2002},
  volume = {1},
  pages = {882--894},
  number = {11},
  month = {Nov},
  abstract = {High-throughput and virtual screening are important components of
	modern drug discovery research. Typically, these screening technologies
	are considered distinct approaches, as one is experimental and the
	other is theoretical in nature. However, given their similar tasks
	and goals, these approaches are much more complementary to each other
	than often thought. Various statistical, informatics and filtering
	methods have recently been introduced to foster the integration of
	experimental and in silico screening and maximize their output in
	drug discovery. Although many of these ideas and efforts have not
	yet proceeded much beyond the conceptual level, there are several
	success stories and good indications that early-stage drug discovery
	will benefit greatly from a more unified and knowledge-based approach
	to biological screening, despite the many technical advances towards
	even higher throughput that are made in the screening arena.},
  doi = {10.1038/nrd941},
  keywords = {Animals, Cluster Analysis, Computer Simulation, DNA Fingerprinting,
	Drug Design, Drug Evaluation, Humans, Pharmaceutical, Preclinical,
	Quantitative Structure-Activity Relationship, Structure-Activity
	Relationship, Technology, 12415248},
  owner = {mahe},
  pii = {nrd941},
  pmid = {12415248},
  timestamp = {2006.08.15},
  url = {http://dx.doi.org/10.1038/nrd941}
}

@article{Bajorath2001Selected,
  author = {Bajorath, J.},
  title = {Selected concepts and investigations in compound classification,
	molecular descriptor analysis, and virtual screening.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2001},
  volume = {41},
  pages = {233--245},
  number = {2},
  keywords = {chemoinformatics},
  owner = {vert},
  pmid = {11277704},
  timestamp = {2006.01.19}
}

@article{Baker2010Next,
  author = {Baker, Monya},
  title = {Next-generation sequencing: adjusting to data overload free},
  journal = {Nature Methods},
  year = {2010},
  volume = {7},
  pages = {495-499},
  owner = {philippe},
  timestamp = {2010.07.27}
}

@article{Bakker2003Task,
  author = {Bakker, B. and Heskes, T.},
  title = {Task clustering and gating for bayesian multitask learning},
  journal = {J. Mach. Learn. Res.},
  year = {2003},
  volume = {4},
  pages = {83--99},
  address = {Cambridge, MA, USA},
  issn = {1533-7928},
  publisher = {MIT Press}
}

@article{Balakin2002Property-based,
  author = {Balakin, K. V. and Tkachenko, S. E. and Lang, S. A. and Okun, I.
	and Ivashchenko, A. A. and Savchuk, N. P.},
  title = {Property-based design of {GPCR}-targeted library.},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2002},
  volume = {42},
  pages = {1332--1342},
  number = {6},
  abstract = {The design of a GPCR-targeted library, based on a scoring scheme for
	the classification of molecules into "GPCR-ligand-like" and "non-GPCR-ligand-like",
	is outlined. The methodology is a valuable tool that can aid in the
	selection and prioritization of potential GPCR ligands for bioscreening
	from large collections of compounds. It is based on the distillation
	of knowledge from large databases of GPCR and non-GPCR active agents.
	The method employed a set of descriptors for encoding the molecular
	structures and by training of a neural network for classifying the
	molecules. The molecular requirements were profiled and validated
	by using available databases of GPCR- and non-GPCR-active agents
	[5736 diverse GPCR-active molecules and 7506 diverse non-GPCR-active
	molecules from the Ensemble Database (Prous Science, 2002)]. The
	method enables efficient qualification or disqualification of a molecule
	as a potential GPCR ligand and represents a useful tool for constraining
	the size of GPCR-targeted libraries that will help speed up the development
	of new GPCR-active drugs.},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {ci025538y},
  pmid = {12444729},
  timestamp = {2007.09.22}
}

@article{Balasubramanian2002isomap,
  author = {Balasubramanian, M. and Schwartz, E. L.},
  title = {The isomap algorithm and topological stability},
  journal = {Science},
  year = {2002},
  volume = {295},
  pages = {7},
  number = {5552},
  month = {Jan},
  doi = {10.1126/science.295.5552.7a},
  pdf = {../local/Balasubramanian2002isomap.pdf},
  file = {Balasubramanian2002isomap.pdf:local/Balasubramanian2002isomap.pdf:PDF},
  keywords = {dimred},
  pii = {295/5552/7a},
  url = {http://dx.doi.org/10.1126/science.295.5552.7a}
}

@book{Baldi2001Bioinformatcs,
  title = {Bioinformatcs, the machine learning approach},
  publisher = {MIT Press},
  year = {2001},
  author = {Baldi, P. and Brunak, S.},
  owner = {jp},
  timestamp = {2010.10.12}
}

@article{Baldi1999Exploiting,
  author = {Baldi, P. and Brunak, S. and Frasconi, P. and Soda, G. and Pollastri,
	G.},
  title = {Exploiting the past and the future in protein secondary structure
	prediction},
  journal = {Bioinformatics},
  year = {1999},
  volume = {15},
  pages = {937--946},
  pdf = {../local/bald99.pdf},
  file = {bald99.pdf:local/bald99.pdf:PDF},
  subject = {biocasp},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/15/11/937.pdf}
}

@article{Baldi1994Hidden,
  author = {Baldi, P. and Chauvin, Y. and Hunkapiller, T. and Mc{C}lure, M.A.},
  title = {Hidden {M}arkov models of biological primary sequence information},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {1994},
  volume = {91(3)},
  pages = {1053--1063},
  pdf = {../local/Baldi1994Hidden.pdf},
  file = {Baldi1994Hidden.pdf:local/Baldi1994Hidden.pdf:PDF}
}

@article{Ballesteros2001G,
  author = {J. Ballesteros and K. Palczewski},
  title = {G protein-coupled receptor drug discovery: implications from the
	crystal structure of rhodopsin.},
  journal = {Curr. Opin. Drug Discov. Devel.},
  year = {2001},
  volume = {4},
  pages = {561--574},
  number = {5},
  month = {Sep},
  abstract = {G protein-coupled receptors (GPCRs) are a functionally diverse group
	of membrane proteins that play a critical role in signal transduction.
	Because of the lack of a high-resolution structure, the heptahelical
	transmembrane bundle within the N-terminal extracellular and C-terminal
	intracellular region of these receptors has initially been modeled
	based on the high-resolution structure of bacterial retinal-binding
	protein, bacteriorhodopsin. However, the low-resolution structure
	of rhodopsin, a prototypical GPCR, revealed that there is a minor
	relationship between GPCRs and bacteriorhodopsins. The high-resolution
	crystal structure of the rhodopsin ground state and further refinements
	of the model provide the first structural information about the entire
	organization of the polypeptide chain and post-translational moieties.
	These studies provide a structural template for Family 1 GPCRs that
	has the potential to significantly improve structure-based approaches
	to GPCR drug discovery.},
  keywords = {Amino Acid Sequence; Animals; Crystallography, X-Ray; Drug Design;
	GTP-Binding Proteins; Humans; Models, Molecular; Molecular Sequence
	Data; Receptors, Drug; Rhodopsin},
  owner = {laurent},
  pmid = {12825452},
  timestamp = {2007.09.22}
}

@article{Balmain2003genetics,
  author = {Balmain, A. and Gray, J. and Ponder, B.},
  title = {The genetics and genomics of cancer},
  journal = {Nat. {G}enet.},
  year = {2003},
  volume = {33},
  pages = {238-244},
  abstract = {The past decade has seen great strides in our understanding of the
	genetic basis of human disease. {A}rguably, the most profound impact
	has been in the area of cancer genetics, where the explosion of genomic
	sequence and molecular profiling data has illustrated the complexity
	of human malignancies. {I}n a tumor cell, dozens of different genes
	may be aberrant in structure or copy number, and hundreds or thousands
	of genes may be differentially expressed. {A} number of familial
	cancer genes with high-penetrance mutations have been identified,
	but the contribution of low-penetrance genetic variants or polymorphisms
	to the risk of sporadic cancer development remains unclear. {S}tudies
	of the complex somatic genetic events that take place in the emerging
	cancer cell may aid the search for the more elusive germline variants
	that confer increased susceptibility. {I}nsights into the molecular
	pathogenesis of cancer have provided new strategies for treatment,
	but a deeper understanding of this disease will require new statistical
	and computational approaches for analysis of the genetic and signaling
	networks that orchestrate individual cancer susceptibility and tumor
	behavior.},
  doi = {doi:10.1038/ng1107},
  pdf = {../local/Balmain2003genetics.pdf},
  file = {Balmain2003genetics.pdf:local/Balmain2003genetics.pdf:PDF},
  url = {http://dx.doi.org/10.1038/ng1107}
}

@article{Bandyopadhyay2006Systematic,
  author = {Bandyopadhyay, S. and Sharan, R. and Ideker, T.},
  title = {Systematic identification of functional orthologs based on protein
	network comparison},
  journal = {Genome Res.},
  year = {2006},
  volume = {16},
  pages = {428--435},
  number = {3},
  month = {Mar},
  abstract = {Annotating protein function across species is an important task that
	is often complicated by the presence of large paralogous gene families.
	Here, we report a novel strategy for identifying functionally related
	proteins that supplements sequence-based comparisons with information
	on conserved protein-protein interactions. First, the protein interaction
	networks of two species are aligned by assigning proteins to sequence
	homology clusters using the Inparanoid algorithm. Next, probabilistic
	inference is performed on the aligned networks to identify pairs
	of proteins, one from each species, that are likely to retain the
	same function based on conservation of their interacting partners.
	Applying this method to Drosophila melanogaster and Saccharomyces
	cerevisiae, we analyze 121 cases for which functional orthology assignment
	is ambiguous when sequence similarity is used alone. In 61 of these
	cases, the network supports a different protein pair than that favored
	by sequence comparisons. These results suggest that network analysis
	can be used to provide a key source of information for refining sequence-based
	homology searches.},
  doi = {10.1101/gr.4526006},
  pdf = {../local/Bandyopadhyay2006Systematic.pdf},
  file = {Bandyopadhyay2006Systematic.pdf:local/Bandyopadhyay2006Systematic.pdf:PDF},
  institution = {Program in Bioinformatics, University of California at San Diego,
	La Jolla, California 92093, USA.},
  owner = {jp},
  pii = {16/3/428},
  pmid = {16510899},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1101/gr.4526006}
}

@article{Banerjee2008Model,
  author = {Banerjee, O. and El Ghaoui, L. and d'Aspremont, A.},
  title = {Model Selection Through Sparse Maximum Likelihood Estimation for
	Multivariate Gaussian or Binary Data},
  journal = {J. Mach. Learn. Res.},
  year = {2008},
  volume = {9},
  pages = {485--516},
  pdf = {../local/Banerjee2008Model.pdf},
  file = {Banerjee2008Model.pdf:Banerjee2008Model.pdf:PDF},
  owner = {jp},
  timestamp = {2012.03.20},
  url = {http://jmlr.csail.mit.edu/papers/volume9/banerjee08a/banerjee08a.pdf}
}

@article{Bansal2007How,
  author = {Bansal, M. and Belcastro, V. and Ambesi-Impiombato, A. and di Bernardo,
	D.},
  title = {How to infer gene networks from expression profiles},
  journal = {Mol. Syst. Biol.},
  year = {2007},
  volume = {3},
  pages = {78},
  abstract = {Inferring, or 'reverse-engineering', gene networks can be defined
	as the process of identifying gene interactions from experimental
	data through computational analysis. Gene expression data from microarrays
	are typically used for this purpose. Here we compared different reverse-engineering
	algorithms for which ready-to-use software was available and that
	had been tested on experimental data sets. We show that reverse-engineering
	algorithms are indeed able to correctly infer regulatory interactions
	among genes, at least when one performs perturbation experiments
	complying with the algorithm requirements. These algorithms are superior
	to classic clustering algorithms for the purpose of finding regulatory
	interactions among genes, and, although further improvements are
	needed, have reached a discreet performance for being practically
	useful.},
  doi = {10.1038/msb4100120},
  pdf = {../local/Bansal2007How.pdf},
  file = {Bansal2007How.pdf:Bansal2007How.pdf:PDF},
  institution = {Telethon Institute of Genetics and Medicine, Via P Castellino, Naples,
	Italy.},
  owner = {fantine},
  pii = {msb4100120},
  pmid = {17299415},
  timestamp = {2008.02.07},
  url = {http://dx.doi.org/10.1038/msb4100120}
}

@article{Bansal2006Inference,
  author = {Bansal, M. and Della Gatta, G. and Bernardo, D.},
  title = {Inference of gene regulatory networks and compound mode of action
	from time course gene expression profiles},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {815--822},
  number = {7},
  month = {Apr},
  abstract = {MOTIVATION: Time series expression experiments are an increasingly
	popular method for studying a wide range of biological systems. Here
	we developed an algorithm that can infer the local network of gene-gene
	interactions surrounding a gene of interest. This is achieved by
	a perturbation of the gene of interest and subsequently measuring
	the gene expression profiles at multiple time points. We applied
	this algorithm to computer simulated data and to experimental data
	on a nine gene network in Escherichia coli. RESULTS: In this paper
	we show that it is possible to recover the gene regulatory network
	from a time series data of gene expression following a perturbation
	to the cell. We show this both on simulated data and on a nine gene
	subnetwork part of the DNA-damage response pathway (SOS pathway)
	in the bacteria E. coli. CONTACT: dibernardo@tigem.it SUPLEMENTARY
	INFORMATION: Supplementary data are available at http://dibernado.tigem.it},
  doi = {10.1093/bioinformatics/btl003},
  institution = {Telethon Institute of Genetics and Medicine, Via P. Castellino 111,
	80131 Naples, Italy.},
  owner = {fantine},
  pii = {btl003},
  pmid = {16418235},
  timestamp = {2008.01.30},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl003}
}

@article{Bantscheff2007Quantitative,
  author = {Marcus Bantscheff and Markus Schirle and Gavain Sweetman and Jens
	Rick and Bernhard Kuster},
  title = {Quantitative mass spectrometry in proteomics: a critical review.},
  journal = {Anal Bioanal Chem},
  year = {2007},
  volume = {389},
  pages = {1017--1031},
  number = {4},
  month = {Oct},
  abstract = {The quantification of differences between two or more physiological
	states of a biological system is among the most important but also
	most challenging technical tasks in proteomics. In addition to the
	classical methods of differential protein gel or blot staining by
	dyes and fluorophores, mass-spectrometry-based quantification methods
	have gained increasing popularity over the past five years. Most
	of these methods employ differential stable isotope labeling to create
	a specific mass tag that can be recognized by a mass spectrometer
	and at the same time provide the basis for quantification. These
	mass tags can be introduced into proteins or peptides (i) metabolically,
	(ii) by chemical means, (iii) enzymatically, or (iv) provided by
	spiked synthetic peptide standards. In contrast, label-free quantification
	approaches aim to correlate the mass spectrometric signal of intact
	proteolytic peptides or the number of peptide sequencing events with
	the relative or absolute protein quantity directly. In this review,
	we critically examine the more commonly used quantitative mass spectrometry
	methods for their individual merits and discuss challenges in arriving
	at meaningful interpretations of quantitative proteomic data.},
  doi = {10.1007/s00216-007-1486-6},
  institution = {Cellzome AG, Meyerhofstrasse 1, 69254, Heidelberg, Germany.},
  keywords = {Automatic Data Processing; Isotope Labeling; Mass Spectrometry; Peptides;
	Proteins; Proteome; Proteomics; Reference Standards},
  owner = {phupe},
  pmid = {17668192},
  timestamp = {2010.08.13},
  url = {http://dx.doi.org/10.1007/s00216-007-1486-6}
}

@article{Bao2005Identifying,
  author = {Lei Bao},
  title = {Identifying genes related to chemosensitivity using support vector
	machine.},
  journal = {Methods {M}ol {M}ed},
  year = {2005},
  volume = {111},
  pages = {233-40},
  abstract = {In an effort to identify genes involved in chemosensitivity and to
	evaluate the functional relationships between genes and anticancer
	drugs acting by the same mechanism, a supervised machine learning
	approach called support vector machine ({SVM}) is used to associate
	genes with any of five predefined anticancer drug mechanistic categories.
	{T}he drug activity profiles are used as training examples to train
	the {SVM} and then the gene expression profiles are used as test
	examples to predict their associated mechanistic categories. {T}his
	method of correlating drugs and genes provides a strategy for finding
	novel biologically significant relationships for molecular pharmacology.},
  keywords = {biosvm},
  pii = {1-59259-889-7:233}
}

@article{Bao2005Prediction,
  author = {Lei Bao and Yan Cui},
  title = {Prediction of the phenotypic effects of non-synonymous single nucleotide
	polymorphisms using structural and evolutionary information.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2185-90},
  number = {10},
  month = {May},
  abstract = {M{OTIVATION}: {T}here has been great expectation that the knowledge
	of an individual's genotype will provide a basis for assessing susceptibility
	to diseases and designing individualized therapy. {N}on-synonymous
	single nucleotide polymorphisms (ns{SNP}s) that lead to an amino
	acid change in the protein product are of particular interest because
	they account for nearly half of the known genetic variations related
	to human inherited diseases. {T}o facilitate the identification of
	disease-associated ns{SNP}s from a large number of neutral ns{SNP}s,
	it is important to develop computational tools to predict the phenotypic
	effects of ns{SNP}s. {RESULTS}: {W}e prepared a training set based
	on the variant phenotypic annotation of the {S}wiss-{P}rot database
	and focused our analysis on ns{SNP}s having homologous 3{D} structures.
	{S}tructural environment parameters derived from the 3{D} homologous
	structure as well as evolutionary information derived from the multiple
	sequence alignment were used as predictors. {T}wo machine learning
	methods, support vector machine and random forest, were trained and
	evaluated. {W}e compared the performance of our method with that
	of the {SIFT} algorithm, which is one of the best predictive methods
	to date. {A}n unbiased evaluation study shows that for ns{SNP}s with
	sufficient evolutionary information (with not <10 homologous sequences),
	the performance of our method is comparable with the {SIFT} algorithm,
	while for ns{SNP}s with insufficient evolutionary information (<10
	homologous sequences), our method outperforms the {SIFT} algorithm
	significantly. {T}hese findings indicate that incorporating structural
	information is critical to achieving good prediction accuracy when
	sufficient evolutionary information is not available. {AVAILABILITY}:
	{T}he codes and curated dataset are available at http://compbio.utmem.edu/snp/dataset/},
  doi = {10.1093/bioinformatics/bti365},
  pdf = {../local/Bao2005Prediction.pdf},
  file = {Bao2005Prediction.pdf:local/Bao2005Prediction.pdf:PDF},
  keywords = {biosvm},
  pii = {bti365},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti365}
}

@article{Bao2002Identifying,
  author = {Bao, L. and Sun, Z.},
  title = {Identifying genes related to drug anticancer mechanisms using support
	vector machine},
  journal = {F{EBS} {L}ett.},
  year = {2002},
  volume = {521},
  pages = {109--114},
  abstract = {In an effort to identify genes related to the cell line chemosensitivity
	and to evaluate the functional relationships between genes and anticancer
	drugs acting by the same mechanism, a supervised machine learning
	approach called support vector machine was used to label genes into
	any of the five predefined anticancer drug mechanistic categories.
	{A}mong dozens of unequivocally categorized genes, many were known
	to be causally related to the drug mechanisms. {F}or example, a few
	genes were found to be involved in the biological process triggered
	by the drugs (e.g. {DNA} polymerase epsilon was the direct target
	for the drugs from {DNA} antimetabolites category). {DNA} repair-related
	genes were found to be enriched for about eight-fold in the resulting
	gene set relative to the entire gene set. {S}ome uncharacterized
	transcripts might be of interest in future studies. {T}his method
	of correlating the drugs and genes provides a strategy for finding
	novel biologically significant relationships for molecular pharmacology.},
  pdf = {../local/bao02.pdf},
  file = {bao02.pdf:local/bao02.pdf:PDF},
  keywords = {biosvm microarray},
  subject = {biokernel},
  url = {http://www.elsevier.com/febs/402/19/42/article.html}
}

@article{Barabasi1999Emergence,
  author = {Barab{\'a}si, A.-L. and Albert, R.},
  title = {Emergence of scaling in random networks},
  journal = {Science},
  year = {1999},
  volume = {286},
  pages = {509--512},
  abstract = {Systems as diverse as genetic networks or the World Wide Web are best
	described as networks with complex topology. A common property of
	many large networks is that the vertex connectivities follow a scale-free
	power-law distribution. This feature was found to be a consequence
	of two generic mechanisms: (i) networks expand continuously by the
	addition of new vertices, and (ii) new vertices attach preferentially
	to sites that are already well connected. A model based on these
	two ingredients reproduces the observed stationary scale-free distributions,
	which indicates that the development of large networks is governed
	by robust self-organizing phenomena that go beyond the particulars
	of the individual systems.},
  pdf = {../local/Barabasi1999Emergence.pdf},
  file = {Barabasi1999Emergence.pdf:Barabasi1999Emergence.pdf:PDF},
  subject = {bionet},
  url = {http://www.sciencemag.org/cgi/reprint/286/5439/509.pdf}
}

@unpublished{Barabasi2001Deterministic,
  author = {Barab{\'a}si, A.-L. and Ravasz, E.},
  title = {Deterministic scale-free networks},
  note = {E-print cond-mat/0107419},
  year = {2001},
  pdf = {../local/bara01.pdf},
  file = {bara01.pdf:local/bara01.pdf:PDF},
  subject = {compnet},
  url = {http://xxx.lanl.gov/abs/cond-mat/0107419}
}

@article{Baranger1971Matrices,
  author = {Baranger, J. and Duc-Jacquet, M.},
  title = {Matrices tridiagonales sym\'etriques et matrices factorisables},
  journal = {Revue fran\c caise d'informatique et de recherche op\'erationnelle,
	s\'erie rouge},
  year = {1971},
  volume = {5},
  pages = {61--66},
  number = {3},
  pdf = {../local/Baranger1971Matrices.pdf},
  file = {Baranger1971Matrices.pdf:Baranger1971Matrices.pdf:PDF},
  owner = {jp},
  timestamp = {2010.08.03},
  url = {http://www.numdam.org/item?id=M2AN_1971__5_3_61_0}
}

@article{Barash2010Deciphering,
  author = {Yoseph Barash and John A Calarco and Weijun Gao and Qun Pan and Xinchen
	Wang and Ofer Shai and Benjamin J Blencowe and Brendan J Frey},
  title = {Deciphering the splicing code.},
  journal = {Nature},
  year = {2010},
  volume = {465},
  pages = {53--59},
  number = {7294},
  month = {May},
  abstract = {Alternative splicing has a crucial role in the generation of biological
	complexity, and its misregulation is often involved in human disease.
	Here we describe the assembly of a 'splicing code', which uses combinations
	of hundreds of RNA features to predict tissue-dependent changes in
	alternative splicing for thousands of exons. The code determines
	new classes of splicing patterns, identifies distinct regulatory
	programs in different tissues, and identifies mutation-verified regulatory
	sequences. Widespread regulatory strategies are revealed, including
	the use of unexpectedly large combinations of features, the establishment
	of low exon inclusion levels that are overcome by features in specific
	tissues, the appearance of features deeper into introns than previously
	appreciated, and the modulation of splice variant levels by transcript
	structure characteristics. The code detected a class of exons whose
	inclusion silences expression in adult tissues by activating nonsense-mediated
	messenger RNA decay, but whose exclusion promotes expression during
	embryogenesis. The code facilitates the discovery and detailed characterization
	of regulated alternative splicing events on a genome-wide scale.},
  doi = {10.1038/nature09000},
  institution = {Biomedical Engineering, Department of Electrical and Computer Engineering,
	University of Toronto, 10 King's College Road, Toronto M5S 3G4, Canada.},
  keywords = {Alternative Splicing, genetics; Animals; Gene Expression Regulation;
	Gene Silencing; Genetic Code, genetics; Humans; Mice; Models, Genetic;
	RNA, Messenger, metabolism; Reproducibility of Results},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nature09000},
  pmid = {20445623},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.1038/nature09000}
}

@article{Baraud2009Gaussian,
  author = {Yannick Baraud and Christophe Giraud and Sylvie Huet},
  title = {Gaussian model selection with an unknown variance},
  journal = {{A}nnals {O}f {S}tatistics, to appear},
  year = {2009},
  volume = {37},
  pages = {630},
  url = {http://doi:10.1214/07-AOS573}
}

@article{Barkai1997Robustness,
  author = {Barkai, N. and Leibler, S.},
  title = {Robustness in simple biochemical networks.},
  journal = {Nature},
  year = {1997},
  volume = {387},
  pages = {913--917},
  number = {6636},
  month = {Jun},
  abstract = {Cells use complex networks of interacting molecular components to
	transfer and process information. These "computational devices of
	living cells" are responsible for many important cellular processes,
	including cell-cycle regulation and signal transduction. Here we
	address the issue of the sensitivity of the networks to variations
	in their biochemical parameters. We propose a mechanism for robust
	adaptation in simple signal transduction networks. We show that this
	mechanism applies in particular to bacterial chemotaxis. This is
	demonstrated within a quantitative model which explains, in a unified
	way, many aspects of chemotaxis, including proper responses to chemical
	gradients. The adaptation property is a consequence of the network's
	connectivity and does not require the 'fine-tuning' of parameters.
	We argue that the key properties of biochemical networks should be
	robust in order to ensure their proper functioning.},
  doi = {10.1038/43199},
  pdf = {../local/Barkai1997Robustness.pdf},
  file = {Barkai1997Robustness.pdf:Barkai1997Robustness.pdf:PDF},
  institution = {Department of Physics, Princeton University, New Jersey 08544, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {9202124},
  timestamp = {2013.01.25},
  url = {http://dx.doi.org/10.1038/43199}
}

@article{Barretina2012Cancer,
  author = {Barretina, Jordi and Caponigro, Giordano and Stransky, Nicolas and
	Venkatesan, Kavitha and Margolin, Adam A. and Kim, Sungjoon and Wilson,
	Christopher J. and Lehár, Joseph and Kryukov, Gregory V. and Sonkin,
	Dmitriy and Reddy, Anupama and Liu, Manway and Murray, Lauren and
	Berger, Michael F. and Monahan, John E. and Morais, Paula and Meltzer,
	Jodi and Korejwa, Adam and Jané-Valbuena, Judit and Mapa, Felipa
	A. and Thibault, Joseph and Bric-Furlong, Eva and Raman, Pichai and
	Shipway, Aaron and Engels, Ingo H. and Cheng, Jill and Yu, Guoying
	K. and Yu, Jianjun and Aspesi, Jr, Peter and {de Silva}, Melanie
	and Jagtap, Kalpana and Jones, Michael D. and Wang, Li and Hatton,
	Charles and Palescandolo, Emanuele and Gupta, Supriya and Mahan,
	Scott and Sougnez, Carrie and Onofrio, Robert C. and Liefeld, Ted
	and MacConaill, Laura and Winckler, Wendy and Reich, Michael and
	Li, Nanxin and Mesirov, Jill P. and Gabriel, Stacey B. and Getz,
	Gad and Ardlie, Kristin and Chan, Vivien and Myer, Vic E. and Weber,
	Barbara L. and Porter, Jeff and Warmuth, Markus and Finan, Peter
	and Harris, Jennifer L. and Meyerson, Matthew and Golub, Todd R.
	and Morrissey, Michael P. and Sellers, William R. and Schlegel, Robert
	and Garraway, Levi A.},
  title = {The Cancer Cell Line Encyclopedia enables predictive modelling of
	anticancer drug sensitivity.},
  journal = {Nature},
  year = {2012},
  volume = {483},
  pages = {603--607},
  number = {7391},
  month = {Mar},
  abstract = {The systematic translation of cancer genomic data into knowledge of
	tumour biology and therapeutic possibilities remains challenging.
	Such efforts should be greatly aided by robust preclinical model
	systems that reflect the genomic diversity of human cancers and for
	which detailed genetic and pharmacological annotation is available.
	Here we describe the Cancer Cell Line Encyclopedia (CCLE): a compilation
	of gene expression, chromosomal copy number and massively parallel
	sequencing data from 947 human cancer cell lines. When coupled with
	pharmacological profiles for 24 anticancer drugs across 479 of the
	cell lines, this collection allowed identification of genetic, lineage,
	and gene-expression-based predictors of drug sensitivity. In addition
	to known predictors, we found that plasma cell lineage correlated
	with sensitivity to IGF1 receptor inhibitors; AHR expression was
	associated with MEK inhibitor efficacy in NRAS-mutant lines; and
	SLFN11 expression predicted sensitivity to topoisomerase inhibitors.
	Together, our results indicate that large, annotated cell-line collections
	may help to enable preclinical stratification schemata for anticancer
	agents. The generation of genetic predictions of drug response in
	the preclinical setting and their incorporation into cancer clinical
	trial design could speed the emergence of 'personalized' therapeutic
	regimens.},
  doi = {10.1038/nature11003},
  pdf = {../local/Barretina2012Cancer.pdf},
  file = {Barretina2012Cancer.pdf:Barretina2012Cancer.pdf:PDF},
  institution = {The Broad Institute of Harvard and MIT, Cambridge, Massachusetts
	02142, USA.},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {nature11003},
  pmid = {22460905},
  timestamp = {2012.04.06},
  url = {http://dx.doi.org/10.1038/nature11003}
}

@article{Barrett2011NCBI,
  author = {Barrett, T. and Troup, D.B. and Wilhite, S.E. and Ledoux, P. and
	Evangelista, C. and Kim, I.F. and Tomashevsky, M. and Marshall, K.A.
	and Phillippy, K.H. and Sherman, P.M. and others},
  title = {NCBI GEO: archive for functional genomics data sets - 10 years on},
  journal = {Nucleic acids research},
  year = {2011},
  volume = {39},
  pages = {D1005--D1010},
  number = {suppl 1},
  publisher = {Oxford Univ Press}
}

@article{Barrett2009NCBI,
  author = {Barrett, T. and Troup, D.B. and Wilhite, S.E. and Ledoux, P. and
	Rudnev, D. and Evangelista, C. and Kim, I.F. and Soboleva, A. and
	Tomashevsky, M. and Marshall, K.A. and others},
  title = {NCBI GEO: archive for high-throughput functional genomic data},
  journal = {Nucleic acids research},
  year = {2009},
  volume = {37},
  pages = {D885--D890},
  number = {suppl 1},
  publisher = {Oxford Univ Press}
}

@article{Barron1993Universal,
  author = {Barron, A.R. },
  title = {Universal approximation bounds for superpositions of a sigmoidal
	function},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {930-945},
  number = {3},
  month = {May},
  abstract = {Approximation properties of a class of artificial neural networks
	are established. {I}t is shown that feedforward networks with one
	layer of sigmoidal nonlinearities achieve integrated squared error
	of order {O} (1/n), where n is the number of nodes. {T}he approximated
	function is assumed to have a bound on the first moment of the magnitude
	distribution of the {F}ourier transform. {T}he nonlinear parameters
	associated with the sigmoidal nodes, as well as the parameters of
	linear combination, are adjusted in the approximation. {I}n contrast,
	it is shown that for series expansions with n terms, in which only
	the parameters of linear combination are adjusted, the integrated
	squared approximation error cannot be made smaller than order 1/n2d/
	uniformly for functions satisfying the same smoothness assumption,
	where d is the dimension of the input to the function. {F}or the
	class of functions examined, the approximation rate and the parsimony
	of the parameterization of the networks are shown to be advantageous
	in high-dimensional settings },
  pdf = {../local/Barron1993Universal.pdf},
  file = {Barron1993Universal.pdf:local/Barron1993Universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Barron1991Minimum,
  author = {Barron, A.R. and Cover, T.M. },
  title = {Minimum complexity density estimation},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1991},
  volume = {37},
  pages = {1034-1054},
  number = {4},
  month = {Jul},
  abstract = {The authors introduce an index of resolvability that is proved to
	bound the rate of convergence of minimum complexity density estimators
	as well as the information-theoretic redundancy of the corresponding
	total description length. {T}he results on the index of resolvability
	demonstrate the statistical effectiveness of the minimum description-length
	principle as a method of inference. {T}he minimum complexity estimator
	converges to true density nearly as fast as an estimator based on
	prior knowledge of the true subclass of densities. {I}nterpretations
	and basic properties of minimum complexity estimators are discussed.
	{S}ome regression and classification problems that can be examined
	from the minimum description-length framework are considered },
  pdf = {../local/Barron1991Minimum.pdf},
  file = {Barron1991Minimum.pdf:local/Barron1991Minimum.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Barron1988bound,
  author = {Barron, A.R. and Cover, T.M. },
  title = {A bound on the financial value of information},
  journal = {IEEE Trans. Inform. Theory},
  year = {1988},
  volume = {34},
  pages = {1097-1100},
  number = {5},
  month = {Sep},
  abstract = {It is shown that each bit of information at most doubles the resulting
	wealth in the general stock-market setup. {T}his information bound
	on the growth of wealth is actually attained for certain probability
	distributions on the market investigated by {J}. {K}elly (1956).
	{T}he bound is shown to be a special case of the result that the
	increase in exponential growth of wealth achieved with true knowledge
	of the stock market distribution {F} over that achieved with incorrect
	knowledge {G} is bounded above by the entropy of {F} relative to
	{G} },
  pdf = {../local/Barron1988bound.pdf},
  file = {Barron1988bound.pdf:local/Barron1988bound.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Barron1992Distribution,
  author = {Barron, A.R. and Gy{\"o}rfi, L. and van der Meulen, E.C.},
  title = {Distribution estimation consistent in total variation and in two
	types of information divergence},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1992},
  volume = {38},
  pages = {1437-1454},
  number = {5},
  month = {Sep},
  abstract = {The problem of the nonparametric estimation of a probability distribution
	is considered from three viewpoints: the consistency in total variation,
	the consistency in information divergence, and consistency in reversed-order
	information divergence. {T}hese types of consistencies are relatively
	strong criteria of convergence, and a probability distribution cannot
	be consistently estimated in either type of convergence without any
	restrictions on the class of probability distributions allowed. {H}istogram-based
	estimators of distribution are presented which, under certain conditions,
	converge in total variation, in information divergence, and in reversed-order
	information divergence to the unknown probability distribution. {S}ome
	a priori information about the true probability distribution is assumed
	in each case. {A}s the concept of consistency in information divergence
	is stronger than that of convergence in total variation, additional
	assumptions are imposed in the cases of informational divergences},
  pdf = {../local/Barron1992Distribution.pdf},
  file = {Barron1992Distribution.pdf:local/Barron1992Distribution.pdf:PDF},
  owner = {vert}
}

@article{Barron1998minimum,
  author = {Barron, A. and Rissanen, J. and Bin Yu},
  title = {The minimum description length principle in coding and modeling},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1998},
  volume = {44},
  pages = {2743-2760},
  number = {6},
  month = {Oct},
  abstract = {We review the principles of minimum description length and stochastic
	complexity as used in data compression and statistical modeling.
	{S}tochastic complexity is formulated as the solution to optimum
	universal coding problems extending {S}hannon's basic source coding
	theorem. {T}he normalized maximized likelihood, mixture, and predictive
	codings are each shown to achieve the stochastic complexity to within
	asymptotically vanishing terms. {W}e assess the performance of the
	minimum description length criterion both from the vantage point
	of quality of data compression and accuracy of statistical inference.
	{C}ontext tree modeling, density estimation, and model selection
	in {G}aussian linear regression serve as examples },
  pdf = {../local/Barron1998minimum.pdf},
  file = {Barron1998minimum.pdf:local/Barron1998minimum.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Barry2005Significance,
  author = {Barry, W. T. and Nobel, A. B. and Wright, F. A.},
  title = {Significance analysis of functional categories in gene expression
	studies: a structured permutation approach},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1943--1949},
  number = {9},
  month = {May},
  abstract = {MOTIVATION: In high-throughput genomic and proteomic experiments,
	investigators monitor expression across a set of experimental conditions.
	To gain an understanding of broader biological phenomena, researchers
	have until recently been limited to post hoc analyses of significant
	gene lists.Method: We describe a general framework, significance
	analysis of function and expression (SAFE), for conducting valid
	tests of gene categories ab initio. SAFE is a two-stage, permutation-based
	method that can be applied to various experimental designs, accounts
	for the unknown correlation among genes and enables permutation-based
	estimation of error rates. RESULTS: The utility and flexibility of
	SAFE is illustrated with a microarray dataset of human lung carcinomas
	and gene categories based on Gene Ontology and the Protein Family
	database. Significant gene categories were observed in comparisons
	of (1) tumor versus normal tissue, (2) multiple tumor subtypes and
	(3) survival times. AVAILABILITY: Code to implement SAFE in the statistical
	package R is available from the authors. SUPPLEMENTARY INFORMATION:
	http://www.bios.unc.edu/~fwright/SAFE.},
  doi = {10.1093/bioinformatics/bti260},
  institution = {Department of Biostatistics, University of North Carolina at Chapel
	Hill, 27599-7420, USA.},
  owner = {jp},
  pii = {bti260},
  pmid = {15647293},
  timestamp = {2008.12.05},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti260}
}

@article{Bartel2004MicroRNAs,
  author = {David P Bartel},
  title = {MicroRNAs: genomics, biogenesis, mechanism, and function.},
  journal = {Cell},
  year = {2004},
  volume = {116},
  pages = {281--297},
  number = {2},
  month = {Jan},
  abstract = {MicroRNAs (miRNAs) are endogenous approximately 22 nt RNAs that can
	play important regulatory roles in animals and plants by targeting
	mRNAs for cleavage or translational repression. Although they escaped
	notice until relatively recently, miRNAs comprise one of the more
	abundant classes of gene regulatory molecules in multicellular organisms
	and likely influence the output of many protein-coding genes.},
  pdf = {../local/Bartel2004MicroRNAs.pdf},
  file = {Bartel2004MicroRNAs.pdf:Bartel2004MicroRNAs.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center,
	Cambridge, MA 02142, USA. dbartel@wi.mit.edu},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092867404000455},
  pmid = {14744438},
  timestamp = {2009.10.28}
}

@techreport{Bartlett2003Convexity,
  author = {Bartlett, P.I. and Jordan, M.I. and McAuliffe, J.D.},
  title = {Convexity, classification and risk bounds},
  institution = {UC Berkeley Statistics},
  year = {2003},
  number = {638},
  pdf = {../local/Bartlett2003Convexity.pdf},
  file = {Bartlett2003Convexity.pdf:local/Bartlett2003Convexity.pdf:PDF},
  owner = {jeanphilippevert}
}

@article{Bartlett2005Local,
  author = {Bartlett, P. L. and Bousquet, O. and Mendelson, S.},
  title = {Local {R}ademacher {C}omplexities},
  journal = {Ann. {S}tat.},
  year = {2005},
  volume = {33},
  pages = {1497-1537},
  number = {4},
  doi = {10.1214/009053605000000282},
  pdf = {../local/Bartlett2005Local.pdf},
  file = {Bartlett2005Local.pdf:local/Bartlett2005Local.pdf:PDF},
  url = {http://dx.doi.org/10.1214/009053605000000282}
}

@article{Bartlett2002Rademacher,
  author = {Bartlett, P. L. and Mendelson, S.},
  title = {Rademacher and {G}aussian complexities: risk bounds and structural
	results},
  journal = {J. Mach. Learn. Res.},
  year = {2002},
  volume = {3},
  pages = {463--482},
  pdf = {../local/Bartlett2002Rademacher.pdf},
  file = {Bartlett2002Rademacher.pdf:Bartlett2002Rademacher.pdf:PDF},
  owner = {jp},
  timestamp = {2012.04.15},
  url = {http://jmlr.csail.mit.edu/papers/volume3/bartlett02a/bartlett02a.pdf}
}

@article{Bartlett2007Sparseness,
  author = {Bartlett, P. L. and Tewari, A.},
  title = {Sparseness vs Estimating Conditional Probabilities: Some Asymptotic
	Results},
  journal = {J. Mach. Learn. Res.},
  year = {2007},
  volume = {8},
  pages = {775--790},
  abstract = {One of the nice properties of kernel classifiers such as SVMs is that
	they often produce sparse solutions. However, the decision functions
	of these classifiers cannot always be used to estimate the conditional
	probability of the class label. We investigate the relationship between
	these two properties and show that these are intimately related:
	sparseness does not occur when the conditional probabilities can
	be unambiguously estimated. We consider a family of convex loss functions
	and derive sharp asymptotic results for the fraction of data that
	becomes support vectors. This enables us to characterize the exact
	trade-off between sparseness and the ability to estimate conditional
	probabilities for these loss functions.},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.02.01},
  url = {http://jmlr.csail.mit.edu/papers/v8/bartlett07a.html}
}

@inproceedings{Bartlett2004Sparseness,
  author = {P. L. Bartlett and A. Tewari},
  title = {Sparseness vs estimating conditional probabilities: {S}ome asymptotic
	results},
  booktitle = {Lecture {N}otes in {C}omputer {S}cience},
  year = {2004},
  volume = {3120},
  pages = {564-578},
  publisher = {Springer}
}

@article{Basak1988Determining,
  author = {S.C. Basak and V.R. Magnuson and G.J. Niemi and R.R. Regal},
  title = {Determining {S}tructural {S}imilarity of {C}hemicals {U}sing {G}raph
	{T}heoretic {I}ndices},
  journal = {Discrete {A}ppl. {M}ath.},
  year = {1988},
  volume = {19},
  pages = {17-44},
  owner = {mahe},
  timestamp = {2006.09.01}
}

@article{Bashir2008Evaluation,
  author = {Ali Bashir and Stanislav Volik and Colin Collins and Vineet Bafna
	and Benjamin J Raphael},
  title = {Evaluation of paired-end sequencing strategies for detection of genome
	rearrangements in cancer.},
  journal = {PLoS Comput. Biol.},
  year = {2008},
  volume = {4},
  pages = {e1000051},
  number = {4},
  month = {Apr},
  abstract = {Paired-end sequencing is emerging as a key technique for assessing
	genome rearrangements and structural variation on a genome-wide scale.
	This technique is particularly useful for detecting copy-neutral
	rearrangements, such as inversions and translocations, which are
	common in cancer and can produce novel fusion genes. We address the
	question of how much sequencing is required to detect rearrangement
	breakpoints and to localize them precisely using both theoretical
	models and simulation. We derive a formula for the probability that
	a fusion gene exists in a cancer genome given a collection of paired-end
	sequences from this genome. We use this formula to compute fusion
	gene probabilities in several breast cancer samples, and we find
	that we are able to accurately predict fusion genes in these samples
	with a relatively small number of fragments of large size. We further
	demonstrate how the ability to detect fusion genes depends on the
	distribution of gene lengths, and we evaluate how different parameters
	of a sequencing strategy impact breakpoint detection, breakpoint
	localization, and fusion gene detection, even in the presence of
	errors that suggest false rearrangements. These results will be useful
	in calibrating future cancer sequencing efforts, particularly large-scale
	studies of many cancer genomes that are enabled by next-generation
	sequencing technologies.},
  doi = {10.1371/journal.pcbi.1000051},
  pdf = {../local/Bashir2008Evaluation.pdf},
  file = {Bashir2008Evaluation.pdf:Bashir2008Evaluation.pdf:PDF},
  institution = {Bioinformatics Graduate Program, University of California San Diego,
	San Diego, California, United States of America. abashir@ucsd.edu},
  keywords = {ngs},
  owner = {jp},
  pmid = {18404202},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000051}
}

@book{Basseville1993Detection,
  title = {Detection of abrupt changes: theory and application},
  publisher = {Prentice Hall Information},
  year = {1993},
  author = {Basseville, M. and Nikiforov, N.},
  series = {Information and System Sciences Series},
  pdf = {../local/Basseville1993Detection.pdf},
  file = {Basseville1993Detection.pdf:Basseville1993Detection.pdf:PDF},
  owner = {jp},
  timestamp = {2010.06.02}
}

@article{Baudat2000Generalized,
  author = {G. Baudat and F. Anouar},
  title = {Generalized discriminant analysis using a kernel approach.},
  journal = {Neural Comput.},
  year = {2000},
  volume = {12},
  pages = {2385-404},
  number = {10},
  month = {Oct},
  abstract = {We present a new method that we call generalized discriminant analysis
	({GDA}) to deal with nonlinear discriminant analysis using kernel
	function operator. {T}he underlying theory is close to the support
	vector machines ({SVM}) insofar as the {GDA} method provides a mapping
	of the input vectors into high-dimensional feature space. {I}n the
	transformed space, linear properties make it easy to extend and generalize
	the classical linear discriminant analysis ({LDA}) to nonlinear discriminant
	analysis. {T}he formulation is expressed as an eigenvalue problem
	resolution. {U}sing a different kernel, one can cover a wide class
	of nonlinearities. {F}or both simulated data and alternate kernels,
	we give classification results, as well as the shape of the decision
	function. {T}he results are confirmed using real data to perform
	seed classification.},
  doi = {10.1162/089976600300014980},
  pdf = {../local/Baudat2000Generalized.pdf},
  file = {Baudat2000Generalized.pdf:Baudat2000Generalized.pdf:PDF},
  url = {http://dx.doi.org/10.1162/089976600300014980}
}

@article{Bauknecht1996Locating,
  author = {H. Bauknecht and A. Zell and H. Bayer and P. Levi and M. Wagener
	and J. Sadowski and J. Gasteiger},
  title = {{L}ocating biologically active compounds in medium-sized heterogeneous
	datasets by topological autocorrelation vectors: dopamine and benzodiazepine
	agonists.},
  journal = {J Chem Inf Comput Sci},
  year = {1996},
  volume = {36},
  pages = {1205--1213},
  number = {6},
  abstract = {Electronic properties located on the atoms of a molecule such as partial
	atomic charges as well as electronegativity and polarizability values
	are encoded by an autocorrelation vector accounting for the constitution
	of a molecule. This encoding procedure is able to distinguish between
	compounds being dopamine agonists and those being benzodiazepine
	receptor agonists even after projection into a two-dimensional self-organizing
	network. The two types of compounds can still be distinguished if
	they are buried in a dataset of 8323 compounds of a chemical supplier
	catalog comprising a wide structural variety. The maps obtained by
	this sequence of events, calculation of empirical physicochemical
	effects, encoding in a topological autocorrelation vector, and projection
	by a self-organizing neural network, can thus be used for searching
	for structural similarity, and, in particular, for finding new lead
	structures with biological activity.},
  keywords = {Animals, Chemical, Chemistry, Databases, Dopamine Agonists, Drug Design,
	Electrochemistry, Factual, GABA-A, Logistic Models, Models, Molecular
	Structure, Neural Networks (Computer), Non-U.S. Gov't, Phenols, Physical,
	Receptors, Research Support, Structure-Activity Relationship, Tetrahymena
	pyriformis, 8941996},
  owner = {mahe},
  pmid = {8941996},
  timestamp = {2006.08.17}
}

@article{Baulcombe1996RNA,
  author = {Baulcombe, D. C.},
  title = {{RNA} as a target and an initiator of post-transcriptional gene silencing
	in transgenic plants.},
  journal = {Plant Mol. Biol.},
  year = {1996},
  volume = {32},
  pages = {79--88},
  number = {1-2},
  month = {Oct},
  abstract = {Post-transcriptional gene silencing in transgenic plants is the manifestation
	of a mechanism that suppresses RNA accumulation in a sequence-specific
	manner. The target RNA species may be the products of transgenes,
	endogenous plant genes or viral RNAs. For an RNA to be a target it
	is necessary only that it has sequence homology to the sense RNA
	product of the transgene. There are three current hypotheses to account
	for the mechanism of post transcriptional gene silencing. These models
	all require production of an antisense RNA of the RNA targets to
	account for the specificity of the mechanism. There could be either
	direct transcription of the antisense RNA from the transgene, antisense
	RNA produced in response to over expression of the transgene or antisense
	RNA produced in response to the production of an aberrant sense RNA
	product of the transgene. To determine which of these models is correct
	it will be necessary to find out whether transgene methylation, which
	is frequently associated with the potential of transgenes to confer
	post-transcriptional gene silencing, is a cause or a consequence
	of the process.},
  doi = {10.1007/BF00039378},
  pdf = {../local/Baulcombe1996RNA.pdf},
  file = {Baulcombe1996RNA.pdf:Baulcombe1996RNA.pdf:PDF},
  owner = {vert},
  pmid = {8980475},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1007/BF00039378}
}

@article{Baumgartner2004Supervised,
  author = {Baumgartner, C. and Bohm, C. and Baumgartner, D. and Marini, G. and
	Weinberger, K. and Olgemoller, B. and Liebl, B. and Roscher, A. A.},
  title = {Supervised machine learning techniques for the classification of
	metabolic disorders in newborns},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {2985-2996},
  number = {17},
  abstract = {Motivation: {D}uring the {B}avarian newborn screening programme all
	newborns have been tested for about 20 inherited metabolic disorders.
	{O}wing to the amount and complexity of the generated experimental
	data, machine learning techniques provide a promising approach to
	investigate novel patterns in high-dimensional metabolic data which
	form the source for constructing classification rules with high discriminatory
	power. {R}esults: {S}ix machine learning techniques have been investigated
	for their classification accuracy focusing on two metabolic disorders,
	phenylketo nuria ({PKU}) and medium-chain acyl-{C}o{A} dehydrogenase
	deficiency ({MCADD}). {L}ogistic regression analysis led to superior
	classification rules (sensitivity >96.8%, specificity >99.98%) compared
	to all investigated algorithms. {I}ncluding novel constellations
	of metabolites into the models, the positive predictive value could
	be strongly increased ({PKU} 71.9% versus 16.2%, {MCADD} 88.4% versus
	54.6% compared to the established diagnostic markers). {O}ur results
	clearly prove that the mined data confirm the known and indicate
	some novel metabolic patterns which may contribute to a better understanding
	of newborn metabolism. {A}vailability: {WEKA} machine learning package:
	www.cs.waikato.ac.nz/~ml/weka and statistical software package {ADE}-4:
	http://pbil.univ-lyon1.fr/{ADE}-4},
  doi = {10.1093/bioinformatics/bth343},
  pdf = {../local/Baumgartner2004Supervised.pdf},
  file = {Baumgartner2004Supervised.pdf:local/Baumgartner2004Supervised.pdf:PDF},
  keywords = {biosvm proteomics},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/17/2985}
}

@article{Baumgartner2004Unsupervised,
  author = {R. Baumgartner and R. Somorjai and C. Bowman and T. C. Sorrell and
	C. E. Mountford and U. Himmelreich},
  title = {Unsupervised feature dimension reduction for classification of {MR}
	spectra.},
  journal = {Magn {R}eson {I}maging},
  year = {2004},
  volume = {22},
  pages = {251-6},
  number = {2},
  month = {Feb},
  abstract = {We present an unsupervised feature dimension reduction method for
	the classification of magnetic resonance spectra. {T}he technique
	preserves spectral information, important for disease profiling.
	{W}e propose to use this technique as a preprocessing step for computationally
	demanding wrapper-based feature subset selection. {W}e show that
	the classification accuracy on an independent test set can be sustained
	while achieving considerable feature reduction. {O}ur method is applicable
	to other classification techniques, such as neural networks, support
	vector machines, etc.},
  doi = {10.1016/j.mri.2003.08.033},
  pdf = {../local/Baumgartner2004Unsupervised.pdf},
  file = {Baumgartner2004Unsupervised.pdf:local/Baumgartner2004Unsupervised.pdf:PDF},
  keywords = {Algorithms, Ambergris, Candida, Candida albicans, Combinatorial Chemistry
	Techniques, Eye Enucleation, Humans, Magnetic Resonance Spectroscopy,
	Melanoma, Models, Molecular, Molecular Conformation, Non-U.S. Gov't,
	Odors, P.H.S., Perfume, Predictive Value of Tests, Prognosis, Prospective
	Studies, Quantitative Structure-Activity Relationship, Research Support,
	U.S. Gov't, Uveal Neoplasms, 15010118},
  pii = {S0730725X03003503},
  url = {http://dx.doi.org/10.1016/j.mri.2003.08.033}
}

@article{Baxter2000Model,
  author = {Jonathan Baxter},
  title = {A Model of Inductive Bias Learning},
  journal = {Journal of Artificial Intelligence Research},
  year = {2000},
  volume = {12},
  pages = {149-198},
  url = {http://citeseer.ist.psu.edu/article/baxter00model.html}
}

@inproceedings{Baxter1997A,
  author = {Jonathan Baxter},
  title = {A Bayesian/information theoretic model of learning to learn via multiple
	task sampling},
  booktitle = {Machine Learning},
  year = {1997},
  pages = {7--39}
}

@inproceedings{Baxter1996Bayesian/information,
  author = {Jonathan Baxter},
  title = {A Bayesian/information theoretic model of bias learning},
  booktitle = {COLT '96: Proceedings of the ninth annual conference on Computational
	learning theory},
  year = {1996},
  pages = {77--88},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  doi = {http://doi.acm.org/10.1145/238061.238071},
  isbn = {0-89791-811-8},
  location = {Desenzano del Garda, Italy}
}

@inproceedings{Baxter1996Learning,
  author = {Jonathan Baxter},
  title = {Learning Model Bias},
  booktitle = {Advances in Neural Information Processing Systems},
  year = {1996},
  pages = {169--175},
  publisher = {MIT Press}
}

@article{Bazzani2001SVM,
  author = {A. Bazzani and A. Bevilacqua and D. Bollini and R. Brancaccio and
	R. Campanini and N. Lanconelli and A. Riccardi and D. Romani},
  title = {An {SVM} classifier to separate false signals from microcalcifications
	in digital mammograms.},
  journal = {Phys {M}ed {B}iol},
  year = {2001},
  volume = {46},
  pages = {1651-63},
  number = {6},
  month = {Jun},
  abstract = {In this paper we investigate the feasibility of using an {SVM} (support
	vector machine) classifier in our automatic system for the detection
	of clustered microcalcifications in digital mammograms. {SVM} is
	a technique for pattern recognition which relies on the statistical
	learning theory. {I}t minimizes a function of two terms: the number
	of misclassified vectors of the training set and a term regarding
	the generalization classifier capability. {W}e compare the {SVM}
	classifier with an {MLP} (multi-layer perceptron) in the false-positive
	reduction phase of our detection scheme: a detected signal is considered
	either microcalcification or false signal, according to the value
	of a set of its features. {T}he {SVM} classifier gets slightly better
	results than the {MLP} one ({A}z value of 0.963 against 0.958) in
	the presence of a high number of training data; the improvement becomes
	much more evident ({A}z value of 0.952 against 0.918) in training
	sets of reduced size. {F}inally, the setting of the {SVM} classifier
	is much easier than the {MLP} one.},
  doi = {10.1088/0031-9155/46/6/305},
  pdf = {../local/Bazzani2001SVM.pdf},
  file = {Bazzani2001SVM.pdf:local/Bazzani2001SVM.pdf:PDF},
  keywords = {biosvm image},
  url = {http://dx.doi.org/10.1088/0031-9155/46/6/305}
}

@article{Beal2005Bayesian,
  author = {Beal, M. J. and Falciani, F. and Ghahramani, Z. and Rangel, C. and
	Wild, D. L.},
  title = {A {B}ayesian approach to reconstructing genetic regulatory networks
	with hidden factors.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {349--356},
  number = {3},
  month = {Feb},
  abstract = {M{OTIVATION}: {W}e have used state-space models ({SSM}s) to reverse
	engineer transcriptional networks from highly replicated gene expression
	profiling time series data obtained from a well-established model
	of {T} cell activation. {SSM}s are a class of dynamic {B}ayesian
	networks in which the observed measurements depend on some hidden
	state variables that evolve according to {M}arkovian dynamics. {T}hese
	hidden variables can capture effects that cannot be directly measured
	in a gene expression profiling experiment, for example: genes that
	have not been included in the microarray, levels of regulatory proteins,
	the effects of m{RNA} and protein degradation, etc. {RESULTS}: {W}e
	have approached the problem of inferring the model structure of these
	state-space models using both classical and {B}ayesian methods. {I}n
	our previous work, a bootstrap procedure was used to derive classical
	confidence intervals for parameters representing 'gene-gene' interactions
	over time. {I}n this article, variational approximations are used
	to perform the analogous model selection task in the {B}ayesian context.
	{C}ertain interactions are present in both the classical and the
	{B}ayesian analyses of these regulatory networks. {T}he resulting
	models place {J}un{B} and {J}un{D} at the centre of the mechanisms
	that control apoptosis and proliferation. {T}hese mechanisms are
	key for clonal expansion and for controlling the long term behavior
	(e.g. programmed cell death) of these cells. {AVAILABILITY}: {S}upplementary
	data is available at http://public.kgi.edu/wild/index.htm and {M}atlab
	source code for variational {B}ayesian learning of {SSM}s is available
	at http://www.cse.ebuffalo.edu/faculty/mbeal/software.html.},
  doi = {10.1093/bioinformatics/bti014},
  pdf = {../local/Beal2005Bayesian.pdf},
  file = {Beal2005Bayesian.pdf:local/Beal2005Bayesian.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {bti014},
  pmid = {15353451},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti014}
}

@article{Beck2009fast,
  author = {Beck, A. and Teboulle, M.},
  title = {A fast iterative shrinkage-thresholding algorithm for linear inverse
	problems},
  journal = {SIAM J. Img. Sci.},
  year = {2009},
  volume = {2},
  pages = {183--202},
  number = {1},
  doi = {10.1137/080716542},
  pdf = {../local/Beck2009fast.pdf},
  file = {Beck2009fast.pdf:Beck2009fast.pdf:PDF},
  owner = {jp},
  timestamp = {2010.10.14},
  url = {http://dx.doi.org/10.1137/080716542}
}

@article{Becker2004G,
  author = {Becker, O. M. and Marantz, Y. and Shacham, S. and Inbal, B. and Heifetz,
	A. and Kalid, O. and Bar-Haim, S. and Warshaviak, D. and Fichman,
	M. and Noiman, S.},
  title = {G protein-coupled receptors: in silico drug discovery in {3D}},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2004},
  volume = {101},
  pages = {11304--11309},
  number = {31},
  month = {Aug},
  abstract = {The application of structure-based in silico methods to drug discovery
	is still considered a major challenge, especially when the x-ray
	structure of the target protein is unknown. Such is the case with
	human G protein-coupled receptors (GPCRs), one of the most important
	families of drug targets, where in the absence of x-ray structures,
	one has to rely on in silico 3D models. We report repeated success
	in using ab initio in silico GPCR models, generated by the predict
	method, for blind in silico screening when applied to a set of five
	different GPCR drug targets. More than 100,000 compounds were typically
	screened in silico for each target, leading to a selection of <100
	"virtual hit" compounds to be tested in the lab. In vitro binding
	assays of the selected compounds confirm high hit rates, of 12-21\%
	(full dose-response curves, Ki < 5 microM). In most cases, the best
	hit was a novel compound (New Chemical Entity) in the 1- to 100-nM
	range, with very promising pharmacological properties, as measured
	by a variety of in vitro and in vivo assays. These assays validated
	the quality of the hits as lead compounds for drug discovery. The
	results demonstrate the usefulness and robustness of ab initio in
	silico 3D models and of in silico screening for GPCR drug discovery.},
  doi = {10.1073/pnas.0401862101},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {0401862101},
  pmid = {15277683},
  timestamp = {2008.03.27},
  url = {http://dx.doi.org/10.1073/pnas.0401862101}
}

@article{Beer2004Predicting,
  author = {Beer, M. A. and Tavazoie, S.},
  title = {Predicting gene expression from sequence},
  journal = {Cell},
  year = {2004},
  volume = {117},
  pages = {185--198},
  pdf = {../local/Beer2004Predicting.pdf},
  file = {Beer2004Predicting.pdf:Beer2004Predicting.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.05}
}

@article{Beerenwinkel2003Methods,
  author = {Beerenwinkel, N. and Lengauer, T. and Daumer, M. and Kaiser, R. and
	Walter, H. and Korn, K. and Hoffmann, D. and Selbig, J.},
  title = {Methods for optimizing antiviral combination therapies},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {i16-i25},
  number = {Suppl. 1},
  abstract = {Motivation: {D}espite some progress with antiretroviral combination
	therapies, therapeutic success in the management of {HIV}-infected
	patients is limited. {T}he evolution of drug-resistant genetic variants
	in response to therapy plays a key role in treatment failure and
	finding a new potent drug combination after therapy failure is considered
	challenging. {R}esults: {T}o estimate the activity of a drug combination
	against a particular viral strain, we develop a scoring function
	whose independent variables describe a set of antiviral agents and
	viral {DNA} sequences coding for the molecular targets of the respective
	drugs. {T}he construction of this activity score involves (1) predicting
	phenotypic drug resistance from genotypes for each drug individually,
	(2) probabilistic modeling of predicted resistance values and integration
	into a score for drug combinations, and (3) searching through the
	mutational neighborhood of the considered strain in order to estimate
	activity on nearby mutants. {F}or a clinical data set, we determine
	the optimal search depth and show that the scoring scheme is predictive
	of therapeutic outcome. {P}roperties of the activity score and applications
	are discussed. {C}ontact: beerenwinkel@mpi-sb.mpg.de {K}eywords:
	{HIV}, antiretroviral therapy, drug resistance, {SVM} regression,
	therapy optimization, sequence space search.},
  pdf = {../local/Beerenwinkel2003Methods.pdf},
  file = {Beerenwinkel2003Methods.pdf:local/Beerenwinkel2003Methods.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/suppl_1/i16}
}

@article{Beerenwinkel2001Geno2pheno,
  author = {Beerenwinkel, N. and Schmidt, B. and Walter, H. and Kaiser, R. and
	Lengauer, T. and Hoffman, D. and Korn, K. and Selbig, J.},
  title = {{{G}eno2pheno: {I}nterpreting {G}enotypic {HIV} {D}rug {R}esistance
	{T}ests}},
  journal = {I{EEE} {I}ntelligent {S}ystems},
  year = {2001},
  volume = {6},
  pages = {35-41},
  number = {6},
  abstract = {Rapid accumulation of resistance mutations in the genome of the human
	immunodeficiency virus ({HIV}) plays a central role in drug treatment
	failure in infected patients. {T}he authors have developed geno2pheno,
	an intelligent system that uses the information encoded in the viral
	genomic sequence to predict resistance or susceptibility of the virus
	to 13 antiretroviral agents. {T}o predict phenotypic drug resistance
	from genotype, they applied two machine learning techniques: decision
	trees and linear support vector machines. {T}hese techniques performed
	learning on more than 400 genotype-phenotype pairs for each drug.
	{T}he authors compared the generalization performance of the two
	families of models in leave-one-out experiments. {E}xcept for three
	drugs, all error estimates ranged between 7.25 and 15.5 percent.
	{S}upport vector machines performed slightly better for most drugs,
	but knowledge extraction was easier for decision trees. {G}eno2pheno
	is freely available at http://cartan.gmd.de/geno2pheno.html.},
  doi = {10.1109/5254.972080},
  pdf = {../local/Beerenwinkel2001Geno2pheno.pdf},
  file = {Beerenwinkel2001Geno2pheno.pdf:local/Beerenwinkel2001Geno2pheno.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1109/5254.972080}
}

@article{Beers2006Array-CGH,
  author = {van Beers, E. and Nederlof, P.},
  title = {Array-{CGH} and breast cancer},
  journal = {Breast Cancer Research},
  year = {2006},
  volume = {8},
  pages = {210},
  number = {3},
  abstract = {The introduction of comparative genomic hybridization (CGH) in 1992
	opened new avenues in genomic investigation; in particular, it advanced
	analysis of solid tumours, including breast cancer, because it obviated
	the need to culture cells before their chromosomes could be analyzed.
	The current generation of CGH analysis uses ordered arrays of genomic
	DNA sequences and is therefore referred to as array-CGH or matrix-CGH.
	It was introduced in 1998, and further increased the potential of
	CGH to provide insight into the fundamental processes of chromosomal
	instability and cancer. This review provides a critical evaluation
	of the data published on array-CGH and breast cancer, and discusses
	some of its expected future value and developments.},
  doi = {10.1186/bcr1510},
  pdf = {../local/Beers2006Array-CGH.pdf},
  file = {Beers2006Array-CGH.pdf:Beers2006Array-CGH.pdf:PDF},
  issn = {1465-5411},
  keywords = {breastcancer, cgh},
  owner = {jp},
  pubmedid = {16817944},
  timestamp = {2008.12.08},
  url = {http://breast-cancer-research.com/content/8/3/210}
}

@article{Begg2005machine,
  author = {R. Begg and J. Kamruzzaman},
  title = {A machine learning approach for automated recognition of movement
	patterns using basic, kinetic and kinematic gait data.},
  journal = {J {B}iomech},
  year = {2005},
  volume = {38},
  pages = {401-8},
  number = {3},
  month = {Mar},
  abstract = {This paper investigated application of a machine learning approach
	({S}upport vector machine, {SVM}) for the automatic recognition of
	gait changes due to ageing using three types of gait measures: basic
	temporal/spatial, kinetic and kinematic. {T}he gaits of 12 young
	and 12 elderly participants were recorded and analysed using a synchronized
	{PEAK} motion analysis system and a force platform during normal
	walking. {A}ltogether, 24 gait features describing the three types
	of gait characteristics were extracted for developing gait recognition
	models and later testing of generalization performance. {T}est results
	indicated an overall accuracy of 91.7\% by the {SVM} in its capacity
	to distinguish the two gait patterns. {T}he classification ability
	of the {SVM} was found to be unaffected across six kernel functions
	(linear, polynomial, radial basis, exponential radial basis, multi-layer
	perceptron and spline). {G}ait recognition rate improved when features
	were selected from different gait data type. {A} feature selection
	algorithm demonstrated that as little as three gait features, one
	selected from each data type, could effectively distinguish the age
	groups with 100\% accuracy. {T}hese results demonstrate considerable
	potential in applying {SVM}s in gait classification for many applications.},
  doi = {10.1016/j.jbiomech.2004.05.002},
  pdf = {../local/Begg2005machine.pdf},
  file = {Begg2005machine.pdf:local/Begg2005machine.pdf:PDF},
  pii = {S0021929004002258},
  url = {http://dx.doi.org/10.1016/j.jbiomech.2004.05.002}
}

@article{Begg2005Support,
  author = {Rezaul K Begg and Marimuthu Palaniswami and Brendan Owen},
  title = {Support vector machines for automated gait classification.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2005},
  volume = {52},
  pages = {828-38},
  number = {5},
  month = {May},
  abstract = {Ageing influences gait patterns causing constant threats to control
	of locomotor balance. {A}utomated recognition of gait changes has
	many advantages including, early identification of at-risk gait and
	monitoring the progress of treatment outcomes. {I}n this paper, we
	apply an artificial intelligence technique [support vector machines
	({SVM})] for the automatic recognition of young-old gait types from
	their respective gait-patterns. {M}inimum foot clearance ({MFC})
	data of 30 young and 28 elderly participants were analyzed using
	a {PEAK}-2{D} motion analysis system during a 20-min continuous walk
	on a treadmill at self-selected walking speed. {G}ait features extracted
	from individual {MFC} histogram-plot and {P}oincarÃ©-plot images
	were used to train the {SVM}. {C}ross-validation test results indicate
	that the generalization performance of the {SVM} was on average 83.3\%
	(+/-2.9) to recognize young and elderly gait patterns, compared to
	a neural network's accuracy of 75.0+/-5.0\%. {A} "hill-climbing"
	feature selection algorithm demonstrated that a small subset (3-5)
	of gait features extracted from {MFC} plots could differentiate the
	gait patterns with 90\% accuracy. {P}erformance of the gait classifier
	was evaluated using areas under the receiver operating characteristic
	plots. {I}mproved performance of the classifier was evident when
	trained with reduced number of selected good features and with radial
	basis function kernel. {T}hese results suggest that {SVM}s can function
	as an efficient gait classifier for recognition of young and elderly
	gait patterns, and has the potential for wider applications in gait
	identification for falls-risk minimization in the elderly.},
  doi = {10.1109/TBME.2005.845241},
  pdf = {../local/Begg2005Support.pdf},
  file = {Begg2005Support.pdf:local/Begg2005Support.pdf:PDF},
  keywords = {Adult, Aged, Aging, Algorithms, Apoptosis, Artificial Intelligence,
	Automated, Computer-Assisted, Female, Foot, Gait, Gene Expression
	Profiling, Humans, Image Interpretation, Male, Neoplasms, Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, Pattern Recognition,
	Polymerase Chain Reaction, Proteins, Reproducibility of Results,
	Research Support, Sensitivity and Specificity, Subcellular Fractions,
	Unknown Primary, 15887532},
  url = {http://dx.doi.org/10.1109/TBME.2005.845241}
}

@unpublished{Behr2011Simultaneous,
  author = {Behr, J. and Bonhert, R. and Kahles, A. and R\"atsch, G.},
  title = {Simultaneous {RNA}-seq-based transcript inference and quantification
	using mixed integer programming},
  note = {NIPS Machine Learning in Computational Biology Workshop, Sierra Nevada.},
  year = {2011},
  owner = {jp},
  timestamp = {2012.03.06}
}

@article{Behre2008Structural,
  author = {Behre, J. and Wilhelm, T. and {von Kamp}, A. and Ruppin, E. and Schuster,
	S.},
  title = {Structural robustness of metabolic networks with respect to multiple
	knockouts.},
  journal = {J Theor Biol},
  year = {2008},
  volume = {252},
  pages = {433--441},
  number = {3},
  month = {Jun},
  abstract = {We present a generalised framework for analysing structural robustness
	of metabolic networks, based on the concept of elementary flux modes
	(EFMs). Extending our earlier study on single knockouts [Wilhelm,
	T., Behre, J., Schuster, S., 2004. Analysis of structural robustness
	of metabolic networks. IEE Proc. Syst. Biol. 1(1), 114-120], we are
	now considering the general case of double and multiple knockouts.
	The robustness measures are based on the ratio of the number of remaining
	EFMs after knockout vs. the number of EFMs in the unperturbed situation,
	averaged over all combinations of knockouts. With the help of simple
	examples we demonstrate that consideration of multiple knockouts
	yields additional information going beyond single-knockout results.
	It is proven that the robustness score decreases as the knockout
	depth increases. We apply our extended framework to metabolic networks
	representing amino acid anabolism in Escherichia coli and human hepatocytes,
	and the central metabolism in human erythrocytes. Moreover, in the
	E. coli model the two subnetworks synthesising amino acids that are
	essential and those that are non-essential for humans are studied
	separately. The results are discussed from an evolutionary viewpoint.
	We find that E. coli has the most robust metabolism of all the cell
	types studied here. Considering only the subnetwork of the synthesis
	of non-essential amino acids, E. coli and the human hepatocyte show
	about the same robustness.},
  doi = {10.1016/j.jtbi.2007.09.043},
  pdf = {../local/Behre2008Structural.pdf},
  file = {Behre2008Structural.pdf:Behre2008Structural.pdf:PDF},
  institution = {Faculty of Biology and Pharmaceutics, Section of Bioinformatics,
	Friedrich Schiller University Jena, Ernst-Abbe-Platz 2, D-07743 Jena,
	Germany. jbehre@minet.uni-jena.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0022-5193(07)00472-9},
  pmid = {18023456},
  timestamp = {2013.01.25},
  url = {http://dx.doi.org/10.1016/j.jtbi.2007.09.043}
}

@article{Bejerano2001Variations,
  author = {Bejerano, G. and Yona, G. },
  title = {Variations on probabilistic suffix trees: statistical modeling and
	prediction of protein families},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {23-43},
  pdf = {../local/Bejerano2001Variations.pdf},
  file = {Bejerano2001Variations.pdf:local/Bejerano2001Variations.pdf:PDF},
  owner = {vert},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/17/1/23}
}

@inproceedings{Bejerano1999Modeling,
  author = {Bejerano, G. and Yona, G. },
  title = {Modeling protein families using probabilistic suffix trees},
  booktitle = {Proceedings of {RECOMB} 1999},
  year = {1999},
  pages = {15-24},
  publisher = {ACM Press},
  pdf = {../local/Bejerano1999Modeling.pdf},
  file = {Bejerano1999Modeling.pdf:local/Bejerano1999Modeling.pdf:PDF},
  owner = {vert}
}

@article{Belkin2003Laplacian,
  author = {Belkin, M. and Niyogi, P.},
  title = {Laplacian {E}igenmaps for {D}imensionality {R}eduction and {D}ata
	{R}epresentation},
  journal = {Neural {C}omput.},
  year = {2003},
  volume = {15},
  pages = {1373-1396},
  number = {6},
  abstract = {One of the central problems in machine learning and pattern recognition
	is to develop appropriate representations for complex data. {W}e
	consider the problem of constructing a representation for data lying
	on a low-dimensional manifold embedded in a high-dimensional space.
	{D}rawing on the correspondence between the graph {L}aplacian, the
	{L}aplace {B}eltrami operator on the manifold, and the connections
	to the heat equation, we propose a geometrically motivated algorithm
	for representing the high-dimensional data. {T}he algorithm provides
	a computationally efficient approach to nonlinear dimensionality
	reduction that has locality-preserving properties and a natural connection
	to clustering. {S}ome potential applications and illustrative examples
	are discussed.},
  pdf = {../local/1373.pdf:http\},
  file = {1373.pdf:http\://neco.mitpress.org/cgi/reprint/15/6/1373.pdf:PDF},
  keywords = {dimred},
  url = {http://neco.mitpress.org/cgi/content/abstract/15/6/1373}
}

@article{Bellman1959adaptive,
  author = {Bellman, R. and Kalaba, R.},
  title = {On adaptive control processes},
  journal = {Automatic Control, IRE Transactions on},
  year = {1959},
  volume = {4},
  pages = {1--9},
  number = {2},
  publisher = {IEEE}
}

@article{Belongie02Shape,
  author = {Belongie, S. and Malik, J. and Puzicha, J.},
  title = {Shape matching and object recognition using shape contexts},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2002},
  volume = {24},
  pages = {509--522},
  number = {4},
  abstract = {We present a novel approach to measuring similarity between shapes
	and exploit it for object recognition. In our framework, the measurement
	of similarity is preceded by: (1) solving for correspondences between
	points on the two shapes; (2) using the correspondences to estimate
	an aligning transform. In order to solve the correspondence problem,
	we attach a descriptor, the shape context, to each point. The shape
	context at a reference point captures the distribution of the remaining
	points relative to it, thus offering a globally discriminative characterization.
	Corresponding points on two similar shapes will have similar shape
	contexts, enabling us to solve for correspondences as an optimal
	assignment problem. Given the point correspondences, we estimate
	the transformation that best aligns the two shapes; regularized thin-plate
	splines provide a flexible class of transformation maps for this
	purpose. The dissimilarity between the two shapes is computed as
	a sum of matching errors between corresponding points, together with
	a term measuring the magnitude of the aligning transform. We treat
	recognition in a nearest-neighbor classification framework as the
	problem of finding the stored prototype shape that is maximally similar
	to that in the image. Results are presented for silhouettes, trademarks,
	handwritten digits, and the COIL data set},
  doi = {10.1109/34.993558},
  pdf = {../local/Belongie02Shape.pdf},
  file = {Belongie02Shape.pdf:Belongie02Shape.pdf:PDF},
  publisher = {IEEE},
  url = {http://dx.doi.org/10.1109/34.993558}
}

@misc{Ben-David2003Exploiting,
  author = {S. Ben-David and R. Schuller},
  title = {Exploiting task relatedness for multiple task learning},
  year = {2003},
  text = {Shai Ben-David and Reba Schuller. Exploiting task relatedness for
	multiple task learning. In Proc. of the Sixteenth Annual Conference
	on Learning Theory COLT 2003.},
  url = {http://citeseer.ist.psu.edu/ben-david03exploiting.html}
}

@article{Ben-Dor2000Tissue,
  author = {Ben-Dor, A. and Bruhn, L. and Friedman, N. and Nachman, I. and Schummer,
	M. and Yakhini, Z.},
  title = {Tissue Classification with Gene Expression Profiles},
  journal = {J. Comput. Biol.},
  year = {2000},
  volume = {7},
  pages = {559-583},
  number = {3-4},
  abstract = {Constantly improving gene expression profiling technologies are expected
	to provide understanding and insight into cancer-related cellular
	processes. {G}ene expression data is also expected to significantly
	aid in the development of efficient cancer diagnosis and classification
	platforms. {I}n this work we examine three sets of gene expression
	data measured across sets of tumor(s) and normal clinical samples:
	{T}he first set consists of 2,000 genes, measured in 62 epithelial
	colon samples ({A}lon et al., 1999). {T}he second consists of approximately
	equal to 100,000 clones, measured in 32 ovarian samples (unpublished
	extension of data set described in {S}chummer et al. (1999)). {T}he
	third set consists of approximately equal to 7,100 genes, measured
	in 72 bone marrow and peripheral blood samples ({G}olub et al, 1999).
	{W}e examine the use of scoring methods, measuring separation of
	tissue type (e.g., tumors from normals) using individual gene expression
	levels. {T}hese are then coupled with high-dimensional classification
	methods to assess the classification power of complete expression
	profiles. {W}e present results of performing leave-one-out cross
	validation ({LOOCV}) experiments on the three data sets, employing
	nearest neighbor classifier, {SVM} ({C}ortes and {V}apnik, 1995),
	{A}da{B}oost ({F}reund and {S}chapire, 1997) and a novel clustering-based
	classification technique. {A}s tumor samples can differ from normal
	samples in their cell-type composition, we also perform {LOOCV} experiments
	using appropriately modified sets of genes, attempting to eliminate
	the resulting bias. {W}e demonstrate success rate of at least 90%
	in tumor versus normal classification, using sets of selected genes,
	with, as well as without, cellular-contamination-related members.
	{T}hese results are insensitive to the exact selection mechanism,
	over a certain range.},
  pdf = {../local/Ben-Dor2000Tissue.pdf},
  file = {Ben-Dor2000Tissue.pdf:local/Ben-Dor2000Tissue.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://www.liebertonline.com/doi/abs/10.1089/106652700750050943}
}

@article{Ben-Elazar2013Spatial,
  author = {Ben-Elazar, S. and Yakhini, Z. and Yanai, I.},
  title = {Spatial localization of co-regulated genes exceeds genomic gene clustering
	in the Saccharomyces cerevisiae genome.},
  journal = {Nucleic Acids Res},
  year = {2013},
  volume = {41},
  pages = {2191--2201},
  number = {4},
  month = {Feb},
  abstract = {While it has been long recognized that genes are not randomly positioned
	along the genome, the degree to which its 3D structure influences
	the arrangement of genes has remained elusive. In particular, several
	lines of evidence suggest that actively transcribed genes are spatially
	co-localized, forming transcription factories; however, a generalized
	systematic test has hitherto not been described. Here we reveal transcription
	factories using a rigorous definition of genomic structure based
	on Saccharomyces cerevisiae chromosome conformation capture data,
	coupled with an experimental design controlling for the primary gene
	order. We develop a data-driven method for the interpolation and
	the embedding of such datasets and introduce statistics that enable
	the comparison of the spatial and genomic densities of genes. Combining
	these, we report evidence that co-regulated genes are clustered in
	space, beyond their observed clustering in the context of gene order
	along the genome and show this phenomenon is significant for 64 out
	of 117 transcription factors. Furthermore, we show that those transcription
	factors with high spatially co-localized targets are expressed higher
	than those whose targets are not spatially clustered. Collectively,
	our results support the notion that, at a given time, the physical
	density of genes is intimately related to regulatory activity.},
  doi = {10.1093/nar/gks1360},
  pdf = {../local/Ben-Elazar2013Spatial.pdf},
  file = {Ben-Elazar2013Spatial.pdf:Ben-Elazar2013Spatial.pdf:PDF},
  institution = {Department of Biology, Technion - Israel Institute of Technology,
	Haifa, Israel, Department of Computer Science, Technion - Israel
	Institute of Technology, Haifa, Israel and Agilent Laboratories,
	Tel Aviv, Israel.},
  keywords = {ngs, hic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gks1360},
  pmid = {23303780},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/nar/gks1360}
}

@article{Ben-Hur2003Remote,
  author = {Ben-Hur, A. and Brutlag, D.},
  title = {Remote homology detection: a motif based approach},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {i26-i33},
  number = {Suppl. 1},
  abstract = {Motivation: {R}emote homology detection is the problem of detecting
	homology in cases of low sequence similarity. {I}t is a hard computational
	problem with no approach that works well in all cases. {R}esults:
	{W}e present a method for detecting remote homology that is based
	on the presence of discrete sequence motifs. {T}he motif content
	of a pair of sequences is used to define a similarity that is used
	as a kernel for a {S}upport {V}ector {M}achine ({SVM}) classifier.
	{W}e test the method on two remote homology detection tasks: prediction
	of a previously unseen {SCOP} family and prediction of an enzyme
	class given other enzymes that have a similar function on other substrates.
	{W}e find that it performs significantly better than an {SVM} method
	that uses {BLAST} or {S}mith-{W}aterman similarity scores as features.
	{A}vailability: {T}he software is available from the authors upon
	request.},
  pdf = {../local/Ben-Hur2003Remote.pdf},
  file = {Ben-Hur2003Remote.pdf:local/Ben-Hur2003Remote.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/suppl_1/i26}
}

@article{Ben-Hur2001Support,
  author = {Ben-Hur, A. and Horn, D. and Siegelmann, H.T. and Vapnik, V.},
  title = {Support {V}ector {C}lustering},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2001},
  volume = {2},
  pages = {125--137},
  pdf = {../local/benh01.pdf},
  file = {benh01.pdf:local/benh01.pdf:PDF},
  subject = {kernel},
  url = {http://www.ai.mit.edu/projects/jmlr/papers/volume2/horn01a/rev1/horn01a1r.pdf}
}

@article{Ben-Hur2006Choosing,
  author = {Ben-Hur, A. and Noble, W. S.},
  title = {Choosing negative examples for the prediction of protein-protein
	interactions.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7 Suppl 1},
  pages = {S2},
  abstract = {The protein-protein interaction networks of even well-studied model
	organisms are sketchy at best, highlighting the continued need for
	computational methods to help direct experimentalists in the search
	for novel interactions. This need has prompted the development of
	a number of methods for predicting protein-protein interactions based
	on various sources of data and methodologies. The common method for
	choosing negative examples for training a predictor of protein-protein
	interactions is based on annotations of cellular localization, and
	the observation that pairs of proteins that have different localization
	patterns are unlikely to interact. While this method leads to high
	quality sets of non-interacting proteins, we find that this choice
	can lead to biased estimates of prediction accuracy, because the
	constraints placed on the distribution of the negative examples makes
	the task easier. The effects of this bias are demonstrated in the
	context of both sequence-based and non-sequence based features used
	for predicting protein-protein interactions.},
  doi = {10.1186/1471-2105-7-S1-S2},
  institution = {CO, USA. asa@cs.colostate.edu},
  owner = {jp},
  pii = {1471-2105-7-S1-S2},
  pmid = {16723005},
  timestamp = {2008.06.01},
  url = {http://dx.doi.org/10.1186/1471-2105-7-S1-S2}
}

@article{Ben-Hur2005Kernel,
  author = {Ben-Hur, A. and Noble, W. S.},
  title = {Kernel methods for predicting protein-protein interactions.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i38-i46},
  number = {Suppl. 1},
  month = {Jun},
  abstract = {M{OTIVATION}: {D}espite advances in high-throughput methods for discovering
	protein-protein interactions, the interaction networks of even well-studied
	model organisms are sketchy at best, highlighting the continued need
	for computational methods to help direct experimentalists in the
	search for novel interactions. {RESULTS}: {W}e present a kernel method
	for predicting protein-protein interactions using a combination of
	data sources, including protein sequences, {G}ene {O}ntology annotations,
	local properties of the network, and homologous interactions in other
	species. {W}hereas protein kernels proposed in the literature provide
	a similarity between single proteins, prediction of interactions
	requires a kernel between pairs of proteins. {W}e propose a pairwise
	kernel that converts a kernel between single proteins into a kernel
	between pairs of proteins, and we illustrate the kernel's effectiveness
	in conjunction with a support vector machine classifier. {F}urthermore,
	we obtain improved performance by combining several sequence-based
	kernels based on k-mer frequency, motif and domain content and by
	further augmenting the pairwise sequence kernel with features that
	are based on other sources of data.{W}e apply our method to predict
	physical interactions in yeast using data from the {BIND} database.
	{A}t a false positive rate of 1\% the classifier retrieves close
	to 80\% of a set of trusted interactions. {W}e thus demonstrate the
	ability of our method to make accurate predictions despite the sizeable
	fraction of false positives that are known to exist in interaction
	databases. {AVAILABILITY}: {T}he classification experiments were
	performed using {P}y{ML} available at http://pyml.sourceforge.net.
	{D}ata are available at: http://noble.gs.washington.edu/proj/sppi
	{CONTACT}: asa@gs.washington.edu.},
  doi = {10.1093/bioinformatics/bti1016},
  pdf = {../local/Ben-Hur2005Kernel.pdf},
  file = {Ben-Hur2005Kernel.pdf:local/Ben-Hur2005Kernel.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i38},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1016}
}

@article{Bengio2004Learning,
  author = {Bengio, Y. and Delalleau, O. and Le Roux, N. and Paiement, J.-F.
	and Vincent, P. and Ouimet, M.},
  title = {Learning eigenfunctions links spectral embedding and kernel {PCA}.},
  journal = {Neural {C}omput.},
  year = {2004},
  volume = {16},
  pages = {2197-219},
  number = {10},
  month = {Oct},
  abstract = {In this letter, we show a direct relation between spectral embedding
	methods and kernel principal components analysis and how both are
	special cases of a more general learning problem: learning the principal
	eigenfunctions of an operator defined from a kernel and the unknown
	data-generating density. {W}hereas spectral embedding methods provided
	only coordinates for the training points, the analysis justifies
	a simple extension to out-of-sample examples (the {N}ystrÃ¶m formula)
	for multidimensional scaling ({MDS}), spectral clustering, {L}aplacian
	eigenmaps, locally linear embedding ({LLE}), and {I}somap. {T}he
	analysis provides, for all such spectral embedding methods, the definition
	of a loss function, whose empirical average is minimized by the traditional
	algorithms. {T}he asymptotic expected value of that loss defines
	a generalization performance and clarifies what these algorithms
	are trying to learn. {E}xperiments with {LLE}, {I}somap, spectral
	clustering, and {MDS} show that this out-of-sample embedding formula
	generalizes well, with a level of error comparable to the effect
	of small perturbations of the training set on the embedding.},
  doi = {10.1162/0899766041732396},
  pdf = {../local/Bengio2004Learning.pdf},
  file = {Bengio2004Learning.pdf:local/Bengio2004Learning.pdf:PDF},
  keywords = {dimred},
  url = {http://dx.doi.org/10.1162/0899766041732396}
}

@article{Bengtsson2009Single,
  author = {Henrik Bengtsson and Pratyaksha Wirapati and Terence P Speed},
  title = {A single-array preprocessing method for estimating full-resolution
	raw copy numbers from all Affymetrix genotyping arrays including
	GenomeWideSNP 5 \& 6.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {2149--2156},
  number = {17},
  month = {Sep},
  abstract = {MOTIVATION: High-resolution copy-number (CN) analysis has in recent
	years gained much attention, not only for the purpose of identifying
	CN aberrations associated with a certain phenotype, but also for
	identifying CN polymorphisms. In order for such studies to be successful
	and cost effective, the statistical methods have to be optimized.
	We propose a single-array preprocessing method for estimating full-resolution
	total CNs. It is applicable to all Affymetrix genotyping arrays,
	including the recent ones that also contain non-polymorphic probes.
	A reference signal is only needed at the last step when calculating
	relative CNs. RESULTS: As with our method for earlier generations
	of arrays, this one controls for allelic crosstalk, probe affinities
	and PCR fragment-length effects. Additionally, it also corrects for
	probe sequence effects and co-hybridization of fragments digested
	by multiple enzymes that takes place on the latest chips. We compare
	our method with Affymetrix's CN5 method and the dChip method by assessing
	how well they differentiate between various CN states at the full
	resolution and various amounts of smoothing. Although CRMA v2 is
	a single-array method, we observe that it performs as well as or
	better than alternative methods that use data from all arrays for
	their preprocessing. This shows that it is possible to do online
	analysis in large-scale projects where additional arrays are introduced
	over time.},
  doi = {10.1093/bioinformatics/btp371},
  institution = {Department of Statistics, University of California, Berkeley, California,
	USA. hb@stat.berkeley.edu},
  keywords = {Base Pairing, genetics; Chromosomes, Human, Pair 10, genetics; Gene
	Dosage, genetics; Genome, Human, genetics; Genotype; Humans; Oligonucleotide
	Array Sequence Analysis, methods; Polymorphism, Single Nucleotide,
	genetics; ROC Curve},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {btp371},
  pmid = {19535535},
  timestamp = {2010.08.05},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp371}
}

@article{Benito2004Adjustment,
  author = {Monica Benito and Joel Parker and Quan Du and Junyuan Wu and Dong
	Xiang and Charles M Perou and J. S. Marron},
  title = {Adjustment of systematic microarray data biases.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {105--114},
  number = {1},
  month = {Jan},
  abstract = {M{OTIVATION}: {S}ystematic differences due to experimental features
	of microarray experiments are present in most large microarray data
	sets. {M}any different experimental features can cause biases including
	different sources of {RNA}, different production lots of microarrays
	or different microarray platforms. {T}hese systematic effects present
	a substantial hurdle to the analysis of microarray data. {RESULTS}:
	{W}e present here a new method for the identification and adjustment
	of systematic biases that are present within microarray data sets.
	{O}ur approach is based on modern statistical discrimination methods
	and is shown to be very effective in removing systematic biases present
	in a previously published breast tumor c{DNA} microarray data set.
	{T}he new method of '{D}istance {W}eighted {D}iscrimination ({DWD})'
	is shown to be better than {S}upport {V}ector {M}achines and {S}ingular
	{V}alue {D}ecomposition for the adjustment of systematic microarray
	effects. {I}n addition, it is shown to be of general use as a tool
	for the discrimination of systematic problems present in microarray
	data sets, including the merging of two breast tumor data sets completed
	on different microarray platforms. {AVAILABILITY}: {M}atlab software
	to perform {DWD} can be retrieved from https://genome.unc.edu/pubsup/dwd/}
}

@article{Benjamini1995Controlling,
  author = {Benjamini, Y. and Hochberg, Y.},
  title = {Controlling the False Discovery Rate: a Practical and Powerful Approach
	to Multiple Testing},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {1995},
  volume = {57},
  pages = {289--300},
  pdf = {../local/Benjamini1995Controlling.pdf},
  file = {Benjamini1995Controlling.pdf:Benjamini1995Controlling.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.09}
}

@article{Bentele2004JCB,
  author = {Bentele, M. and Lavrik, I. and Ulrich, M. and Stosser, S. and Heermann,
	D. W. and Kalthoff, H. and Krammer, P. H. and Eils, R.},
  title = {Mathematical modeling reveals threshold mechanism in CD95-induced
	apoptosis},
  journal = {J Cell Biol},
  year = {2004},
  volume = {166},
  pages = {839-51},
  number = {6},
  abstract = {Mathematical modeling is required for understanding the complex behavior
	of large signal transduction networks. Previous attempts to model
	signal transduction pathways were often limited to small systems
	or based on qualitative data only. Here, we developed a mathematical
	modeling framework for understanding the complex signaling behavior
	of CD95(APO-1/Fas)-mediated apoptosis. Defects in the regulation
	of apoptosis result in serious diseases such as cancer, autoimmunity,
	and neurodegeneration. During the last decade many of the molecular
	mechanisms of apoptosis signaling have been examined and elucidated.
	A systemic understanding of apoptosis is, however, still missing.
	To address the complexity of apoptotic signaling we subdivided this
	system into subsystems of different information qualities. A new
	approach for sensitivity analysis within the mathematical model was
	key for the identification of critical system parameters and two
	essential system properties: modularity and robustness. Our model
	describes the regulation of apoptosis on a systems level and resolves
	the important question of a threshold mechanism for the regulation
	of apoptosis.},
  keywords = {csbcbook}
}

@book{Berg1984Harmonic,
  title = {Harmonic analysis on semigroups},
  publisher = {Springer-Verlag},
  year = {1984},
  author = {C. Berg and J. P. R. Christensen and P. Ressel},
  address = {New-York}
}

@article{Berg2006Cross-species,
  author = {Berg, J. and L{\"a}ssig, M.},
  title = {Cross-species analysis of biological networks by Bayesian alignment},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2006},
  volume = {103},
  pages = {10967--10972},
  number = {29},
  month = {Jul},
  abstract = {Complex interactions between genes or proteins contribute a substantial
	part to phenotypic evolution. Here we develop an evolutionarily grounded
	method for the cross-species analysis of interaction networks by
	alignment, which maps bona fide functional relationships between
	genes in different organisms. Network alignment is based on a scoring
	function measuring mutual similarities between networks, taking into
	account their interaction patterns as well as sequence similarities
	between their nodes. High-scoring alignments and optimal alignment
	parameters are inferred by a systematic Bayesian analysis. We apply
	this method to analyze the evolution of coexpression networks between
	humans and mice. We find evidence for significant conservation of
	gene expression clusters and give network-based predictions of gene
	function. We discuss examples where cross-species functional relationships
	between genes do not concur with sequence similarity.},
  doi = {10.1073/pnas.0602294103},
  pdf = {../local/Berg2006Cross-species.pdf},
  file = {Berg2006Cross-species.pdf:local/Berg2006Cross-species.pdf:PDF},
  institution = {Institut fÃ¼r Theoretische Physik, UniversitÃ¤t zu KÃ¶ln, ZÃ¼lpicherstrasse
	77, 50937 Cologne, Germany. berg@thp.uni-koeln.de},
  owner = {jp},
  pii = {0602294103},
  pmid = {16835301},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1073/pnas.0602294103}
}

@book{Berge1959Espaces,
  title = {Espaces topologiques et fonctions multivoques},
  publisher = {Dunod},
  year = {1959},
  author = {Berge, C.},
  address = {Paris},
  owner = {jp},
  timestamp = {2011.04.30}
}

@book{Berger1985Statistical,
  title = {Statistical Decision Theory and Bayesian Analysis},
  publisher = {Springer-Verlag},
  year = {1985},
  author = {J.O. Berger}
}

@article{Berkum2010HiC,
  author = {van Berkum, N. L. and Lieberman-Aiden, E. and Williams, L. and Imakaev,
	M. and Gnirke, A. and Mirny, L. A. and Dekker, J. and Lander, E.
	S.},
  title = {{Hi-C}: a method to study the three-dimensional architecture of genomes.},
  journal = {J. Vis. Exp.},
  year = {2010},
  volume = {39},
  pages = {e1869},
  abstract = {The three-dimensional folding of chromosomes compartmentalizes the
	genome and and can bring distant functional elements, such as promoters
	and enhancers, into close spatial proximity (2-6). Deciphering the
	relationship between chromosome organization and genome activity
	will aid in understanding genomic processes, like transcription and
	replication. However, little is known about how chromosomes fold.
	Microscopy is unable to distinguish large numbers of loci simultaneously
	or at high resolution. To date, the detection of chromosomal interactions
	using chromosome conformation capture (3C) and its subsequent adaptations
	required the choice of a set of target loci, making genome-wide studies
	impossible (7-10). We developed Hi-C, an extension of 3C that is
	capable of identifying long range interactions in an unbiased, genome-wide
	fashion. In Hi-C, cells are fixed with formaldehyde, causing interacting
	loci to be bound to one another by means of covalent DNA-protein
	cross-links. When the DNA is subsequently fragmented with a restriction
	enzyme, these loci remain linked. A biotinylated residue is incorporated
	as the 5' overhangs are filled in. Next, blunt-end ligation is performed
	under dilute conditions that favor ligation events between cross-linked
	DNA fragments. This results in a genome-wide library of ligation
	products, corresponding to pairs of fragments that were originally
	in close proximity to each other in the nucleus. Each ligation product
	is marked with biotin at the site of the junction. The library is
	sheared, and the junctions are pulled-down with streptavidin beads.
	The purified junctions can subsequently be analyzed using a high-throughput
	sequencer, resulting in a catalog of interacting fragments. Direct
	analysis of the resulting contact matrix reveals numerous features
	of genomic organization, such as the presence of chromosome territories
	and the preferential association of small gene-rich chromosomes.
	Correlation analysis can be applied to the contact matrix, demonstrating
	that the human genome is segregated into two compartments: a less
	densely packed compartment containing open, accessible, and active
	chromatin and a more dense compartment containing closed, inaccessible,
	and inactive chromatin regions. Finally, ensemble analysis of the
	contact matrix, coupled with theoretical derivations and computational
	simulations, revealed that at the megabase scale Hi-C reveals features
	consistent with a fractal globule conformation.},
  doi = {10.3791/1869},
  institution = {Program in Gene Function and Expression, Department of Biochemistry
	and Molecular Pharmacology, University of Massachusetts Medical School.},
  keywords = {ngs, hic},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1869},
  pmid = {20461051},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.3791/1869}
}

@book{Berlinet2003Reproducing,
  title = {Reproducing Kernel Hilbert Spaces in Probability and Statistics},
  publisher = {Springer},
  year = {2003},
  author = {Berlinet, A. and Thomas-Agnan, C.},
  owner = {vert},
  timestamp = {2007.08.02}
}

@article{Bern2004Automatic,
  author = {Bern, M. and Goldberg, D. and McDonald, W. H. and Yates, J. R., III},
  title = {Automatic {Q}uality {A}ssessment of {P}eptide {T}andem {M}ass {S}pectra},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {i49-i54},
  number = {Suppl. 1},
  abstract = {Motivation: {A} powerful proteomics methodology couples high-performance
	liquid chromatography ({HPLC}) with tandem mass spectrometry and
	database-search software, such as {SEQUEST}. {S}uch a set-up, however,
	produces a large number of spectra, many of which are of too poor
	quality to be useful. {H}ence a filter that eliminates poor spectra
	before the database search can significantly improve throughput and
	robustness. {M}oreover, spectra judged to be of high quality, but
	that cannot be identified by database search, are prime candidates
	for still more computationally intensive methods, such as de novo
	sequencing or wider database searches including post-translational
	modifications. {R}esults: {W}e report on two different approaches
	to assessing spectral quality prior to identification: binary classification,
	which predicts whether or not {SEQUEST} will be able to make an identification,
	and statistical regression, which predicts a more universal quality
	metric involving the number of b- and y-ion peaks. {T}he best of
	our binary classifiers can eliminate over 75% of the unidentifiable
	spectra while losing only 10% of the identifiable spectra. {S}tatistical
	regression can pick out spectra of modified peptides that can be
	identified by a de novo program but not by {SEQUEST}. {I}n a section
	of independent interest, we discuss intensity normalization of mass
	spectra.},
  pdf = {../local/Bern2004Automatic.pdf},
  file = {Bern2004Automatic.pdf:local/Bern2004Automatic.pdf:PDF},
  keywords = {biosvm proteomics},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/suppl_1/i49}
}

@article{Bernardo2005Chemogenomica,
  author = {di Bernardo, D. and Thompson, M.J. and Gardner, T.S. and Chobot,
	S.E. and Eastwood, E.L. and Wojtovich, A.P. and Elliott, S.J. and
	Schaus, S.E. and Collins, J.J.},
  title = {Chemogenomic profiling on a genome-wide scale using reverse-engineered
	gene networks.},
  journal = {Nat Biotechnol},
  year = {2005},
  volume = {23},
  pages = {377--383},
  number = {3},
  month = {Mar},
  abstract = {A major challenge in drug discovery is to distinguish the molecular
	targets of a bioactive compound from the hundreds to thousands of
	additional gene products that respond indirectly to changes in the
	activity of the targets. Here, we present an integrated computational-experimental
	approach for computing the likelihood that gene products and associated
	pathways are targets of a compound. This is achieved by filtering
	the mRNA expression profile of compound-exposed cells using a reverse-engineered
	model of the cell's gene regulatory network. We apply the method
	to a set of 515 whole-genome yeast expression profiles resulting
	from a variety of treatments (compounds, knockouts and induced expression),
	and correctly enrich for the known targets and associated pathways
	in the majority of compounds examined. We demonstrate our approach
	with PTSB, a growth inhibitory compound with a previously unknown
	mode of action, by predicting and validating thioredoxin and thioredoxin
	reductase as its target.},
  doi = {10.1038/nbt1075},
  institution = {Telethon Institute for Genetics and Medicine, Naples, Italy.},
  keywords = {Algorithms; Artificial Intelligence; Computer Simulation; Drug Delivery
	Systems; Drug Design; Gene Expression Profiling; Gene Expression
	Regulation; Models, Biological; Models, Statistical; Protein Engineering;
	Protein Interaction Mapping; Saccharomyces cerevisiae; Saccharomyces
	cerevisiae Proteins; Signal Transduction; Thioredoxin-Disulfide Reductase;
	Thioredoxins},
  owner = {fantine},
  pii = {nbt1075},
  pmid = {15765094},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1038/nbt1075}
}

@article{Bernardo2005Chemogenomic,
  author = {Bernardo, D. and Thompson, M. J. and Gardner, T. S. and Chobot, S.
	E. and Eastwood, E. L. and Wojtovich, A. P. and Elliott, S. J. and
	Schaus, S. E. and Collins, J. J.},
  title = {Chemogenomic profiling on a genome-wide scale using reverse-engineered
	gene networks},
  journal = {Nat. Biotechnol.},
  year = {2005},
  volume = {23},
  pages = {377--383},
  number = {3},
  month = {Mar},
  abstract = {A major challenge in drug discovery is to distinguish the molecular
	targets of a bioactive compound from the hundreds to thousands of
	additional gene products that respond indirectly to changes in the
	activity of the targets. Here, we present an integrated computational-experimental
	approach for computing the likelihood that gene products and associated
	pathways are targets of a compound. This is achieved by filtering
	the mRNA expression profile of compound-exposed cells using a reverse-engineered
	model of the cell's gene regulatory network. We apply the method
	to a set of 515 whole-genome yeast expression profiles resulting
	from a variety of treatments (compounds, knockouts and induced expression),
	and correctly enrich for the known targets and associated pathways
	in the majority of compounds examined. We demonstrate our approach
	with PTSB, a growth inhibitory compound with a previously unknown
	mode of action, by predicting and validating thioredoxin and thioredoxin
	reductase as its target.},
  doi = {10.1038/nbt1075},
  institution = {Telethon Institute for Genetics and Medicine, Naples, Italy.},
  owner = {fantine},
  pii = {nbt1075},
  pmid = {15765094},
  timestamp = {2008.01.22},
  url = {http://dx.doi.org/10.1038/nbt1075}
}

@article{Bernstein1977Protein,
  author = {F. C. Bernstein and T. F. Koetzle and G. J. Williams and E. F. Meyer
	and M. D. Brice and J. R. Rodgers and O. Kennard and T. Shimanouchi
	and M. Tasumi},
  title = {The Protein Data Bank: a computer-based archival file for macromolecular
	structures.},
  journal = {J. Mol. Biol.},
  year = {1977},
  volume = {112},
  pages = {535--542},
  number = {3},
  month = {May},
  keywords = {Computers; Great Britain; Information Systems; Japan; Protein Conformation;
	Proteins; United States},
  owner = {bricehoffmann},
  pmid = {875032},
  timestamp = {2009.02.13}
}

@inproceedings{Berretti04Graph,
  author = {Berretti, S. and Del Bimbo, A. and Pala, P.},
  title = {A Graph Edit Distance Based on Node Merging},
  booktitle = {Proc. of ACM International Conference on Image and Video Retrieval
	(CIVR)},
  year = {2004},
  pages = {464--472},
  address = {Dublin, Ireland},
  month = {July},
  url = {http://www.micc.unifi.it/publications/2004/BDP04a}
}

@article{Bertoni2008Discovering,
  author = {Alberto Bertoni and Giorgio Valentini},
  title = {Discovering multi-level structures in bio-molecular data through
	the Bernstein inequality.},
  journal = {BMC Bioinformatics},
  year = {2008},
  volume = {9 Suppl 2},
  pages = {S4},
  abstract = {The unsupervised discovery of structures (i.e. clusterings) underlying
	data is a central issue in several branches of bioinformatics. Methods
	based on the concept of stability have been recently proposed to
	assess the reliability of a clustering procedure and to estimate
	the "optimal" number of clusters in bio-molecular data. A major problem
	with stability-based methods is the detection of multi-level structures
	(e.g. hierarchical functional classes of genes), and the assessment
	of their statistical significance. In this context, a chi-square
	based statistical test of hypothesis has been proposed; however,
	to assure the correctness of this technique some assumptions about
	the distribution of the data are needed.To assess the statistical
	significance and to discover multi-level structures in bio-molecular
	data, a new method based on Bernstein's inequality is proposed. This
	approach makes no assumptions about the distribution of the data,
	thus assuring a reliable application to a large range of bioinformatics
	problems. Results with synthetic and DNA microarray data show the
	effectiveness of the proposed method.The Bernstein test, due to its
	loose assumptions, is more sensitive than the chi-square test to
	the detection of multiple structures simultaneously present in the
	data. Nevertheless it is less selective, that is subject to more
	false positives, but adding independence assumptions, a more selective
	variant of the Bernstein inequality-based test is also presented.
	The proposed methods can be applied to discover multiple structures
	and to assess their significance in different types of bio-molecular
	data.},
  doi = {10.1186/1471-2105-9-S2-S4},
  institution = {DSI, Dipartimento di Scienze dell' Informazione, Universitá degli
	Studi di Milano, Via Comelico 39, Milano, Italy. bertoni@dsi.unimi.it},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1471-2105-9-S2-S4},
  pmid = {18387206},
  timestamp = {2011.05.14},
  url = {http://dx.doi.org/10.1186/1471-2105-9-S2-S4}
}

@book{Bertsekas1999Nonlinear,
  title = {Nonlinear programming},
  publisher = {Athena Scientific},
  year = {1999},
  author = {D. Bertsekas}
}

@article{Bertucci2006Gene,
  author = {Bertucci, F. and Finetti, P. and Cervera, N. and Charafe-Jauffret,
	E. and Mamessier, E. and Ad\'ela\"ide, J. and Debono, S. and Houvenaeghel,
	G. and Maraninchi, D. and Viens, P. and Charpin, C. and Jacquemier,
	J. and Birnbaum, D.},
  title = {Gene expression profiling shows medullary breast cancer is a subgroup
	of basal breast cancers.},
  journal = {Cancer Res.},
  year = {2006},
  volume = {66},
  pages = {4636--4644},
  number = {9},
  month = {May},
  abstract = {Medullary breast cancer (MBC) is a rare but enigmatic pathologic type
	of breast cancer. Despite features of aggressiveness, MBC is associated
	with a favorable prognosis. Morphologic diagnosis remains difficult
	in many cases. Very little is known about the molecular alterations
	involved in MBC. Notably, it is not clear whether MBC and ductal
	breast cancer (DBC) represent molecularly distinct entities and what
	genes/proteins might account for their differences. Using whole-genome
	oligonucleotide microarrays, we compared gene expression profiles
	of 22 MBCs and 44 grade III DBCs. We show that MBCs are less heterogeneous
	than DBCs. Whereas different molecular subtypes (luminal A, luminal
	B, basal, ERBB2-overexpressing, and normal-like) exist in DBCs, 95\%
	MBCs display a basal profile, similar to that of basal DBCs. Supervised
	analysis identified gene expression signatures that discriminated
	MBCs from DBCs. Discriminator genes are associated with various cellular
	processes related to MBC features, in particular immune reaction
	and apoptosis. As compared with MBCs, basal DBCs overexpress genes
	involved in smooth muscle cell differentiation, suggesting that MBCs
	are a distinct subgroup of basal breast cancer with limited myoepithelial
	differentiation. Finally, MBCs overexpress a series of genes located
	on the 12p13 and 6p21 chromosomal regions known to contain pluripotency
	genes. Our results contribute to a better understanding of MBC and
	of mammary oncogenesis in general.},
  doi = {10.1158/0008-5472.CAN-06-0031},
  pdf = {../local/Bertucci2006Gene.pdf},
  file = {Bertucci2006Gene.pdf:Bertucci2006Gene.pdf:PDF},
  institution = {Institut de Cancérologie de Marseille, Département d'Oncologie Moléculaire,
	Institut Paoli-Calmettes et Unité Mixte de Recherche 599 Institut
	National de la Santé et de la Recherche Médicale, Marseilles, France.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {66/9/4636},
  pmid = {16651414},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-06-0031}
}

@article{Bhalla1999Emergent,
  author = {Bhalla, U. S. and Iyengar, R.},
  title = {Emergent Properties of Networks of Biological Signaling Pathways},
  journal = {Science},
  year = {1999},
  volume = {283},
  pages = {381-387},
  number = {5400},
  doi = {10.1126/science.283.5400.381},
  eprint = {http://www.sciencemag.org/cgi/reprint/283/5400/381.pdf},
  pdf = {../local/Bhalla1999Emergent.pdf},
  file = {Bhalla1999Emergent.pdf:Bhalla1999Emergent.pdf:PDF},
  keywords = {csbcbook},
  url = {http://www.sciencemag.org/cgi/content/abstract/283/5400/381}
}

@article{Bhasin2005GPCRsclass,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {G{PCR}sclass: a web tool for the classification of amine type of
	{G}-protein-coupled receptors.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {W143-7},
  number = {Web Server issue},
  month = {Jul},
  abstract = {The receptors of amine subfamily are specifically major drug targets
	for therapy of nervous disorders and psychiatric diseases. {T}he
	recognition of novel amine type of receptors and their cognate ligands
	is of paramount interest for pharmaceutical companies. {I}n the past,
	{C}hou and co-workers have shown that different types of amine receptors
	are correlated with their amino acid composition and are predictable
	on its basis with considerable accuracy [{E}lrod and {C}hou (2002)
	{P}rotein {E}ng., 15, 713-715]. {T}his motivated us to develop a
	better method for the recognition of novel amine receptors and for
	their further classification. {T}he method was developed on the basis
	of amino acid composition and dipeptide composition of proteins using
	support vector machine. {T}he method was trained and tested on 167
	proteins of amine subfamily of {G}-protein-coupled receptors ({GPCR}s).
	{T}he method discriminated amine subfamily of {GPCR}s from globular
	proteins with {M}atthew's correlation coefficient of 0.98 and 0.99
	using amino acid composition and dipeptide composition, respectively.
	{I}n classifying different types of amine receptors using amino acid
	composition and dipeptide composition, the method achieved an accuracy
	of 89.8 and 96.4\%, respectively. {T}he performance of the method
	was evaluated using 5-fold cross-validation. {T}he dipeptide composition
	based method predicted 67.6\% of protein sequences with an accuracy
	of 100\% with a reliability index > or =5. {A} web server {GPCR}sclass
	has been developed for predicting amine-binding receptors from its
	amino acid sequence [http://www.imtech.res.in/raghava/gpcrsclass/
	and http://bioinformatics.uams.edu/raghava/gpersclass/ (mirror site)].},
  doi = {10.1093/nar/gki351},
  pdf = {../local/Bhasin2005GPCRsclass.pdf},
  file = {Bhasin2005GPCRsclass.pdf:local/Bhasin2005GPCRsclass.pdf:PDF},
  keywords = {biosvm},
  pii = {33/suppl_2/W143},
  url = {http://dx.doi.org/10.1093/nar/gki351}
}

@article{Bhasin2005Pcleavage,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {Pcleavage: an {SVM} based method for prediction of constitutive proteasome
	and immunoproteasome cleavage sites in antigenic sequences.},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {W202-7},
  number = {Web Server issue},
  month = {Jul},
  abstract = {This manuscript describes a support vector machine based method for
	the prediction of constitutive as well as immunoproteasome cleavage
	sites in antigenic sequences. {T}his method achieved {M}atthew's
	correlation coefficents of 0.54 and 0.43 on in vitro and major histocompatibility
	complex ligand data, respectively. {T}his shows that the performance
	of our method is comparable to that of the {N}et{C}hop method, which
	is currently considered to be the best method for proteasome cleavage
	site prediction. {B}ased on the method, a web server, {P}cleavage,
	has also been developed. {T}his server accepts protein sequences
	in any standard format and present results in a user-friendly format.
	{T}he server is available for free use by all academic users at the
	{URL} http://www.imtech.res.in/raghava/pcleavage/ or http://bioinformatics.uams.edu/mirror/pcleavage/.},
  doi = {10.1093/nar/gki587},
  pdf = {../local/Bhasin2005Pcleavage.pdf},
  file = {Bhasin2005Pcleavage.pdf:local/Bhasin2005Pcleavage.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  url = {http://dx.doi.org/10.1093/nar/gki587}
}

@article{Bhasin2004Analysis,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {Analysis and prediction of affinity of {TAP} binding peptides using
	cascade {SVM}},
  journal = {Protein {S}ci.},
  year = {2004},
  volume = {13},
  pages = {596-607},
  number = {3},
  month = {Mar},
  abstract = {The generation of cytotoxic {T} lymphocyte ({CTL}) epitopes from an
	antigenic sequence involves number of intracellular processes, including
	production of peptide fragments by proteasome and transport of peptides
	to endoplasmic reticulum through transporter associated with antigen
	processing ({TAP}). {I}n this study, 409 peptides that bind to human
	{TAP} transporter with varying affinity were analyzed to explore
	the selectivity and specificity of {TAP} transporter. {T}he abundance
	of each amino acid from {P}1 to {P}9 positions in high-, intermediate-,
	and low-affinity {TAP} binders were examined. {T}he rules for predicting
	{TAP} binding regions in an antigenic sequence were derived from
	the above analysis. {T}he quantitative matrix was generated on the
	basis of contribution of each position and residue in binding affinity.
	{T}he correlation of r = 0.65 was obtained between experimentally
	determined and predicted binding affinity by using a quantitative
	matrix. {F}urther a support vector machine ({SVM})-based method has
	been developed to model the {TAP} binding affinity of peptides. {T}he
	correlation (r = 0.80) was obtained between the predicted and experimental
	measured values by using sequence-based {SVM}. {T}he reliability
	of prediction was further improved by cascade {SVM} that uses features
	of amino acids along with sequence. {A}n extremely good correlation
	(r = 0.88) was obtained between measured and predicted values, when
	the cascade {SVM}-based method was evaluated through jackknife testing.
	{A} {W}eb service, {TAPP}red (http://www.imtech.res.in/raghava/tappred/
	or http://bioinformatics.uams.edu/mirror/tappred/), has been developed
	based on this approach.},
  doi = {10.1110/ps.03373104},
  pdf = {../local/Bhasin2004Analysis.pdf},
  file = {Bhasin2004Analysis.pdf:local/Bhasin2004Analysis.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1110/ps.03373104}
}

@article{Bhasin2004Classification,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {Classification of {N}uclear {R}eceptors {B}ased on {A}mino {A}cid
	{C}omposition and {D}ipeptide {C}omposition},
  journal = {J. {B}iol. {C}hem.},
  year = {2004},
  volume = {279},
  pages = {23262-23266},
  number = {22},
  abstract = {Nuclear receptors are key transcription factors that regulate crucial
	gene networks responsible for cell growth, differentiation, and homeostasis.
	{N}uclear receptors form a superfamily of phylogenetically related
	proteins and control functions associated with major diseases (e.g.
	diabetes, osteoporosis, and cancer). {I}n this study, a novel method
	has been developed for classifying the subfamilies of nuclear receptors.
	{T}he classification was achieved on the basis of amino acid and
	dipeptide composition from a sequence of receptors using support
	vector machines. {T}he training and testing was done on a non-redundant
	data set of 282 proteins obtained from the {N}uclea{RDB} data base
	(1). {T}he performance of all classifiers was evaluated using a 5-fold
	cross validation test. {I}n the 5-fold cross-validation, the data
	set was randomly partitioned into five equal sets and evaluated five
	times on each distinct set while keeping the remaining four sets
	for training. {I}t was found that different subfamilies of nuclear
	receptors were quite closely correlated in terms of amino acid composition
	as well as dipeptide composition. {T}he overall accuracy of amino
	acid composition-based and dipeptide compositionbased classifiers
	were 82.6 and 97.5%, respectively. {T}herefore, our results prove
	that different subfamilies of nuclear receptors are predictable with
	considerable accuracy using amino acid or dipeptide composition.
	{F}urthermore, based on above approach, an online web service, {NR}pred,
	was developed, which is available at www.imtech.res.in/raghava/nrpred.},
  doi = {10.1074/jbc.M401932200},
  eprint = {http://www.jbc.org/cgi/reprint/279/22/23262.pdf},
  pdf = {../local/Bhasin2004Classification.pdf},
  file = {Bhasin2004Classification.pdf:local/Bhasin2004Classification.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1074/jbc.M401932200}
}

@article{Bhasin2004ESLpred,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {{{ESL}pred: {SVM}}-based method for subcellular localization of eukaryotic
	proteins using dipeptide composition and {{PSI}-{BLAST}}},
  journal = {Nucl. {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {W414-419},
  number = {Suppl. 2},
  abstract = {Automated prediction of subcellular localization of proteins is an
	important step in the functional annotation of genomes. {T}he existing
	subcellular localization prediction methods are based on either amino
	acid composition or {N}-terminal characteristics of the proteins.
	{I}n this paper, support vector machine ({SVM}) has been used to
	predict the subcellular location of eukaryotic proteins from their
	different features such as amino acid composition, dipeptide composition
	and physico-chemical properties. {T}he {SVM} module based on dipeptide
	composition performed better than the {SVM} modules based on amino
	acid composition or physico-chemical properties. {I}n addition, {PSI}-{BLAST}
	was also used to search the query sequence against the dataset of
	proteins (experimentally annotated proteins) to predict its subcellular
	location. {I}n order to improve the prediction accuracy, we developed
	a hybrid module using all features of a protein, which consisted
	of an input vector of 458 dimensions (400 dipeptide compositions,
	33 properties, 20 amino acid compositions of the protein and 5 from
	{PSI}-{BLAST} output). {U}sing this hybrid approach, the prediction
	accuracies of nuclear, cytoplasmic, mitochondrial and extracellular
	proteins reached 95.3, 85.2, 68.2 and 88.9%, respectively. {T}he
	overall prediction accuracy of {SVM} modules based on amino acid
	composition, physico-chemical properties, dipeptide composition and
	the hybrid approach was 78.1, 77.8, 82.9 and 88.0%, respectively.
	{T}he accuracy of all the modules was evaluated using a 5-fold cross-validation
	technique. {A}ssigning a reliability index (reliability index > or
	=3), 73.5% of prediction can be made with an accuracy of 96.4%. {B}ased
	on the above approach, an online web server {ESL}pred was developed,
	which is available at http://www.imtech.res.in/raghava/eslpred/.},
  doi = {10.1093/nar/gkh350},
  eprint = {http://nar.oupjournals.org/cgi/reprint/32/suppl_2/W414.pdf},
  pdf = {../local/Bhasin2004ESLpred.pdf},
  file = {Bhasin2004ESLpred.pdf:local/Bhasin2004ESLpred.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://nar.oupjournals.org/cgi/content/abstract/32/suppl_2/W414}
}

@article{Bhasin2004GPCRpred,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {{{GPCR}pred}: an {SVM}-based method for prediction of families and
	subfamilies of {G}-protein coupled receptors},
  journal = {Nucl. {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {W383-389},
  number = {Supp.2},
  abstract = {G-protein coupled receptors ({GPCR}s) belong to one of the largest
	superfamilies of membrane proteins and are important targets for
	drug design. {I}n this study, a support vector machine ({SVM})-based
	method, {GPCR}pred, has been developed for predicting families and
	subfamilies of {GPCR}s from the dipeptide composition of proteins.
	{T}he dataset used in this study for training and testing was obtained
	from http://www.soe.ucsc.edu/research/compbio/gpcr/. {T}he method
	classified {GPCR}s and non-{GPCR}s with an accuracy of 99.5% when
	evaluated using 5-fold cross-validation. {T}he method is further
	able to predict five major classes or families of {GPCR}s with an
	overall {M}atthew's correlation coefficient ({MCC}) and accuracy
	of 0.81 and 97.5% respectively. {I}n recognizing the subfamilies
	of the rhodopsin-like family, the method achieved an average {MCC}
	and accuracy of 0.97 and 97.3% respectively. {T}he method achieved
	overall accuracy of 91.3% and 96.4% at family and subfamily level
	respectively when evaluated on an independent/blind dataset of 650
	{GPCR}s. {A} server for recognition and classification of {GPCR}s
	based on multiclass {SVM}s has been set up at http://www.imtech.res.in/raghava/gpcrpred/.
	{W}e have also suggested subfamilies for 42 sequences which were
	previously identified as unclassified {C}lass{A} {GPCR}s. {T}he supplementary
	information is available at http://www.imtech.res.in/raghava/gpcrpred/info.html.},
  doi = {10.1093/nar/gkh416},
  eprint = {http://nar.oupjournals.org/cgi/reprint/32/suppl_2/W383.pdf},
  pdf = {../local/Bhasin2004GPCRpred.pdf},
  file = {Bhasin2004GPCRpred.pdf:local/Bhasin2004GPCRpred.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/nar/gkh416}
}

@article{Bhasin2004Prediction,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {Prediction of {CTL} epitopes using {QM}, {SVM} and {ANN} techniques},
  journal = {Vaccine},
  year = {2004},
  volume = {22},
  pages = {3195-3204},
  number = {23-24},
  abstract = {Cytotoxic {T} lymphocyte ({CTL}) epitopes are potential candidates
	for subunit vaccine design for various diseases. {M}ost of the existing
	{T} cell epitope prediction methods are indirect methods that predict
	{MHC} class {I} binders instead of {CTL} epitopes. {I}n this study,
	a systematic attempt has been made to develop a direct method for
	predicting {CTL} epitopes from an antigenic sequence. {T}his method
	is based on quantitative matrix ({QM}) and machine learning techniques
	such as {S}upport {V}ector {M}achine ({SVM}) and {A}rtificial {N}eural
	{N}etwork ({ANN}). {T}his method has been trained and tested on non-redundant
	dataset of {T} cell epitopes and non-epitopes that includes 1137
	experimentally proven {MHC} class {I} restricted {T} cell epitopes.
	{T}he accuracy of {QM}-, {ANN}- and {SVM}-based methods was 70.0,
	72.2 and 75.2%, respectively. {T}he performance of these methods
	has been evaluated through {L}eave {O}ne {O}ut {C}ross-{V}alidation
	({LOOCV}) at a cutoff score where sensitivity and specificity was
	nearly equal. {F}inally, both machine-learning methods were used
	for consensus and combined prediction of {CTL} epitopes. {T}he performances
	of these methods were evaluated on blind dataset where machine learning-based
	methods perform better than {QM}-based method. {W}e also demonstrated
	through subgroup analysis that our methods can discriminate between
	{T}-cell epitopes and {MHC} binders (non-epitopes). {I}n brief this
	method allows prediction of {CTL} epitopes using {QM}, {SVM}, {ANN}
	approaches. {T}he method also facilitates prediction of {MHC} restriction
	in predicted {T} cell epitopes.},
  doi = {10.1016/j.vaccine.2004.02.005},
  pdf = {../local/Bhasin2004Prediction.pdf},
  file = {Bhasin2004Prediction.pdf:local/Bhasin2004Prediction.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.vaccine.2004.02.005}
}

@article{Bhasin2004SVM,
  author = {Bhasin, M. and Raghava, G. P. S.},
  title = {S{VM} based method for predicting {{HLA}-{DRB}1*0401} binding peptides
	in an antigen sequence},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {421-423},
  number = {3},
  abstract = {Summary: {P}rediction of peptides binding with {MHC} class {II} allele
	{HLA}-{DRB}1*0401 can effectively reduce the number of experiments
	required for identifying helper {T} cell epitopes. {T}his paper describes
	support vector machine ({SVM}) based method developed for identifying
	{HLA}-{DRB}1*0401 binding peptides in an antigenic sequence. {SVM}
	was trained and tested on large and clean data set consisting of
	567 binders and equal number of non-binders. {T}he accuracy of the
	method was 86% when evaluated through 5-fold cross-validation technique.
	{A}vailable: {A} web server {HLA}-{DR}4{P}red based on above approach
	is available at http://www.imtech.res.in/raghava/hladr4pred/ and
	http://bioinformatics.uams.edu/mirror/hladr4pred/ ({M}irror {S}ite).
	{S}upplementary information: http://www.imtech.res.in/raghava/hladr4pred/info.html},
  pdf = {../local/Bhasin2004SVM.pdf},
  file = {Bhasin2004SVM.pdf:local/Bhasin2004SVM.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/3/421}
}

@article{Bhasin2003MHCBN,
  author = {Manoj Bhasin and Harpreet Singh and G. P S Raghava},
  title = {{MHCBN}: a comprehensive database of {MHC} binding and non-binding
	peptides.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {665--666},
  number = {5},
  month = {Mar},
  abstract = {MHCBN is a comprehensive database of Major Histocompatibility Complex
	(MHC) binding and non-binding peptides compiled from published literature
	and existing databases. The latest version of the database has 19
	777 entries including 17 129 MHC binders and 2648 MHC non-binders
	for more than 400 MHC molecules. The database has sequence and structure
	data of (a) source proteins of peptides and (b) MHC molecules. MHCBN
	has a number of web tools that include: (i) mapping of peptide on
	query sequence; (ii) search on any field; (iii) creation of data
	sets; and (iv) online data submission. The database also provides
	hypertext links to major databases like SWISS-PROT, PDB, IMGT/HLA-DB,
	GenBank and PUBMED.},
  keywords = {Amino Acid Sequence; Binding Sites; Database Management Systems; Databases,
	Protein; Histocompatibility Antigens; Information Storage and Retrieval;
	Macromolecular Substances; Major Histocompatibility Complex; Molecular
	Sequence Data; Peptide Fragments; Peptides; Protein Binding; Protein
	Conformation; Sequence Alignment; Sequence Analysis, Protein; Structure-Activity
	Relationship; User-Computer Interface},
  owner = {laurent},
  pmid = {12651731},
  timestamp = {2007.01.30}
}

@article{Bhattacharjee2001Classification,
  author = {Bhattacharjee, A. and Richards, W. G. and Staunton, J. and Li, C.
	and Monti, S. and Vasa, P. and Ladd, C. and Beheshti, J. and Bueno,
	R. and Gillette, M. and Loda, M. and Weber, G. and Mark, E. J. and
	Lander, E. S. and Wong, W. and Johnson, B. E. and Golub, T. R. and
	Sugarbaker, D. A. and Meyerson, M.},
  title = {Classification of human lung carcinomas by {mRNA} expression profiling
	reveals distinct adenocarcinoma subclasses},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2001},
  volume = {98},
  pages = {13790--13795},
  number = {24},
  month = {Nov},
  abstract = {We have generated a molecular taxonomy of lung carcinoma, the leading
	cause of cancer death in the United States and worldwide. Using oligonucleotide
	microarrays, we analyzed mRNA expression levels corresponding to
	12,600 transcript sequences in 186 lung tumor samples, including
	139 adenocarcinomas resected from the lung. Hierarchical and probabilistic
	clustering of expression data defined distinct subclasses of lung
	adenocarcinoma. Among these were tumors with high relative expression
	of neuroendocrine genes and of type II pneumocyte genes, respectively.
	Retrospective analysis revealed a less favorable outcome for the
	adenocarcinomas with neuroendocrine gene expression. The diagnostic
	potential of expression profiling is emphasized by its ability to
	discriminate primary lung adenocarcinomas from metastases of extra-pulmonary
	origin. These results suggest that integration of expression profile
	data with clinical parameters could aid in diagnosis of lung cancer
	patients.},
  doi = {10.1073/pnas.191502998},
  pdf = {../local/Bhattacharjee2001Classification.pdf},
  file = {Bhattacharjee2001Classification.pdf:Bhattacharjee2001Classification.pdf:PDF},
  institution = {Department of Adult Oncology, Dana-Farber Cancer Institute, Harvard
	Medical School, 44 Binney Street, Boston, MA 02115, USA.},
  owner = {jp},
  pii = {191502998},
  pmid = {11707567},
  timestamp = {2008.11.15},
  url = {http://dx.doi.org/10.1073/pnas.191502998}
}

@article{Bhavani2006Substructure-based,
  author = {S. Bhavani and A. Nagargadde and A. Thawani and V. Sridhar and N.
	Chandra},
  title = {Substructure-based support vector machine classifiers for prediction
	of adverse effects in diverse classes of drugs.},
  journal = {J. Chem. Inform. Model.},
  year = {2006},
  volume = {46},
  pages = {2478--2486},
  number = {6},
  abstract = {Unforeseen adverse effects exhibited by drugs contribute heavily to
	late-phase failure and even withdrawal of marketed drugs. Torsade
	de pointes (TdP) is one such important adverse effect, which causes
	cardiac arrhythmia and, in some cases, sudden death, making it crucial
	for potential drugs to be screened for torsadogenicity. The need
	to tap the power of computational approaches for the prediction of
	adverse effects such as TdP is increasingly becoming evident. The
	availability of screening data including those in organized databases
	greatly facilitates exploration of newer computational approaches.
	In this paper, we report the development of a prediction method based
	on a support machine vector algorithm. The method uses a combination
	of descriptors, encoding both the type of toxicophore as well as
	the position of the toxicophore in the drug molecule, thus considering
	both the pharmacophore and the three-dimensional shape information
	of the molecule. For delineating toxicophores, a novel pattern-recognition
	method that utilizes substructures within a molecule has been developed.
	The results obtained using the hybrid approach have been compared
	with those available in the literature for the same data set. An
	improvement in prediction accuracy is clearly seen, with the accuracy
	reaching up to 97\% in predicting compounds that can cause TdP and
	90\% for predicting compounds that do not cause TdP. The generic
	nature of the method has been demonstrated with four data sets available
	for carcinogenicity, where prediction accuracies were significantly
	higher, with a best receiver operating characteristics (ROC) value
	of 0.81 as against a best ROC value of 0.7 reported in the literature
	for the same data set. Thus, the method holds promise for wide applicability
	in toxicity prediction.},
  doi = {10.1021/ci060128l},
  keywords = {Algorithms; Carcinogens; Chemistry, Pharmaceutical; Computational
	Biology; Drug Evaluation, Preclinical; Drug Industry; Humans; Models,
	Chemical; Models, Statistical; Neural Networks (Computer); Pattern
	Recognition, Automated; ROC Curve; Sequence Analysis, Protein; Software;
	Torsades de Pointes},
  owner = {laurent},
  pmid = {17125188},
  timestamp = {2007.09.18},
  url = {http://dx.doi.org/10.1021/ci060128l}
}

@article{Bi2003Dimensionality,
  author = {Bi, J. and Bennett, K. and Embrechts, M. and Breneman, C. and Song,
	M.},
  title = {Dimensionality reduction via sparse support vector machines},
  journal = {J. Mach. Learn. Res.},
  year = {2003},
  volume = {3},
  pages = {1229--1243},
  owner = {jp},
  timestamp = {2011.01.11}
}

@incollection{Biasotti20043D,
  author = {S. Biasotti and S. Marini and M. Mortara and G. Patane and M. Spagnuolo
	and B. Falcidieno },
  title = {3D Shape Matching through Topological Structures},
  booktitle = {Discrete Geometry for Computer Imagery},
  publisher = {Springer Berlin / Heidelberg},
  year = {2004},
  pages = {194-203}
}

@article{Biau2012Analysis,
  author = {Biau, G.},
  title = {Analysis of a Random Forests model},
  journal = {The Journal of Machine Learning Research},
  year = {2012},
  volume = {98888},
  pages = {1063--1095},
  publisher = {JMLR. org}
}

@article{Biau2006Statistical,
  author = {Biau, G. and Bleakley, K.},
  title = {Statistical inference on graphs},
  journal = {Statistics and Decisions},
  year = {2006},
  volume = {24},
  pages = {209-232},
  number = {2},
  timestamp = {2007.02.01}
}

@article{Bickel2009Simultaneous,
  author = {Bickel, P. J. and Ritov, Y. and Tsybakov, A.},
  title = {Simultaneous analysis of {L}asso and {D}antzig selector},
  journal = {Ann. Stat.},
  year = {2009},
  volume = {37},
  pages = {1705--1732},
  number = {4},
  pdf = {../local/BickelSimultaneous.pdf},
  file = {BickelSimultaneous.pdf:BickelSimultaneous.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.02}
}

@inproceedings{Bickel2008Multi-task,
  author = {Steffen Bickel and Jasmina Bogojeska and Thomas Lengauer and Tobias
	Scheffer},
  title = {Multi-task learning for HIV therapy screening},
  booktitle = {ICML'08: Proceedings of the 25th international conference on Machine
	learning},
  year = {2008},
  pages = {56-63},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://doi.acm.org/10.1145/1390156.1390164}
}

@inproceedings{Bickel2007Discriminative,
  author = {Steffen Bickel and Michael Br{\"u}ckner and Tobias Scheffer},
  title = {Discriminative learning for differing training and test distributions},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {81--88},
  publisher = {ACM Press}
}

@book{Biggs1976Graph,
  title = {Graph theory 1736-1936},
  publisher = {Oxford University Press},
  year = {1976},
  author = {N. L. Biggs and E.K. Lloyd and R. J. Wilson}
}

@article{Bild2006Oncogenic,
  author = {Bild, A. H. and Yao, G. and Chang, J. T. and Wang, Q. and Potti,
	A. and Chasse, D. and Joshi, M. B. and Harpole, D. and Lancaster,
	J. M. and Berchuck, A. and Olson, J. A., Jr. and Marks, J. R. and
	Dressman, H. K. and West, M. and Nevins, J. R.},
  title = {Oncogenic pathway signatures in human cancers as a guide to targeted
	therapies},
  journal = {Nature},
  year = {2006},
  volume = {439},
  pages = {353-7},
  number = {7074},
  abstract = {The development of an oncogenic state is a complex process involving
	the accumulation of multiple independent mutations that lead to deregulation
	of cell signalling pathways central to the control of cell growth
	and cell fate. {T}he ability to define cancer subtypes, recurrence
	of disease and response to specific therapies using {DNA} microarray-based
	gene expression signatures has been demonstrated in multiple studies.
	{V}arious studies have also demonstrated the potential for using
	gene expression profiles for the analysis of oncogenic pathways.
	{H}ere we show that gene expression signatures can be identified
	that reflect the activation status of several oncogenic pathways.
	{W}hen evaluated in several large collections of human cancers, these
	gene expression signatures identify patterns of pathway deregulation
	in tumours and clinically relevant associations with disease outcomes.
	{C}ombining signature-based predictions across several pathways identifies
	coordinated patterns of pathway deregulation that distinguish between
	specific cancers and tumour subtypes. {C}lustering tumours based
	on pathway signatures further defines prognosis in respective patient
	subsets, demonstrating that patterns of oncogenic pathway deregulation
	underlie the development of the oncogenic phenotype and reflect the
	biology and outcome of specific cancers. {P}redictions of pathway
	deregulation in cancer cell lines are also shown to predict the sensitivity
	to therapeutic agents that target components of the pathway. {L}inking
	pathway deregulation with sensitivity to therapeutics that target
	components of the pathway provides an opportunity to make use of
	these oncogenic pathway signatures to guide the use of targeted therapeutics.},
  doi = {10.1038/nature04296},
  pdf = {../local/Bild2006Oncogenic.pdf},
  file = {Bild2006Oncogenic.pdf:Bild2006Oncogenic.pdf:PDF},
  keywords = {breastcancer},
  url = {http://dx.doi.org/10.1038/nature04296}
}

@article{Bilello2004Automatic,
  author = {Michel Bilello and Salih Burak Gokturk and Terry Desser and Sandy
	Napel and R. Brooke Jeffrey and Christopher F Beaulieu},
  title = {Automatic detection and classification of hypodense hepatic lesions
	on contrast-enhanced venous-phase {CT}.},
  journal = {Med {P}hys},
  year = {2004},
  volume = {31},
  pages = {2584-93},
  number = {9},
  month = {Sep},
  abstract = {The objective of this work was to develop and validate algorithms
	for detection and classification of hypodense hepatic lesions, specifically
	cysts, hemangiomas, and metastases from {CT} scans in the portal
	venous phase of enhancement. {F}ifty-six {CT} sections from 51 patients
	were used as representative of common hypodense liver lesions, including
	22 simple cysts, 11 hemangiomas, 22 metastases, and 1 image containing
	both a cyst and a hemangioma. {T}he detection algorithm uses intensity-based
	histogram methods to find central lesions, followed by liver contour
	refinement to identify peripheral lesions. {T}he classification algorithm
	operates on the focal lesions identified during detection, and includes
	shape-based segmentation, edge pixel weighting, and lesion texture
	filtering. {S}upport vector machines are then used to perform a pair-wise
	lesion classification. {F}or the detection algorithm, 80\% lesion
	sensitivity was achieved at approximately 0.3 false positives ({FP})
	per slice for central lesions, and 0.5 {FP} per slice for peripheral
	lesions, giving a total of 0.8 {FP} per section. {F}or 90\% sensitivity,
	the total number of {FP} rises to about 2.2 per section. {T}he pair-wise
	classification yielded good discrimination between cysts and metastases
	(at 95\% sensitivity for detection of metastases, only about 5\%
	of cysts are incorrectly classified as metastases), perfect discrimination
	between hemangiomas and cysts, and was least accurate in discriminating
	between hemangiomas and metastases (at 90\% sensitivity for detection
	of hemangiomas, about 28\% of metastases were incorrectly classified
	as hemangiomas). {I}nitial implementations of our algorithms are
	promising for automating liver lesion detection and classification.}
}

@article{Billerey1996Etude,
  author = {Billerey, C. and Boccon-Gibod, L.},
  title = {Etude des variations inter-pathologistes dans l'{\'e}valuation du
	grade et du stade des tumeurs v{\'e}sicales},
  journal = {Progr{\`e}s en Urologie},
  year = {1996},
  volume = {6},
  pages = {49--57},
  pdf = {../local/Billerey1996Etude.pdf},
  file = {Billerey1996Etude.pdf:Billerey1996Etude.pdf:PDF},
  keywords = {csbcbook, csbcbook-ch3},
  url = {http://www.urofrance.org/fileadmin/documents/data/PU/1996/PU-1996-00070049/TEXF-PU-1996-00070049.PDF}
}

@article{Billerey2001Frequent,
  author = {Billerey, C. and Chopin D and Aubriot-Lorton MH and Ricol D and Gil
	Diez de Medina S and Van Rhijn B and Bralet MP and Lefrere-Belda
	MA and Lahaye JB and Abbou CC and Bonaventure J and Zafrani ES and
	van der Kwast T and Thiery JP and Radvanyi F.},
  title = {Frequent FGFR3 mutations in papillary non-invasive bladder (pTa)
	tumors.},
  journal = {Am J Pathol.},
  year = {2001},
  volume = {158},
  pages = {1955-1959},
  abstract = {We recently identified activating mutations of fibroblast growth factor
	receptor 3 (FGFR3) in bladder carcinoma. In this study we assessed
	the incidence of FGFR3 mutations in a series of 132 bladder carcinomas:
	20 carcinoma in situ (CIS), 50 pTa, 19 pT1, and 43 pT2-4. All 48
	mutations identified were identical to the germinal activating mutations
	that cause thanatophoric dysplasia, a lethal form of dwarfism. The
	S249C mutation, found in 33 of the 48 mutated tumors, was the most
	common. The frequency of mutations was higher in pTa tumors (37 of
	50, 74%) than in CIS (0 of 20, 0%; P < 0.0001), pT1 (4 of 19, 21%;
	P < 0.0001) and pT2-4 tumors (7 of 43, 16%; P < 0.0001). FGFR3 mutations
	were detected in 27 of 32 (84%) G1, 16 of 29 (55%) G2, and 5 of 71
	(7%) G3 tumors. This association between FGFR3 mutations and low
	grade was highly significant (P < 0.0001). FGFR3 is the first gene
	found to be mutated at a high frequency in pTa tumors. The absence
	of FGFR3 mutations in CIS and the low frequency of FGFR3 mutations
	in pT1 and pT2-4 tumors are consistent with the model of bladder
	tumor progression in which the most common precursor of pT1 and pT2-4
	tumors is CIS.},
  owner = {lcalzone},
  timestamp = {2010.04.27}
}

@article{Birge2006Minimal,
  author = {Birg{\'e}, L. and Massart, P.},
  title = {Minimal penalties for Gaussian model selection},
  journal = {Probab. Theory Relat. Fields},
  year = {2006},
  volume = {138},
  pages = {33--73},
  doi = {10.1007/s00440-006-0011-8},
  pdf = {../local/Birge2006Minimal.pdf},
  file = {Birge2006Minimal.pdf:Birge2006Minimal.pdf:PDF},
  owner = {jp},
  timestamp = {2009.05.02},
  url = {http://dx.doi.org/10.1007/s00440-006-0011-8}
}

@article{Birge2001Gaussian,
  author = {Birg{\'e}, L. and Massart, P.},
  title = {Gaussian model selection},
  journal = {J. Eur. Math. Soc.},
  year = {2001},
  volume = {3},
  pages = {203--268},
  owner = {jp},
  timestamp = {2010.06.02}
}

@book{Bishop2006Pattern,
  title = {Pattern recognition and machine learning},
  publisher = {Springer},
  year = {2006},
  author = {Bishop, C.M.},
  owner = {vert},
  timestamp = {2007.08.02}
}

@article{Bissantz2003Protein-based,
  author = {Bissantz, C. and Bernard, P. and Hibert, M. and Rognan, D.},
  title = {Protein-based virtual screening of chemical databases. {II}. Are
	homology models of {G}-Protein Coupled Receptors suitable targets?},
  journal = {Proteins},
  year = {2003},
  volume = {50},
  pages = {5--25},
  number = {1},
  month = {Jan},
  abstract = {The aim of the current study is to investigate whether homology models
	of G-Protein-Coupled Receptors (GPCRs) that are based on bovine rhodopsin
	are reliable enough to be used for virtual screening of chemical
	databases. Starting from the recently described 2.8 A-resolution
	X-ray structure of bovine rhodopsin, homology models of an "antagonist-bound"
	form of three human GPCRs (dopamine D3 receptor, muscarinic M1 receptor,
	vasopressin V1a receptor) were constructed. The homology models were
	used to screen three-dimensional databases using three different
	docking programs (Dock, FlexX, Gold) in combination with seven scoring
	functions (ChemScore, Dock, FlexX, Fresno, Gold, Pmf, Score). Rhodopsin-based
	homology models turned out to be suitable, indeed, for virtual screening
	since known antagonists seeded in the test databases could be distinguished
	from randomly chosen molecules. However, such models are not accurate
	enough for retrieving known agonists. To generate receptor models
	better suited for agonist screening, we developed a new knowledge-
	and pharmacophore-based modeling procedure that might partly simulate
	the conformational changes occurring in the active site during receptor
	activation. Receptor coordinates generated by this new procedure
	are now suitable for agonist screening. We thus propose two alternative
	strategies for the virtual screening of GPCR ligands, relying on
	a different set of receptor coordinates (antagonist-bound and agonist-bound
	states).},
  doi = {10.1002/prot.10237},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {12471595},
  timestamp = {2008.03.27},
  url = {http://dx.doi.org/10.1002/prot.10237}
}

@article{Bittner2000Molecular,
  author = {Bittner, M. and Meltzer, P. and Chen, Y. and Jiang, Y. and Seftor,
	E. and Hendrix, M. and Radmacher, M. and Simon, R. and Yakhini, Z.
	and Ben-Dor, A. and Sampas, N. and Dougherty, E. and Wang, E. and
	Marincola, F. and Gooden, C. and Lueders, J. and Glatfelter, A. and
	Pollock, P. and Carpten, J. and Gillanders, E. and Leja, D. and Dietrich,
	K. and Beaudry, C. and Berens, M. and Alberts, D. and Sondak, V.},
  title = {Molecular classification of cutaneous malignant melanoma by gene
	expression profiling.},
  journal = {Nature},
  year = {2000},
  volume = {406},
  pages = {536--540},
  number = {6795},
  month = {Aug},
  abstract = {The most common human cancers are malignant neoplasms of the skin.
	Incidence of cutaneous melanoma is rising especially steeply, with
	minimal progress in non-surgical treatment of advanced disease. Despite
	significant effort to identify independent predictors of melanoma
	outcome, no accepted histopathological, molecular or immunohistochemical
	marker defines subsets of this neoplasm. Accordingly, though melanoma
	is thought to present with different 'taxonomic' forms, these are
	considered part of a continuous spectrum rather than discrete entities.
	Here we report the discovery of a subset of melanomas identified
	by mathematical analysis of gene expression in a series of samples.
	Remarkably, many genes underlying the classification of this subset
	are differentially regulated in invasive melanomas that form primitive
	tubular networks in vitro, a feature of some highly aggressive metastatic
	melanomas. Global transcript analysis can identify unrecognized subtypes
	of cutaneous melanoma and predict experimentally verifiable phenotypic
	characteristics that may be of importance to disease progression.},
  doi = {10.1038/35020115},
  pdf = {../local/Bittner2000Molecular.pdf},
  file = {Bittner2000Molecular.pdf:Bittner2000Molecular.pdf:PDF},
  institution = {Cancer Genetics Branch, National Human Genome Research Institute,
	NIH, Bethesda, Maryland 20892, USA. mbittner@nhgri.nih.gov},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10952317},
  timestamp = {2012.02.27},
  url = {http://dx.doi.org/10.1038/35020115}
}

@book{Blake1987Visual,
  title = {Visual Reconstruction},
  publisher = {MIT Press},
  year = {1987},
  author = {A. Blake and A. Zisserman}
}

@article{Blake2000Chemoinformatics,
  author = {J. F. Blake},
  title = {Chemoinformatics - predicting the physicochemical properties of 'drug-like'
	molecules.},
  journal = {Curr. Opin. Biotechnol.},
  year = {2000},
  volume = {11},
  pages = {104--107},
  number = {1},
  month = {Feb},
  abstract = {A few major advances have occurred in the area of physicochemical
	modeling of organic compounds during the past several years, spurred
	on by changes in the pharmaceutical industry. Recent advances include
	the ability to categorize and screen the overall physicochemical
	properties of potential drug candidates based entirely on their molecular
	structures and the ability to model the components that contribute
	to the oral absorption characteristics of potential drug candidates.},
  doi = {10.1016/S0958-1669(99)00062-2},
  pdf = {../local/Blake2000Chemoinformatics.pdf},
  file = {Blake2000Chemoinformatics.pdf:Blake2000Chemoinformatics.pdf:PDF},
  owner = {vert},
  pii = {S0958-1669(99)00062-2},
  pmid = {10679344},
  timestamp = {2007.08.02},
  url = {http://dx.doi.org/10.1016/S0958-1669(99)00062-2}
}

@phdthesis{Blanchard2001Methodes,
  author = {Blanchard, G.},
  title = {M{\'e}thodes de m{\'e}lange et d'aggregation d'estimateurs en reconnaissance
	de formes. {A}pplications aux arbres de decision},
  school = {University Paris 13},
  year = {2001},
  month = jan,
  pdf = {../local/blan01.pdf},
  file = {blan01.pdf:local/blan01.pdf:PDF},
  subject = {ml},
  url = {http://www.dma.ens.fr/~gblancha/phd.ps.gz}
}

@unpublished{Blanchard2004Statistical,
  author = {Blanchard, G. and Bousquet, O. and Masssart, P.},
  title = {Statistical {P}erformance of {S}upport {V}ector {M}achine},
  note = {Submitted Ann.Stat.},
  year = {2004}
}

@misc{Blaschko2006Conformal,
  author = {M.B. Blaschko and T. Hofmann},
  title = {Conformal {M}ulti-{I}nstance {K}ernels},
  howpublished = {In NIPS 2006 {W}orkshop on {L}earning to {C}ompare {E}xamples},
  year = {2006},
  owner = {pmahe},
  timestamp = {2007.07.13}
}

@article{Blaveri2005Bladder,
  author = {Blaveri, E. and Brewer, J. L. and Roydasgupta, R. and Fridlyand,
	J. and DeVries, S. and Koppie, T. and Pejavar, S. and Mehta, K. and
	Carroll, P. and Simko, J. P. and Waldman, F. M.},
  title = {Bladder cancer stage and outcome by array-based comparative genomic
	hybridization.},
  journal = {Clin. Cancer Res.},
  year = {2005},
  volume = {11},
  pages = {7012--7022},
  number = {19 Pt 1},
  month = {Oct},
  abstract = {PURPOSE: Bladder carcinogenesis is believed to follow alternative
	pathways of disease progression driven by an accumulation of genetic
	alterations. The purpose of this study was to evaluate associations
	between measures of genomic instability and bladder cancer clinical
	phenotype. EXPERIMENTAL DESIGN: Genome-wide copy number profiles
	were obtained for 98 bladder tumors of diverse stages (29 pT(a),
	14 pT1, 55 pT(2-4)) and grades (21 low-grade and 8 high-grade superficial
	tumors) by array-based comparative genomic hybridization (CGH). Each
	array contained 2,464 bacterial artificial chromosome and P1 clones,
	providing an average resolution of 1.5 Mb across the genome. A total
	of 54 muscle-invasive cases had follow-up information available.
	Overall outcome analysis was done for patients with muscle-invasive
	tumors having "good" (alive >2 years) versus "bad" (dead in <2 years)
	prognosis. RESULTS: Array CGH analysis showed significant increases
	in copy number alterations and genomic instability with increasing
	stage and with outcome. The fraction of genome altered (FGA) was
	significantly different between tumors of different stages (pT(a)
	versus pT1, P = 0.0003; pT(a) versus pT(2-4), P = 0.02; and pT1 versus
	pT(2-4), P = 0.03). Individual clones that differed significantly
	between different tumor stages were identified after adjustment for
	multiple comparisons (false discovery rate < 0.05). For muscle-invasive
	tumors, the FGA was associated with patient outcome (bad versus good
	prognosis patients, P = 0.002) and was identified as the only independent
	predictor of overall outcome based on a multivariate Cox proportional
	hazards method. Unsupervised hierarchical clustering separated "good"
	and "bad" prognosis muscle-invasive tumors into clusters that showed
	significant association with FGA and survival (Kaplan-Meier, P =
	0.019). Supervised tumor classification (prediction analysis for
	microarrays) had a 71\% classification success rate based on 102
	unique clones. CONCLUSIONS: Array-based CGH identified quantitative
	and qualitative differences in DNA copy number alterations at high
	resolution according to tumor stage and grade. Fraction genome altered
	was associated with worse outcome in muscle-invasive tumors, independent
	of other clinicopathologic parameters. Measures of genomic instability
	add independent power to outcome prediction of bladder tumors.},
  doi = {10.1158/1078-0432.CCR-05-0177},
  institution = {Department of Laboratory Medicine, University of California San Francisco,
	San Francisco, California 94143-0808, USA.},
  keywords = {Chromosome Mapping; Chromosomes, Artificial, Bacterial; Cluster Analysis;
	DNA; Disease Progression; Gene Deletion; Gene Expression Profiling;
	Gene Expression Regulation, Neoplastic; Genome; Humans; Image Processing,
	Computer-Assisted; Linkage (Genetics); Multivariate Analysis; Nucleic
	Acid Hybridization; Oligonucleotide Array Sequence Analysis; Phenotype;
	Prognosis; Proportional Hazards Models; Time Factors; Treatment Outcome;
	Urinary Bladder Neoplasms},
  owner = {jp},
  pii = {11/19/7012},
  pmid = {16203795},
  timestamp = {2009.10.05},
  url = {http://dx.doi.org/10.1158/1078-0432.CCR-05-0177}
}

@article{Bleakley2007Supervised,
  author = {Bleakley, K. and Biau, G. and Vert, J.-P.},
  title = {Supervised reconstruction of biological networks with local models},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {i57--i65},
  number = {13},
  month = {Jul},
  abstract = {MOTIVATION: Inference and reconstruction of biological networks from
	heterogeneous data is currently an active research subject with several
	important applications in systems biology. The problem has been attacked
	from many different points of view with varying degrees of success.
	In particular, predicting new edges with a reasonable false discovery
	rate is highly demanded for practical applications, but remains extremely
	challenging due to the sparsity of the networks of interest. RESULTS:
	While most previous approaches based on the partial knowledge of
	the network to be inferred build global models to predict new edges
	over the network, we introduce here a novel method which predicts
	whether there is an edge from a newly added vertex to each of the
	vertices of a known network using local models. This involves learning
	individually a certain subnetwork associated with each vertex of
	the known network, then using the discovered classification rule
	associated with only that vertex to predict the edge to the new vertex.
	Excellent experimental results are shown in the case of metabolic
	and protein-protein interaction network reconstruction from a variety
	of genomic data. AVAILABILITY: An implementation of the proposed
	algorithm is available upon request from the authors. CONTACT: Jean-Philippe.Vert@ensmp.fr.},
  doi = {10.1093/bioinformatics/btm204},
  pdf = {../local/Bleakley2007Supervised.pdf},
  file = {Bleakley2007Supervised.pdf:Bleakley2007Supervised.pdf:PDF},
  owner = {laurent},
  pii = {23/13/i57},
  pmid = {17646345},
  timestamp = {2007.07.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm204}
}

@techreport{Bleakley2009Joint,
  author = {Bleakley, K. and Vert, J.-P.},
  title = {Joint segmentation of many {aCGH} profiles using fast group {LARS}},
  institution = {HAL},
  year = {2009},
  number = {hal-00422430},
  month = {October},
  abstract = {Array-Based Comparative Genomic Hybridization (aCGH) is a method used
	to search for genomic regions with copy numbers variations. For a
	given aCGH profile, one challenge is to accurately segment it into
	regions of constant copy number. Subjects sharing the same disease
	status, for example a type of cancer, often have aCGH profiles with
	similar copy number variations, due to duplications and deletions
	relevant to that particular disease. We introduce a constrained optimization
	algorithm that jointly segments aCGH profiles of many subjects. It
	simultaneously penalizes the amount of freedom the set of profiles
	have to jump from one level of constant copy number to another, at
	genomic locations known as breakpoints. We show that breakpoints
	shared by many different profiles tend to be found first by the algorithm,
	even in the presence of significant amounts of noise. The algorithm
	can be formulated as a group LARS problem. We propose an extremely
	fast way to find the solution path, i.e., a sequence of shared breakpoints
	in order of importance. For no extra cost the algorithm smoothes
	all of the aCGH profiles into piecewise-constant regions of equal
	copy number, giving low-dimensional versions of the original data.
	These can be shown for all profiles on a single graph, allowing for
	intuitive visual interpretation. Simulations and an implementation
	of the algorithm on bladder cancer aCGH profiles are provided.},
  owner = {jp},
  timestamp = {2010.01.11},
  url = {http://hal.archives-ouvertes.fr/hal-00422430}
}

@article{Bleicher2003Hit,
  author = {K. H. Bleicher and H.-J. B{\"o}hm and K. M{\"u}ller and A. I. Alanine},
  title = {{H}it and lead generation: beyond high-throughput screening.},
  journal = {Nat Rev Drug Discov},
  year = {2003},
  volume = {2},
  pages = {369--378},
  number = {5},
  month = {May},
  abstract = {The identification of small-molecule modulators of protein function,
	and the process of transforming these into high-content lead series,
	are key activities in modern drug discovery. The decisions taken
	during this process have far-reaching consequences for success later
	in lead optimization and even more crucially in clinical development.
	Recently, there has been an increased focus on these activities due
	to escalating downstream costs resulting from high clinical failure
	rates. In addition, the vast emerging opportunities from efforts
	in functional genomics and proteomics demands a departure from the
	linear process of identification, evaluation and refinement activities
	towards a more integrated parallel process. This calls for flexible,
	fast and cost-effective strategies to meet the demands of producing
	high-content lead series with improved prospects for clinical success.},
  doi = {10.1038/nrd1086},
  keywords = {Amino Acid Motifs, Combinatorial Chemistry Techniques, Drug Design,
	Drug Evaluation, Genomics, Preclinical, Proteomics, 12750740},
  owner = {mahe},
  pii = {nrd1086},
  pmid = {12750740},
  timestamp = {2006.08.15},
  url = {http://dx.doi.org/10.1038/nrd1086}
}

@article{Blow2008DNA,
  author = {Blow, N.},
  title = {{DNA} sequencing: generation next-next},
  journal = {Nat. Meth.},
  year = {2008},
  volume = {5},
  pages = {267-274},
  number = {3},
  abstract = {Emboldened by the success of next-generation sequencing, scientists
	are pursuing the holy grail of genomics—the '$1,000 genome'—with
	single-molecule approaches. Nathan Blow reports.},
  doi = {10.1038/nmeth0308-267},
  pdf = {../local/Blow2008DNA.pdf},
  file = {Blow2008DNA.pdf:Blow2008DNA.pdf:PDF},
  keywords = {csbcbook-ch2, csbcbook},
  owner = {jp},
  url = {http://dx.doi.org/10.1038/nmeth0308-267}
}

@article{Blows2010Subtyping,
  author = {Fiona M Blows and Kristy E Driver and Marjanka K Schmidt and Annegien
	Broeks and Flora E van Leeuwen and Jelle Wesseling and Maggie C Cheang
	and Karen Gelmon and Torsten O Nielsen and Carl Blomqvist and Päivi
	Heikkilä and Tuomas Heikkinen and Heli Nevanlinna and Lars A Akslen
	and Louis R Bégin and William D Foulkes and Fergus J Couch and Xianshu
	Wang and Vicky Cafourek and Janet E Olson and Laura Baglietto and
	Graham G Giles and Gianluca Severi and Catriona A McLean and Melissa
	C Southey and Emad Rakha and Andrew R Green and Ian O Ellis and Mark
	E Sherman and Jolanta Lissowska and William F Anderson and Angela
	Cox and Simon S Cross and Malcolm W R Reed and Elena Provenzano and
	Sarah-Jane Dawson and Alison M Dunning and Manjeet Humphreys and
	Douglas F Easton and Montserrat García-Closas and Carlos Caldas and
	Paul D Pharoah and David Huntsman},
  title = {Subtyping of breast cancer by immunohistochemistry to investigate
	a relationship between subtype and short and long term survival:
	a collaborative analysis of data for 10,159 cases from 12 studies.},
  journal = {PLoS Med},
  year = {2010},
  volume = {7},
  pages = {e1000279},
  number = {5},
  month = {May},
  abstract = {Immunohistochemical markers are often used to classify breast cancer
	into subtypes that are biologically distinct and behave differently.
	The aim of this study was to estimate mortality for patients with
	the major subtypes of breast cancer as classified using five immunohistochemical
	markers, to investigate patterns of mortality over time, and to test
	for heterogeneity by subtype.We pooled data from more than 10,000
	cases of invasive breast cancer from 12 studies that had collected
	information on hormone receptor status, human epidermal growth factor
	receptor-2 (HER2) status, and at least one basal marker (cytokeratin
	[CK]5/6 or epidermal growth factor receptor [EGFR]) together with
	survival time data. Tumours were classified as luminal and nonluminal
	tumours according to hormone receptor expression. These two groups
	were further subdivided according to expression of HER2, and finally,
	the luminal and nonluminal HER2-negative tumours were categorised
	according to expression of basal markers. Changes in mortality rates
	over time differed by subtype. In women with luminal HER2-negative
	subtypes, mortality rates were constant over time, whereas mortality
	rates associated with the luminal HER2-positive and nonluminal subtypes
	tended to peak within 5 y of diagnosis and then decline over time.
	In the first 5 y after diagnosis the nonluminal tumours were associated
	with a poorer prognosis, but over longer follow-up times the prognosis
	was poorer in the luminal subtypes, with the worst prognosis at 15
	y being in the luminal HER2-positive tumours. Basal marker expression
	distinguished the HER2-negative luminal and nonluminal tumours into
	different subtypes. These patterns were independent of any systemic
	adjuvant therapy.The six subtypes of breast cancer defined by expression
	of five markers show distinct behaviours with important differences
	in short term and long term prognosis. Application of these markers
	in the clinical setting could have the potential to improve the targeting
	of adjuvant chemotherapy to those most likely to benefit. The different
	patterns of mortality over time also suggest important biological
	differences between the subtypes that may result in differences in
	response to specific therapies, and that stratification of breast
	cancers by clinically relevant subtypes in clinical trials is urgently
	required.},
  doi = {10.1371/journal.pmed.1000279},
  pdf = {../local/Blows2010Subtyping.pdf},
  file = {Blows2010Subtyping.pdf:Blows2010Subtyping.pdf:PDF},
  institution = {Department of Oncology, University of Cambridge, United Kingdom.},
  keywords = {Adult; Aged; Aged, 80 and over; Breast Neoplasms, metabolism/mortality/pathology;
	Female; Hormones, analysis; Humans; Immunohistochemistry; Keratins;
	Middle Aged; Prognosis; Proportional Hazards Models; Receptor, Epidermal
	Growth Factor, analysis; Receptors, Cell Surface, metabolism; Tumor
	Markers, Biological, analysis; Young Adult},
  language = {eng},
  medline-pst = {epublish},
  owner = {phupe},
  pmid = {20520800},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1371/journal.pmed.1000279}
}

@inproceedings{Blum1998Combining,
  author = {Blum, A. and Mitchell, T.},
  title = {Combining labeled and unlabeled data with co-training},
  booktitle = {COLT' 98: Proceedings of the eleventh annual conference on Computational
	learning theory},
  year = {1998},
  pages = {92--100},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/279943.279962},
  isbn = {1-58113-057-0},
  location = {Madison, Wisconsin, United States},
  owner = {mordelet},
  timestamp = {2010.10.27},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.114.9164}
}

@article{Bock2005Virtual,
  author = {Bock, J. R. and Gough, D. A.},
  title = {Virtual screen for ligands of orphan {G} protein-coupled receptors.},
  journal = {J. Chem. Inform. Model.},
  year = {2005},
  volume = {45},
  pages = {1402--1414},
  number = {5},
  abstract = {This paper describes a virtual screening methodology that generates
	a ranked list of high-binding small molecule ligands for orphan G
	protein-coupled receptors (oGPCRs), circumventing the requirement
	for receptor three-dimensional structure determination. Features
	representing the receptor are based only on physicochemical properties
	of primary amino acid sequence, and ligand features use the two-dimensional
	atomic connection topology and atomic properties. An experimental
	screen comprised nearly 2 million hypothetical oGPCR-ligand complexes,
	from which it was observed that the top 1.96\% predicted affinity
	scores corresponded to "highly active" ligands against orphan receptors.
	Results representing predicted high-scoring novel ligands for many
	oGPCRs are presented here. Validation of the method was carried out
	in several ways: (1) A random permutation of the structure-activity
	relationship of the training data was carried out; by comparing test
	statistic values of the randomized and nonshuffled data, we conclude
	that the value obtained with nonshuffled data is unlikely to have
	been encountered by chance. (2) Biological activities linked to the
	compounds with high cross-target binding affinity were analyzed using
	computed log-odds from a structure-based program. This information
	was correlated with literature citations where GPCR-related pathways
	or processes were linked to the bioactivity in question. (3) Anecdotal,
	out-of-sample predictions for nicotinic targets and known ligands
	were performed, with good accuracy in the low-to-high "active" binding
	range. (4) An out-of-sample consistency check using the commercial
	antipsychotic drug olanzapine produced "active" to "highly-active"
	predicted affinities for all oGPCRs in our study, an observation
	that is consistent with documented findings of cross-target affinity
	of this compound for many different GPCRs. It is suggested that this
	virtual screening approach may be used in support of the functional
	characterization of oGPCRs by identifying potential cognate ligands.
	Ultimately, this approach may have implications for pharmaceutical
	therapies to modulate the activity of faulty or disease-related cellular
	signaling pathways. In addition to application to cell surface receptors,
	this approach is a generalized strategy for discovery of small molecules
	that may bind intracellular enzymes and involve protein-protein interactions.},
  doi = {10.1021/ci050006d},
  pdf = {../local/Bock2005Virtual.pdf},
  file = {Bock2005Virtual.pdf:Bock2005Virtual.pdf:PDF},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16180917},
  timestamp = {2007.07.30},
  url = {http://dx.doi.org/10.1021/ci050006d}
}

@article{Bock2003Whole-proteome,
  author = {Bock, J. R. and Gough, D. A.},
  title = {Whole-proteome interaction mining},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {125-134},
  number = {1},
  abstract = {Motivation: {A} major post-genomic scientific and technological pursuit
	is to describe the functions performed by the proteins encoded by
	the genome. {O}ne strategy is to first identify the protein-protein
	interactions in a proteome, then determine pathways and overall structure
	relating these interactions, and finally to statistically infer functional
	roles of individual proteins. {A}lthough huge amounts of genomic
	data are at hand, current experimental protein interaction assays
	must overcome technical problems to scale-up for high-throughput
	analysis. {I}n the meantime, bioinformatics approaches may help bridge
	the information gap required for inference of protein function. {I}n
	this paper, a previously described data mining approach to prediction
	of protein-protein interactions ({B}ock and {G}ough, 2001, {B}ioinformatics,
	17, 455-460) is extended to interaction mining on a proteome-wide
	scale. {A}n algorithm (the phylogenetic bootstrap) is introduced,
	which suggests traversal of a phenogram, interleaving rounds of computation
	and experiment, to develop a knowledge base of protein interactions
	in genetically-similar organisms. {R}esults: {T}he interaction mining
	approach was demonstrated by building a learning system based on
	1,039 experimentally validated protein-protein interactions in the
	human gastric bacterium {H}elicobacter pylori. {A}n estimate of the
	generalization performance of the classifier was derived from 10-fold
	cross-validation, which indicated expected upper bounds on precision
	of 80% and sensitivity of 69% when applied to related organisms.
	{O}ne such organism is the enteric pathogen {C}ampylobacter jejuni,
	in which comprehensive machine learning prediction of all possible
	pairwise protein-protein interactions was performed. {T}he resulting
	network of interactions shares an average protein connectivity characteristic
	in common with previous investigations reported in the literature,
	offering strong evidence supporting the biological feasibility of
	the hypothesized map. {F}or inferences about complete proteomes in
	which the number of pairwise non-interactions is expected to be much
	larger than the number of actual interactions, we anticipate that
	the sensitivity will remain the same but precision may decrease.
	{W}e present specific biological examples of two subnetworks of protein-protein
	interactions in {C}. jejuni resulting from the application of this
	approach, including elements of a two-component signal transduction
	systems for thermoregulation, and a ferritin uptake network. {C}ontact:
	dgough@bioeng.ucsd.edu},
  pdf = {../local/Bock2003Whole-proteome.pdf},
  file = {Bock2003Whole-proteome.pdf:local/Bock2003Whole-proteome.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/1/125}
}

@article{Bock2002New,
  author = {Bock, J. R. and Gough, D. A.},
  title = {A {N}ew {M}ethod to {E}stimate {L}igand-{R}eceptor {E}nergetics},
  journal = {Mol {C}ell {P}roteomics},
  year = {2002},
  volume = {1},
  pages = {904-910},
  number = {11},
  abstract = {In the discovery of new drugs, lead identification and optimization
	have assumed critical importance given the number of drug targets
	generated from genetic, genomics, and proteomic technologies. {H}igh-throughput
	experimental screening assays have been complemented recently by
	"virtual screening" approaches to identify and filter potential ligands
	when the characteristics of a target receptor structure of interest
	are known. {V}irtual screening mandates a reliable procedure for
	automatic ranking of structurally distinct ligands in compound library
	databases. {C}omputing a rank score requires the accurate prediction
	of binding affinities between these ligands and the target. {M}any
	current scoring strategies require information about the target three-dimensional
	structure. {I}n this study, a new method to estimate the free binding
	energy between a ligand and receptor is proposed. {W}e extend a central
	idea previously reported ({B}ock, {J}. {R}., and {G}ough, {D}. {A}.
	(2001) {P}redicting protein-protein interactions from primary structure.
	{B}ioinformatics 17, 455-460; {B}ock, {J}. {R}., and {G}ough, {D}.
	{A}. (2002) {W}hole-proteome interaction mining. {B}ioinformatics,
	in press) that uses simple descriptors to represent biomolecules
	as input examples to train a support vector machine ({S}mola, {A}.
	{J}., and {S}cholkopf, {B}. (1998) {A} {T}utorial on {S}upport {V}ector
	{R}egression, {N}euro{COLT} {T}echnical {R}eport {NC}-{TR}-98-030,
	{R}oyal {H}olloway {C}ollege, {U}niversity of {L}ondon, {UK}) and
	the application of the trained system to previously unseen pairs,
	estimating their propensity for interaction. {H}ere we seek to learn
	the function that maps features of a receptor-ligand pair onto their
	equilibrium free binding energy. {T}hese features do not comprise
	any direct information about the three-dimensional structures of
	ligand or target. {I}n cross-validation experiments, it is demonstrated
	that objective measurements of prediction error rate and rank-ordering
	statistics are competitive with those of several other investigations,
	most of which depend on three-dimensional structural data. {T}he
	size of the sample (n = 2,671) indicates that this approach is robust
	and may have widespread applicability beyond restricted families
	of receptor types. {I}t is concluded that newly sequenced proteins,
	or those for which three-dimensional crystal structures are not easily
	obtained, can be rapidly analyzed for their binding potential against
	a library of ligands using this methodology.},
  pdf = {../local/Bock2002New.pdf},
  file = {Bock2002New.pdf:local/Bock2002New.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.mcponline.org/cgi/content/abstract/1/11/904}
}

@article{Bock2001Predicting,
  author = {Bock, J. R. and Gough, D. A.},
  title = {Predicting protein-protein interactions from primary structure},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {455--460},
  number = {5},
  pdf = {../local/bock01.pdf},
  file = {bock01.pdf:local/bock01.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/17/5/455.pdf}
}

@article{Bock2007Effective,
  author = {Mary Ellen Bock and Claudio Garutti and Conettina Guerra},
  title = {Effective labeling of molecular surface points for cavity detection
	and location of putative binding sites.},
  journal = {Comput Syst Bioinformatics Conf},
  year = {2007},
  volume = {6},
  pages = {263--274},
  abstract = {We present a method for detecting and comparing cavities on protein
	surfaces that is useful for protein binding site recognition. The
	method is based on a representation of the protein structures by
	a collection of spin-images and their associated spin-image profiles.
	Results of the cavity detection procedure are presented for a large
	set of non-redundant proteins and compared with SURFNET-ConSurf.
	Our comparison method is used to find a surface region in one cavity
	of a protein that is geometrically similar to a surface region in
	the cavity of another protein. Such a finding would be an indication
	that the two regions likely bind to the same ligand. Our overall
	approach for cavity detection and comparison is benchmarked on several
	pairs of known complexes, obtaining a good coverage of the atoms
	of the binding sites.},
  institution = {Department of Statistics, Purdue University 150 N. University Street,
	West Lafayette, IN 47907-2067, USA. mbock@purdue.edu},
  keywords = {Binding Sites; Computer Simulation; Models, Chemical; Models, Molecular;
	Protein Binding; Protein Conformation; Protein Folding; Proteins,
	chemistry/ultrastructure; Sequence Analysis, Protein, methods; Surface
	Properties},
  owner = {bricehoffmann},
  pii = {9781860948732_0028},
  pmid = {17951830},
  timestamp = {2009.02.13}
}

@article{Bockaert1999Molecular,
  author = {Bockaert, J. and Pin, J. P.},
  title = {Molecular tinkering of {G} protein-coupled receptors: an evolutionary
	success},
  journal = {EMBO J.},
  year = {1999},
  volume = {18},
  pages = {1723--1729},
  number = {7},
  month = {Apr},
  abstract = {Among membrane-bound receptors, the G protein-coupled receptors (GPCRs)
	are certainly the most diverse. They have been very successful during
	evolution, being capable of transducing messages as different as
	photons, organic odorants, nucleotides, nucleosides, peptides, lipids
	and proteins. Indirect studies, as well as two-dimensional crystallization
	of rhodopsin, have led to a useful model of a common 'central core',
	composed of seven transmembrane helical domains, and its structural
	modifications during activation. There are at least six families
	of GPCRs showing no sequence similarity. They use an amazing number
	of different domains both to bind their ligands and to activate G
	proteins. The fine-tuning of their coupling to G proteins is regulated
	by splicing, RNA editing and phosphorylation. Some GPCRs have been
	found to form either homo- or heterodimers with a structurally different
	GPCR, but also with membrane-bound proteins having one transmembrane
	domain such as nina-A, odr-4 or RAMP, the latter being involved in
	their targeting, function and pharmacology. Finally, some GPCRs are
	unfaithful to G proteins and interact directly, via their C-terminal
	domain, with proteins containing PDZ and Enabled/VASP homology (EVH)-like
	domains.},
  doi = {10.1093/emboj/18.7.1723},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {10202136},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1093/emboj/18.7.1723}
}

@article{Boese2005Mechanistic,
  author = {Boese, Q. and Leake, D. and Reynolds, A. and Read, S. and Scaringe,
	S. A. and Marshall, W. S. and Khvorova, A.},
  title = {Mechanistic insights aid computational short interfering {RNA} design.},
  journal = {Methods {E}nzymol.},
  year = {2005},
  volume = {392},
  pages = {73-96},
  abstract = {R{NA} interference is widely recognized for its utility as a functional
	genomics tool. {I}n the absence of reliable target site selection
	tools, however, the impact of {RNA} interference ({RNA}i) may be
	diminished. {T}he primary determinants of silencing are influenced
	by highly coordinated {RNA}-protein interactions that occur throughout
	the {RNA}i process, including short interfering {RNA} (si{RNA}) binding
	and unwinding followed by target recognition, cleavage, and subsequent
	product release. {R}ecently developed strategies for identification
	of functional si{RNA}s reveal that thermodynamic and si{RNA} sequence-specific
	properties are crucial to predict functional duplexes ({K}hvorova
	et al., 2003; {R}eynolds et al., 2004; {S}chwarz et al., 2003). {A}dditional
	assessments of si{RNA} specificity reveal that more sophisticated
	sequence comparison tools are also required to minimize potential
	off-target effects ({J}ackson et al., 2003; {S}emizarov et al., 2003).
	{T}his chapter reviews the biological basis for current computational
	design tools and how best to utilize and assess their predictive
	capabilities for selecting functional and specific si{RNA}s.},
  doi = {10.1016/S0076-6879(04)92005-8},
  keywords = {sirna},
  pii = {S0076687904920058},
  url = {http://dx.doi.org/10.1016/S0076-6879(04)92005-8}
}

@article{Boeva2011Control-free,
  author = {Boeva, V. and Zinovyev, A. and Bleakley, K. and Vert, J.-P. and Janoueix-Lerosey,
	I. and Delattre, O. and Barillot, E.},
  title = {Control-free calling of copy number alterations in deep-sequencing
	data using {GC}-content normalization.},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {268--269},
  number = {2},
  month = {Jan},
  abstract = {We present a tool for control-free copy number alteration (CNA) detection
	using deep-sequencing data, particularly useful for cancer studies.
	The tool deals with two frequent problems in the analysis of cancer
	deep-sequencing data: absence of control sample and possible polyploidy
	of cancer cells. FREEC (control-FREE Copy number caller) automatically
	normalizes and segments copy number profiles (CNPs) and calls CNAs.
	If ploidy is known, FREEC assigns absolute copy number to each predicted
	CNA. To normalize raw CNPs, the user can provide a control dataset
	if available; otherwise GC content is used. We demonstrate that for
	Illumina single-end, mate-pair or paired-end sequencing, GC-contentr
	normalization provides smooth profiles that can be further segmented
	and analyzed in order to predict CNAs.Source code and sample data
	are available at http://bioinfo-out.curie.fr/projects/freec/.freec@curie.frSupplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btq635},
  pdf = {../local/Boeva2011Control-free.pdf},
  file = {Boeva2011Control-free.pdf:Boeva2011Control-free.pdf:PDF},
  institution = {Institut Curie, INSERM, U900, Paris, F-75248, Mines ParisTech, Fontainebleau,
	F-77300 and INSERM, U830, Paris, F-75248 France.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btq635},
  pmid = {21081509},
  timestamp = {2011.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/btq635}
}

@article{Bohnert2009Transcript,
  author = {Bohnert, R. and Behr, J. and R\"atsch, G.},
  title = {Transcript quantification with {RNA-Seq} data},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10 (Suppl 13)},
  pages = {P5},
  doi = {10.1186/1471-2105-10-S13-P5},
  pdf = {../local/Bohnert2009Transcript.pdf},
  file = {Bohnert2009Transcript.pdf:Bohnert2009Transcript.pdf:PDF},
  keywords = {ngs, rnaseq},
  owner = {jp},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1186/1471-2105-10-S13-P5}
}

@article{Bolstad2003comparison,
  author = {Bolstad, B.M. and Irizarry, R.A. and {\AA}strand, M. and Speed, T.P.},
  title = {A comparison of normalization methods for high density oligonucleotide
	array data based on variance and bias},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {185--193},
  number = {2},
  publisher = {Oxford Univ Press}
}

@article{Bonachera2008Fuzzy,
  author = {Bonach{\'e}ra, F. and Horvath, D.},
  title = {Fuzzy tricentric pharmacophore fingerprints. 2. Application of topological
	fuzzy pharmacophore triplets in quantitative structure-activity relationships.},
  journal = {J. Chem. Inf. Model.},
  year = {2008},
  volume = {48},
  pages = {409--425},
  number = {2},
  month = {Feb},
  abstract = {Topological fuzzy pharmacophore triplets (2D-FPT), using the number
	of interposed bonds to measure separation between the atoms representing
	pharmacophore types, were employed to establish and validate quantitative
	structure-activity relationships (QSAR). Thirteen data sets for which
	state-of-the-art QSAR models were reported in literature were revisited
	in order to benchmark 2D-FPT biological activity-explaining propensities.
	Linear and nonlinear QSAR models were constructed for each compound
	series (following the original author's splitting into training/validation
	subsets) with three different 2D-FPT versions, using the genetic
	algorithm-driven Stochastic QSAR sampler (SQS) to pick relevant triplets
	and fit their coefficients. 2D-FPT QSARs are computationally cheap,
	interpretable, and perform well in benchmarking. In a majority of
	cases (10/13), default 2D-FPT models validated better than or as
	well as the best among those reported, including 3D overlay-dependent
	approaches. Most of the analogues series, either unaffected by protonation
	equilibria or unambiguously adopting expected protonation states,
	were equally well described by rule- or pKa-based pharmacophore flagging.
	Thermolysin inhibitors represent a notable exception: pKa-based flagging
	boosts model quality, although--surprisingly--not due to proteolytic
	equilibrium effects. The optimal degree of 2D-FPT fuzziness is compound
	set dependent. This work further confirmed the higher robustness
	of nonlinear over linear SQS models. In spite of the wealth of studied
	sets, benchmarking is nevertheless flawed by low intraset diversity:
	a whole series of thereby caused artifacts were evidenced, implicitly
	raising questions about the way QSAR studies are conducted nowadays.
	An in-depth investigation of thrombin inhibition models revealed
	that some of the selected triplets make sense (one of these stands
	for a topological pharmacophore covering the P1 and P2 binding pockets).
	Nevertheless, equations were either unable to predict the activity
	of the structurally different ligands or tended to indiscriminately
	predict any compound outside the training family to be active. 2D-FPT
	QSARs do however not depend on any common scaffold required for molecule
	superimposition and may in principle be trained on hand of diverse
	sets, which is a must in order to obtain widely applicable models.
	Adding (assumed) inactives of various families for training enabled
	discovery of models that specifically recognize the structurally
	different actives.},
  doi = {10.1021/ci7003237},
  pdf = {../local/Bonachera2008Fuzzy.pdf},
  file = {Bonachera2008Fuzzy.pdf:Bonachera2008Fuzzy.pdf:PDF},
  institution = { Fonctionnelle, UniversitÃ© des Sciences et Technologies de Lille,
	BÃ¢t. C9-59655 Villeneuve d'Ascq Cedex, France.},
  keywords = {chemoinformatics},
  owner = {jp},
  pmid = {18254617},
  timestamp = {2009.03.12},
  url = {http://dx.doi.org/10.1021/ci7003237}
}

@article{Bonachera2006Fuzzy,
  author = {Bonach{\'e}ra, F. and Parent, B. and Barbosa, F. and Froloff, N.
	and Horvath, D.},
  title = {Fuzzy tricentric pharmacophore fingerprints. 1. Topological fuzzy
	pharmacophore triplets and adapted molecular similarity scoring schemes.},
  journal = {J. Chem. Inf. Model.},
  year = {2006},
  volume = {46},
  pages = {2457--2477},
  number = {6},
  abstract = {This paper introduces a novel molecular description--topological (2D)
	fuzzy pharmacophore triplets, 2D-FPT--using the number of interposed
	bonds as the measure of separation between the atoms representing
	pharmacophore types (hydrophobic, aromatic, hydrogen-bond donor and
	acceptor, cation, and anion). 2D-FPT features three key improvements
	with respect to the state-of-the-art pharmacophore fingerprints:
	(1) The first key novelty is fuzzy mapping of molecular triplets
	onto the basis set of pharmacophore triplets: unlike in the binary
	scheme where an atom triplet is set to highlight the bit of a single,
	best-matching basis triplet, the herein-defined fuzzy approach allows
	for gradual mapping of each atom triplet onto several related basis
	triplets, thus minimizing binary classification artifacts. (2) The
	second innovation is proteolytic equilibrium dependence, by explicitly
	considering all of the conjugated acids and bases (microspecies).
	2D-FPTs are concentration-weighted (as predicted at pH=7.4) averages
	of microspecies fingerprints. Therefore, small structural modifications,
	not affecting the overall pharmacophore pattern (in the sense of
	classical rule-based assignment), but nevertheless triggering a pKa
	shift, will have a major impact on 2D-FPT. Pairs of almost identical
	compounds with significantly differing activities ("activity cliffs"
	in classical descriptor spaces) were in many cases predictable by
	2D-FPT. (3) The third innovation is a new similarity scoring formula,
	acknowledging that the simultaneous absence of a triplet in two molecules
	is a less-constraining indicator of similarity than its simultaneous
	presence. It displays excellent neighborhood behavior, outperforming
	2D or 3D two-point pharmacophore descriptors or chemical fingerprints.
	The 2D-FPT calculator was developed using the chemoinformatics toolkit
	of ChemAxon (www.chemaxon.com).},
  doi = {10.1021/ci6002416},
  pdf = {../local/Bonachera2006Fuzzy.pdf},
  file = {Bonachera2006Fuzzy.pdf:Bonachera2006Fuzzy.pdf:PDF},
  institution = {ex, France.},
  keywords = {chemoinformatics},
  owner = {jp},
  pmid = {17125187},
  timestamp = {2009.03.12},
  url = {http://dx.doi.org/10.1021/ci6002416}
}

@book{Bondy1976Graph,
  title = {Graph theory with applications},
  publisher = {Macmillan Press Ltd.},
  year = {1976},
  author = {J. A. Bondy and U. S. R. Murty}
}

@incollection{Bonilla2008Multi-task,
  author = {Edwin Bonilla and Kian Ming Chai and Chris Williams},
  title = {Multi-task Gaussian Process Prediction},
  booktitle = {Advances in Neural Information Processing Systems 20},
  publisher = {MIT Press},
  year = {2008},
  editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  address = {Cambridge, MA}
}

@inproceedings{Bonilla2007Kernel,
  author = {Edwin V. Bonilla and Felix V. Agakov and Christopher K. I. Williams},
  title = {Kernel Multi-task Learning using Task-specific Features},
  booktitle = {Proceedings of the 11th International Conference on Artificial Intelligence
	and Statistics},
  year = {2007},
  month = {March},
  publisher = {Omnipress},
  location = {San Juan, Puerto Rico}
}

@article{Boobis2002In,
  author = {A. Boobis and U. Gundert-Remy and P. Kremers and P. Macheras and
	O. Pelkonen},
  title = {{I}n silico prediction of {ADME} and pharmacokinetics. {R}eport of
	an expert meeting organised by {COST} {B}15.},
  journal = {Eur. J. Pharm. Sci.},
  year = {2002},
  volume = {17},
  pages = {183--193},
  number = {4-5},
  month = {Dec},
  abstract = {The computational approach is one of the newest and fastest developing
	techniques in pharmacokinetics, ADME (absorption, distribution, metabolism,
	excretion) evaluation, drug discovery and toxicity. However, to date,
	the software packages devoted to ADME prediction, especially of metabolism,
	have not yet been adequately validated and still require improvements
	to be effective. Most are 'open' systems, under constant evolution
	and able to incorporate rapidly, and often easily, new information
	from user or developer databases. Quantitative in silico predictions
	are now possible for several pharmacokinetic (PK) parameters, particularly
	absorption and distribution. The emerging consensus is that the predictions
	are no worse than those made using in vitro tests, with the decisive
	advantage that much less investment in technology, resources and
	time is needed. In addition, and of critical importance, it is possible
	to screen virtual compounds. Some packages are able to handle thousands
	of molecules in a few hours. However, common experience shows that,
	in part at least for essentially irrational reasons, there is currently
	a lack of confidence in these approaches. An effort should be made
	by the software producers towards more transparency, in order to
	improve the confidence of their consumers. It seems highly probable
	that in silico approaches will evolve rapidly, as did in vitro methods
	during the last decade. Past experience with the latter should be
	helpful in avoiding repetition of similar errors and in taking the
	necessary steps to ensure effective implementation. A general concern
	is the lack of access to the large amounts of data on compounds no
	longer in development, but still kept secret by the pharmaceutical
	industry. Controlled access to these data could be particularly helpful
	in validating new in silico approaches.},
  keywords = {Adsorption, Biological Availability, Chemical, Computer Simulation,
	Models, Pharmaceutical, Pharmaceutical Preparations, Predictive Value
	of Tests, Software, Technology, 12453607},
  owner = {mahe},
  pii = {S0928098702001859},
  pmid = {12453607},
  timestamp = {2006.08.16}
}

@article{Bookstein1989Principal,
  author = {Bookstein, F. L. },
  title = {Principal warps: thin-plate splines and the decomposition of deformations},
  journal = {IEEE T. Pattern. Anal.},
  year = {1989},
  volume = {11},
  pages = {567--585},
  number = {6},
  abstract = {The decomposition of deformations by principal warps is demonstrated.
	The method is extended to deal with curving edges between landmarks.
	This formulation is related to other applications of splines current
	in computer vision. How they might aid in the extraction of features
	for analysis, comparison, and diagnosis of biological and medical
	images in indicated},
  doi = {10.1109/34.24792},
  pdf = {../local/Bookstein1989Principal.pdf},
  file = {Bookstein1989Principal.pdf:Bookstein1989Principal.pdf:PDF},
  owner = {jp},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1109/34.24792}
}

@book{Border1985Fixed,
  title = {Fixed point theorems with applications to economics and game theory},
  publisher = {Cambridge University Press},
  year = {1985},
  author = {Border, K. C.},
  address = {Cambridge, UK},
  owner = {jp},
  timestamp = {2011.04.30}
}

@article{Bordner2005Statistical,
  author = {Andrew J Bordner and Ruben Abagyan},
  title = {Statistical analysis and prediction of protein-protein interfaces.},
  journal = {Proteins},
  year = {2005},
  volume = {60},
  pages = {353-66},
  number = {3},
  month = {Aug},
  abstract = {Predicting protein-protein interfaces from a three-dimensional structure
	is a key task of computational structural proteomics. {I}n contrast
	to geometrically distinct small molecule binding sites, protein-protein
	interface are notoriously difficult to predict. {W}e generated a
	large nonredundant data set of 1494 true protein-protein interfaces
	using biological symmetry annotation where necessary. {T}he data
	set was carefully analyzed and a {S}upport {V}ector {M}achine was
	trained on a combination of a new robust evolutionary conservation
	signal with the local surface properties to predict protein-protein
	interfaces. {F}ivefold cross validation verifies the high sensitivity
	and selectivity of the model. {A}s much as 97\% of the predicted
	patches had an overlap with the true interface patch while only 22\%
	of the surface residues were included in an average predicted patch.
	{T}he model allowed the identification of potential new interfaces
	and the correction of mislabeled oligomeric states.},
  doi = {10.1002/prot.20433},
  pdf = {../local/Bordner2005Statistical.pdf},
  file = {Bordner2005Statistical.pdf:local/Bordner2005Statistical.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1002/prot.20433}
}

@article{Borgwardt2005Protein,
  author = {Borgwardt, K.M. and Ong, C.S. and Sch{\"o}nauer, S. and Vishwanathan,
	S.V.N. and Smola, A.J. and Kriegel, H.-P.},
  title = {Protein function prediction via graph kernels.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i47-i56},
  number = {Suppl. 1},
  month = {Jun},
  abstract = {M{OTIVATION}: {C}omputational approaches to protein function prediction
	infer protein function by finding proteins with similar sequence,
	structure, surface clefts, chemical properties, amino acid motifs,
	interaction partners or phylogenetic profiles. {W}e present a new
	approach that combines sequential, structural and chemical information
	into one graph model of proteins. {W}e predict functional class membership
	of enzymes and non-enzymes using graph kernels and support vector
	machine classification on these protein graphs. {RESULTS}: {O}ur
	graph model, derivable from protein sequence and structure only,
	is competitive with vector models that require additional protein
	information, such as the size of surface pockets. {I}f we include
	this extra information into our graph model, our classifier yields
	significantly higher accuracy levels than the vector models. {H}yperkernels
	allow us to select and to optimally combine the most relevant node
	attributes in our protein graphs. {W}e have laid the foundation for
	a protein function prediction system that integrates protein information
	from various sources efficiently and effectively. {AVAILABILITY}:
	{M}ore information available via www.dbs.ifi.lmu.de/{M}itarbeiter/borgwardt.html.
	{CONTACT}: borgwardt@dbs.ifi.lmu.de.},
  doi = {10.1093/bioinformatics/bti1007},
  pdf = {../local/Borgwardt2005Protein.pdf},
  file = {Borgwardt2005Protein.pdf:local/Borgwardt2005Protein.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i47},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1007}
}

@inproceedings{Borgwardt2005Shortest-Path,
  author = {Karsten M. Borgwardt and Hans-Peter Kriegel},
  title = {Shortest-Path Kernels on Graphs},
  booktitle = {{ICDM} '05: {P}roceedings of the {F}ifth {IEEE} {I}nternational {C}onference
	on {D}ata {M}ining},
  year = {2005},
  pages = {74--81},
  address = {Washington, DC, USA},
  publisher = {IEEE Computer Society},
  doi = {http://dx.doi.org/10.1109/ICDM.2005.132},
  pdf = {../local/Borgwardt2005Shortest-Path.pdf},
  file = {Borgwardt2005Shortest-Path.pdf:Borgwardt2005Shortest-Path.pdf:PDF},
  isbn = {0-7695-2278-5},
  keywords = {chemoinformatics kernel-theory}
}

@book{Borwein2000Convex,
  title = {Convex Analysis and Nonlinear Optimization},
  publisher = {Springer-Verlag, New York},
  year = {2000},
  author = {J. M. Borwein and A. S. Lewis}
}

@inproceedings{Boser1992training,
  author = {Boser, B. E. and Guyon, I. M. and Vapnik, V. N.},
  title = {A training algorithm for optimal margin classifiers},
  booktitle = {Proceedings of the 5th annual {ACM} workshop on {C}omputational {L}earning
	{T}heory},
  year = {1992},
  pages = {144--152},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  pdf = {../local/bose92.pdf},
  file = {bose92.pdf:local/bose92.pdf:PDF},
  location = {Pittsburgh, Pennsylvania, United States},
  subject = {kernel},
  url = {http://www.clopinet.com/isabelle/Papers/colt92.ps.Z}
}

@article{Bosshard2001Molecular,
  author = {H. R. Bosshard},
  title = {Molecular recognition by induced fit: how fit is the concept?},
  journal = {News Physiol Sci},
  year = {2001},
  volume = {16},
  pages = {171--173},
  month = {Aug},
  abstract = {Induced fit explains why biomolecules can bind together even if they
	are not optimized for binding. However, induced fit can lead to a
	kinetic bottleneck and does not describe every interaction in the
	absence of prior complementarity. Preselection of a fitting conformer
	is an alternative to induced fit.},
  institution = {and.},
  keywords = {Antigen-Antibody Complex, physiology; Biological Products, chemistry/metabolism;
	Models, Biological; Molecular Conformation},
  owner = {bricehoffmann},
  pmid = {11479367},
  timestamp = {2009.02.13}
}

@article{Bostroem2001Reproducing,
  author = {J. Bostr\"om},
  title = {Reproducing the conformations of protein-bound ligands: a critical
	evaluation of several popular conformational searching tools.},
  journal = {J Comput Aided Mol Des},
  year = {2001},
  volume = {15},
  pages = {1137--1152},
  number = {12},
  month = {Dec},
  abstract = {Several programs (Catalyst, Confort, Flo99, MacroModel, and Omega)
	that are commonly used to generate conformational ensembles have
	been tested for their ability to reproduce bioactive conformations.
	The ligands from thirty-two different ligand-protein complexes determined
	by high-resolution (< 2.0 A) X-ray crystallography have been analyzed.
	The Low-Mode Conformational Search method (with AMBER* and the GB/SA
	hydration model), as implemented in MacroModel, was found to perform
	better than the other algorithms. The rule-based method Omega, which
	is orders of magnitude faster than the other methods, also gave reasonable
	results but were found to be dependent on the input structure. The
	methods supporting diverse sampling (Catalyst, Confort) performed
	least well. For the seven ligands in the set having eight or more
	rotatable bonds, none of the bioactive conformations were ever found,
	save for one exception (Flo99). These ligands do not bind in a local
	minimum conformation according to AMBER*\GB/SA. Taking these last
	two observations together, it is clear that geometrically similar
	structures should be collected in order to increase the probability
	of finding the bioactive conformation among the generated ensembles.
	Factors influencing bioactive conformational retrieval have been
	identified and are discussed.},
  keywords = {Algorithms; Crystallography, X-Ray; Ligands; Models, Molecular; Molecular
	Conformation; Protein Binding; Quantum Theory; Software},
  owner = {laurent},
  pmid = {12160095},
  timestamp = {2008.01.16}
}

@article{Bostroem2003Assessing,
  author = {Bostr{\"o}m, J. and Greenwood, J. R. and Gottfries, J.},
  title = {Assessing the performance of {OMEGA} with respect to retrieving bioactive
	conformations.},
  journal = {J. Mol. Graph. Model.},
  year = {2003},
  volume = {21},
  pages = {449--462},
  number = {5},
  month = {Mar},
  abstract = {OMEGA is a rule-based program which rapidly generates conformational
	ensembles of small molecules. We have varied the parameters which
	control the nature of the ensembles generated by OMEGA in a statistical
	fashion (D-optimal) with the aim of increasing the probability of
	generating bioactive conformations. Thirty-six drug-like ligands
	from different ligand-protein complexes determined by high-resolution
	(< or =2.0A) X-ray crystallography have been analyzed. Statistically
	significant models (Q(2)> or =0.75) confirm that one can increase
	the performance of OMEGA by modifying the parameters. Twenty-eight
	of the bioactive conformations were retrieved when using a low-energy
	cut-off (5 kcal/mol), a low RMSD value (0.6A) for duplicate removal,
	and a maximum of 1000 output conformations. All of those that were
	not retrieved had eight or more rotatable bonds. The duplicate removal
	parameter was found to have the largest impact on retrieval of bioactive
	conformations, and the maximum number of conformations also affected
	the results considerably. The input conformation was found to influence
	the results largely because certain bond angles can prevent the bioactive
	conformation from being generated as a low-energy conformation. Pre-optimizing
	the input structures with MMFF94s improved the results significantly.
	We also investigated the performance of OMEGA in connection with
	database searching. The shape-matching program Rapid Overlay of Chemical
	Structures (ROCS) was used as search tool. Two multi-conformational
	databases were built from the MDDR database plus the 36 compounds;
	one large (maximum 1000 conformations/mol) and one small (maximum
	100 conformations/mol). Both databases provided satisfactory results
	in terms of retrieval. ROCS was able to rank 35 out of 36 X-ray structures
	among the top 500 hits from the large database.},
  owner = {laurent},
  pii = {S1093-3263(02)00204-8},
  pmid = {12543140},
  timestamp = {2008.01.16}
}

@inproceedings{Bouchard2008Efficient,
  author = {Alexandre Bouchard-C{\^o}t{\'e} and Michael I. Jordan and Dan Klein},
  title = {Efficient Inference in Phylogenetic InDel Trees.},
  booktitle = {NIPS},
  year = {2008},
  editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{\'e}on
	Bottou},
  pages = {177-184},
  publisher = {MIT Press},
  date = {2009-04-15},
  ee = {http://books.nips.cc/papers/files/nips21/NIPS2008_0438.pdf},
  url = {http://dblp.uni-trier.de/db/conf/nips/nips2008.html#Bouchard-CoteJK08}
}

@book{Bouchaud2003Theory,
  title = {Theory of financial risk and derivative pricing},
  publisher = {Cambridge University Press},
  year = {2003},
  author = {Bouchaud, J.-P. and Potters, M.},
  owner = {jp},
  timestamp = {2011.01.29}
}

@article{Boucheron2000sharp,
  author = {Boucheron, S. and Lugosi, G. and Massart, P.},
  title = {A sharp concentration inequality with applications},
  journal = {Random {S}tructures and {A}lgorithms},
  year = {2000},
  volume = {16},
  pages = {277--292},
  pdf = {../local/bouc00.pdf},
  file = {bouc00.pdf:local/bouc00.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/concentration.ps}
}

@article{Boulesteix2010Over-optimism,
  author = {Boulesteix, A.L.},
  title = {Over-optimism in bioinformatics research},
  journal = {Bioinformatics},
  year = {2010},
  volume = {26},
  pages = {437--439},
  number = {3},
  publisher = {Oxford Univ Press}
}

@article{Boulesteix2009Stability,
  author = {Boulesteix, A.L. and Slawski, M.},
  title = {Stability and aggregation of ranked gene lists},
  journal = {Briefings in bioinformatics},
  year = {2009},
  volume = {10},
  pages = {556--568},
  number = {5},
  publisher = {Oxford Univ Press}
}

@article{Bowd2002Comparing,
  author = {Christopher Bowd and Kwokleung Chan and Linda M Zangwill and Michael
	H Goldbaum and Te-Won Lee and Terrence J Sejnowski and Robert N Weinreb},
  title = {Comparing neural networks and linear discriminant functions for glaucoma
	detection using confocal scanning laser ophthalmoscopy of the optic
	disc.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2002},
  volume = {43},
  pages = {3444-54},
  number = {11},
  month = {Nov},
  abstract = {P{URPOSE}: {T}o determine whether neural network techniques can improve
	differentiation between glaucomatous and nonglaucomatous eyes, using
	the optic disc topography parameters of the {H}eidelberg {R}etina
	{T}omograph ({HRT}; {H}eidelberg {E}ngineering, {H}eidelberg, {G}ermany).
	{METHODS}: {W}ith the {HRT}, one eye was imaged from each of 108
	patients with glaucoma (defined as having repeatable visual field
	defects with standard automated perimetry) and 189 subjects without
	glaucoma (no visual field defects with healthy-appearing optic disc
	and retinal nerve fiber layer on clinical examination) and the optic
	nerve topography was defined by 17 global and 66 regional {HRT} parameters.
	{W}ith all the {HRT} parameters used as input, receiver operating
	characteristic ({ROC}) curves were generated for the classification
	of eyes, by three neural network techniques: linear and {G}aussian
	support vector machines ({SVM} linear and {SVM} {G}aussian, respectively)
	and a multilayer perceptron ({MLP}), as well as four previously proposed
	linear discriminant functions ({LDF}s) and one {LDF} developed on
	the current data with all {HRT} parameters used as input. {RESULTS}:
	{T}he areas under the {ROC} curves for {SVM} linear and {SVM} {G}aussian
	were 0.938 and 0.945, respectively; for {MLP}, 0.941; for the current
	{LDF}, 0.906; and for the best previously proposed {LDF}, 0.890.
	{W}ith the use of forward selection and backward elimination optimization
	techniques, the areas under the {ROC} curves for {SVM} {G}aussian
	and the current {LDF} were increased to approximately 0.96. {CONCLUSIONS}:
	{T}rained neural networks, with global and regional {HRT} parameters
	used as input, improve on previously proposed {HRT} parameter-based
	{LDF}s for discriminating between glaucomatous and nonglaucomatous
	eyes. {T}he performance of both neural networks and {LDF}s can be
	improved with optimization of the features in the input. {N}eural
	network analyses show promise for increasing diagnostic accuracy
	of tests for glaucoma.},
  pdf = {../local/Bowd2002Comparing.pdf},
  file = {Bowd2002Comparing.pdf:local/Bowd2002Comparing.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Discriminant Analysis,
	Drug, Drug Design, Electrostatics, Eukaryotic Cells, Factual, Feasibility
	Studies, Female, Gene Expression, Gene Expression Profiling, Gene
	Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic
	Markers, Glaucoma, Hemolysins, Humans, Internet, Intraocular Pressure,
	Ion Exchange, Lasers, Leukemia, Ligands, Likelihood Functions, Logistic
	Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Open-Angle, Ophthalmoscopy,
	Optic Disk, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability,
	Probability Learning, Protein Binding, Protein Conformation, Proteins,
	Quality Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12407155},
  url = {http://www.iovs.org/cgi/content/abstract/43/11/3444}
}

@article{Bowd2005Relevance,
  author = {Christopher Bowd and Felipe A Medeiros and Zuohua Zhang and Linda
	M Zangwill and Jiucang Hao and Te-Won Lee and Terrence J Sejnowski
	and Robert N Weinreb and Michael H Goldbaum},
  title = {Relevance vector machine and support vector machine classifier analysis
	of scanning laser polarimetry retinal nerve fiber layer measurements.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2005},
  volume = {46},
  pages = {1322-9},
  number = {4},
  month = {Apr},
  abstract = {P{URPOSE}: {T}o classify healthy and glaucomatous eyes using relevance
	vector machine ({RVM}) and support vector machine ({SVM}) learning
	classifiers trained on retinal nerve fiber layer ({RNFL}) thickness
	measurements obtained by scanning laser polarimetry ({SLP}). {METHODS}:
	{S}eventy-two eyes of 72 healthy control subjects (average age =
	64.3 +/- 8.8 years, visual field mean deviation = -0.71 +/- 1.2 d{B})
	and 92 eyes of 92 patients with glaucoma (average age = 66.9 +/-
	8.9 years, visual field mean deviation = -5.32 +/- 4.0 d{B}) were
	imaged with {SLP} with variable corneal compensation ({GD}x {VCC};
	{L}aser {D}iagnostic {T}echnologies, {S}an {D}iego, {CA}). {RVM}
	and {SVM} learning classifiers were trained and tested on {SLP}-determined
	{RNFL} thickness measurements from 14 standard parameters and 64
	sectors (approximately 5.6 degrees each) obtained in the circumpapillary
	area under the instrument-defined measurement ellipse (total 78 parameters).
	{T}en-fold cross-validation was used to train and test {RVM} and
	{SVM} classifiers on unique subsets of the full 164-eye data set
	and areas under the receiver operating characteristic ({AUROC}) curve
	for the classification of eyes in the test set were generated. {AUROC}
	curve results from {RVM} and {SVM} were compared to those for 14
	{SLP} software-generated global and regional {RNFL} thickness parameters.
	{A}lso reported was the {AUROC} curve for the {GD}x {VCC} software-generated
	nerve fiber indicator ({NFI}). {RESULTS}: {T}he {AUROC} curves for
	{RVM} and {SVM} were 0.90 and 0.91, respectively, and increased to
	0.93 and 0.94 when the training sets were optimized with sequential
	forward and backward selection (resulting in reduced dimensional
	data sets). {AUROC} curves for optimized {RVM} and {SVM} were significantly
	larger than those for all individual {SLP} parameters. {T}he {AUROC}
	curve for the {NFI} was 0.87. {CONCLUSIONS}: {R}esults from {RVM}
	and {SVM} trained on {SLP} {RNFL} thickness measurements are similar
	and provide accurate classification of glaucomatous and healthy eyes.
	{RVM} may be preferable to {SVM}, because it provides a {B}ayesian-derived
	probability of glaucoma as an output. {T}hese results suggest that
	these machine learning classifiers show good potential for glaucoma
	diagnosis.},
  doi = {10.1167/iovs.04-1122},
  pdf = {../local/Bowd2005Relevance.pdf},
  file = {Bowd2005Relevance.pdf:local/Bowd2005Relevance.pdf:PDF},
  keywords = {80 and over, Aged, Algorithms, Area Under Curve, Cross-Sectional Studies,
	Diagnostic Imaging, Diagnostic Techniques, Glaucoma, Humans, Lasers,
	Middle Aged, Nerve Fibers, Non-U.S. Gov't, Ophthalmological, Optic
	Nerve Diseases, P.H.S., ROC Curve, Research Support, Retinal Ganglion
	Cells, Sensitivity and Specificity, U.S. Gov't, 15790898},
  pii = {46/4/1322},
  url = {http://dx.doi.org/10.1167/iovs.04-1122}
}

@article{Bowd2004Confocal,
  author = {Christopher Bowd and Linda M Zangwill and Felipe A Medeiros and Jiucang
	Hao and Kwokleung Chan and Te-Won Lee and Terrence J Sejnowski and
	Michael H Goldbaum and Pamela A Sample and Jonathan G Crowston and
	Robert N Weinreb},
  title = {Confocal scanning laser ophthalmoscopy classifiers and stereophotograph
	evaluation for prediction of visual field abnormalities in glaucoma-suspect
	eyes.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2004},
  volume = {45},
  pages = {2255-62},
  number = {7},
  month = {Jul},
  abstract = {P{URPOSE}: {T}o determine whether {H}eidelberg {R}etina {T}omograph
	({HRT}; {H}eidelberg {E}ngineering, {D}ossenheim, {G}ermany) classification
	techniques and investigational support vector machine ({SVM}) analyses
	can detect optic disc abnormalities in glaucoma-suspect eyes before
	the development of visual field abnormalities. {METHODS}: {G}laucoma-suspect
	eyes (n = 226) were classified as converts or nonconverts based on
	the development of repeatable (either two or three consecutive) standard
	automated perimetry ({SAP})-detected abnormalities over the course
	of the study (mean follow-up, approximately 4.5 years). {H}azard
	ratios for development of {SAP} abnormalities were calculated based
	on baseline classification results, follow-up time, and end point
	status (convert, nonconvert). {C}lassification techniques applied
	were {HRT} classification ({HRTC}), {M}oorfields {R}egression {A}nalysis,
	forward-selection optimized {SVM} ({SVM} fwd) and backward elimination-optimized
	{SVM} ({SVM} back) analysis of {HRT} data, and stereophotograph assessment.
	{RESULTS}: {U}nivariate analyses indicated that all classification
	techniques were predictors of the development of two repeatable abnormal
	{SAP} results, with hazards ratios (95\% confidence interval [{CI}])
	ranging from 1.32 (1.00-1.75) for {HRTC} to 2.0 (1.48-2.76) for stereophotograph
	assessment (all {P} < or = 0.05). {O}nly {SVM} ({SVM} fwd and {SVM}
	back) analysis of {HRT} data and stereophotograph assessment were
	univariate predictors of the development of three repeatable abnormal
	{SAP} results, with hazard ratios (95\% {CI}) ranging from 1.73 (1.16-2.82)
	for {SVM} fwd to 1.82 (1.19-3.12) for {SVM} back (both {P} < 0.007).
	{M}ultivariate analyses including each classification technique individually
	in a model with age, baseline {SAP} pattern standard deviation [{PSD}],
	and baseline {IOP} indicated that all classification techniques except
	{HRTC} ({P} = 0.06) were predictors of the development of two repeatable
	abnormal {SAP} results with hazards ratios ranging from 1.30 (0.99,
	1.73) for {HRTC} to 1.90 (1.37, 2.69) for stereophotograph assessment.
	{O}nly {SVM} ({SVM} fwd and {SVM} back) analysis of {HRT} data and
	stereophotograph assessment were significant predictors of the development
	of three repeatable abnormal {SAP} results in multivariate analyses;
	hazard ratios of 1.57 (1.03, 2.59) and 1.70 (1.18, 2.51), respectively.
	{SAP} {PSD} was a significant predictor of two repeatable abnormal
	{SAP} results in multivariate models with all classification techniques,
	with hazard ratios ranging from 3.31 (1.39, 7.89) to 4.70 (2.02,
	10.93) per 1-d{B} increase. {CONCLUSIONS}: {HRT} classifications
	techniques and stereophotograph assessment can detect optic disc
	topography abnormalities in glaucoma-suspect eyes before the development
	of {SAP} abnormalities. {T}hese data support strongly the importance
	of optic disc examination for early glaucoma diagnosis.},
  doi = {10.1167/iovs.03-1087},
  pdf = {../local/Bowd2004Confocal.pdf},
  file = {Bowd2004Confocal.pdf:local/Bowd2004Confocal.pdf:PDF},
  keywords = {80 and over, Adolescent, Adult, Aged, Algorithms, Artificial Intelligence,
	Auditory, Benchmarking, Binding Sites, Brain Stem, Breast Diseases,
	Chemical, Child, Chromosomes, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, Data Interpretation, Databases,
	Diagnosis, Diagnostic Errors, Differential, Drug Resistance, Electroencephalography,
	Epilepsy, Evoked Potentials, Female, Forecasting, Gene Expression,
	Gene Expression Profiling, Genetic, Genotype, Glaucoma, Greece, HIV
	Protease Inhibitors, HIV-1, Human, Humans, Infant, Information Management,
	Information Storage and Retrieval, Intraocular Pressure, Kinetics,
	Language Development Disorders, Lasers, Least-Squares Analysis, Linear
	Models, Male, Microbial Sensitivity Tests, Middle Aged, Models, Molecular,
	Monitoring, Nephroblastoma, Non-U.S. Gov't, Nonlinear Dynamics, Ocular
	Hypertension, Oligonucleotide Array Sequence Analysis, Ophthalmoscopy,
	Optic Disk, Optic Nerve Diseases, P.H.S., Pair 1, Perimetry, Periodicals,
	Phosphorylation, Phosphotransferases, Photography, Physiologic, Point
	Mutation, Preschool, Prognosis, Protein, Proteins, Pyrimidinones,
	Reaction Time, Recurrence, Reproducibility of Results, Research Support,
	Reverse Transcriptase Inhibitors, Sensitivity and Specificity, Sequence
	Alignment, Sequence Analysis, Signal Processing, Software, Sound
	Localization, Statistical, Stochastic Processes, Structure-Activity
	Relationship, Theoretical, Time Factors, U.S. Gov't, Viral, Vision
	Disorders, Visual Fields, 15223803},
  url = {http://dx.doi.org/10.1167/iovs.03-1087}
}

@book{Bower2001Computational,
  title = {Computational modeling of genetic and biochemical networks},
  publisher = {MIT Press},
  year = {2001},
  author = {Bower, J. M. and Bolouri, H.},
  address = {Cambridge, MA}
}

@article{Boyd2011Distributed,
  author = {Boyd, S. and Parikh, N. and Chu, E. and Peleato, B. and Eckstein,
	J.},
  title = {Distributed optimization and statistical learning via the alternating
	direction method of multipliers},
  journal = {Foundations and Trends{\textregistered} in Machine Learning},
  year = {2011},
  volume = {3},
  pages = {1--122},
  number = {1},
  doi = {10.1561/2200000016},
  pdf = {../local/Boyd2011Distributed.pdf},
  file = {Boyd2011Distributed.pdf:Boyd2011Distributed.pdf:PDF},
  publisher = {Now Publishers Inc.},
  url = {http://dx.doi.org/10.1561/2200000016}
}

@book{Boyd2004Convex,
  title = {Convex Optimization},
  publisher = {Cambridge {U}niversity {P}ress},
  year = {2004},
  author = {S. Boyd and L. Vandenberghe},
  address = {New York, NY, USA},
  pdf = {../local/Boyd2004Convex.pdf},
  file = {Boyd2004Convex.pdf:Boyd2004Convex.pdf:PDF},
  isbn = {0521833787}
}

@article{Boyle2005Cancer,
  author = {Boyle, P. and Ferlay, J.},
  title = {Cancer incidence and mortality in Europe, 2004},
  journal = {Ann. Oncol.},
  year = {2005},
  volume = {16},
  pages = {481--488},
  number = {3},
  month = {Mar},
  abstract = {BACKGROUND: There are no recent estimates of the incidence and mortality
	from cancer at a European level. Those data that are available generally
	refer to the mid-1990s and are of limited use for cancer control
	planning. We present estimates of the cancer burden in Europe in
	2004, including data for the (25 Member States) European Union. METHODS:
	The most recent sources of incidence and mortality data available
	in the Descriptive Epidemiology Group at IARC were applied to population
	projections to derive the best estimates of the burden of cancer,
	in terms of incidence and mortality, for Europe in 2004. RESULTS:
	In 2004 in Europe, there were an estimated 2,886,800 incident cases
	of cancer diagnosed and 1,711,000 cancer deaths. The most common
	incident form of cancer was lung cancer (13.3\% of all incident cases),
	followed by colorectal cancer (13.2\%) and breast cancer (13\%).
	Lung cancer was also the most common cause of cancer death (341,800
	deaths), followed by colorectal (203,700), stomach (137,900) and
	breast (129,900). CONCLUSIONS: With an estimated 2.9 million new
	cases (54\% occurring in men, 46\% in women) and 1.7 million deaths
	(56\% in men, 44\% in women) each year, cancer remains an important
	public health problem in Europe, and the ageing of the European population
	will cause these numbers to continue to increase even if age-specific
	rates remain constant. To make great progress quickly against cancer
	in Europe, the need is evident to make a concerted attack on the
	big killers: lung, colorectal, breast and stomach cancer. Stomach
	cancer rates are falling everywhere in Europe and public health measures
	are available to reduce the incidence and mortality of lung cancer,
	colorectal cancer and breast cancer.},
  doi = {10.1093/annonc/mdi098},
  pdf = {../local/Boyle2005Cancer.pdf},
  file = {Boyle2005Cancer.pdf:Boyle2005Cancer.pdf:PDF},
  institution = {International Agency for Research on Cancer, 150 cours Albert Thomas,
	69372 Lyon Cedex 08, France. director@iarc.fr},
  keywords = {breastcancer},
  owner = {jp},
  pii = {mdi098},
  pmid = {15718248},
  timestamp = {2008.11.26},
  url = {http://dx.doi.org/10.1093/annonc/mdi098}
}

@article{Boysen2009Consistencies,
  author = {Boysen, L. and Kempe, A. and Liebscher, V. and Munk, A. and Wittich,
	O.},
  title = {Consistencies and rates of convergence of jump-penalized least squares
	estimators},
  journal = {Ann. Stat.},
  year = {2009},
  volume = {37},
  pages = {157--183},
  number = {1},
  abstract = {We study the asymptotics for jump-penalized least squares regression
	aiming at approximating a regression function by piecewise constant
	functions. Besides conventional consistency and convergence rates
	of the estimates in L2([0, 1)) our results cover other metrics like
	Skorokhod metric on the space of cÃ dlÃ g functions and uniform metrics
	on C([0, 1]). We will show that these estimators are in an adaptive
	sense rate optimal over certain classes of "approximation spaces."
	Special cases are the class of functions of bounded variation (piecewise)
	HÃ¶lder continuous functions of order 0http://dx.doi.org/10.1214/07-AOS558}
}

@article{Bozdech2004Antioxidant,
  author = {Bozdech, Z. and Ginsburg, H.},
  title = {Antioxidant defense in {P}lasmodium falciparum - data mining of the
	transcriptome},
  journal = {Malaria {J}ournal},
  year = {2004},
  volume = {3},
  pages = {23},
  number = {1},
  abstract = {The intraerythrocytic malaria parasite is under constant oxidative
	stress originating both from endogenous and exogenous processes.
	{T}he parasite is endowed with a complete network of enzymes and
	proteins that protect it from those threats, but also uses redox
	activities to regulate enzyme activities. {I}n the present analysis,
	the transcription of the genes coding for the antioxidant defense
	elements are viewed in the time-frame of the intraerythrocytic cycle.
	{T}ime-dependent transcription data were taken from the transcriptome
	of the human malaria parasite {P}lasmodium falciparum. {W}hereas
	for several processes the transcription of the many participating
	genes is coordinated, in the present case there are some outstanding
	deviations where gene products that utilize glutathione or thioredoxin
	are transcribed before the genes coding for elements that control
	the levels of those substrates are transcribed. {S}uch insights may
	hint to novel, non-classical pathways that necessitate further investigations.},
  doi = {10.1186/1475-2875-3-23},
  pdf = {../local/Bozdech2004Antioxidant.pdf},
  file = {Bozdech2004Antioxidant.pdf:local/Bozdech2004Antioxidant.pdf:PDF},
  keywords = {microarray plasmodium},
  owner = {vert},
  url = {http://www.malariajournal.com/content/3/1/23}
}

@article{Bozdech2003Transcriptome,
  author = {Bozdech, Z. and Llinas, M. and Pulliam, B. L. and Wong, E. D. and
	Zhu, J. and DeRisi, J. L.},
  title = {The {T}ranscriptome of the {I}ntraerythrocytic {D}evelopmental {C}ycle
	of {P}lasmodium falciparum },
  journal = {P{L}o{S} {B}iology},
  year = {2003},
  volume = {1},
  pages = {e5},
  number = {1},
  abstract = {Plasmodium falciparum is the causative agent of the most burdensome
	form of human malaria, affecting 200-300 million individuals per
	year worldwide. {T}he recently sequenced genome of {P}. falciparum
	revealed over 5,400 genes, of which 60{percnt} encode proteins of
	unknown function. {I}nsights into the biochemical function and regulation
	of these genes will provide the foundation for future drug and vaccine
	development efforts toward eradication of this disease. {B}y analyzing
	the complete asexual intraerythrocytic developmental cycle ({IDC})
	transcriptome of the {HB}3 strain of {P}. falciparum, we demonstrate
	that at least 60{percnt} of the genome is transcriptionally active
	during this stage. {O}ur data demonstrate that this parasite has
	evolved an extremely specialized mode of transcriptional regulation
	that produces a continuous cascade of gene expression, beginning
	with genes corresponding to general cellular processes, such as protein
	synthesis, and ending with {P}lasmodium-specific functionalities,
	such as genes involved in erythrocyte invasion. {T}he data reveal
	that genes contiguous along the chromosomes are rarely coregulated,
	while transcription from the plastid genome is highly coregulated
	and likely polycistronic. {C}omparative genomic hybridization between
	{HB}3 and the reference genome strain (3{D}7) was used to distinguish
	between genes not expressed during the {IDC} and genes not detected
	because of possible sequence variations. {G}enomic differences between
	these strains were found almost exclusively in the highly antigenic
	subtelomeric regions of chromosomes. {T}he simple cascade of gene
	regulation that directs the asexual development of {P}. falciparum
	is unprecedented in eukaryotic biology. {T}he transcriptome of the
	{IDC} resembles a "just-in-time" manufacturing process whereby induction
	of any given gene occurs once per cycle and only at a time when it
	is required. {T}hese data provide to our knowledge the first comprehensive
	view of the timing of transcription throughout the intraerythrocytic
	development of {P}. falciparum and provide a resource for the identification
	of new chemotherapeutic and vaccine candidates.},
  comment = {(JP Vert) The paper that monitors the 48h cell cycle of P. falciparum},
  doi = {10.1371/journal.pbio.0000005},
  pdf = {../local/Bozdech2003Transcriptome.pdf},
  file = {Bozdech2003Transcriptome.pdf:local/Bozdech2003Transcriptome.pdf:PDF},
  keywords = {microarray plasmodium},
  owner = {vert},
  url = {http://dx.doi.org/10.1371/journal.pbio.0000005 }
}

@article{Bozdech2003Expression,
  author = {Bozdech, Z. and Zhu, J. and Joachimiak, M. and Cohen, F. and Pulliam,
	B. and DeRisi, J.},
  title = {Expression profiling of the schizont and trophozoite stages of {P}lasmodium
	falciparum with a long-oligonucleotide microarray},
  journal = {Genome {B}iology},
  year = {2003},
  volume = {4},
  pages = {R9},
  number = {2},
  abstract = {B{ACKGROUND}:{T}he worldwide persistence of drug-resistant {P}lasmodium
	falciparum, the most lethal variety of human malaria, is a global
	health concern. {T}he {P}. falciparum sequencing project has brought
	new opportunities for identifying molecular targets for antimalarial
	drug and vaccine development.{RESULTS}:{W}e developed a software
	package, {A}rray{O}ligo{S}elector, to design an open reading frame
	({ORF})-specific {DNA} microarray using the publicly available {P}.
	falciparum genome sequence. {E}ach gene was represented by one or
	more long 70 mer oligonucleotides selected on the basis of uniqueness
	within the genome, exclusion of low-complexity sequence, balanced
	base composition and proximity to the 3' end. {A} first-generation
	microarray representing approximately 6,000 {ORF}s of the {P}. falciparum
	genome was constructed. {A}rray performance was evaluated through
	the use of control oligonucleotide sets with increasing levels of
	introduced mutations, as well as traditional northern blotting. {U}sing
	this array, we extensively characterized the gene-expression profile
	of the intraerythrocytic trophozoite and schizont stages of {P}.
	falciparum. {T}he results revealed extensive transcriptional regulation
	of genes specialized for processes specific to these two stages.{CONCLUSIONS}:{DNA}
	microarrays based on long oligonucleotides are powerful tools for
	the functional annotation and exploration of the {P}. falciparum
	genome. {E}xpression profiling of trophozoites and schizonts revealed
	genes associated with stage-specific processes and may serve as the
	basis for future drug targets and vaccine development.},
  doi = {10.1186/gb-2003-4-2-r9},
  pdf = {../local/Bozdech2003Expression.pdf},
  file = {Bozdech2003Expression.pdf:local/Bozdech2003Expression.pdf:PDF},
  keywords = {microarray plasmodium},
  owner = {vert},
  url = {http://genomebiology.com/2003/4/2/R9}
}

@article{Bradford2005Improved,
  author = {James R Bradford and David R Westhead},
  title = {Improved prediction of protein-protein binding sites using a support
	vector machines approach.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1487-94},
  number = {8},
  month = {Apr},
  abstract = {M{OTIVATION}: {S}tructural genomics projects are beginning to produce
	protein structures with unknown function, therefore, accurate, automated
	predictors of protein function are required if all these structures
	are to be properly annotated in reasonable time. {I}dentifying the
	interface between two interacting proteins provides important clues
	to the function of a protein and can reduce the search space required
	by docking algorithms to predict the structures of complexes. {RESULTS}:
	{W}e have combined a support vector machine ({SVM}) approach with
	surface patch analysis to predict protein-protein binding sites.
	{U}sing a leave-one-out cross-validation procedure, we were able
	to successfully predict the location of the binding site on 76\%
	of our dataset made up of proteins with both transient and obligate
	interfaces. {W}ith heterogeneous cross-validation, where we trained
	the {SVM} on transient complexes to predict on obligate complexes
	(and vice versa), we still achieved comparable success rates to the
	leave-one-out cross-validation suggesting that sufficient properties
	are shared between transient and obligate interfaces. {AVAILABILITY}:
	{A} web application based on the method can be found at http://www.bioinformatics.leeds.ac.uk/ppi_pred.
	{T}he dataset of 180 proteins used in this study is also available
	via the same web site. {CONTACT}: westhead@bmb.leeds.ac.uk {SUPPLEMENTARY}
	{INFORMATION}: http://www.bioinformatics.leeds.ac.uk/ppi-pred/supp-material.},
  doi = {10.1093/bioinformatics/bti242},
  pdf = {../local/Bradford2005Improved.pdf},
  file = {Bradford2005Improved.pdf:local/Bradford2005Improved.pdf:PDF},
  keywords = {biosvm},
  pii = {bti242},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti242}
}

@article{Brahnam2005Machine,
  author = {Sheryl Brahnam and Chao-Fa Chuang and Frank Y Shih and Melinda R
	Slack},
  title = {Machine recognition and representation of neonatal facial displays
	of acute pain.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  month = {Jun},
  abstract = {O{BJECTIVE}:: {I}t has been reported in medical literature that health
	care professionals have difficulty distinguishing a newborn's facial
	expressions of pain from facial reactions to other stimuli. {A}lthough
	a number of pain instruments have been developed to assist health
	professionals, studies demonstrate that health professionals are
	not entirely impartial in their assessment of pain and fail to capitalize
	on all the information exhibited in a newborn's facial displays.
	{T}his study tackles these problems by applying three different state-of-the-art
	face classification techniques to the task of distinguishing a newborn's
	facial expressions of pain. {METHODS}:: {T}he facial expressions
	of 26 neonates between the ages of 18h and 3 days old were photographed
	experiencing the pain of a heel lance and a variety of stressors,
	including transport from one crib to another (a disturbance that
	can provoke crying that is not in response to pain), an air stimulus
	on the nose, and friction on the external lateral surface of the
	heel. {T}hree face classification techniques, principal component
	analysis ({PCA}), linear discriminant analysis ({LDA}), and support
	vector machine ({SVM}), were used to classify the faces. {RESULTS}::
	{I}n our experiments, the best recognition rates of pain versus nonpain
	(88.00\%), pain versus rest (94.62\%), pain versus cry (80.00\%),
	pain versus air puff (83.33\%), and pain versus friction (93.00\%)
	were obtained from an {SVM} with a polynomial kernel of degree 3.
	{T}he {SVM} outperformed two commonly used methods in face classification:
	{PCA} and {LDA}, each using the {L}(1) distance metric. {CONCLUSION}::
	{T}he results of this study indicate that the application of face
	classification techniques in pain assessment and management is a
	promising area of investigation.},
  doi = {10.1016/j.artmed.2004.12.003},
  pdf = {../local/Brahnam2005Machine.pdf},
  file = {Brahnam2005Machine.pdf:local/Brahnam2005Machine.pdf:PDF},
  keywords = {Artificial Intelligence, Conservation of Natural Resources, Decision
	Support Techniques, Ecosystem, Environment, Forestry, Regression
	Analysis, Spain, 15979291},
  pii = {S0933-3657(05)00013-8},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.12.003}
}

@article{Brancotte2011Gene,
  author = {Brancotte, B. and Biton, A. and Bernard-Pierrot, I. and Radvanyi,
	F. and Reyal, F. and Cohen-Boulakia, S.},
  title = {Gene List significance at-a-glance with {GeneValorization}.},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {1187--1189},
  number = {8},
  month = {Apr},
  abstract = {High-throughput technologies provide fundamental informations concerning
	thousands of genes. Many of the current research laboratories daily
	use one or more of these technologies and end-up with lists of genes.
	Assessing the originality of the results obtained includes being
	aware of the number of publications available concerning individual
	or multiple genes and accessing information about these publications.
	Faced with the exponential growth of publications avaliable and number
	of genes involved in a study, this task is becoming particularly
	difficult to achieve.We introduce GeneValorization, a web-based tool
	that gives a clear and handful overview of the bibliography available
	corresponding to the user input formed by (i) a gene list (expressed
	by gene names or ids from EntrezGene) and (ii) a context of study
	(expressed by keywords). From this input, GeneValorization provides
	a matrix containing the number of publications with co-occurrences
	of gene names and keywords. Graphics are automatically generated
	to assess the relative importance of genes within various contexts.
	Links to publications and other databases offering information on
	genes and keywords are also available. To illustrate how helpful
	GeneValorization is, we will consider the gene list of the OncotypeDX
	prognostic marker test.http://bioguide-project.net/gvcohen@lri.frSupplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btr073},
  institution = {Laboratoire de Recherche en Informatique, CNRS UMR 8623, Université
	Paris-Sud, F-91405 Orsay Cedex, CNRS, UMR 144, Institut Curie, 26
	rue d'Ulm, F-75248 Paris Cedex 05, Institut Curie, Centre de Recherche,
	Paris, F-75248, INSERM, U900, Paris, F-75248, Mines ParisTech, Fontainebleau,
	F-77300 and Institut Curie, Departement de Chirurgie, 6 rue d'Ulm,
	F-75005 Paris, France.},
  owner = {mordelet},
  pii = {btr073},
  pmid = {21349868},
  timestamp = {2011.05.17},
  url = {http://dx.doi.org/10.1093/bioinformatics/btr073}
}

@article{Brazma2001Minimum,
  author = {Brazma, A. and Hingamp, P. and Quackenbush, J. and Sherlock, G. and
	Spellman, P. and Stoeckert, C. and Aach, J. and Ansorge, W. and Ball,
	C. A. and Causton, H. C. and Gaasterland, T. and Glenisson, P. and
	Holstege, F. C. and Kim, I. F. and Markowitz, V. and Matese, J. C.
	and Parkinson, H. and Robinson, A. and Sarkans, U. and Schulze-Kremer,
	S. and Stewart, J. and Taylor, R. and Vilo, J. and Vingron, M.},
  title = {Minimum information about a microarray experiment (MIAME)-toward
	standards for microarray data.},
  journal = {Nat. Genet.},
  year = {2001},
  volume = {29},
  pages = {365--371},
  number = {4},
  month = {Dec},
  abstract = {Microarray analysis has become a widely used tool for the generation
	of gene expression data on a genomic scale. Although many significant
	results have been derived from microarray studies, one limitation
	has been the lack of standards for presenting and exchanging such
	data. Here we present a proposal, the Minimum Information About a
	Microarray Experiment (MIAME), that describes the minimum information
	required to ensure that microarray data can be easily interpreted
	and that results derived from its analysis can be independently verified.
	The ultimate goal of this work is to establish a standard for recording
	and reporting microarray-based gene expression data, which will in
	turn facilitate the establishment of databases and public repositories
	and enable the development of data analysis tools. With respect to
	MIAME, we concentrate on defining the content and structure of the
	necessary information rather than the technical format for capturing
	it.},
  doi = {10.1038/ng1201-365},
  institution = {European Bioinformatics Institute, EMBL outstation, Wellcome Trust
	Genome Campus, Hinxton, Cambridge CB10 1SD, UK. brazma@ebi.ac.uk},
  keywords = {Computational Biology; Gene Expression Profiling, methods; Oligonucleotide
	Array Sequence Analysis, standards},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {ng1201-365},
  pmid = {11726920},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/ng1201-365}
}

@article{Bredel2004Chemogenomics,
  author = {Bredel, M. and Jacoby, E.},
  title = {Chemogenomics: an emerging strategy for rapid target and drug discovery.},
  journal = {Nat. Rev. Genet.},
  year = {2004},
  volume = {5},
  pages = {262--275},
  number = {4},
  month = {Apr},
  doi = {10.1038/nrg1317},
  pdf = {../local/Bredel2004Chemogenomics.pdf},
  file = {Bredel2004Chemogenomics.pdf:Bredel2004Chemogenomics.pdf:PDF},
  keywords = {chemogenomics},
  owner = {vert},
  pii = {nrg1317},
  pmid = {15131650},
  timestamp = {2007.08.02},
  url = {http://dx.doi.org/10.1038/nrg1317}
}

@inproceedings{Breese1998Empirical,
  author = {Breese, J. S. and Heckerman, D. and Kadie, C.},
  title = {Empirical analysis of predictive algorithms for collaborative filtering},
  booktitle = {14th Conference on Uncertainty in Artificial Intelligence},
  year = {1998},
  pages = {43-52},
  address = {Madison, W.I.},
  publisher = {Morgan Kaufman}
}

@article{Breiman2001Random,
  author = {Breiman, L.},
  title = {Random forests},
  journal = {Mach. Learn.},
  year = {2001},
  volume = {45},
  pages = {5--32},
  number = {1},
  abstract = {Random forests are a combination of tree predictors such that each
	tree depends on the values of a random vector sampled independently
	and with the same distribution for all trees in the forest. The generalization
	error for forests converges a.s. to a limit as the number of trees
	in the forest becomes large. The generalization error of a forest
	of tree classifiers depends on the strength of the individual trees
	in the forest and the correlation between them. Using a random selection
	of features to split each node yields error rates that compare favorably
	to Adaboost (Y. Freund & R. Schapire, Machine Learning: Proceedings
	of the Thirteenth International conference, ***, 148–156), but are
	more robust with respect to noise. Internal estimates monitor error,
	strength, and correlation and these are used to show the response
	to increasing the number of features used in the splitting. Internal
	estimates are also used to measure variable importance. These ideas
	are also applicable to regression.},
  doi = {10.1023/A:1010933404324},
  pdf = {../local/Breiman2001Random.pdf},
  file = {Breiman2001Random.pdf:Breiman2001Random.pdf:PDF},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.02.01},
  url = {http://dx.doi.org/10.1023/A:1010933404324}
}

@article{Breiman2001Statistical,
  author = {Breiman, L.},
  title = {Statistical modeling: The two cultures (with comments and a rejoinder
	by the author)},
  journal = {Statistical Science},
  year = {2001},
  volume = {16},
  pages = {199--231},
  number = {3},
  publisher = {Institute of Mathematical Statistics}
}

@article{Breiman1996Bagging,
  author = {Breiman, L.},
  title = {Bagging predictors},
  journal = {Mach. Learn.},
  year = {1996},
  volume = {24},
  pages = {123--140},
  number = {2},
  doi = {10.1023/A:1018054314350},
  pdf = {../local/Breiman1996Bagging.pdf},
  file = {Breiman1996Bagging.pdf:Breiman1996Bagging.pdf:PDF},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.01.29},
  url = {http://dx.doi.org/10.1023/A:1018054314350}
}

@book{Breiman1984Classification,
  title = {Classification and regression trees},
  publisher = {Chapman \& Hall/CRC},
  year = {1984},
  author = {Breiman, L. and Friedman, J. and Stone, C.J. and Olshen, R.A.}
}

@article{Brein2005Inparanoid,
  author = {K. Brein and M. Remm and E. Sonnhammer},
  title = {Inparanoid: a comprehensive database of eukaryothic orthologs},
  journal = {Nucleic Acids Res.},
  year = {2005},
  volume = {33},
  owner = {michael},
  timestamp = {2008.10.02}
}

@article{Brennecke2005Principles,
  author = {Julius Brennecke and Alexander Stark and Robert B Russell and Stephen
	M Cohen},
  title = {Principles of microRNA-target recognition.},
  journal = {PLoS Biol},
  year = {2005},
  volume = {3},
  pages = {e85},
  number = {3},
  month = {Mar},
  abstract = {MicroRNAs (miRNAs) are short non-coding RNAs that regulate gene expression
	in plants and animals. Although their biological importance has become
	clear, how they recognize and regulate target genes remains less
	well understood. Here, we systematically evaluate the minimal requirements
	for functional miRNA-target duplexes in vivo and distinguish classes
	of target sites with different functional properties. Target sites
	can be grouped into two broad categories. 5' dominant sites have
	sufficient complementarity to the miRNA 5' end to function with little
	or no support from pairing to the miRNA 3' end. Indeed, sites with
	3' pairing below the random noise level are functional given a strong
	5' end. In contrast, 3' compensatory sites have insufficient 5' pairing
	and require strong 3' pairing for function. We present examples and
	genome-wide statistical support to show that both classes of sites
	are used in biologically relevant genes. We provide evidence that
	an average miRNA has approximately 100 target sites, indicating that
	miRNAs regulate a large fraction of protein-coding genes and that
	miRNA 3' ends are key determinants of target specificity within miRNA
	families.},
  doi = {10.1371/journal.pbio.0030085},
  institution = {European Molecular Biology Laboratory, Heidelberg, Germany.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {15723116},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1371/journal.pbio.0030085}
}

@article{Breslin2005Signal,
  author = {Breslin, T. and Krogh, M. and Peterson, C. and Troein, C.},
  title = {Signal transduction pathway profiling of individual tumor samples.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {163},
  abstract = {Signal transduction pathways convey information from the outside of
	the cell to transcription factors, which in turn regulate gene expression.
	Our objective is to analyze tumor gene expression data from microarrays
	in the context of such pathways.We use pathways compiled from the
	TRANSPATH/TRANSFAC databases and the literature, and three publicly
	available cancer microarray data sets. Variation in pathway activity,
	across the samples, is gauged by the degree of correlation between
	downstream targets of a pathway. Two correlation scores are applied;
	one considers all pairs of downstream targets, and the other considers
	only pairs without common transcription factors. Several pathways
	are found to be differentially active in the data sets using these
	scores. Moreover, we devise a score for pathway activity in individual
	samples, based on the average expression value of the downstream
	targets. Statistical significance is assigned to the scores using
	permutation of genes as null model. Hence, for individual samples,
	the status of a pathway is given as a sign, + or -, and a p-value.
	This approach defines a projection of high-dimensional gene expression
	data onto low-dimensional pathway activity scores. For each dataset
	and many pathways we find a much larger number of significant samples
	than expected by chance. Finally, we find that several sample-wise
	pathway activities are significantly associated with clinical classifications
	of the samples.This study shows that it is feasible to infer signal
	transduction pathway activity, in individual samples, from gene expression
	data. Furthermore, these pathway activities are biologically relevant
	in the three cancer data sets.},
  doi = {10.1186/1471-2105-6-163},
  pdf = {../local/Breslin2005Signal.pdf},
  file = {Breslin2005Signal.pdf:Breslin2005Signal.pdf:PDF},
  institution = {Complex Systems Division, Department of Theoretical Physics, University
	of Lund, Sölvegatan 14A, SE-223 62 Lund, Sweden. thomas@thep.lu.se},
  keywords = {csbcbook-ch4},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-6-163},
  pmid = {15987529},
  timestamp = {2011.08.06},
  url = {http://dx.doi.org/10.1186/1471-2105-6-163}
}

@article{Breslin2004Autofluorescence,
  author = {Tara M Breslin and Fushen Xu and Gregory M Palmer and Changfang Zhu
	and Kennedy W Gilchrist and Nirmala Ramanujam},
  title = {Autofluorescence and diffuse reflectance properties of malignant
	and benign breast tissues.},
  journal = {Ann {S}urg {O}ncol},
  year = {2004},
  volume = {11},
  pages = {65-70},
  number = {1},
  month = {Jan},
  abstract = {B{ACKGROUND}: {F}luorescence spectroscopy is an evolving technology
	that can rapidly differentiate between benign and malignant tissues.
	{T}hese differences are thought to be due to endogenous fluorophores,
	including nicotinamide adenine dinucleotide, flavin adenine dinucleotide,
	and tryptophan, and absorbers such as beta-carotene and hemoglobin.
	{W}e hypothesized that a statistically significant difference would
	be demonstrated between benign and malignant breast tissues on the
	basis of their unique fluorescence and reflectance properties. {METHODS}:
	{O}ptical measurements were performed on 56 samples of tumor or benign
	breast tissue. {A}utofluorescence spectra were measured at excitation
	wavelengths ranging from 300 to 460 nm, and diffuse reflectance was
	measured between 300 and 600 nm. {P}rincipal component analysis to
	dimensionally reduce the spectral data and a {W}ilcoxon ranked sum
	test were used to determine which wavelengths showed statistically
	significant differences. {A} support vector machine algorithm compared
	classification results with the histological diagnosis (gold standard).
	{RESULTS}: {S}everal excitation wavelengths and diffuse reflectance
	spectra showed significant differences between tumor and benign tissues.
	{B}y using the support vector machine algorithm to incorporate relevant
	spectral differences, a sensitivity of 70.0\% and specificity of
	91.7\% were achieved. {CONCLUSIONS}: {A} statistically significant
	difference was demonstrated in the diffuse reflectance and fluorescence
	emission spectra of benign and malignant breast tissue. {T}hese differences
	could be exploited in the development of adjuncts to diagnostic and
	surgical procedures.},
  doi = {10.1245/ASO.2004.03.031},
  pdf = {../local/Breslin2004Autofluorescence.pdf},
  file = {Breslin2004Autofluorescence.pdf:Breslin2004Autofluorescence.pdf:PDF},
  keywords = {breastcancer},
  url = {http://dx.doi.org/10.1245/ASO.2004.03.031}
}

@article{Briem2005Classifying,
  author = {Hans Briem and Judith G{\"u}nther},
  title = {Classifying "kinase inhibitor-likeness" by using machine-learning
	methods.},
  journal = {ChemBioChem},
  year = {2005},
  volume = {6},
  pages = {558-66},
  number = {3},
  month = {Mar},
  abstract = {By using an in-house data set of small-molecule structures, encoded
	by {G}hose-{C}rippen parameters, several machine learning techniques
	were applied to distinguish between kinase inhibitors and other molecules
	with no reported activity on any protein kinase. {A}ll four approaches
	pursued--support-vector machines ({SVM}), artificial neural networks
	({ANN}), k nearest neighbor classification with {GA}-optimized feature
	selection ({GA}/k{NN}), and recursive partitioning ({RP})--proved
	capable of providing a reasonable discrimination. {N}evertheless,
	substantial differences in performance among the methods were observed.
	{F}or all techniques tested, the use of a consensus vote of the 13
	different models derived improved the quality of the predictions
	in terms of accuracy, precision, recall, and {F}1 value. {S}upport-vector
	machines, followed by the {GA}/k{NN} combination, outperformed the
	other techniques when comparing the average of individual models.
	{B}y using the respective majority votes, the prediction of neural
	networks yielded the highest {F}1 value, followed by {SVM}s.},
  doi = {10.1002/cbic.200400109},
  pdf = {../local/Briem2005Classifying.pdf},
  file = {Briem2005Classifying.pdf:local/Briem2005Classifying.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1002/cbic.200400109}
}

@article{Briggs2001Histone,
  author = {S. D. Briggs and M. Bryk and B. D. Strahl and W. L. Cheung and J.
	K. Davie and S. Y. Dent and F. Winston and C. D. Allis},
  title = {Histone H3 lysine 4 methylation is mediated by Set1 and required
	for cell growth and rDNA silencing in Saccharomyces cerevisiae.},
  journal = {Genes Dev},
  year = {2001},
  volume = {15},
  pages = {3286--3295},
  number = {24},
  month = {Dec},
  abstract = {Histone methylation is known to be associated with both transcriptionally
	active and repressive chromatin states. Recent studies have identified
	SET domain-containing proteins such as SUV39H1 and Clr4 as mediators
	of H3 lysine 9 (Lys9) methylation and heterochromatin formation.
	Interestingly, H3 Lys9 methylation is not observed from bulk histones
	isolated from asynchronous populations of Saccharomyces cerevisiae
	or Tetrahymena thermophila. In contrast, H3 lysine 4 (Lys4) methylation
	is a predominant modification in these smaller eukaryotes. To identify
	the responsible methyltransferase(s) and to gain insight into the
	function of H3 Lys4 methylation, we have developed a histone H3 Lys4
	methyl-specific antiserum. With this antiserum, we show that deletion
	of SET1, but not of other putative SET domain-containing genes, in
	S. cerevisiae, results in the complete abolishment of H3 Lys4 methylation
	in vivo. Furthermore, loss of H3 Lys4 methylation in a set1 Delta
	strain can be rescued by SET1. Analysis of histone H3 mutations at
	Lys4 revealed a slow-growth defect similar to a set1 Delta strain.
	Chromatin immunoprecipitation assays show that H3 Lys4 methylation
	is present at the rDNA locus and that Set1-mediated H3 Lys4 methylation
	is required for repression of RNA polymerase II transcription within
	rDNA. Taken together, these data suggest that Set1-mediated H3 Lys4
	methylation is required for normal cell growth and transcriptional
	silencing.},
  doi = {10.1101/gad.940201},
  institution = {Department of Biochemistry and Molecular Genetics, University of
	Virginia Health System, Charlottesville, Virginia 22908, USA.},
  keywords = {Animals; Antibody Formation; Blotting, Western; Cell Division; DNA
	Primers, chemistry; DNA, Bacterial, genetics; DNA, Ribosomal, genetics;
	DNA-Binding Proteins, metabolism; Fungal Proteins, metabolism; Gene
	Silencing; Genetic Vectors; Heterochromatin, chemistry/metabolism;
	Histone-Lysine N-Methyltransferase; Histones, metabolism; Lysine,
	metabolism; Methylation; Methyltransferases, genetics/metabolism;
	Mutation; Nucleosomes, chemistry/metabolism; Polymerase Chain Reaction;
	Precipitin Tests; Protein Methyltransferases; RNA Polymerase III,
	metabolism; Rabbits; Saccharomyces cerevisiae Proteins; Saccharomyces
	cerevisiae, genetics; Transcription Factors, metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11751634},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1101/gad.940201}
}

@article{Briggs2002Gene,
  author = {Scott D Briggs and Tiaojiang Xiao and Zu-Wen Sun and Jennifer A Caldwell
	and Jeffrey Shabanowitz and Donald F Hunt and C. David Allis and
	Brian D Strahl},
  title = {Gene silencing: trans-histone regulatory pathway in chromatin.},
  journal = {Nature},
  year = {2002},
  volume = {418},
  pages = {498},
  number = {6897},
  month = {Aug},
  abstract = {The fundamental unit of eukaryotic chromatin, the nucleosome, consists
	of genomic DNA wrapped around the conserved histone proteins H3,
	H2B, H2A and H4, all of which are variously modified at their amino-
	and carboxy-terminal tails to influence the dynamics of chromatin
	structure and function -- for example, conjugation of histone H2B
	with ubiquitin controls the outcome of methylation at a specific
	lysine residue (Lys 4) on histone H3, which regulates gene silencing
	in the yeast Saccharomyces cerevisiae. Here we show that ubiquitination
	of H2B is also necessary for the methylation of Lys 79 in H3, the
	only modification known to occur away from the histone tails, but
	that not all methylated lysines in H3 are regulated by this 'trans-histone'
	pathway because the methylation of Lys 36 in H3 is unaffected. Given
	that gene silencing is regulated by the methylation of Lys 4 and
	Lys 79 in histone H3, we suggest that H2B ubiquitination acts as
	a master switch that controls the site-selective histone methylation
	patterns responsible for this silencing.},
  doi = {10.1038/nature00970},
  institution = {Department of Biochemistry and Molecular Genetics, University of
	Virginia Health System, Charlottesville, Virginia 22908, USA.},
  keywords = {Chromatin, chemistry/metabolism; Gene Expression Regulation, Fungal;
	Gene Silencing; Histone-Lysine N-Methyltransferase; Histones, chemistry/metabolism;
	Ligases, metabolism; Methylation; Models, Biological; Nuclear Proteins,
	metabolism; Saccharomyces cerevisiae Proteins; Saccharomyces cerevisiae,
	genetics/metabolism; Ubiquitin, metabolism; Ubiquitin-Conjugating
	Enzymes},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature00970},
  pmid = {12152067},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1038/nature00970}
}

@article{Brodersen2009Revisiting,
  author = {Peter Brodersen and Olivier Voinnet},
  title = {Revisiting the principles of microRNA target recognition and mode
	of action.},
  journal = {Nat Rev Mol Cell Biol},
  year = {2009},
  volume = {10},
  pages = {141--148},
  number = {2},
  month = {Feb},
  abstract = {MicroRNAs (miRNAs) are fundamental regulatory elements of animal and
	plant gene expression. Although rapid progress in our understanding
	of miRNA biogenesis has been achieved by experimentation, computational
	approaches have also been influential in determining the general
	principles that are thought to govern miRNA target recognition and
	mode of action. We discuss how these principles are being progressively
	challenged by genetic and biochemical studies. In addition, we discuss
	the role of target-site-specific endonucleolytic cleavage, which
	is the hallmark of experimental RNA interference and a mechanism
	that is used by plant miRNAs and a few animal miRNAs. Generally thought
	to be merely a degradation mechanism, we propose that this might
	also be a biogenesis mechanism for biologically functional, non-coding
	RNA fragments.},
  doi = {10.1038/nrm2619},
  pdf = {../local/Brodersen2009Revisiting.pdf},
  file = {Brodersen2009Revisiting.pdf:Brodersen2009Revisiting.pdf:PDF},
  institution = {Institut de Biologie Moléculaire des Plantes, Centre National de
	la Recherche Scientifique, 12 rue du Général Zimmer, 67084 Strasbourg
	Cedex, France. peter.brodersen@ibmp-ulp.u-strasbg.fr},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nrm2619},
  pmid = {19145236},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1038/nrm2619}
}

@book{Brodsky1993Nonparametric,
  title = {Nonparametric Methods in Change-Point Problems},
  publisher = {Kluwer Academic Publishers},
  year = {1993},
  author = {Brodsky, B. and Darkhovsky, B.},
  owner = {jp},
  timestamp = {2010.06.02}
}

@article{Brown1998Chemoinformatics,
  author = {F.K. Brown},
  title = {Chemoinformatics : {W}hat is it and {H}ow does it {I}mpact {D}rug
	{D}iscovery},
  journal = {Annual Reports in Med. Chem.},
  year = {1998},
  volume = {33},
  pages = {375-384},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.16}
}

@inproceedings{Brown1993Using,
  author = {Brown, M.P. and Hughey, R. and Krogh, A. and Mian, I.S. and Sjolander,
	K. and Haussler, D.},
  title = {Using {D}irichlet mixture priors to derive hidden {M}arkov models
	for protein families},
  booktitle = {Proc. {F}irst {I}nternational {C}onference on {I}ntelligent {S}ystems
	for {M}olecular {B}iology ({ISMB} 1993)},
  year = {1993},
  owner = {vert}
}

@article{Brown2000Knowledge-based,
  author = {Brown, M. P. and Grundy, W. N. and Lin, D. and Cristianini, N. and
	Sugnet, C. W. and Furey, T. S. and Ares, M. and Haussler, D.},
  title = {Knowledge-based analysis of microarray gene expression data by using
	support vector machines.},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2000},
  volume = {97},
  pages = {262-7},
  number = {1},
  month = {Jan},
  abstract = {We introduce a method of functionally classifying genes by using gene
	expression data from {DNA} microarray hybridization experiments.
	{T}he method is based on the theory of support vector machines ({SVM}s).
	{SVM}s are considered a supervised computer learning method because
	they exploit prior knowledge of gene function to identify unknown
	genes of similar function from expression data. {SVM}s avoid several
	problems associated with unsupervised clustering methods, such as
	hierarchical clustering and self-organizing maps. {SVM}s have many
	mathematical features that make them attractive for gene expression
	analysis, including their flexibility in choosing a similarity function,
	sparseness of solution when dealing with large data sets, the ability
	to handle large feature spaces, and the ability to identify outliers.
	{W}e test several {SVM}s that use different similarity metrics, as
	well as some other supervised learning methods, and find that the
	{SVM}s best identify sets of genes with a common function using expression
	data. {F}inally, we use {SVM}s to predict functional roles for uncharacterized
	yeast {ORF}s based on their expression data.},
  pdf = {../local/Brown2000Knowledge-based.pdf},
  file = {Brown2000Knowledge-based.pdf:local/Brown2000Knowledge-based.pdf:PDF},
  keywords = {biosvm microarray},
  url = {http://www.pnas.org/cgi/content/abstract/97/1/262}
}

@article{Brown2000Exploring,
  author = {P.O. Brown and D. Botstein},
  title = {Exploring the new world of the genome with {DNA} microarrays},
  journal = {Nat. {G}enet.},
  year = {2000},
  volume = {21},
  pages = {33--37},
  pdf = {../local/brow00b.pdf},
  file = {brow00b.pdf:local/brow00b.pdf:PDF},
  subject = {microarray},
  url = {http://www.nature.com/ng/journal/v21/n1s/abs/ng0199supp_33.html}
}

@article{Brown1980Adaptive,
  author = {P. J. Brown and J. V. Zidek},
  title = {Adaptive Multivariate Ridge Regression},
  journal = {Ann. Statist.},
  year = {1980},
  volume = {8},
  pages = {64--74},
  number = {1}
}

@article{Brown1997information,
  author = {Brown, R. D. and Martin, Y. C.},
  title = {The information content of 2{D} and 3{D} structural descriptors relevant
	to ligand-receptor binding},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {1997},
  volume = {37},
  pages = {1-9},
  keywords = {chemoinformatics}
}

@article{Brown1996Use,
  author = {Robert D. Brown and Yvonne C. Martin},
  title = {Use of {S}tructure-{A}ctivity {D}ata {T}o {C}ompare {S}tructure-{B}ased
	{C}lustering {M}ethods and {D}escriptors for {U}se in {C}ompound
	{S}election},
  journal = {J Chem Inf Comput Sci},
  year = {1996},
  volume = {36},
  pages = {572-584},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Brunet2004Metagenes,
  author = {Brunet, J. P. and Tamayo, P. and Golub, T. R. and Mesirov, J. P.},
  title = {Metagenes and molecular pattern discovery using matrix factorization},
  journal = {Proc {N}atl {A}cad {S}ci {U} {S} {A}},
  year = {2004},
  volume = {101},
  pages = {4164-9},
  number = {12},
  abstract = {We describe here the use of nonnegative matrix factorization ({NMF}),
	an algorithm based on decomposition by parts that can reduce the
	dimension of expression data from thousands of genes to a handful
	of metagenes. {C}oupled with a model selection mechanism, adapted
	to work for any stochastic clustering algorithm, {NMF} is an efficient
	method for identification of distinct molecular patterns and provides
	a powerful method for class discovery. {W}e demonstrate the ability
	of {NMF} to recover meaningful biological information from cancer-related
	microarray data. {NMF} appears to have advantages over other methods
	such as hierarchical clustering or self-organizing maps. {W}e found
	it less sensitive to a priori selection of genes or initial conditions
	and able to detect alternative or context-dependent patterns of gene
	expression in complex biological systems. {T}his ability, similar
	to semantic polysemy in text, provides a general method for robust
	molecular pattern discovery.},
  doi = {10.1073/pnas.0308531101},
  pdf = {../local/Brunet2004Metagenes.pdf},
  file = {Brunet2004Metagenes.pdf:Brunet2004Metagenes.pdf:PDF},
  url = {http://dx.doi.org/10.1073/pnas.0308531101}
}

@article{Brusic2002Prediction,
  author = {Brusic, V. and Petrovsky, N. and Zhang, G. and Bajic, V. B.},
  title = {{P}rediction of promiscuous peptides that bind {HLA} class {I} molecules.},
  journal = {Immunol. Cell Biol.},
  year = {2002},
  volume = {80},
  pages = {280--285},
  number = {3},
  month = {Jun},
  abstract = {Promiscuous T-cell epitopes make ideal targets for vaccine development.
	We report here a computational system, MULTIPRED, for the prediction
	of peptide binding to the HLA-A2 supertype. It combines a novel representation
	of peptide/MHC interactions with a hidden Markov model as the prediction
	algorithm. MULTIPREDis both sensitive and specific, and demonstrates
	high accuracy of peptide-binding predictions for HLA-A*0201, *0204,
	and *0205 alleles, good accuracy for *0206 allele, and marginal accuracy
	for *0203 allele. MULTIPREDreplaces earlier requirements for individual
	prediction models for each HLA allelic variant and simplifies computational
	aspects of peptide-binding prediction. Preliminary testing indicates
	that MULTIPRED can predict peptide binding to HLA-A2 supertype molecules
	with high accuracy, including those allelic variants for which no
	experimental binding data are currently available.},
  keywords = {Algorithms, Amino Acid Motifs, Amino Acid Sequence, Antigen-Antibody
	Complex, Automated, Binding Sites, Computational Biology, Drug Delivery
	Systems, Drug Design, Epitopes, Forecasting, Genes, HLA Antigens,
	HLA-A Antigens, HLA-A2 Antigen, HLA-DR Antigens, Humans, Internet,
	MHC Class I, Markov Chains, Molecular Sequence Data, Neural Networks
	(Computer), Pattern Recognition, Peptide Fragments, Peptides, Protein,
	Protein Binding, Protein Interaction Mapping, Sensitivity and Specificity,
	Sequence Analysis, Software, T-Lymphocyte, User-Computer Interface,
	Viral Vaccines, 12067415},
  pii = {1088},
  pmid = {12067415},
  timestamp = {2007.01.25}
}

@article{Bryk2002Evidence,
  author = {Mary Bryk and Scott D Briggs and Brian D Strahl and M. Joan Curcio
	and C. David Allis and Fred Winston},
  title = {Evidence that Set1, a factor required for methylation of histone
	H3, regulates rDNA silencing in S. cerevisiae by a Sir2-independent
	mechanism.},
  journal = {Curr Biol},
  year = {2002},
  volume = {12},
  pages = {165--170},
  number = {2},
  month = {Jan},
  abstract = {Several types of histone modifications have been shown to control
	transcription. Recent evidence suggests that specific combinations
	of these modifications determine particular transcription patterns.
	The histone modifications most recently shown to play critical roles
	in transcription are arginine-specific and lysine-specific methylation.
	Lysine-specific histone methyltransferases all contain a SET domain,
	a conserved 130 amino acid motif originally identified in polycomb-
	and trithorax-group proteins from Drosophila. Members of the SU(VAR)3-9
	family of SET-domain proteins methylate K9 of histone H3. Methylation
	of H3 has also been shown to occur at K4. Several studies have suggested
	a correlation between K4-methylated H3 and active transcription.
	In this paper, we provide evidence that K4-methylated H3 is required
	in a negative role, rDNA silencing in Saccharomyces cerevisiae. In
	a screen for rDNA silencing mutants, we identified a mutation in
	SET1, previously shown to regulate silencing at telomeres and HML.
	Recent work has shown that Set1 is a member of a complex and is required
	for methylation of K4 of H3 at several genomic locations. In addition,
	we demonstrate that a K4R change in H3, which prevents K4 methylation,
	impairs rDNA silencing, indicating that Set1 regulates rDNA silencing,
	directly or indirectly, via H3 methylation. Furthermore, we present
	several lines of evidence that the role of Set1 in rDNA silencing
	is distinct from that of the histone deacetylase Sir2. Together,
	these results suggest that Set1-dependent H3 methylation is required
	for rDNA silencing in a Sir2-independent fashion.},
  institution = {Department of Genetics, Harvard Medical School, 200 Longwood Avenue,
	Boston, MA 02115, USA.},
  keywords = {Acetylation; DNA Methylation; DNA, Ribosomal, genetics; DNA-Binding
	Proteins, metabolism; Drosophila Proteins; Fungal Proteins, metabolism;
	Gene Silencing; Histone Deacetylases, metabolism; Histone-Lysine
	N-Methyltransferase; Histones, metabolism; Mutation; Saccharomyces
	cerevisiae Proteins; Saccharomyces cerevisiae, metabolism; Silent
	Information Regulator Proteins, Saccharomyces cerevisiae; Sirtuin
	2; Sirtuins; Trans-Activators, metabolism; Transcription Factors,
	metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0960982201006522},
  pmid = {11818070},
  timestamp = {2010.11.23}
}

@article{Buchala2005role,
  author = {Samarasena Buchala and Neil Davey and Ray J Frank and Martin Loomes
	and Tim M Gale},
  title = {The role of global and feature based information in gender classification
	of faces: a comparison of human performance and computational models.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2005},
  volume = {15},
  pages = {121-8},
  number = {1-2},
  abstract = {Most computational models for gender classification use global information
	(the full face image) giving equal weight to the whole face area
	irrespective of the importance of the internal features. {H}ere,
	we use a global and feature based representation of face images that
	includes both global and featural information. {W}e use dimensionality
	reduction techniques and a support vector machine classifier and
	show that this method performs better than either global or feature
	based representations alone. {W}e also present results of human subjects
	performance on gender classification task and evaluate how the different
	dimensionality reduction techniques compare with human subjects performance.
	{T}he results support the psychological plausibility of the global
	and feature based representation.},
  pii = {S0129065705000074}
}

@article{Buck2004ChIP,
  author = {Michael J Buck and Jason D Lieb},
  title = {ChIP-chip: considerations for the design, analysis, and application
	of genome-wide chromatin immunoprecipitation experiments.},
  journal = {Genomics},
  year = {2004},
  volume = {83},
  pages = {349--360},
  number = {3},
  month = {Mar},
  abstract = {Chromatin immunoprecipitation (ChIP) is a well-established procedure
	to investigate interactions between proteins and DNA. Coupled with
	whole-genome DNA microarrays, ChIPS allow one to determine the entire
	spectrum of in vivo DNA binding sites for any given protein. The
	design and analysis of ChIP-microarray (also called ChIP-chip) experiments
	differ significantly from the conventions used for locus ChIP approaches
	and ChIP-chip experiments, and these differences require new methods
	of analysis. In this light, we review the design of DNA microarrays,
	the selection of controls, the level of repetition required, and
	other critical parameters for success in the design and analysis
	of ChIP-chip experiments, especially those conducted in the context
	of mammalian or other relatively large genomes.},
  institution = {Department of Biology and Carolina Center for Genome Sciences, University
	of North Carolina at Chapel Hill, Chapel Hill, NC 27599-3280, USA.},
  keywords = {Animals; Chromatin, metabolism; Genome; Humans; Immunoprecipitation,
	methods; Models, Theoretical; Oligonucleotide Array Sequence Analysis,
	methods; Research Design},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {14986705},
  timestamp = {2010.08.05}
}

@article{Bui2006Structural,
  author = {Bui, H.-H. and Schiewe, A. J. and von Grafenstein, H. and Haworth,
	I. S.},
  title = {{S}tructural prediction of peptides binding to {MHC} class {I} molecules.},
  journal = {Proteins},
  year = {2006},
  volume = {63},
  pages = {43--52},
  number = {1},
  month = {Apr},
  abstract = {Peptide binding to class I major histocompatibility complex (MHCI)
	molecules is a key step in the immune response and the structural
	details of this interaction are of importance in the design of peptide
	vaccines. Algorithms based on primary sequence have had success in
	predicting potential antigenic peptides for MHCI, but such algorithms
	have limited accuracy and provide no structural information. Here,
	we present an algorithm, PePSSI (peptide-MHC prediction of structure
	through solvated interfaces), for the prediction of peptide structure
	when bound to the MHCI molecule, HLA-A2. The algorithm combines sampling
	of peptide backbone conformations and flexible movement of MHC side
	chains and is unique among other prediction algorithms in its incorporation
	of explicit water molecules at the peptide-MHC interface. In an initial
	test of the algorithm, PePSSI was used to predict the conformation
	of eight peptides bound to HLA-A2, for which X-ray data are available.
	Comparison of the predicted and X-ray conformations of these peptides
	gave RMSD values between 1.301 and 2.475 A. Binding conformations
	of 266 peptides with known binding affinities for HLA-A2 were then
	predicted using PePSSI. Structural analyses of these peptide-HLA-A2
	conformations showed that peptide binding affinity is positively
	correlated with the number of peptide-MHC contacts and negatively
	correlated with the number of interfacial water molecules. These
	results are consistent with the relatively hydrophobic binding nature
	of the HLA-A2 peptide binding interface. In summary, PePSSI is capable
	of rapid and accurate prediction of peptide-MHC binding conformations,
	which may in turn allow estimation of MHCI-peptide binding affinity.},
  doi = {10.1002/prot.20870},
  keywords = {Algorithms, Amino Acid Sequence, Antigens, Artificial Intelligence,
	Automated, Binding Sites, Chemical, Computational Biology, Computer
	Simulation, Crystallog, Crystallography, Electrostatics, Genes, Genetic,
	HLA Antigens, Histocompatibility Antigens Class I, Humans, Hydrogen
	Bonding, Ligands, MHC Class I, Major Histocompatibility Complex,
	Models, Molecular, Molecular Conformation, Molecular Sequence Data,
	Pattern Recognition, Peptides, Protein, Protein Binding, Protein
	Conformation, Proteomics, Quantitative Structure-Activity Relationship,
	Sequence Alignment, Sequence Analysis, Software, Structural Homology,
	Structure-Activity Relationship, Thermodynamics, Water, X-Ray, X-Rays,
	raphy, 16447245},
  pmid = {16447245},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1002/prot.20870}
}

@article{Bui2005Automated,
  author = {Huynh-Hoa Bui and John Sidney and Bjoern Peters and Muthuraman Sathiamurthy
	and Asabe Sinichi and Kelly-Anne Purton and Bianca R Moth\'e and
	Francis V Chisari and David I Watkins and Alessandro Sette},
  title = {Automated generation and evaluation of specific MHC binding predictive
	tools: ARB matrix applications.},
  journal = {Immunogenetics},
  year = {2005},
  volume = {57},
  pages = {304--314},
  number = {5},
  month = {Jun},
  abstract = {Prediction of which peptides can bind major histocompatibility complex
	(MHC) molecules is commonly used to assist in the identification
	of T cell epitopes. However, because of the large numbers of different
	MHC molecules of interest, each associated with different predictive
	tools, tool generation and evaluation can be a very resource intensive
	task. A methodology commonly used to predict MHC binding affinity
	is the matrix or linear coefficients method. Herein, we described
	Average Relative Binding (ARB) matrix methods that directly predict
	IC(50) values allowing combination of searches involving different
	peptide sizes and alleles into a single global prediction. A computer
	program was developed to automate the generation and evaluation of
	ARB predictive tools. Using an in-house MHC binding database, we
	generated a total of 85 and 13 MHC class I and class II matrices,
	respectively. Results from the automated evaluation of tool efficiency
	are presented. We anticipate that this automation framework will
	be generally applicable to the generation and evaluation of large
	numbers of MHC predictive methods and tools, and will be of value
	to centralize and rationalize the process of evaluation of MHC predictions.
	MHC binding predictions based on ARB matrices were made available
	at http://epitope.liai.org:8080/matrix web server.},
  doi = {10.1007/s00251-005-0798-y},
  keywords = {Animals; Binding Sites; Computer Simulation; Databases, Protein; Epitopes;
	Histocompatibility Antigens; Humans; Major Histocompatibility Complex;
	Models, Biological; Protein Binding},
  owner = {laurent},
  pmid = {15868141},
  timestamp = {2007.07.12},
  url = {http://dx.doi.org/10.1007/s00251-005-0798-y}
}

@incollection{Buijsman2005Structural,
  author = {Rogier Buijsman},
  title = {Structural Aspects of Kinases and Their Inhibitors},
  booktitle = {Chemogenomics in Drug Discovery},
  publisher = {Wiley-VCH},
  year = {2005},
  chapter = {7},
  pages = {191-219}
}

@article{Bullard2010Polygenic,
  author = {J. H. Bullard and Y. Mostovoy and S. Dudoit and R. B. Brem},
  title = {Polygenic and directional regulatory evolution across pathways in
	{{\it Saccharomyces}}},
  journal = {PNAS},
  year = {2010},
  volume = {107},
  pages = {5058-5063},
  number = {11},
  url = {http://www.pnas.org/content/107/11/5058.abstract}
}

@article{Bullard2010Evaluation,
  author = {Bullard, J. H. and Purdom, E. and Hansen, K. D. and Dudoit, S.},
  title = {Evaluation of statistical methods for normalization and differential
	expression in mRNA-Seq experiments.},
  journal = {BMC Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {94},
  abstract = {High-throughput sequencing technologies, such as the Illumina Genome
	Analyzer, are powerful new tools for investigating a wide range of
	biological and medical questions. Statistical and computational methods
	are key for drawing meaningful and accurate conclusions from the
	massive and complex datasets generated by the sequencers. We provide
	a detailed evaluation of statistical methods for normalization and
	differential expression (DE) analysis of Illumina transcriptome sequencing
	(mRNA-Seq) data.We compare statistical methods for detecting genes
	that are significantly DE between two types of biological samples
	and find that there are substantial differences in how the test statistics
	handle low-count genes. We evaluate how DE results are affected by
	features of the sequencing platform, such as, varying gene lengths,
	base-calling calibration method (with and without phi X control lane),
	and flow-cell/library preparation effects. We investigate the impact
	of the read count normalization method on DE results and show that
	the standard approach of scaling by total lane counts (e.g., RPKM)
	can bias estimates of DE. We propose more general quantile-based
	normalization procedures and demonstrate an improvement in DE detection.Our
	results have significant practical and methodological implications
	for the design and analysis of mRNA-Seq experiments. They highlight
	the importance of appropriate statistical methods for normalization
	and DE inference, to account for features of the sequencing platform
	that could impact the accuracy of results. They also reveal the need
	for further research in the development of statistical and computational
	methods for mRNA-Seq.},
  doi = {10.1186/1471-2105-11-94},
  institution = {Division of Biostatistics, University of California, Berkeley, Berkeley,
	CA, USA. bullard@berkeley.edu},
  keywords = {Computational Biology; Databases, Genetic; RNA, Messenger; Sequence
	Analysis, RNA},
  owner = {laurent},
  pii = {1471-2105-11-94},
  pmid = {20167110},
  timestamp = {2012.04.11},
  url = {http://dx.doi.org/10.1186/1471-2105-11-94}
}

@article{Bultinck2003Quantum,
  author = {P. Bultinck and T. Kuppens and X. Giron{\`e}s and R. Carb{\'o}-Dorca},
  title = {{Q}uantum similarity superposition algorithm ({QSSA}): a consistent
	scheme for molecular alignment and molecular similarity based on
	quantum chemistry.},
  journal = {J Chem Inf Comput Sci},
  year = {2003},
  volume = {43},
  pages = {1143--1150},
  number = {4},
  abstract = {The use of the molecular quantum similarity overlap measure for molecular
	alignment is investigated. A new algorithm is presented, the quantum
	similarity superposition algorithm (QSSA), expressing the relative
	positions of two molecules in terms of mutual translation in three
	Cartesian directions and three Euler angles. The quantum similarity
	overlap is then used to optimize the mutual positions of the molecules.
	A comparison is made with TGSA, a topogeometrical approach, and the
	influence of differences on molecular clustering is discussed.},
  doi = {10.1021/ci0340153},
  keywords = {Aldosterone, Algorithms, Chemical, Comparative Study, Estrone, Isomerism,
	Models, Molecular, Molecular Structure, Non-U.S. Gov't, Quantitative
	Structure-Activity Relationship, Quantum Theory, Research Support,
	12870905},
  owner = {mahe},
  pmid = {12870905},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/ci0340153}
}

@article{Bulyk2006DNA,
  author = {Martha L Bulyk},
  title = {{DNA} microarray technologies for measuring protein-{DNA} interactions.},
  journal = {Curr Opin Biotechnol},
  year = {2006},
  volume = {17},
  pages = {422--430},
  number = {4},
  month = {Aug},
  abstract = {DNA-binding proteins have key roles in many cellular processes, including
	transcriptional regulation and replication. Microarray-based technologies
	permit the high-throughput identification of binding sites and enable
	the functional roles of these binding proteins to be elucidated.
	In particular, microarray readout either of chromatin immunoprecipitated
	DNA-bound proteins (ChIP-chip) or of DNA adenine methyltransferase
	fusion proteins (DamID) enables the identification of in vivo genomic
	target sites of proteins. A complementary approach to analyse the
	in vitro binding of proteins directly to double-stranded DNA microarrays
	(protein binding microarrays; PBMs), permits rapid characterization
	of their DNA binding site sequence specificities. Recent advances
	in DNA microarray synthesis technologies have facilitated the definition
	of DNA-binding sites at much higher resolution and coverage, and
	advances in these and emerging technologies will further increase
	the efficiencies of these exciting new approaches.},
  doi = {10.1016/j.copbio.2006.06.015},
  institution = {Division of Genetics, Department of Medicine, Harvard/MIT Division
	of Health Sciences and Technology (HST), Brigham and Women's Hospital
	and Harvard Medical School, Boston, MA 02115, USA. mlbulyk@receptor.med.harvard.edu},
  keywords = {Animals; Chromatin Immunoprecipitation, methods; Cross-Linking Reagents,
	chemistry; DNA, analysis/chemistry/metabolism; DNA-Binding Proteins,
	analysis/genetics/metabolism; Humans; Oligonucleotide Array Sequence
	Analysis, methods; Protein Binding},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S0958-1669(06)00099-1},
  pmid = {16839757},
  timestamp = {2010.08.05},
  url = {http://dx.doi.org/10.1016/j.copbio.2006.06.015}
}

@inproceedings{Bunea2006Aggregation,
  author = {Bunea, F. and Tsybakov, A. and Wegkamp, M.},
  title = {Aggregation and sparsity via $l_1$ penalized least squares},
  booktitle = {Proceedings of the 19th Annual Conference on Learning Theory, COLT
	2006.},
  year = {2006},
  editor = {Lugosi, G. and Simon, H. U.},
  number = {4005},
  series = {LNAI},
  pages = {379--391},
  address = {Berlin Heidelberg},
  publisher = {Springer-Verlag},
  pdf = {../local/Bunea2006Aggregation.pdf},
  file = {Bunea2006Aggregation.pdf:Bunea2006Aggregation.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.07}
}

@article{Bunea2007Sparsity,
  author = {Bunea, F. and Tsybakov, A. and Wegkamp, M.},
  title = {Sparsity oracle inequalities for the Lasso},
  journal = {Electron. J. Statist.},
  year = {2007},
  volume = {1},
  pages = {169--194},
  doi = {10.1214/07-EJS008},
  pdf = {../local/Bunea2007Sparsity.pdf},
  file = {Bunea2007Sparsity.pdf:Bunea2007Sparsity.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.02},
  url = {http://dx.doi.org/10.1214/07-EJS008}
}

@article{Bunescu2005Comparative,
  author = {Bunescu, R. and Ge, R. and Kate, R. J. and Marcotte, E. M. and Mooney,
	R. J. and Ramani, A. K. and Wong, Y. W.},
  title = {Comparative experiments on learning information extractors for proteins
	and their interactions.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  volume = {33},
  pages = {139-55},
  number = {2},
  month = {Feb},
  abstract = {O{BJECTIVE}: {A}utomatically extracting information from biomedical
	text holds the promise of easily consolidating large amounts of biological
	knowledge in computer-accessible form. {T}his strategy is particularly
	attractive for extracting data relevant to genes of the human genome
	from the 11 million abstracts in {M}edline. {H}owever, extraction
	efforts have been frustrated by the lack of conventions for describing
	human genes and proteins. {W}e have developed and evaluated a variety
	of learned information extraction systems for identifying human protein
	names in {M}edline abstracts and subsequently extracting information
	on interactions between the proteins. {METHODS} {AND} {MATERIAL}:
	{W}e used a variety of machine learning methods to automatically
	develop information extraction systems for extracting information
	on gene/protein name, function and interactions from {M}edline abstracts.
	{W}e present cross-validated results on identifying human proteins
	and their interactions by training and testing on a set of approximately
	1000 manually-annotated {M}edline abstracts that discuss human genes/proteins.
	{RESULTS}: {W}e demonstrate that machine learning approaches using
	support vector machines and maximum entropy are able to identify
	human proteins with higher accuracy than several previous approaches.
	{W}e also demonstrate that various rule induction methods are able
	to identify protein interactions with higher precision than manually-developed
	rules. {CONCLUSION}: {O}ur results show that it is promising to use
	machine learning to automatically build systems for extracting information
	from biomedical text. {T}he results also give a broad picture of
	the relative strengths of a wide variety of methods when tested on
	a reasonably large human-annotated corpus.},
  doi = {10.1016/j.artmed.2004.07.016},
  pdf = {../local/Bunescu2005Comparative.pdf},
  file = {Bunescu2005Comparative.pdf:local/Bunescu2005Comparative.pdf:PDF},
  keywords = {biosvm},
  pii = {S0933-3657(04)00131-9},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.07.016}
}

@article{Bungaro2009Integration,
  author = {Bungaro, Silvia and Dell'Orto, Marta Campo and Zangrando, Andrea
	and Basso, Dario and Gorletta, Tatiana and {Lo Nigro}, Luca and Leszl,
	Anna and Young, Bryan D. and Basso, Giuseppe and Bicciato, Silvio
	and Biondi, Andrea and {te Kronnie}, Gertruy and Cazzaniga, Giovanni},
  title = {Integration of genomic and gene expression data of childhood ALL
	without known aberrations identifies subgroups with specific genetic
	hallmarks.},
  journal = {Genes Chromosomes Cancer},
  year = {2009},
  volume = {48},
  pages = {22--38},
  number = {1},
  month = {Jan},
  abstract = {Pediatric acute lymphoblastic leukemia (ALL) comprises genetically
	distinct subtypes. However, 25\% of cases still lack defined genetic
	hallmarks. To identify genomic aberrancies in childhood ALL patients
	nonclassifiable by conventional methods, we performed a single nucleotide
	polymorphisms (SNP) array-based genomic analysis of leukemic cells
	from 29 cases. The vast majority of cases analyzed (19/24, 79\%)
	showed genomic abnormalities; at least one of them affected either
	genes involved in cell cycle regulation or in B-cell development.
	The most relevant abnormalities were CDKN2A/9p21 deletions (7/24,
	29\%), ETV6 (TEL)/12p13 deletions (3/24, 12\%), and intrachromosomal
	amplifications of chromosome 21 (iAMP21) (3/24, 12\%). To identify
	variation in expression of genes directly or indirectly affected
	by recurrent genomic alterations, we integrated genomic and gene
	expression data generated by microarray analyses of the same samples.
	SMAD1 emerged as a down-regulated gene in CDKN2A homozygous deleted
	cases compared with nondeleted. The JAG1 gene, encoding the Jagged
	1 ligand of the Notch receptor, was among a list of differentially
	expressed (up-regulated) genes in ETV6-deleted cases. Our findings
	demonstrate that integration of genomic analysis and gene expression
	profiling can identify genetic lesions undetected by routine methods
	and potential novel pathways involved in B-progenitor ALL pathogenesis.},
  doi = {10.1002/gcc.20616},
  institution = {Centro Ricerca Tettamanti, Clinica Pediatrica Università Milano-Bicocca,
	Ospedale San Gerardo, Monza, Italy.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {18803328},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1002/gcc.20616}
}

@article{Bunke1983Inexact,
  author = {Bunke, H.},
  title = {Inexact graph matching for structural pattern recognition},
  journal = {Pattern Recogn. Lett.},
  year = {1983},
  volume = {1},
  pages = {245--253},
  number = {4},
  month = {May},
  abstract = {This paper is concerned with the inexact matching of attributed, relational
	graphs for structural pattern recognition. The matching procedure
	is based on a state space search utilizing heuristic information.
	Some experimental results are reported.},
  doi = {10.1016/0167-8655(83)90033-8},
  issn = {01678655},
  keywords = {graph, matching},
  owner = {misha},
  posted-at = {2009-07-13 05:03:03},
  priority = {0},
  url = {http://dx.doi.org/10.1016/0167-8655(83)90033-8}
}

@article{Bunke1998Graph,
  author = {H. Bunke and K. Shearer},
  title = {A {G}raph {D}istance {M}etric based on the {M}aximal {C}ommon {S}ubgraph},
  journal = {Pattern Recogn. Lett.},
  year = {1998},
  volume = {19},
  pages = {255-259},
  doi = {doi:10.1016/S0167-8655(97)00179-7},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Burbidge2001Drug,
  author = {Burbidge, R. and Trotter, M. and Buxton, B. and Holden, S.},
  title = {Drug design by machine learning: support vector machines for pharmaceutical
	data analysis},
  journal = {Comput. {C}hem.},
  year = {2001},
  volume = {26},
  pages = {4--15},
  number = {1},
  month = {December},
  pdf = {../local/burb01.pdf},
  file = {burb01.pdf:local/burb01.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  subject = {qsar},
  url = {http://stats.ma.ic.ac.uk/~rdb/pubs/candc-aisb00-rbmt-final.pdf}
}

@article{Burckin2005Exploring,
  author = {Burckin, T. and Nagel, R. and Mandel-Gutfreund, Y. and Shiue, L.
	and Clark, T. A. and Chong, J.-L. and Chang, T.-H. and Squazzo, S.
	and Hartzog, G. and Ares, M.},
  title = {Exploring functional relationships between components of the gene
	expression machinery.},
  journal = {Nat. {S}truct. {M}ol. {B}iol.},
  year = {2005},
  volume = {12},
  pages = {175-82},
  number = {2},
  month = {Feb},
  abstract = {Eukaryotic gene expression requires the coordinated activity of many
	macromolecular machines including transcription factors and {RNA}
	polymerase, the spliceosome, m{RNA} export factors, the nuclear pore,
	the ribosome and decay machineries. {Y}east carrying mutations in
	genes encoding components of these machineries were examined using
	microarrays to measure changes in both pre-m{RNA} and m{RNA} levels.
	{W}e used these measurements as a quantitative phenotype to ask how
	steps in the gene expression pathway are functionally connected.
	{A} multiclass support vector machine was trained to recognize the
	gene expression phenotypes caused by these mutations. {I}n several
	cases, unexpected phenotype assignments by the computer revealed
	functional roles for specific factors at multiple steps in the gene
	expression pathway. {T}he ability to resolve gene expression pathway
	phenotypes provides insight into how the major machineries of gene
	expression communicate with each other.},
  doi = {10.1038/nsmb891},
  pdf = {../local/Burckin2005Exploring.pdf},
  file = {Burckin2005Exploring.pdf:local/Burckin2005Exploring.pdf:PDF},
  keywords = {biosvm microarray},
  pii = {nsmb891},
  url = {http://dx.doi.org/10.1038/nsmb891}
}

@article{Burges1998Tutorial,
  author = {Burges, C. J. C.},
  title = {A {T}utorial on {S}upport {V}ector {M}achines for {P}attern {R}ecognition},
  journal = {Data {M}in. {K}nowl. {D}iscov.},
  year = {1998},
  volume = {2},
  pages = {121-167},
  number = {2},
  pdf = {../local/burg98.pdf},
  file = {burg98.pdf:local/burg98.pdf:PDF},
  subject = {kernel},
  url = {http://www.kernel-machines.org/papers/Burges98.ps.gz}
}

@article{Buriol04New,
  author = {Buriol,, L. and Fran\c{c}a,, P. M. and Moscato,, P.},
  title = {A New Memetic Algorithm for the Asymmetric Traveling Salesman Problem},
  journal = {Journal of Heuristics},
  year = {2004},
  volume = {10},
  pages = {483--506},
  number = {5},
  address = {Hingham, MA, USA},
  doi = {http://dx.doi.org/10.1023/B:HEUR.0000045321.59202.52},
  issn = {1381-1231},
  publisher = {Kluwer Academic Publishers}
}

@article{Bussemaker2001Regulatory,
  author = {Bussemaker, H. J. and Li, H. and Siggia, E. D.},
  title = {Regulatory element detection using correlation with expression},
  journal = {Nat. {G}enet.},
  year = {2001},
  volume = {27},
  pages = {167--174},
  pdf = {../local/buss01.pdf},
  file = {buss01.pdf:local/buss01.pdf:PDF},
  subject = {microarray},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v27/n2/full/ng0201_167.html&filetype=pdf}
}

@article{Busuttil2004Support,
  author = {Busuttil, S. and Abela, J. and Pace, G. J.},
  title = {Support vector machines with profile-based kernels for remote protein
	homology detection.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2004},
  volume = {15},
  pages = {191-200},
  number = {2},
  abstract = {Two new techniques for remote protein homology detection particulary
	suited for sparse data are introduced. {T}hese methods are based
	on position specific scoring matrices or profiles and use a support
	vector machine ({SVM}) for discrimination. {T}he performance on standard
	benchmarks outperforms previous non-discriminative techniques and
	is comparable to that of other {SVM}-based methods while giving distinct
	advantages.},
  pdf = {../local/Busuttil2004Support.pdf},
  file = {Busuttil2004Support.pdf:local/Busuttil2004Support.pdf:PDF},
  keywords = {biosvm},
  url = {http://www.jsbi.org/journal/GIW04/GIW04F020.html}
}

@article{Butina2002Predicting,
  author = {D. Butina and M. D. Segall and K. Frankcombe},
  title = {Predicting {ADME} properties in silico: methods and models.},
  journal = {Drug {D}iscov {T}oday},
  year = {2002},
  volume = {7},
  pages = {S83--S88},
  number = {11 Suppl},
  month = {Jun},
  abstract = {Unfavourable absorption, distribution, metabolism and elimination
	({ADME}) properties have been identified as a major cause of failure
	for candidate molecules in drug development. {C}onsequently, there
	is increasing interest in the early prediction of {ADME} properties,
	with the objective of increasing the success rate of compounds reaching
	development. {T}his review explores in silico approaches and selected
	published models for predicting {ADME} properties from chemical structure
	alone. {I}n particular, we provide a comparison of methods based
	on pattern recognition to identify correlations between molecular
	descriptors and {ADME} properties, structural models based on classical
	molecular mechanics and quantum mechanical techniques for modelling
	chemical reactions.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pii = {S1359644602022882},
  pmid = {12047885},
  timestamp = {2006.02.03}
}

@article{Butte2000Mutual,
  author = {A. J. Butte and I. S. Kohane},
  title = {Mutual information relevance networks: functional genomic clustering
	using pairwise entropy measurements.},
  journal = {Pac Symp Biocomput},
  year = {2000},
  pages = {418--429},
  abstract = {Increasing numbers of methodologies are available to find functional
	genomic clusters in RNA expression data. We describe a technique
	that computes comprehensive pair-wise mutual information for all
	genes in such a data set. An association with a high mutual information
	means that one gene is non-randomly associated with another; we hypothesize
	this means the two are related biologically. By picking a threshold
	mutual information and using only associations at or above the threshold,
	we show how this technique was used on a public data set of 79 RNA
	expression measurements of 2,467 genes to construct 22 clusters,
	or Relevance Networks. The biological significance of each Relevance
	Network is explained.},
  institution = {Children's Hospital Informatics Program, Boston, MA 02115, USA.},
  keywords = {Computer Simulation; Gene Expression; Genome; Genome, Fungal; Genome,
	Human; Humans; Models, Genetic; Multigene Family; RNA; Saccharomyces
	cerevisiae},
  owner = {fantine},
  pmid = {10902190},
  timestamp = {2010.10.21}
}

@article{Butte2000Discovering,
  author = {Butte, A. J. and Tamayo, P. and Slonim, D. and Golub, T. R. and Kohane,
	I. S.},
  title = {{D}iscovering functional relationships between {RNA} expression and
	chemotherapeutic susceptibility using relevance networks.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2000},
  volume = {97},
  pages = {12182--12186},
  number = {22},
  month = {Oct},
  abstract = {In an effort to find gene regulatory networks and clusters of genes
	that affect cancer susceptibility to anticancer agents, we joined
	a database with baseline expression levels of 7,245 genes measured
	by using microarrays in 60 cancer cell lines, to a database with
	the amounts of 5,084 anticancer agents needed to inhibit growth of
	those same cell lines. Comprehensive pair-wise correlations were
	calculated between gene expression and measures of agent susceptibility.
	Associations weaker than a threshold strength were removed, leaving
	networks of highly correlated genes and agents called relevance networks.
	Hypotheses for potential single-gene determinants of anticancer agent
	susceptibility were constructed. The effect of random chance in the
	large number of calculations performed was empirically determined
	by repeated random permutation testing; only associations stronger
	than those seen in multiply permuted data were used in clustering.
	We discuss the advantages of this methodology over alternative approaches,
	such as phylogenetic-type tree clustering and self-organizing maps.},
  doi = {10.1073/pnas.220392197},
  pdf = {../local/Butte2000Discovering.pdf},
  file = {Butte2000Discovering.pdf:Butte2000Discovering.pdf:PDF},
  pii = {220392197},
  pmid = {11027309},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1073/pnas.220392197}
}

@article{Buus2003Sensitive,
  author = {S. Buus and S. L. Lauem{\o}ller and P. Worning and C. Kesmir and
	T. Frimurer and S. Corbet and A. Fomsgaard and J. Hilden and A. Holm
	and S. Brunak},
  title = {Sensitive quantitative predictions of peptide-{MHC} binding by a
	'Query by Committee' artificial neural network approach.},
  journal = {Tissue Antigens},
  year = {2003},
  volume = {62},
  pages = {378--384},
  number = {5},
  month = {Nov},
  abstract = {We have generated Artificial Neural Networks (ANN) capable of performing
	sensitive, quantitative predictions of peptide binding to the MHC
	class I molecule, HLA-A*0204. We have shown that such quantitative
	ANN are superior to conventional classification ANN, that have been
	trained to predict binding vs non-binding peptides. Furthermore,
	quantitative ANN allowed a straightforward application of a 'Query
	by Committee' (QBC) principle whereby particularly information-rich
	peptides could be identified and subsequently tested experimentally.
	Iterative training based on QBC-selected peptides considerably increased
	the sensitivity without compromising the efficiency of the prediction.
	This suggests a general, rational and unbiased approach to the development
	of high quality predictions of epitopes restricted to this and other
	HLA molecules. Due to their quantitative nature, such predictions
	will cover a wide range of MHC-binding affinities of immunological
	interest, and they can be readily integrated with predictions of
	other events involved in generating immunogenic epitopes. These predictions
	have the capacity to perform rapid proteome-wide searches for epitopes.
	Finally, it is an example of an iterative feedback loop whereby advanced,
	computational bioinformatics optimize experimental strategy, and
	vice versa.},
  keywords = {HLA-A Antigens; Humans; Neural Networks (Computer); Peptides; Protein
	Binding; Proteome; Research Support, Non-U.S. Gov't; Research Support,
	U.S. Gov't, P.H.S.},
  owner = {jacob},
  pii = {112},
  pmid = {14617044},
  timestamp = {2006.08.30}
}

@article{Buyse2006Validation,
  author = {Buyse, M. and Loi, S. and van't Veer, S. and Viale, G. and Delorenzi,
	M. and Glas, A. M. and Saghatchian d'Assignies, M. and Bergh, J.
	and Lidereau, R. and Ellis, P. and Harris, A. and Bogaerts, J. and
	Therasse, P. and Floore, A. and Amakrane, M. and Piette, F. and Rutgers,
	E. and Sotiriou, C. and Cardoso, F. and Piccart, M. J. and T. R.
	A. N. S. B. I. G. Consortium},
  title = {Validation and clinical utility of a 70-gene prognostic signature
	for women with node-negative breast cancer.},
  journal = {J. Natl. Canc. Inst.},
  year = {2006},
  volume = {98},
  pages = {1183--1192},
  number = {17},
  month = {Sep},
  abstract = {BACKGROUND: A 70-gene signature was previously shown to have prognostic
	value in patients with node-negative breast cancer. Our goal was
	to validate the signature in an independent group of patients. METHODS:
	Patients (n = 307, with 137 events after a median follow-up of 13.6
	years) from five European centers were divided into high- and low-risk
	groups based on the gene signature classification and on clinical
	risk classifications. Patients were assigned to the gene signature
	low-risk group if their 5-year distant metastasis-free survival probability
	as estimated by the gene signature was greater than 90\%. Patients
	were assigned to the clinicopathologic low-risk group if their 10-year
	survival probability, as estimated by Adjuvant! software, was greater
	than 88\% (for estrogen receptor [ER]-positive patients) or 92\%
	(for ER-negative patients). Hazard ratios (HRs) were estimated to
	compare time to distant metastases, disease-free survival, and overall
	survival in high- versus low-risk groups. RESULTS: The 70-gene signature
	outperformed the clinicopathologic risk assessment in predicting
	all endpoints. For time to distant metastases, the gene signature
	yielded HR = 2.32 (95\% confidence interval [CI] = 1.35 to 4.00)
	without adjustment for clinical risk and hazard ratios ranging from
	2.13 to 2.15 after adjustment for various estimates of clinical risk;
	clinicopathologic risk using Adjuvant! software yielded an unadjusted
	HR = 1.68 (95\% CI = 0.92 to 3.07). For overall survival, the gene
	signature yielded an unadjusted HR = 2.79 (95\% CI = 1.60 to 4.87)
	and adjusted hazard ratios ranging from 2.63 to 2.89; clinicopathologic
	risk yielded an unadjusted HR = 1.67 (95\% CI = 0.93 to 2.98). For
	patients in the gene signature high-risk group, 10-year overall survival
	was 0.69 for patients in both the low- and high-clinical risk groups;
	for patients in the gene signature low-risk group, the 10-year survival
	rates were 0.88 and 0.89, respectively. CONCLUSIONS: The 70-gene
	signature adds independent prognostic information to clinicopathologic
	risk assessment for patients with early breast cancer.},
  doi = {10.1093/jnci/djj329},
  pdf = {../local/Buyse2006Validation.pdf},
  file = {Buyse2006Validation.pdf:Buyse2006Validation.pdf:PDF},
  institution = {International Drug Development Institute, Brussels, Belgium.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16954471},
  timestamp = {2009.10.17},
  url = {http://dx.doi.org/10.1093/jnci/djj329}
}

@article{Byerly2009Effects,
  author = {Byerly, S. and Sundin, K. and Raja, R. and Stanchfield, J. and Bejjani,
	B. A. and Shaffer, L. G.},
  title = {Effects of ozone exposure during microarray posthybridization washes
	and scanning},
  journal = {J. Mol. Diagn.},
  year = {2009},
  volume = {11},
  pages = {590--597},
  number = {6},
  month = {Nov},
  abstract = {The increasing prevalence of array-based comparative genomic hybridization
	in the clinical laboratory necessitates the implementation of quality
	control measures to attain accurate results with a high level of
	confidence. Environmental ozone is present in all industrialized
	cities and has been found to be detrimental to array data even at
	levels considered acceptable by US Environmental Protection Agency
	standards. In this study, we characterized the effect of ozone on
	microarray data on three different labeling platforms that use different
	fluorescent dyes (Cy3 and Cy5, Alexa Fluor 555 and Alexa Fluor 647,
	and Alexa Fluor 3 and Alexa Fluor 5) that are commonly used in array-based
	comparative genomic hybridization. We investigated the effects of
	ozone on microarray data by washing the array in variable ozone environments.
	In addition, we observed the effects of prolonged exposure to ozone
	on the microarray after washing in an ozone-free environment. Our
	results demonstrate the necessity of minimizing ozone exposure when
	washing and drying the microarray. We also found that washed microarrays
	produce the best results when immediately scanned; however, if a
	low-ozone environment is maintained, there will be little compromise
	in the data collected.},
  doi = {10.2353/jmoldx.2009.090009},
  institution = {Signature Genomic Laboratories, 2820 N. Astor St., Spokane, WA 99207,
	USA.},
  keywords = {Humans; Oligonucleotide Array Sequence Analysis, methods; Ozone},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {S1525-1578(10)60283-8},
  pmid = {19767590},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.2353/jmoldx.2009.090009}
}

@article{Byvatov2003Comparison,
  author = {Byvatov, E. and Fechner, U. and Sadowski, J. and Schneider, G.},
  title = {Comparison of support vector machine and artificial neural network
	systems for drug/nondrug classification.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {1882-9},
  number = {6},
  abstract = {Support vector machine ({SVM}) and artificial neural network ({ANN})
	systems were applied to a drug/nondrug classification problem as
	an example of binary decision problems in early-phase virtual compound
	filtering and screening. {T}he results indicate that solutions obtained
	by {SVM} training seem to be more robust with a smaller standard
	error compared to {ANN} training. {G}enerally, the {SVM} classifier
	yielded slightly higher prediction accuracy than {ANN}, irrespective
	of the type of descriptors used for molecule encoding, the size of
	the training data sets, and the algorithm employed for neural network
	training. {T}he performance was compared using various different
	descriptor sets and descriptor combinations based on the 120 standard
	{G}hose-{C}rippen fragment descriptors, a wide range of 180 different
	properties and physicochemical descriptors from the {M}olecular {O}perating
	{E}nvironment ({MOE}) package, and 225 topological pharmacophore
	({CATS}) descriptors. {F}or the complete set of 525 descriptors cross-validated
	classification by {SVM} yielded 82\% correct predictions ({M}atthews
	cc = 0.63), whereas {ANN} reached 80\% correct predictions ({M}atthews
	cc = 0.58). {A}lthough {SVM} outperformed the {ANN} classifiers with
	regard to overall prediction accuracy, both methods were shown to
	complement each other, as the sets of true positives, false positives
	(overprediction), true negatives, and false negatives (underprediction)
	produced by the two classifiers were not identical. {T}he theory
	of {SVM} and {ANN} training is briefly reviewed.},
  doi = {10.1021/ci0341161},
  pdf = {../local/Byvatov2003Comparison.pdf},
  file = {Byvatov2003Comparison.pdf:local/Byvatov2003Comparison.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci0341161}
}

@article{Byvatov2004SVM-based,
  author = {Evgeny Byvatov and Gisbert Schneider},
  title = {S{VM}-based feature selection for characterization of focused compound
	collections.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {993-9},
  number = {3},
  abstract = {Artificial neural networks, the support vector machine ({SVM}), and
	other machine learning methods for the classification of molecules
	are often considered as a "black box", since the molecular features
	that are most relevant for a given classifier are usually not presented
	in a human-interpretable form. {W}e report on an {SVM}-based algorithm
	for the selection of relevant molecular features from a trained classifier
	that might be important for an understanding of ligand-receptor interactions.
	{T}he original {SVM} approach was extended to allow for feature selection.
	{T}he method was applied to characterize focused libraries of enzyme
	inhibitors. {A} comparison with classical {K}olmogorov-{S}mirnov
	({KS})-based feature selection was performed. {I}n most of the applications
	the {SVM} method showed sustained classification accuracy, thereby
	relying on a smaller number of molecular features than {KS}-based
	classifiers. {I}n one case both methods produced comparable results.
	{L}imiting the calculation of descriptors to only the most relevant
	ones for a certain biological activity can also be used to speed
	up high-throughput virtual screening.},
  doi = {10.1021/ci0342876},
  pdf = {../local/Byvatov2004SVM-based.pdf},
  file = {Byvatov2004SVM-based.pdf:local/Byvatov2004SVM-based.pdf:PDF},
  keywords = {biosvm chemoinformatics featureselection},
  url = {http://dx.doi.org/10.1021/ci0342876}
}

@article{Byvatov2003Support,
  author = {E. Byvatov and G. Schneider},
  title = {Support vector machine applications in bioinformatics.},
  journal = {Appl {B}ioinformatics},
  year = {2003},
  volume = {2},
  pages = {67-77},
  number = {2},
  abstract = {The support vector machine ({SVM}) approach represents a data-driven
	method for solving classification tasks. {I}t has been shown to produce
	lower prediction error compared to classifiers based on other methods
	like artificial neural networks, especially when large numbers of
	features are considered for sample description. {I}n this review,
	the theory and main principles of the {SVM} approach are outlined,
	and successful applications in traditional areas of bioinformatics
	research are described. {C}urrent developments in techniques related
	to the {SVM} approach are reviewed which might become relevant for
	future functional genomics and chemogenomics projects. {I}n a comparative
	study, we developed neural network and {SVM} models to identify small
	organic molecules that potentially modulate the function of {G}-protein
	coupled receptors. {T}he {SVM} system was able to correctly classify
	approximately 90\% of the compounds in a cross-validation study yielding
	a {M}atthews correlation coefficient of 0.78. {T}his classifier can
	be used for fast filtering of compound libraries in virtual screening
	applications.},
  keywords = {biosvm}
}

@book{International1992Enzyme,
  title = {Enzyme Nomenclature 1992},
  publisher = {Academic Press},
  year = {1992},
  author = {{I}nternational {U}nion of {B}iochemistry and {M}olecular {B}iology},
  address = {San Diego, California, United States},
  month = {August},
  abstract = {Recommendations of the Nomenclature Committee of the International
	Union of Biochemistry and Molecular Biology on the Nomenclature and
	Classification of Enzymes.},
  citeulike-article-id = {1096963},
  howpublished = {Hardcover},
  isbn = {0122271645},
  keywords = {gene-ontology, ontology},
  posted-at = {2007-10-29 12:38:57},
  priority = {2},
  url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/0122271645}
}

@article{Buerckstuemmer2006efficient,
  author = {Tilmann Bürckstümmer and Keiryn L Bennett and Adrijana Preradovic
	and Gregor Schütze and Oliver Hantschel and Giulio Superti-Furga
	and Angela Bauch},
  title = {An efficient tandem affinity purification procedure for interaction
	proteomics in mammalian cells.},
  journal = {Nat Methods},
  year = {2006},
  volume = {3},
  pages = {1013--1019},
  number = {12},
  month = {Dec},
  abstract = {Tandem affinity purification (TAP) is a generic two-step affinity
	purification protocol that enables the isolation of protein complexes
	under close-to-physiological conditions for subsequent analysis by
	mass spectrometry. Although TAP was instrumental in elucidating the
	yeast cellular machinery, in mammalian cells the method suffers from
	a low overall yield. We designed several dual-affinity tags optimized
	for use in mammalian cells and compared the efficiency of each tag
	to the conventional TAP tag. A tag based on protein G and the streptavidin-binding
	peptide (GS-TAP) resulted in a tenfold increase in protein-complex
	yield and improved the specificity of the procedure. This allows
	purification of protein complexes that were hitherto not amenable
	to TAP and use of less starting material, leading to higher success
	rates and enabling systematic interaction proteomics projects. Using
	the well-characterized Ku70-Ku80 protein complex as an example, we
	identified both core elements as well as new candidate effectors.},
  doi = {10.1038/nmeth968},
  pdf = {../local/Buerckstuemmer2006efficient.pdf},
  file = {Buerckstuemmer2006efficient.pdf:Buerckstuemmer2006efficient.pdf:PDF},
  institution = {Research Center for Molecular Medicine (CeMM), Lazarettgasse 19/3,
	1090 Vienna, Austria.},
  owner = {phupe},
  pii = {nmeth968},
  pmid = {17060908},
  timestamp = {2010.09.01},
  url = {http://dx.doi.org/10.1038/nmeth968}
}

@article{Cabusora2005Differential,
  author = {Cabusora, L. and Sutton, E. and Fulmer, A. and Forst, C. V.},
  title = {Differential network expression during drug and stress response.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2898--2905},
  number = {12},
  month = {Jun},
  abstract = {The application of microarray chip technology has led to an explosion
	of data concerning the expression levels of the genes in an organism
	under a plethora of conditions. One of the major challenges of systems
	biology today is to devise generally applicable methods of interpreting
	this data in a way that will shed light on the complex relationships
	between multiple genes and their products. The importance of such
	information is clear, not only as an aid to areas of research like
	drug design, but also as a contribution to our understanding of the
	mechanisms behind an organism's ability to react to its environment.We
	detail one computational approach for using gene expression data
	to identify response networks in an organism. The method is based
	on the construction of biological networks given different sets of
	interaction information and the reduction of the said networks to
	important response sub-networks via the integration of the gene expression
	data. As an application, the expression data of known stress responders
	and DNA repair genes in Mycobacterium tuberculosis is used to construct
	a generic stress response sub-network. This is compared to similar
	networks constructed from data obtained from subjecting M.tuberculosis
	to various drugs; we are thus able to distinguish between generic
	stress response and specific drug response. We anticipate that this
	approach will be able to accelerate target identification and drug
	development for tuberculosis in the future.chris@lanl.govSupplementary
	Figures 1 through 6 on drug response networks and differential network
	analyses on cerulenin, chlorpromazine, ethionamide, ofloxacin, thiolactomycin
	and triclosan. Supplementary Tables 1 to 3 on predicted protein interactions.
	http://www.santafe.edu/~chris/DifferentialNW.},
  doi = {10.1093/bioinformatics/bti440},
  pdf = {../local/Cabusora2005Differential.pdf},
  file = {Cabusora2005Differential.pdf:Cabusora2005Differential.pdf:PDF},
  institution = {Los Alamos National Laboratory, PO Box 1663, Mailstop M888, Los Alamos,
	NM 87545, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {bti440},
  pmid = {15840709},
  timestamp = {2011.10.08},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti440}
}

@article{Caelli2004eigenspace,
  author = {Caelli, T. and Kosinov, S.},
  title = {An eigenspace projection clustering method for inexact graph matching},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2004},
  volume = {26},
  pages = {515--519},
  number = {4},
  month = {April },
  doi = {10.1109/TPAMI.2004.1265866},
  owner = {jp},
  timestamp = {2008.10.05},
  url = {http://dx.doi.org/10.1109/TPAMI.2004.1265866}
}

@article{Cai2004Enzyme,
  author = {Cai, C.Z. and Han, L.Y. and Ji, Z.L. and Chen, Y.Z.},
  title = {Enzyme family classification by support vector machines.},
  journal = {Proteins},
  year = {2004},
  volume = {55},
  pages = {66-76},
  number = {1},
  abstract = {One approach for facilitating protein function prediction is to classify
	proteins into functional families. {R}ecent studies on the classification
	of {G}-protein coupled receptors and other proteins suggest that
	a statistical learning method, {S}upport vector machines ({SVM}),
	may be potentially useful for protein classification into functional
	families. {I}n this work, {SVM} is applied and tested on the classification
	of enzymes into functional families defined by the {E}nzyme {N}omenclature
	{C}ommittee of {IUBMB}. {SVM} classification system for each family
	is trained from representative enzymes of that family and seed proteins
	of {P}fam curated protein families. {T}he classification accuracy
	for enzymes from 46 families and for non-enzymes is in the range
	of 50.0% to 95.7% and 79.0% to 100% respectively. {T}he corresponding
	{M}atthews correlation coefficient is in the range of 54.1% to 96.1%.
	{M}oreover, 80.3% of the 8,291 correctly classified enzymes are uniquely
	classified into a specific enzyme family by using a scoring function,
	indicating that {SVM} may have certain level of unique prediction
	capability. {T}esting results also suggest that {SVM} in some cases
	is capable of classification of distantly related enzymes and homologous
	enzymes of different functions. {E}ffort is being made to use a more
	comprehensive set of enzymes as training sets and to incorporate
	multi-class {SVM} classification systems to further enhance the unique
	prediction accuracy. {O}ur results suggest the potential of {SVM}
	for enzyme family classification and for facilitating protein function
	prediction. {O}ur software is accessible at http://jing.cz3.nus.edu.sg/cgi-bin/svmprot.cgi.},
  doi = {10.1002/prot.20045},
  pdf = {../local/Cai2004Enzyme.pdf},
  file = {Cai2004Enzyme.pdf:local/Cai2004Enzyme.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.20045}
}

@article{Cai2003Protein,
  author = {Cai, C.Z. and Wang, W.L. and Sun, L.Z. and Chen, Y.Z.},
  title = {Protein function classification via support vector machine approach.},
  journal = {Math. {B}iosci.},
  year = {2003},
  volume = {185},
  pages = {111-122},
  number = {2},
  abstract = {Support vector machine ({SVM}) is introduced as a method for the classification
	of proteins into functionally distinguished classes. {S}tudies are
	conducted on a number of protein classes including {RNA}-binding
	proteins; protein homodimers, proteins responsible for drug absorption,
	proteins involved in drug distribution and excretion, and drug metabolizing
	enzymes. {T}esting accuracy for the classification of these protein
	classes is found to be in the range of 84-96%. {T}his suggests the
	usefulness of {SVM} in the classification of protein functional classes
	and its potential application in protein function prediction.},
  doi = {10.1016/S0025-5564(03)00096-8},
  pdf = {../local/Cai2003Protein.pdf},
  file = {Cai2003Protein.pdf:local/Cai2003Protein.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Cai2003SVM-Prot,
  author = {C. Z. Cai and L. Y. Han and Z. L. Ji and X. Chen and Y. Z. Chen},
  title = {S{VM}-{P}rot: {W}eb-based support vector machine software for functional
	classification of a protein from its primary sequence.},
  journal = {Nucleic {A}cids {R}es},
  year = {2003},
  volume = {31},
  pages = {3692-7},
  number = {13},
  month = {Jul},
  abstract = {Prediction of protein function is of significance in studying biological
	processes. {O}ne approach for function prediction is to classify
	a protein into functional family. {S}upport vector machine ({SVM})
	is a useful method for such classification, which may involve proteins
	with diverse sequence distribution. {W}e have developed a web-based
	software, {SVMP}rot, for {SVM} classification of a protein into functional
	family from its primary sequence. {SVMP}rot classification system
	is trained from representative proteins of a number of functional
	families and seed proteins of {P}fam curated protein families. {I}t
	currently covers 54 functional families and additional families will
	be added in the near future. {T}he computed accuracy for protein
	family classification is found to be in the range of 69.1-99.6\%.
	{SVMP}rot shows a certain degree of capability for the classification
	of distantly related proteins and homologous proteins of different
	function and thus may be used as a protein function prediction tool
	that complements sequence alignment methods. {SVMP}rot can be accessed
	at http://jing.cz3.nus.edu.sg/cgi-bin/svmprot.cgi.},
  pdf = {../local/Cai2003SVM-Prot.pdf},
  file = {Cai2003SVM-Prot.pdf:local/Cai2003SVM-Prot.pdf:PDF},
  keywords = {biosvm},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/31/13/3692}
}

@article{Cai2003Supportc,
  author = {Cai, Y.D. and Feng, K.Y. and Li, Y.X. and Chou, K.C.},
  title = {Support vector machine for predicting alpha-turn types.},
  journal = {Peptides},
  year = {2003},
  volume = {24},
  pages = {629-630},
  number = {4},
  abstract = {Tight turns play an important role in globular proteins from both
	the structural and functional points of view. {O}f tight turns, beta-turns
	and gamma-turns have been extensively studied, but alpha-turns were
	little investigated. {R}ecently, a systematic search for alpha-turns
	classified alpha-turns into nine different types according to their
	backbone trajectory features. {I}n this paper, {S}upport {V}ector
	{M}achines ({SVM}s), a new machine learning method, is proposed for
	predicting the alpha-turn types in proteins. {T}he high rates of
	correct prediction imply that that the formation of different alpha-turn
	types is evidently correlated with the sequence of a pentapeptide,
	and hence can be approximately predicted based on the sequence information
	of the pentapeptide alone, although the incorporation of its interaction
	with the other part of a protein, the so-called "long distance interaction",
	will further improve the prediction quality.},
  doi = {10.1016/S0196-9781(03)00100-1},
  pdf = {../local/Cai2003Supportc.pdf},
  file = {Cai2003Supportc.pdf:local/Cai2003Supportc.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0196-9781(03)00100-1}
}

@article{Cai2003Supportd,
  author = {Cai, Y.D. and Lin, S.L.},
  title = {Support vector machines for predicting r{RNA}-, {RNA}-, and {DNA}-binding
	proteins from amino acid sequence.},
  journal = {Biochim. {B}iophys. {A}cta},
  year = {2003},
  volume = {1648},
  pages = {127-133},
  number = {1-2},
  abstract = {Classification of gene function remains one of the most important
	and demanding tasks in the post-genome era. {M}ost of the current
	predictive computer methods rely on comparing features that are essentially
	linear to the protein sequence. {H}owever, features of a protein
	nonlinear to the sequence may also be predictive to its function.
	{M}achine learning methods, for instance the {S}upport {V}ector {M}achines
	({SVM}s), are particularly suitable for exploiting such features.
	{I}n this work we introduce {SVM} and the pseudo-amino acid composition,
	a collection of nonlinear features extractable from protein sequence,
	to the field of protein function prediction. {W}e have developed
	prototype {SVM}s for binary classification of r{RNA}-, {RNA}-, and
	{DNA}-binding proteins. {U}sing a protein's amino acid composition
	and limited range correlation of hydrophobicity and solvent accessible
	surface area as input, each of the {SVM}s predicts whether the protein
	belongs to one of the three classes. {I}n self-consistency and cross-validation
	tests, which measures the success of learning and prediction, respectively,
	the r{RNA}-binding {SVM} has consistently achieved >95% accuracy.
	{T}he {RNA}- and {DNA}-binding {SVM}s demonstrate more diverse accuracy,
	ranging from approximately 76% to approximately 97%. {A}nalysis of
	the test results suggests the directions of improving the {SVM}s.},
  doi = {10.1016/S1570-9639(03)00112-2},
  pdf = {../local/Cai2003Supportd.pdf},
  file = {Cai2003Supportd.pdf:local/Cai2003Supportd.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S1570-9639(03)00112-2}
}

@article{Cai2003Supporta,
  author = {Cai, Y.D. and Lin, S.L. and Chou, K.C.},
  title = {Support vector machines for prediction of protein signal sequences
	and their cleavage sites},
  journal = {Peptides},
  year = {2003},
  volume = {24},
  pages = {159-161},
  number = {1},
  abstract = {Given a nascent protein sequence, how can one predict its signal peptide
	or "{Z}ipcode" sequence? {T}his is an important problem for scientists
	to use signal peptides as a vehicle to find new drugs or to reprogram
	cells for gene therapy (see, e.g. [7] {K}.{C}. {C}hou, {C}urrent
	{P}rotein and {P}eptide {S}cience 2002;3:615?22). {I}n this paper,
	support vector machines ({SVM}s), a new machine learning method,
	is applied to approach this problem. {T}he overall rate of correct
	prediction for 1939 secretary proteins and 1440 nonsecretary proteins
	was over 91%. {I}t has not escaped our attention that the new method
	may also serve as a useful tool for further investigating many unclear
	details regarding the molecular mechanism of the {ZIP} code protein-sorting
	system in cells.},
  doi = {10.1016/S0196-9781(02)00289-9},
  pdf = {../local/Cai2003Supporta.pdf},
  file = {Cai2003Supporta.pdf:local/Cai2003Supporta.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Cai2003Prediction,
  author = {Cai, Y.D. and Liu, X.J. and Li, Y.X. and Xu, X.B. and Chou, K.C.},
  title = {Prediction of beta-turns with learning machines.},
  journal = {Peptides},
  year = {2003},
  volume = {24},
  pages = {665-669},
  number = {5},
  abstract = {The support vector machine approach was introduced to predict the
	beta-turns in proteins. {T}he overall self-consistency rate by the
	re-substitution test for the training or learning dataset reached
	100%. {B}oth the training dataset and independent testing dataset
	were taken from {C}hou [{J}. {P}ept. {R}es. 49 (1997) 120]. {T}he
	success prediction rates by the jackknife test for the beta-turn
	subset of 455 tetrapeptides and non-beta-turn subset of 3807 tetrapeptides
	in the training dataset were 58.1 and 98.4%, respectively. {T}he
	success rates with the independent dataset test for the beta-turn
	subset of 110 tetrapeptides and non-beta-turn subset of 30,231 tetrapeptides
	were 69.1 and 97.3%, respectively. {T}he results obtained from this
	study support the conclusion that the residue-coupled effect along
	a tetrapeptide is important for the formation of a beta-turn.},
  doi = {10.1016/S0196-9781(03)00133-5},
  pdf = {../local/Cai2003Prediction.pdf},
  file = {Cai2003Prediction.pdf:local/Cai2003Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0196-9781(03)00133-5}
}

@article{Cai2003Supportb,
  author = {Cai, Y.D. and Liu, X.J. and Xu, X.B. and Chou, K.C.},
  title = {Support vector machines for prediction of protein domain structural
	class.},
  journal = {J. {T}heor. {B}iol.},
  year = {2003},
  volume = {221},
  pages = {115-120},
  number = {1},
  abstract = {The support vector machines ({SVM}s) method was introduced for predicting
	the structural class of protein domains. {T}he results obtained through
	the self-consistency test, jack-knife test, and independent dataset
	test have indicated that the current method and the elegant component-coupled
	algorithm developed by {C}hou and co-workers, if effectively complemented
	with each other, may become a powerful tool for predicting the structural
	class of protein domains.},
  doi = {10.1006/jtbi.2003.3179},
  pdf = {../local/Cai2003Supportb.pdf},
  file = {Cai2003Supportb.pdf:local/Cai2003Supportb.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1006/jtbi.2003.3179}
}

@article{Cai2002Supporta,
  author = {Cai, Y.D. and Liu, X.J. and Xu, X.B. and Chou, K.C.},
  title = {Support {V}ector {M}achines for predicting {HIV} protease cleavage
	sites in protein.},
  journal = {J. {C}omput. {C}hem.},
  year = {2002},
  volume = {23},
  pages = {267-274},
  number = {2},
  abstract = {Knowledge of the polyprotein cleavage sites by {HIV} protease will
	refine our understanding of its specificity, and the information
	thus acquired is useful for designing specific and efficient {HIV}
	protease inhibitors. {T}he pace in searching for the proper inhibitors
	of {HIV} protease will be greatly expedited if one can find an accurate,
	robust, and rapid method for predicting the cleavage sites in proteins
	by {HIV} protease. {I}n this article, a {S}upport {V}ector {M}achine
	is applied to predict the cleavability of oligopeptides by proteases
	with multiple and extended specificity subsites. {W}e selected {HIV}-1
	protease as the subject of the study. {T}wo hundred ninety-nine oligopeptides
	were chosen for the training set, while the other 63 oligopeptides
	were taken as a test set. {B}ecause of its high rate of self-consistency
	(299/299 = 100%), a good result in the jackknife test (286/299 95%)
	and correct prediction rate (55/63 = 87%), it is expected that the
	{S}upport {V}ector {M}achine method can be referred to as a useful
	assistant technique for finding effective inhibitors of {HIV} protease,
	which is one of the targets in designing potential drugs against
	{AIDS}. {T}he principle of the {S}upport {V}ector {M}achine method
	can also be applied to analyzing the specificity of other multisubsite
	enzymes.},
  doi = {10.1002/jcc.10017},
  pdf = {../local/local},
  file = {local:local/:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/jcc.10017}
}

@article{Cai2002Supportb,
  author = {Cai, Y.D. and Liu, X.J. and Xu, X.B. and Chou, K.C.},
  title = {Support vector machines for predicting the specificity of {{G}al{NA}c}-transferase},
  journal = {Peptides},
  year = {2002},
  volume = {23},
  pages = {205-208},
  abstract = {Support {V}ector {M}achines ({SVM}s) which is one kind of learning
	machines, was applied to predict the specificity of {G}al{NA}c-transferase.
	{T}he examination for the self-consistency and the jackknife test
	of the {SVM}s method were tested for the training dataset (305 oligopeptides),
	the correct rate of self-consistency and jackknife test reaches 100%
	and 84.9%, respectively. {F}urthermore, the prediction of the independent
	testing dataset (30 oligopeptides) was tested, the rate reaches 76.67%.},
  doi = {10.1016/S0196-9781(01)00597-6},
  pdf = {../local/Cai2002Supportb.pdf},
  file = {Cai2002Supportb.pdf:local/Cai2002Supportb.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0196-9781(01)00597-6}
}

@article{Cai2002Supportc,
  author = {Cai, Y.D. and Liu, X.J. and Xu, X.B. and Chou, K.C.},
  title = {Support vector machines for the classification and prediction of
	beta-turn types},
  journal = {J. {P}ept. {S}ci.},
  year = {2002},
  volume = {8},
  pages = {297-301},
  number = {7},
  abstract = {The support vector machines ({SVM}s) method is proposed because it
	can reflect the sequence-coupling effect for a tetrapeptide in not
	only a beta-turn or non-beta-turn, but also in different types of
	beta-turn. {T}he results of the model for 6022 tetrapeptides indicate
	that the rates of self-consistency for beta-turn types {I}, {I}',
	{II}, {II}', {VI} and {VIII} and non-beta-turns are 99.92%, 96.8%,
	98.02%, 97.75%, 100%, 97.19% and 100%, respectively. {U}sing these
	training data, the rate of correct prediction by the {SVM}s for a
	given protein: rubredoxin (54 residues. 51 tetrapeptides) which includes
	12 beta-turn type {I} tetrapeptides, 1 beta-turn type {II} tetrapeptide
	and 38 non-beta-turns reached 82.4%. {T}he high quality of prediction
	of the {SVM}s implies that the formation of different beta-turn types
	or non-beta-turns is considerably correlated with the sequence of
	a tetrapeptide. {T}he {SVM}s can save {CPU} time and avoid the overfitting
	problem compared with the neural network method.},
  doi = {10.1002/psc.401},
  pdf = {../local/local},
  file = {local:local/:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/psc.401}
}

@article{Cai2000Support,
  author = {Cai, Y.D. and Liu, X.J. and Xu, X.B. and Chou, K.C.},
  title = {Support vector machines for prediction of protein subcellular location},
  journal = {Mol. {C}ell {B}iol. {R}es. {C}ommun.},
  year = {2000},
  volume = {4},
  pages = {230-234},
  number = {4},
  abstract = {Support {V}ector {M}achine ({SVM}), which is one kind of learning
	machines, was applied to predict the subcellular location of proteins
	from their amino acid composition. {I}n this research, the proteins
	are classified into the following 12 groups: (1) chloroplast, (2)
	cytoplasm, (3) cytoskeleton, (4) endoplasmic reticulum, (5) extracall,
	(6) {G}olgi apparatus, (7) lysosome, (8) mitochondria, (9) nucleus,
	(10) peroxisome, (11) plasma membrane, and (12) vacuole, which have
	covered almost all the organelles and subcellular compartments in
	an animal or plant cell. {T}he examination for the self-consistency
	and the jackknife test of the {SVM}s method was tested for the three
	sets: 2022 proteins, 2161 proteins, and 2319 proteins. {A}s a result,
	the correct rate of self-consistency and jackknife test reaches 91
	and 82% for 2022 proteins, 89 and 75% for 2161 proteins, and 85 and
	73% for 2319 proteins, respectively. {F}urthermore, the predicting
	rate was tested by the three independent testing datasets containing
	2240 proteins, 2513 proteins, and 2591 proteins. {T}he correct prediction
	rates reach 82, 75, and 73% for 2240 proteins, 2513 proteins, and
	2591 proteins, respectively.},
  doi = {10.1006/mcbr.2001.0285},
  pdf = {../local/local},
  file = {local:local/:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1006/mcbr.2001.0285}
}

@article{Cai2004Application,
  author = {Cai, Y.D. and Ricardo, P.W. and Jen, C.H. and Chou, K.C.},
  title = {Application of {SVM} to predict membrane protein types.},
  journal = {J. {T}heor. {B}iol.},
  year = {2004},
  volume = {226},
  pages = {373-376},
  number = {4},
  abstract = {As a continuous effort to develop automated methods for predicting
	membrane protein types that was initiated by {C}hou and {E}lrod ({PROTEINS}:
	{S}tructure, {F}unction, and {G}enetics, 1999, 34, 137-153), the
	support vector machine ({SVM}) is introduced. {R}esults obtained
	through re-substitution, jackknife, and independent data set tests,
	respectively, have indicated that the {SVM} approach is quite a promising
	one, suggesting that the covariant discriminant algorithm ({C}hou
	and {E}lrod, {P}rotein {E}ng. 12 (1999) 107) and {SVM}, if effectively
	complemented with each other, will become a powerful tool for predicting
	membrane protein types and the other protein attributes as well.},
  doi = {10.1016/j.jtbi.2003.08.015},
  pdf = {../local/Cai2004Application.pdf},
  file = {Cai2004Application.pdf:local/Cai2004Application.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.jtbi.2003.08.015}
}

@article{Cai2004Identify,
  author = {Cai, Y.D. and Zhou, G.P. and Jen, C.H. and Lin, S.L. and Chou, K.C.},
  title = {Identify catalytic triads of serine hydrolases by support vector
	machines.},
  journal = {J. {T}heor. {B}iol.},
  year = {2004},
  volume = {228},
  pages = {551-557},
  number = {4},
  abstract = {The core of an enzyme molecule is its active site from the viewpoints
	of both academic research and industrial application. {T}o reveal
	the structural and functional mechanism of an enzyme, one needs to
	know its active site; to conduct structure-based drug design by regulating
	the function of an enzyme, one needs to know the active site and
	its microenvironment as well. {G}iven the atomic coordinates of an
	enzyme molecule, how can we predict its active site? {T}o tackle
	such a problem, a distance group approach was proposed and the support
	vector machine algorithm applied to predict the catalytic triad of
	serine hydrolase family. {T}he success rate by jackknife test for
	the 139 serine hydrolases was 85%, implying that the method is quite
	promising and may become a useful tool in structural bioinformatics.},
  doi = {10.1016/j.jtbi.2004.02.019},
  pdf = {../local/Cai2004Identify.pdf},
  file = {Cai2004Identify.pdf:local/Cai2004Identify.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.jtbi.2004.02.019}
}

@article{Cai2004Prediction,
  author = {Yu-Dong Cai and Andrew J Doig},
  title = {Prediction of {S}accharomyces cerevisiae protein functional class
	from functional domain composition.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1292-300},
  number = {8},
  month = {May},
  abstract = {M{OTIVATION}: {A} key goal of genomics is to assign function to genes,
	especially for orphan sequences. {RESULTS}: {W}e compared the clustered
	functional domains in the {SBASE} database to each protein sequence
	using {BLASTP}. {T}his representation for a protein is a vector,
	where each of the non-zero entries in the vector indicates a significant
	match between the sequence of interest and the {SBASE} domain. {T}he
	machine learning methods nearest neighbour algorithm ({NNA}) and
	support vector machines are used for predicting protein functional
	classes from this information. {W}e find that the best results are
	found using the {SBASE}-{A} database and the {NNA}, namely 72\% accuracy
	for 79\% coverage. {W}e tested an assigning function based on searching
	for {I}nter{P}ro sequence motifs and by taking the most significant
	{BLAST} match within the dataset. {W}e applied the functional domain
	composition method to predict the functional class of 2018 currently
	unclassified yeast open reading frames. {AVAILABILITY}: {A} program
	for the prediction method, that uses {NNA} called {F}unctional {C}lass
	{P}rediction based on {F}unctional {D}omains ({FCPFD}) is available
	and can be obtained by contacting {Y}.{D}.{C}ai at y.cai@umist.ac.uk},
  doi = {10.1093/bioinformatics/bth085},
  pdf = {../local/Cai2004Prediction.pdf},
  file = {Cai2004Prediction.pdf:local/Cai2004Prediction.pdf:PDF},
  keywords = {biosvm},
  pii = {bth085},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth085}
}

@article{Cai2005Using,
  author = {Yu-Dong Cai and Kai-Yan Feng and Wen-Cong Lu and Kuo-Chen Chou},
  title = {Using {L}ogit{B}oost classifier to predict protein structural classes.},
  journal = {J {T}heor {B}iol},
  year = {2005},
  month = {Jul},
  abstract = {Prediction of protein classification is an important topic in molecular
	biology. {T}his is because it is able to not only provide useful
	information from the viewpoint of structure itself, but also greatly
	stimulate the characterization of many other features of proteins
	that may be closely correlated with their biological functions. {I}n
	this paper, the {L}ogit{B}oost, one of the boosting algorithms developed
	recently, is introduced for predicting protein structural classes.
	{I}t performs classification using a regression scheme as the base
	learner, which can handle multi-class problems and is particularly
	superior in coping with noisy data. {I}t was demonstrated that the
	{L}ogit{B}oost outperformed the support vector machines in predicting
	the structural classes for a given dataset, indicating that the new
	classifier is very promising. {I}t is anticipated that the power
	in predicting protein structural classes as well as many other bio-macromolecular
	attributes will be further strengthened if the {L}ogit{B}oost and
	some other existing algorithms can be effectively complemented with
	each other.},
  doi = {10.1016/j.jtbi.2005.05.034},
  pdf = {../local/Cai2005Using.pdf},
  file = {Cai2005Using.pdf:local/Cai2005Using.pdf:PDF},
  pii = {S0022-5193(05)00252-3},
  url = {http://dx.doi.org/10.1016/j.jtbi.2005.05.034}
}

@article{Cai2002Support,
  author = {Cai, Y.-D. and Liu, X.-J. and Xu, X.-B. and Chou, K.-C.},
  title = {Support vector machines for prediction of protein subcellular location
	by incorporating quasi-sequence-order effect},
  journal = {J. {C}ell. {B}iochem.},
  year = {2002},
  volume = {84},
  pages = {343-348},
  number = {2},
  abstract = {Support {V}ector {M}achine ({SVM}), which is one class of learning
	machines, was applied to predict the subcellular location of proteins
	by incorporating the quasi-sequence-order effect ({C}hou [2000] {B}iochem.
	{B}iophys. {R}es. {C}ommun. 278:477-483). {I}n this study, the proteins
	are classified into the following 12 groups: (1) chloroplast, (2)
	cytoplasm, (3) cytoskeleton, (4) endoplasmic reticulum, (5) extracellular,
	(6) {G}olgi apparatus, (7) lysosome, (8) mitochondria, (9) nucleus,
	(10) peroxisome, (11) plasma membrane, and (12) vacuole, which account
	for most organelles and subcellular compartments in an animal or
	plant cell. {E}xaminations for self-consistency and jackknife testing
	of the {SVM}s method were conducted for three sets consisting of
	1,911, 2,044, and 2,191 proteins. {T}he correct rates for self-consistency
	and the jackknife test values achieved with these protein sets were
	94 and 83% for 1,911 proteins, 92 and 78% for 2,044 proteins, and
	89 and 75% for 2,191 proteins, respectively. {F}urthermore, tests
	for correct prediction rates were undertaken with three independent
	testing datasets containing 2,148 proteins, 2,417 proteins, and 2,494
	proteins producing values of 84, 77, and 74%, respectively.},
  doi = {10.1002/jcb.10030},
  pdf = {../local/Cai2002Support.pdf},
  file = {Cai2002Support.pdf:local/Cai2002Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/jcb.10030}
}

@article{Cai2002Prediction,
  author = {Cai, Y.-D. and Liu, X.-J. and Xu, X.-B. and Zhou, G.-P.},
  title = {Prediction of protein structural classes by support vector machines.},
  journal = {Comput. {C}hem.},
  year = {2002},
  volume = {26},
  pages = {293-296},
  number = {3},
  abstract = {In this paper, we apply a new machine learning method which is called
	support vector machine to approach the prediction of protein structural
	class. {T}he support vector machine method is performed based on
	the database derived from {SCOP} which is based upon domains of known
	structure and the evolutionary relationships and the principles that
	govern their 3{D} structure. {A}s a result, high rates of both self-consistency
	and jackknife test are obtained. {T}his indicates that the structural
	class of a protein inconsiderably correlated with its amino and composition,
	and the support vector machine can be referred as a powerful computational
	tool for predicting the structural classes of proteins.},
  doi = {10.1016/S0097-8485(01)00113-9},
  pdf = {../local/Cai2002Prediction.pdf},
  file = {Cai2002Prediction.pdf:local/Cai2002Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0097-8485(01)00113-9}
}

@article{Cai2001Support,
  author = {Cai, Y.-D. and Liu, X.-J. and Xu, X.-B. and Zhou, G.-P.},
  title = {Support {V}ector {M}achines for predicting protein structural class},
  journal = {B{MC} {B}ioinformatics},
  year = {2001},
  volume = {2},
  pages = {3},
  number = {3},
  abstract = {Background {W}e apply a new machine learning method, the so-called
	{S}upport {V}ector {M}achine method, to predict the protein structural
	class. {S}upport {V}ector {M}achine method is performed based on
	the database derived from {SCOP}, in which protein domains are classified
	based on known structures and the evolutionary relationships and
	the principles that govern their 3-{D} structure. {R}esults {H}igh
	rates of both self-consistency and jackknife tests are obtained.
	{T}he good results indicate that the structural class of a protein
	is considerably correlated with its amino acid composition. {C}onclusions
	{I}t is expected that the {S}upport {V}ector {M}achine method and
	the elegant component-coupled method, also named as the covariant
	discrimination algorithm, if complemented with each other, can provide
	a powerful computational tool for predicting the structural classes
	of proteins.},
  doi = {10.1186/1471-2105-2-3},
  pdf = {../local/Cai2001Support.pdf},
  file = {Cai2001Support.pdf:local/Cai2001Support.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/2/3/abstract}
}

@article{Cai2003Support,
  author = {Cai, Y.-D. and Zhou, G.-P. and Chou, K.-C.},
  title = {Support {V}ector {M}achines for {P}redicting {M}embrane {P}rotein
	{T}ypes by {U}sing {F}unctional {D}omain {C}omposition},
  journal = {Biophys. {J}.},
  year = {2003},
  volume = {84},
  pages = {3257-3263},
  number = {5},
  abstract = {Membrane proteins are generally classified into the following five
	types: 1), type {I} membrane protein; 2), type {II} membrane protein;
	3), multipass transmembrane proteins; 4), lipid chain-anchored membrane
	proteins; and 5), {GPI}-anchored membrane proteins. {I}n this article,
	based on the concept of using the functional domain composition to
	define a protein, the {S}upport {V}ector {M}achine algorithm is developed
	for predicting the membrane protein type. {H}igh success rates are
	obtained by both the self-consistency and jackknife tests. {T}he
	current approach, complemented with the powerful covariant discriminant
	algorithm based on the pseudo-amino acid composition that has incorporated
	quasi-sequence-order effect as recently proposed by {K}. {C}. {C}hou
	(2001), may become a very useful high-throughput tool in the area
	of bioinformatics and proteomics.},
  pdf = {../local/Cai2003Support.pdf},
  file = {Cai2003Support.pdf:local/Cai2003Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.biophysj.org/cgi/content/abstract/84/5/3257}
}

@article{Caie2010High,
  author = {Peter D Caie and Rebecca E Walls and Alexandra Ingleston-Orme and
	Sandeep Daya and Tom Houslay and Rob Eagle and Mark E Roberts and
	Neil O Carragher},
  title = {High-content phenotypic profiling of drug response signatures across
	distinct cancer cells.},
  journal = {Mol Cancer Ther},
  year = {2010},
  volume = {9},
  pages = {1913--1926},
  number = {6},
  month = {Jun},
  abstract = {The application of high-content imaging in conjunction with multivariate
	clustering techniques has recently shown value in the confirmation
	of cellular activity and further characterization of drug mode of
	action following pharmacologic perturbation. However, such practical
	examples of phenotypic profiling of drug response published to date
	have largely been restricted to cell lines and phenotypic response
	markers that are amenable to basic cellular imaging. As such, these
	approaches preclude the analysis of both complex heterogeneous phenotypic
	responses and subtle changes in cell morphology across physiologically
	relevant cell panels. Here, we describe the application of a cell-based
	assay and custom designed image analysis algorithms designed to monitor
	morphologic phenotypic response in detail across distinct cancer
	cell types. We further describe the integration of these methods
	with automated data analysis workflows incorporating principal component
	analysis, Kohonen neural networking, and kNN classification to enable
	rapid and robust interrogation of such data sets. We show the utility
	of these approaches by providing novel insight into pharmacologic
	response across four cancer cell types, Ovcar3, MiaPaCa2, and MCF7
	cells wild-type and mutant for p53. These methods have the potential
	to drive the development of a new generation of novel therapeutic
	classes encompassing pharmacologic compositions or polypharmacology
	in appropriate disease context.},
  doi = {10.1158/1535-7163.MCT-09-1148},
  institution = {D Charnwood, Loughborough, United Kingdom.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {1535-7163.MCT-09-1148},
  pmid = {20530715},
  timestamp = {2010.07.26},
  url = {http://dx.doi.org/10.1158/1535-7163.MCT-09-1148}
}

@article{Caldwell1995introduction,
  author = {Caldwell, J. and Gardner, I. and Swales, N.},
  title = {An introduction to drug disposition: the basic principles of absorption,
	distribution, metabolism, and excretion.},
  journal = {Toxicol. Pathol.},
  year = {1995},
  volume = {23},
  pages = {102--114},
  number = {2},
  abstract = {A knowledge of the fate of a drug, its disposition (absorption, distribution,
	metabolism, and excretion, known by the acronym ADME) and pharmacokinetics
	(the mathematical description of the rates of these processes and
	of concentration-time relationships), plays a central role throughout
	pharmaceutical research and development. These studies aid in the
	discovery and selection of new chemical entities, support safety
	assessment, and are critical in defining conditions for safe and
	effective use in patients. ADME studies provide the only basis for
	critical judgments from situations where the behavior of the drug
	is understood to those where it is unknown: this is most important
	in bridging from animal studies to the human situation. This presentation
	is intended to provide an introductory overview of the life cycle
	of a drug in the animal body and indicates the significance of such
	information for a full understanding of mechanisms of action and
	toxicity.},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {7569663},
  timestamp = {2008.07.16}
}

@article{Calin2006MicroRNA,
  author = {Calin, G. A. and Croce, C. M.},
  title = {Micro{RNA} signatures in human cancers},
  journal = {Nat. Rev. Cancer},
  year = {2006},
  volume = {6},
  pages = {857--866},
  number = {11},
  month = {Nov},
  abstract = {MicroRNA (miRNA) alterations are involved in the initiation and progression
	of human cancer. The causes of the widespread differential expression
	of miRNA genes in malignant compared with normal cells can be explained
	by the location of these genes in cancer-associated genomic regions,
	by epigenetic mechanisms and by alterations in the miRNA processing
	machinery. MiRNA-expression profiling of human tumours has identified
	signatures associated with diagnosis, staging, progression, prognosis
	and response to treatment. In addition, profiling has been exploited
	to identify miRNA genes that might represent downstream targets of
	activated oncogenic pathways, or that target protein-coding genes
	involved in cancer.},
  doi = {10.1038/nrc1997},
  pdf = {../local/Calin2006MicroRNA.pdf},
  file = {Calin2006MicroRNA.pdf:Calin2006MicroRNA.pdf:PDF},
  institution = {Department of Molecular Virology, Immunology and Medical Genetics
	and Comprehensive Cancer Center, Ohio State University, Columbus,
	Ohio 43210, USA.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nrc1997},
  pmid = {17060945},
  timestamp = {2009.10.17},
  url = {http://dx.doi.org/10.1038/nrc1997}
}

@article{Calin2006MicroRNA-cancer,
  author = {Calin, G.A. and Croce, C. M.},
  title = {{MicroRNA}-cancer connection: the beginning of a new tale},
  journal = {Cancer Res.},
  year = {2006},
  volume = {66},
  pages = {7390--7394},
  number = {15},
  month = {Aug},
  abstract = {Cancer initiation and progression can involve microRNAs (miRNA), which
	are small noncoding RNAs that can regulate gene expression. Their
	expression profiles can be used for the classification, diagnosis,
	and prognosis of human malignancies. Loss or amplification of miRNA
	genes has been reported in a variety of cancers, and altered patterns
	of miRNA expression may affect cell cycle and survival programs.
	Germ-line and somatic mutations in miRNAs or polymorphisms in the
	mRNAs targeted by miRNAs may also contribute to cancer predisposition
	and progression. We propose that alterations in miRNA genes play
	a critical role in the pathophysiology of many, perhaps all, human
	cancers.},
  doi = {10.1158/0008-5472.CAN-06-0800},
  pdf = {../local/Calin2006MicroRNA-cancer.pdf},
  file = {Calin2006MicroRNA-cancer.pdf:Calin2006MicroRNA-cancer.pdf:PDF},
  institution = {Department of Molecular Virology, Immunology, and Medical Genetics,
	Ohio State University, 400 12th Avenue, Columbus, OH 43210, USA.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {66/15/7390},
  pmid = {16885332},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-06-0800}
}

@article{Calinski1974dendrite,
  author = {Calinski, R. B. and Harabasz, J.},
  title = {A dendrite method for cluster analysis},
  journal = {Communs Statist.},
  year = {1974},
  volume = {3},
  pages = {1--27},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Calvo2007partially,
  author = {Calvo, B. and L{\'o}pez-Bigas, N. and Furney, S.J. and Larra{\~n}aga,
	P. and Lozano, J.A.},
  title = {A partially supervised classification approach to dominant and recessive
	human disease gene prediction.},
  journal = {Comput. Methods Programs Biomed.},
  year = {2007},
  volume = {85},
  pages = {229--237},
  number = {3},
  month = {Mar},
  abstract = {The discovery of the genes involved in genetic diseases is a very
	important step towards the understanding of the nature of these diseases.
	In-lab identification is a difficult, time-consuming task, where
	computational methods can be very useful. In silico identification
	algorithms can be used as a guide in future studies. Previous works
	in this topic have not taken into account that no reliable sets of
	negative examples are available, as it is not possible to ensure
	that a given gene is not related to any genetic disease. In this
	paper, this feature of the nature of the problem is considered, and
	identification is approached as a partially supervised classification
	problem. In addition, we have performed a more specific method to
	identify disease genes by classifying, for the first time, genes
	causing dominant and recessive diseases independently. We base this
	separation on previous results that show that these two types of
	genes present differences in their sequence properties. In this paper,
	we have applied a new model averaging algorithm to the identification
	of human genes associated with both dominant and recessive Mendelian
	diseases.},
  doi = {10.1016/j.cmpb.2006.12.003},
  institution = {Intelligent Systems Group, Department of Computer Science and Artificial
	Intelligence, University of the Basque Country UPV-EHU, Paseo Manuel
	de Lardizabal 1, E-20018 San Sebastián, Spain. borxa@si.ehu.es},
  keywords = {Algorithms; Genes, Dominant; Genes, Recessive; Genetic Predisposition
	to Disease; Humans; Sequence Analysis, DNA; Spain},
  owner = {mordelet},
  pii = {S0169-2607(06)00293-8},
  pmid = {17258838},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1016/j.cmpb.2006.12.003}
}

@article{Calzone2006BIOCHAM,
  author = {Calzone, L. and Fages, F. and Soliman, S.},
  title = {{BIOCHAM: an environment for modeling biological systems and formalizing
	experimental knowledge}},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {1805-1807},
  number = {14},
  abstract = {Summary: BIOCHAM (the BIOCHemical Abstract Machine) is a software
	environment for modeling biochemical systems. It is based on two
	aspects: (1) the analysis and simulation of boolean, kinetic and
	stochastic models and (2) the formalization of biological properties
	in temporal logic. BIOCHAM provides tools and languages for describing
	protein networks with a simple and straightforward syntax, and for
	integrating biological properties into the model. It then becomes
	possible to analyze, query, verify and maintain the model with respect
	to those properties. For kinetic models, BIOCHAM can search for appropriate
	parameter values in order to reproduce a specific behavior observed
	in experiments and formalized in temporal logic. Coupled with other
	methods such as bifurcation diagrams, this search assists the modeler/biologist
	in the modeling process. Availability: BIOCHAM (v. 2.5) is a free
	software available for download, with example models, at http://contraintes.inria.fr/BIOCHAM/
	Contact: Sylvain.Soliman@inria.fr},
  doi = {10.1093/bioinformatics/btl172},
  eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/22/14/1805.pdf},
  pdf = {../local/Calzone2006BIOCHAM.pdf},
  file = {Calzone2006BIOCHAM.pdf:Calzone2006BIOCHAM.pdf:PDF},
  keywords = {csbcbook},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/22/14/1805}
}

@article{Calzone2008comprehensive,
  author = {Calzone, L. and Gelay, A. and Zinovyev, A. and Radvanyi, F. and Barillot,
	E.},
  title = {A comprehensive modular map of molecular interactions in {RB/E2F}
	pathway.},
  journal = {Mol. Syst. Biol.},
  year = {2008},
  volume = {4},
  pages = {173},
  abstract = {We present, here, a detailed and curated map of molecular interactions
	taking place in the regulation of the cell cycle by the retinoblastoma
	protein (RB/RB1). Deregulations and/or mutations in this pathway
	are observed in most human cancers. The map was created using Systems
	Biology Graphical Notation language with the help of CellDesigner
	3.5 software and converted into BioPAX 2.0 pathway description format.
	In the current state the map contains 78 proteins, 176 genes, 99
	protein complexes, 208 distinct chemical species and 165 chemical
	reactions. Overall, the map recapitulates biological facts from approximately
	350 publications annotated in the diagram. The network contains more
	details about RB/E2F interaction network than existing large-scale
	pathway databases. Structural analysis of the interaction network
	revealed a modular organization of the network, which was used to
	elaborate a more summarized, higher-level representation of RB/E2F
	network. The simplification of complex networks opens the road for
	creating realistic computational models of this regulatory pathway.},
  doi = {10.1038/msb.2008.7},
  pdf = {../local/Calzone2008comprehensive.pdf},
  file = {Calzone2008comprehensive.pdf:Calzone2008comprehensive.pdf:PDF},
  institution = {Institut Curie, Service Blolnforrnatique, Paris, France.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {msb20087},
  pmid = {18319725},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/msb.2008.7}
}

@article{Camastra2005novel,
  author = {Francesco Camastra and Alessandro Verri},
  title = {A novel kernel method for clustering.},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2005},
  volume = {27},
  pages = {801-5},
  number = {5},
  month = {May},
  abstract = {Kernel {M}ethods are algorithms that, by replacing the inner product
	with an appropriate positive definite function, implicitly perform
	a nonlinear mapping of the input data into a high-dimensional feature
	space. {I}n this paper, we present a kernel method for clustering
	inspired by the classical {K}-{M}eans algorithm in which each cluster
	is iteratively refined using a one-class {S}upport {V}ector {M}achine.
	{O}ur method, which can be easily implemented, compares favorably
	with respect to popular clustering algorithms, like {K}-{M}eans,
	{N}eural {G}as, and {S}elf-{O}rganizing {M}aps, on a synthetic data
	set and three {UCI} real data benchmarks ({IRIS} data, {W}isconsin
	breast cancer database, {S}pam database).},
  doi = {10.1109/TPAMI.2005.88},
  pdf = {../local/Camastra2005novel.pdf},
  file = {Camastra2005novel.pdf:local/Camastra2005novel.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TPAMI.2005.88}
}

@article{Campanini2004novel,
  author = {Renato Campanini and Danilo Dongiovanni and Emiro Iampieri and Nico
	Lanconelli and Matteo Masotti and Giuseppe Palermo and Alessandro
	Riccardi and Matteo Roffilli},
  title = {A novel featureless approach to mass detection in digital mammograms
	based on support vector machines.},
  journal = {Phys {M}ed {B}iol},
  year = {2004},
  volume = {49},
  pages = {961-75},
  number = {6},
  month = {Mar},
  abstract = {In this work, we present a novel approach to mass detection in digital
	mammograms. {T}he great variability of the appearance of masses is
	the main obstacle to building a mass detection method. {I}t is indeed
	demanding to characterize all the varieties of masses with a reduced
	set of features. {H}ence, in our approach we have chosen not to extract
	any feature, for the detection of the region of interest; in contrast,
	we exploit all the information available on the image. {A} multiresolution
	overcomplete wavelet representation is performed, in order to codify
	the image with redundancy of information. {T}he vectors of the very-large
	space obtained are then provided to a first support vector machine
	({SVM}) classifier. {T}he detection task is considered here as a
	two-class pattern recognition problem: crops are classified as suspect
	or not, by using this {SVM} classifier. {F}alse candidates are eliminated
	with a second cascaded {SVM}. {T}o further reduce the number of false
	positives, an ensemble of experts is applied: the final suspect regions
	are achieved by using a voting strategy. {T}he sensitivity of the
	presented system is nearly 80\% with a false-positive rate of 1.1
	marks per image, estimated on images coming from the {USF} {DDSM}
	database.},
  doi = {doi:10.1088/0031-9155/49/6/007},
  pdf = {../local/Campanini2004novel.pdf},
  file = {Campanini2004novel.pdf:local/Campanini2004novel.pdf:PDF},
  url = {http://dx.doi.org/doi:10.1088/0031-9155/49/6/007}
}

@article{Campbell2008Identification,
  author = {Peter J Campbell and Philip J Stephens and Erin D Pleasance and Sarah
	O'Meara and Heng Li and Thomas Santarius and Lucy A Stebbings and
	Catherine Leroy and Sarah Edkins and Claire Hardy and Jon W Teague
	and Andrew Menzies and Ian Goodhead and Daniel J Turner and Christopher
	M Clee and Michael A Quail and Antony Cox and Clive Brown and Richard
	Durbin and Matthew E Hurles and Paul A W Edwards and Graham R Bignell
	and Michael R Stratton and P. Andrew Futreal},
  title = {Identification of somatically acquired rearrangements in cancer using
	genome-wide massively parallel paired-end sequencing.},
  journal = {Nat. Genet.},
  year = {2008},
  volume = {40},
  pages = {722--729},
  number = {6},
  month = {Jun},
  abstract = {Human cancers often carry many somatically acquired genomic rearrangements,
	some of which may be implicated in cancer development. However, conventional
	strategies for characterizing rearrangements are laborious and low-throughput
	and have low sensitivity or poor resolution. We used massively parallel
	sequencing to generate sequence reads from both ends of short DNA
	fragments derived from the genomes of two individuals with lung cancer.
	By investigating read pairs that did not align correctly with respect
	to each other on the reference human genome, we characterized 306
	germline structural variants and 103 somatic rearrangements to the
	base-pair level of resolution. The patterns of germline and somatic
	rearrangement were markedly different. Many somatic rearrangements
	were from amplicons, although rearrangements outside these regions,
	notably including tandem duplications, were also observed. Some somatic
	rearrangements led to abnormal transcripts, including two from internal
	tandem duplications and two fusion transcripts created by interchromosomal
	rearrangements. Germline variants were predominantly mediated by
	retrotransposition, often involving AluY and LINE elements. The results
	demonstrate the feasibility of systematic, genome-wide characterization
	of rearrangements in complex human cancer genomes, raising the prospect
	of a new harvest of genes associated with cancer using this strategy.},
  doi = {10.1038/ng.128},
  pdf = {../local/Campbell2008Identification.pdf},
  file = {Campbell2008Identification.pdf:Campbell2008Identification.pdf:PDF},
  institution = {Wellcome Trust Sanger Institute, Hinxton CB10 1SA, UK.},
  keywords = {ngs},
  owner = {jp},
  pii = {ng.128},
  pmid = {18438408},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/ng.128}
}

@article{Camps-Valls2004Profiled,
  author = {Camps-Valls, G. and Chalk, A.M. and Serrano-Lopez, A.J. and Martin-Guerrero,
	J.D. and Sonnhammer, E.L.},
  title = {Profiled support vector machines for antisense oligonucleotide efficacy
	prediction.},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  pages = {135},
  number = {135},
  abstract = {Background {T}his paper presents the use of {S}upport {V}ector {M}achines
	({SVM}s) for prediction and analysis of antisense oligonucleotide
	({AO}) efficacy. {T}he collected database comprises 315 {AO} molecules
	including 68 features each, inducing a problem well-suited to {SVM}s.
	{T}he task of feature selection is crucial given the presence of
	noisy or redundant features, and the well-known problem of the curse
	of dimensionality. {W}e propose a two-stage strategy to develop an
	optimal model: (1) feature selection using correlation analysis,
	mutual information, and {SVM}-based recursive feature elimination
	({SVM}-{RFE}), and (2) {AO} prediction using standard and profiled
	{SVM} formulations. {A} profiled {SVM} gives different weights to
	different parts of the training data to focus the training on the
	most important regions. {R}esults {I}n the first stage, the {SVM}-{RFE}
	technique was most efficient and robust in the presence of low number
	of samples and high input space dimension. {T}his method yielded
	an optimal subset of 14 representative features, which were all related
	to energy and sequence motifs. {T}he second stage evaluated the performance
	of the predictors (overall correlation coefficient between observed
	and predicted efficacy, r; mean error, {ME}; and root-mean-square-error,
	{RMSE}) using 8-fold and minus-one-{RNA} cross-validation methods.
	{T}he profiled {SVM} produced the best results (r = 0.44, {ME} =
	0.022, and {RMSE}= 0.278) and predicted high (>75% inhibition of
	gene expression) and low efficacy (<25%) {AO}s with a success rate
	of 83.3% and 82.9%, respectively, which is better than by previous
	approaches. {A} web server for {AO} prediction is available online
	at http://aosvm.cgb.ki.se/. {C}onclusions {T}he {SVM} approach is
	well suited to the {AO} prediction problem, and yields a prediction
	accuracy superior to previous methods. {T}he profiled {SVM} was found
	to perform better than the standard {SVM}, suggesting that it could
	lead to improvements in other prediction problems as well.},
  doi = {10.1186/1471-2105-5-135},
  pdf = {../local/Camps-Valls2004Profiled.pdf},
  file = {Camps-Valls2004Profiled.pdf:local/Camps-Valls2004Profiled.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.biomedcentral.com/1471-2105/5/135}
}

@article{Candes2008The,
  author = {Cand{\`e}s, E.},
  title = {The restricted isometry property},
  journal = {Compte {R}endus de l'{A}cad\'emie des {S}ciences, {P}aris},
  year = {2008},
  volume = {1},
  pages = {589--592},
  number = {346}
}

@article{Candes2006Stable,
  author = {Cand{\`e}s, E. and Romberk, J. K. and Tao, T.},
  title = {Stable signal recovery from incomplete and inaccurate measurements},
  journal = {Comm. {P}ure {A}ppl. {M}ath.},
  year = {2006},
  volume = {59},
  pages = {1207--1223},
  number = {8}
}

@article{Candes2007Dantzig,
  author = {Cand{\`e}s, E. and Tao, T.},
  title = {The {D}antzig selector: Statistical estimation when $p$ is much larger
	than $n$},
  journal = {Ann. Stat.},
  year = {2007},
  volume = {35},
  pages = {2313--2351},
  number = {6},
  doi = {10.1214/009053606000001523},
  pdf = {../local/Candes2007Dantzig.pdf},
  file = {Candes2007Dantzig.pdf:Candes2007Dantzig.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.08},
  url = {http://dx.doi.org/10.1214/009053606000001523}
}

@article{Candes2005Decoding,
  author = {Cand{\`e}s, E. and Tao, T.},
  title = {Decoding by linear programming},
  journal = {{IEEE} {T}ransactions on {I}nformation {T}heory},
  year = {2005},
  volume = {51},
  pages = {4203--4215},
  number = {12}
}

@article{Candes2011Robust,
  author = {Cand{\`e}s, E. J. and Li, X. and Ma, Y. and Wright, J.},
  title = {Robust principal component analysis?},
  journal = {J. ACM},
  year = {2011},
  volume = {58},
  pages = {11:1--11:37},
  number = {3},
  month = {jun},
  doi = {10.1145/1970392.1970395},
  pdf = {../local/Candes2011Robust.pdf},
  file = {Candes2011Robust.pdf:Candes2011Robust.pdf:PDF},
  owner = {jp},
  timestamp = {2013.02.19},
  url = {http://doi.acm.org/10.1145/1970392.1970395}
}

@article{Candes2009Exact,
  author = {Cand{\`e}s, E. J. and Recht, B.},
  title = {Exact Matrix Completion via Convex Optimization},
  journal = {Found. Comput. Math.},
  year = {2009},
  volume = {9},
  pages = {717--772},
  number = {6},
  doi = {10.1007/s10208-009-9045-5},
  pdf = {../local/Candes2009Exact.pdf},
  file = {Candes2009Exact.pdf:Candes2009Exact.pdf:PDF},
  owner = {jp},
  timestamp = {2012.12.22},
  url = {http://dx.doi.org/10.1007/s10208-009-9045-5}
}

@article{Caplen2001Specific,
  author = {Caplen, N. J. and Parrish, S. and Imani, F. and Fire, A. and Morgan,
	R. A.},
  title = {{S}pecific inhibition of gene expression by small double-stranded
	{RNA}s in invertebrate and vertebrate systems.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2001},
  volume = {98},
  pages = {9742--9747},
  number = {17},
  month = {Aug},
  abstract = {Short interfering RNAs (siRNAs) are double-stranded RNAs of approximately
	21-25 nucleotides that have been shown to function as key intermediaries
	in triggering sequence-specific RNA degradation during posttranscriptional
	gene silencing in plants and RNA interference in invertebrates. siRNAs
	have a characteristic structure, with 5'-phosphate/3'-hydroxyl ends
	and a 2-base 3' overhang on each strand of the duplex. In this study,
	we present data that synthetic siRNAs can induce gene-specific inhibition
	of expression in Caenorhabditis elegans and in cell lines from humans
	and mice. In each case, the interference by siRNAs was superior to
	the inhibition of gene expression mediated by single-stranded antisense
	oligonucleotides. The siRNAs seem to avoid the well documented nonspecific
	effects triggered by longer double-stranded RNAs in mammalian cells.
	These observations may open a path toward the use of siRNAs as a
	reverse genetic and therapeutic tool in mammalian cells.},
  doi = {10.1073/pnas.171251798},
  pdf = {../local/Caplen2001Specific.pdf},
  file = {Caplen2001Specific.pdf:Caplen2001Specific.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {171251798},
  pmid = {11481446},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1073/pnas.171251798}
}

@article{Capriotti2005I-Mutant,
  author = {Capriotti, E. and Fariselli, P. and Casadio, R.},
  title = {I-{M}utant2.0: predicting stability changes upon mutation from the
	protein sequence or structure.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {W306-10},
  number = {Web Server issue},
  month = {Jul},
  abstract = {I-{M}utant2.0 is a support vector machine ({SVM})-based tool for the
	automatic prediction of protein stability changes upon single point
	mutations. {I}-{M}utant2.0 predictions are performed starting either
	from the protein structure or, more importantly, from the protein
	sequence. {T}his latter task, to the best of our knowledge, is exploited
	for the first time. {T}he method was trained and tested on a data
	set derived from {P}ro{T}herm, which is presently the most comprehensive
	available database of thermodynamic experimental data of free energy
	changes of protein stability upon mutation under different conditions.
	{I}-{M}utant2.0 can be used both as a classifier for predicting the
	sign of the protein stability change upon mutation and as a regression
	estimator for predicting the related {D}elta{D}elta{G} values. {A}cting
	as a classifier, {I}-{M}utant2.0 correctly predicts (with a cross-validation
	procedure) 80\% or 77\% of the data set, depending on the usage of
	structural or sequence information, respectively. {W}hen predicting
	{D}elta{D}elta{G} values associated with mutations, the correlation
	of predicted with expected/experimental values is 0.71 (with a standard
	error of 1.30 kcal/mol) and 0.62 (with a standard error of 1.45 kcal/mol)
	when structural or sequence information are respectively adopted.
	{O}ur web interface allows the selection of a predictive mode that
	depends on the availability of the protein structure and/or sequence.
	{I}n this latter case, the web server requires only pasting of a
	protein sequence in a raw format. {W}e therefore introduce {I}-{M}utant2.0
	as a unique and valuable helper for protein design, even when the
	protein structure is not yet known with atomic resolution. {A}vailability:
	http://gpcr.biocomp.unibo.it/cgi/predictors/{I}-{M}utant2.0/{I}-{M}utant2.0.cgi.},
  doi = {10.1093/nar/gki375},
  pdf = {../local/local},
  file = {local:local/:PDF},
  keywords = {biosvm},
  pii = {33/suppl_2/W306},
  url = {http://dx.doi.org/10.1093/nar/gki375}
}

@article{Carbo1980How,
  author = {R. Carb{\'o} and L. Leyda and M. Arnau},
  title = {How similar is a molecule to another - an electron-density measure
	of similarity between 2 molecular structures},
  journal = {Int. J. Qantum Chem.},
  year = {1980},
  volume = {17},
  pages = {1185-1189},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.09.01}
}

@article{Carcassoni2002Spectral,
  author = {Carcassoni, M. and Hancock, E.},
  title = {Spectral correspondence for point pattern matching},
  journal = {Pattern Recogn.},
  year = {2003},
  volume = {36},
  pages = {193--204},
  number = {1},
  month = {January},
  abstract = {This paper investigates the correspondence matching of point-sets
	using spectral graph analysis. In particular, we are interested in
	the problem of how the modal analysis of point-sets can be rendered
	robust to contamination and drop-out. We make three contributions.
	First, we show how the modal structure of point-sets can be embedded
	within the framework of the EM algorithm. Second, we present several
	methods for computing the probabilities of point correspondences
	from the modes of the point proximity matrix. Third, we consider
	alternatives to the Gaussian proximity matrix. We evaluate the new
	method on both synthetic and real-world data. Here we show that the
	method can be used to compute useful correspondences even when the
	level of point contamination is as large as 50\%. We also provide
	some examples on deformed point-set tracking.},
  citeulike-article-id = {4071215},
  citeulike-linkout-0 = {http://dx.doi.org/10.1016/S0031-3203(02)00054-7},
  citeulike-linkout-1 = {http://linkinghub.elsevier.com/retrieve/pii/S0031320302000547},
  doi = {10.1016/S0031-3203(02)00054-7},
  issn = {00313203},
  keywords = {correspondences, matching},
  posted-at = {2009-02-19 14:46:34},
  priority = {2},
  url = {http://dx.doi.org/10.1016/S0031-3203(02)00054-7}
}

@article{Carhart1985Atom,
  author = {R.E. Carhart and D.H. Smith and R. Venkataraghavan},
  title = {Atom {P}airs as {M}olecular {F}eatures in {S}tructure-{A}ctivity
	{S}tudies: {D}efinitions and {A}pplications},
  journal = {J Chem Inf Comput Sci},
  year = {1985},
  volume = {25},
  pages = {64-73},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Carlo1999Phylogenetic,
  author = {Monte Carlo and Shuying Li and Dennis K. Pearl and Hani Doss},
  title = {Phylogenetic Tree Construction Using Markov Chain Monte Carlo},
  journal = {Journal of the {A}merican {S}tatistical {A}ssociation},
  year = {1999},
  volume = {95},
  pages = {493--508}
}

@article{Carter2001computational,
  author = {Carter, R. J. and Dubchak, I. and Holbrook, S. R.},
  title = {A computational approach to identify genes for functional {{RNA}s}
	in genomic sequences},
  journal = {Nucl. {A}cids {R}es.},
  year = {2001},
  volume = {29},
  pages = {3928-3938},
  number = {19},
  abstract = {Currently there is no successful computational approach for identification
	of genes encoding novel functional {RNA}s (f{RNA}s) in genomic sequences.
	{W}e have developed a machine learning approach using neural networks
	and support vector machines to extract common features among known
	{RNA}s for prediction of new {RNA} genes in the unannotated regions
	of prokaryotic and archaeal genomes. {T}he {E}scherichia coli genome
	was used for development, but we have applied this method to several
	other bacterial and archaeal genomes. {N}etworks based on nucleotide
	composition were 80-90% accurate in jackknife testing experiments
	for bacteria and 90-99% for hyperthermophilic archaea. {W}e also
	achieved a significant improvement in accuracy by combining these
	predictions with those obtained using a second set of parameters
	consisting of known {RNA} sequence motifs and the calculated free
	energy of folding. {S}everal known f{RNA}s not included in the training
	datasets were identified as well as several hundred predicted novel
	{RNA}s. {T}hese studies indicate that there are many unidentified
	{RNA}s in simple genomes that can be predicted computationally as
	a precursor to experimental study. {P}ublic access to our {RNA} gene
	predictions and an interface for user predictions is available via
	the web.},
  pdf = {../local/Carter2001computational.pdf},
  file = {Carter2001computational.pdf:local/Carter2001computational.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://nar.oupjournals.org/cgi/content/abstract/29/19/3928}
}

@article{Caruana1997Multitask,
  author = {Rich Caruana},
  title = {Multitask Learning},
  journal = {Machine Learning},
  year = {1997},
  volume = {28},
  pages = {41-75},
  number = {1}
}

@inproceedings{Caruana1993Multitask,
  author = {Richard Caruana},
  title = {Multitask Learning: A Knowledge-Based Source of Inductive Bias},
  booktitle = {Proceedings of the Tenth International Conference on Machine Learning},
  year = {1993},
  pages = {41--48},
  publisher = {Morgan Kaufmann}
}

@article{Catapano2007G,
  author = {Catapano, L. A. and Manji, H. K.},
  title = {{G} protein-coupled receptors in major psychiatric disorders.},
  journal = {Biochim. Biophys. Acta},
  year = {2007},
  volume = {1768},
  pages = {976--993},
  number = {4},
  month = {Apr},
  doi = {10.1016/j.bbamem.2006.09.025},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {S0005-2736(06)00384-1},
  pmid = {17078926},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1016/j.bbamem.2006.09.025}
}

@inproceedings{Catoni2002Data,
  author = {Catoni, O.},
  title = {Data {C}ompression and {A}daptive {H}istograms},
  booktitle = {Foundations of {C}omputational {M}athematics, {P}roceedings of {S}malefest
	2000},
  year = {2002},
  editor = {Felipe Cucker and J. Maurice Rojas},
  publisher = {World Scientific},
  pdf = {../local/cato02.pdf},
  file = {cato02.pdf:local/cato02.pdf:PDF},
  subject = {stat},
  url = {http://www.proba.jussieu.fr/users/catoni/gibbsHist_doc/}
}

@unpublished{CatoniGibbs,
  author = {Catoni, O.},
  title = {Gibbs estimators},
  note = {Revised version},
  pdf = {../local/cato02.ps},
  file = {cato02.ps:local/cato02.ps:PostScript},
  subject = {stat},
  url = {http://www.proba.jussieu.fr/users/catoni/homepage/gibbs5.dvi}
}

@article{Causier2004Studying,
  author = {Barry Causier},
  title = {Studying the interactome with the yeast two-hybrid system and mass
	spectrometry.},
  journal = {Mass Spectrom Rev},
  year = {2004},
  volume = {23},
  pages = {350--367},
  number = {5},
  abstract = {Protein interactions are crucial to the life of a cell. The analysis
	of such interactions is allowing biologists to determine the function
	of uncharacterized proteins and the genes that encode them. The yeast
	two-hybrid system has become one of the most popular and powerful
	tools to study protein-protein interactions. With the advent of proteomics,
	the two-hybrid system has found a niche in interactome mapping. However,
	it is clear that only by combining two-hybrid data with that from
	complementary approaches such as mass spectrometry (MS) can the interactome
	be analyzed in full. This review introduces the yeast two-hybrid
	system to those unfamiliar with the technique, and discusses how
	it can be used in combination with MS to unravel the network of protein
	interactions that occur in a cell.},
  doi = {10.1002/mas.10080},
  institution = {School of Biology, University of Leeds, Leeds LS2 9JT, United Kingdom.
	b.e.causier@leeds.ac.uk},
  keywords = {Genes, Fungal; Genome; Mass Spectrometry; Proteins; Proteomics; Yeasts},
  owner = {phupe},
  pmid = {15264234},
  timestamp = {2010.08.31},
  url = {http://dx.doi.org/10.1002/mas.10080}
}

@article{Cavalieri2005,
  author = {Cavalieri, D. and De Filippo, C.},
  title = {Bioinformatic methods for integrating whole-genome expression results
	into cellular networks},
  journal = {Drug {D}iscov {T}oday},
  year = {2005},
  volume = {10},
  pages = {727-34},
  number = {10},
  abstract = {Extracting a comprehensive overview from the huge amount of information
	arising from whole-genome analyses is a significant challenge. {T}his
	review critically surveys the state of the art methods that are used
	to connect information from functional genomic studies to biological
	function. {C}luster analysis methods for inferring the correlation
	between genes are discussed, as are the methods for integrating gene
	expression information with existing information on biological pathways
	and the methods that combine cluster analysis with biological information
	to reconstruct novel biological networks.},
  keywords = {Cluster Analysis *Computational Biology/methods/organization & administration/trends
	*Genomics/methods/organization & administration/trends Humans Oligonucleotide
	Array Sequence Analysis/methods}
}

@article{Cavalli2002Toward,
  author = {Cavalli, A. and Poluzzi, E. and De Ponti, F. and Recanatini, M.},
  title = {{T}oward a pharmacophore for drugs inducing the long {QT} syndrome:
	insights from a {C}o{MFA} study of {HERG} {K}(+) channel blockers.},
  journal = {J. Med. Chem.},
  year = {2002},
  volume = {45},
  pages = {3844--3853},
  number = {18},
  month = {Aug},
  abstract = {In this paper, we present a pharmacophore for QT-prolonging drugs,
	along with a 3D QSAR (CoMFA) study for a series of very structurally
	variegate HERG K(+) channel blockers. The blockade of HERG K(+) channels
	is one of the most important molecular mechanisms through which QT-prolonging
	drugs increase cardiac action potential duration. Since QT prolongation
	is one of the most undesirable side effects of drugs, we first tried
	to identify the minimum set of molecular features responsible for
	this action and then we attempted to develop a quantitative model
	correlating the 3D stereoelectronic characteristics of the molecules
	with their HERG blocking potency. Having considered an initial set
	of 31 QT-prolonging drugs for which the HERG K(+) channel blocking
	activity was measured on mammalian transfected cells, we started
	the construction of a theoretical screening tool able to predict
	whether a new molecule can interact with the HERG channel and eventually
	induce the long QT syndrome. This in silico tool might be useful
	in the design of new drug candidates devoid of the physicochemical
	features likely to cause the above-mentioned side effect.},
  keywords = {chemoinformatics herg},
  pii = {jm0208875},
  pmid = {12190308},
  timestamp = {2006.10.06}
}

@article{Cavasotto2003Structure-based,
  author = {Cavasotto, C. N. and Orry, A. J. W. and Abagyan, R. A.},
  title = {Structure-based identification of binding sites, native ligands and
	potential inhibitors for {G}-protein coupled receptors.},
  journal = {Proteins},
  year = {2003},
  volume = {51},
  pages = {423--433},
  number = {3},
  month = {May},
  abstract = {G-protein coupled receptors (GPCRs) are the largest family of cell-surface
	receptors involved in signal transmission. Drugs associated with
	GPCRs represent more than one fourth of the 100 top-selling drugs
	and are the targets of more than half of the current therapeutic
	agents on the market. Our methodology based on the internal coordinate
	mechanics (ICM) program can accurately identify the ligand-binding
	pocket in the currently available crystal structures of seven transmembrane
	(7TM) proteins [bacteriorhodopsin (BR) and bovine rhodopsin (bRho)].
	The binding geometry of the ligand can be accurately predicted by
	ICM flexible docking with and without the loop regions, a useful
	finding for GPCR docking because the transmembrane regions are easier
	to model. We also demonstrate that the native ligand can be identified
	by flexible docking and scoring in 1.5\% and 0.2\% (for bRho and
	BR, respectively) of the best scoring compounds from two different
	types of compound database. The same procedure can be applied to
	the database of available chemicals to identify specific GPCR binders.
	Finally, we demonstrate that even if the sidechain positions in the
	bRho binding pocket are entirely wrong, their correct conformation
	can be fully restored with high accuracy (0.28 A) through the ICM
	global optimization with and without the ligand present. These binding
	site adjustments are critical for flexible docking of new ligands
	to known structures or for docking to GPCR homology models. The ICM
	docking method has the potential to be used to "de-orphanize" orphan
	GPCRs (oGPCRs) and to identify antagonists-agonists for GPCRs if
	an accurate model (experimentally and computationally validated)
	of the structure has been constructed or when future crystal structures
	are determined.},
  doi = {10.1002/prot.10362},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {12696053},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1002/prot.10362}
}

@article{Cavasotto2008Discovery,
  author = {Cavasotto, C. N. and Orry, A. J. W. and Murgolo, N. J. and Czarniecki,
	M. F. and Kocsi, S. A. and Hawes, B. E. and O'Neill, K. A. and Hine,
	H. and Burton, M. S. and Voigt, J. H. and Abagyan, R. A. and Bayne,
	M. L. and Monsma, F. J.},
  title = {Discovery of novel chemotypes to a {G}-protein-coupled receptor through
	ligand-steered homology modeling and structure-based virtual screening},
  journal = {J. Med. Chem.},
  year = {2008},
  volume = {51},
  pages = {581--588},
  number = {3},
  month = {Feb},
  abstract = {Melanin-concentrating hormone receptor 1 (MCH-R1) is a G-protein-coupled
	receptor (GPCR) and a target for the development of therapeutics
	for obesity. The structure-based development of MCH-R1 and other
	GPCR antagonists is hampered by the lack of an available experimentally
	determined atomic structure. A ligand-steered homology modeling approach
	has been developed (where information about existing ligands is used
	explicitly to shape and optimize the binding site) followed by docking-based
	virtual screening. Top scoring compounds identified virtually were
	tested experimentally in an MCH-R1 competitive binding assay, and
	six novel chemotypes as low micromolar affinity antagonist "hits"
	were identified. This success rate is more than a 10-fold improvement
	over random high-throughput screening, which supports our ligand-steered
	method. Clearly, the ligand-steered homology modeling method reduces
	the uncertainty of structure modeling for difficult targets like
	GPCRs.},
  doi = {10.1021/jm070759m},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {18198821},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1021/jm070759m}
}

@article{Cawley2004Fast,
  author = {Gavin C Cawley and Nicola L C Talbot},
  title = {Fast exact leave-one-out cross-validation of sparse least-squares
	support vector machines.},
  journal = {Neural {N}etw},
  year = {2004},
  volume = {17},
  pages = {1467-75},
  number = {10},
  month = {Dec},
  abstract = {Leave-one-out cross-validation has been shown to give an almost unbiased
	estimator of the generalisation properties of statistical models,
	and therefore provides a sensible criterion for model selection and
	comparison. {I}n this paper we show that exact leave-one-out cross-validation
	of sparse {L}east-{S}quares {S}upport {V}ector {M}achines ({LS}-{SVM}s)
	can be implemented with a computational complexity of only {O}(ln2)
	floating point operations, rather than the {O}(l2n2) operations of
	a naÃ¯ve implementation, where l is the number of training patterns
	and n is the number of basis vectors. {A}s a result, leave-one-out
	cross-validation becomes a practical proposition for model selection
	in large scale applications. {F}or clarity the exposition concentrates
	on sparse least-squares support vector machines in the context of
	non-linear regression, but is equally applicable in a pattern recognition
	setting.},
  doi = {10.1016/j.neunet.2004.07.002},
  pdf = {../local/Cawley2004Fast.pdf},
  file = {Cawley2004Fast.pdf:local/Cawley2004Fast.pdf:PDF},
  pii = {S0893-6080(04)00143-1},
  url = {http://dx.doi.org/10.1016/j.neunet.2004.07.002}
}

@article{Cayley1877Number,
  author = {A. Cayley},
  title = {On the number of univalent radicals $C_nH_{2n+1}$},
  journal = {Philos. Mag.},
  year = {1877},
  volume = {18},
  pages = {34-35},
  number = {3}
}

@article{Cayley1875Theorychemical,
  author = {A. Cayley},
  title = {On the theory of the analytical forms called threes, with application
	to the theory of chemical combinations},
  journal = {Rep. Brit. Assoc. Sci.},
  year = {1875},
  volume = {4},
  pages = {257-305},
  number = {45}
}

@article{Cayley1874Mathematical,
  author = {A. Cayley},
  title = {On the mathematical theory of isomers},
  journal = {Philos. Mag.},
  year = {1874},
  volume = {10},
  pages = {444-446},
  number = {47}
}

@article{Cayley1859Theory,
  author = {A. Cayley},
  title = {On the theory of the analytical forms called threes},
  journal = {Philos. Mag.},
  year = {1859},
  volume = {37},
  pages = {374-378},
  number = {18}
}

@misc{Cela2007Quadratic,
  author = {E. Cela},
  title = {Qaudratuc assignment problem library},
  year = {2007},
  url = {http://www.opt.math.tu-graz.ac.at/qaplib/}
}

@article{Chalk2004Improved,
  author = {Chalk, A. M. and Wahlestedt, C. and Sonnhammer, E. L. L.},
  title = {Improved and automated prediction of effective si{RNA}.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {319},
  pages = {264-74},
  number = {1},
  month = {Jun},
  abstract = {Short interfering {RNA}s are used in functional genomics studies to
	knockdown a single gene in a reversible manner. {T}he results of
	si{RNA} experiments are highly dependent on the choice of si{RNA}
	sequence. {I}n order to evaluate si{RNA} design rules, we collected
	a database of 398 si{RNA}s of known efficacy from 92 genes. {W}e
	used this database to evaluate previously proposed rules from smaller
	datasets, and to find a new set of rules that are optimal for the
	entire database. {W}e also trained a regression tree with full cross-validation.
	{I}t was however difficult to obtain the same precision as methods
	previously tested on small datasets from one or two genes. {W}e show
	that those methods are overfitting as they work poorly on independent
	validation datasets from multiple genes. {O}ur new design rules can
	predict si{RNA}s with efficacy >/= 50\% in 91\% of cases, and with
	efficacy >/=90\% in 52\% of cases, which is more than a twofold improvement
	over random selection. {S}oftware for designing si{RNA}s is available
	online via a web server at or as a standalone version for high-throughput
	applications.},
  doi = {10.1016/j.bbrc.2004.04.181},
  pdf = {../local/Chalk2004Improved.pdf},
  file = {Chalk2004Improved.pdf:local/Chalk2004Improved.pdf:PDF},
  keywords = {sirna},
  pii = {S0006291X04009374},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.04.181}
}

@article{Chalk2005siRNAdb,
  author = {Chalk, A. M. and Warfinge, R. E. and Georgii-Hemming, P. and Sonnhammer,
	E. L. L.},
  title = {si{RNA}db: a database of si{RNA} sequences.},
  journal = {Nucleic Acids Res.},
  year = {2005},
  volume = {33},
  pages = {D131--D134},
  number = {Database issue},
  month = {Jan},
  abstract = {Short interfering RNAs (siRNAs) are a popular method for gene-knockdown,
	acting by degrading the target mRNA. Before performing experiments
	it is invaluable to locate and evaluate previous knockdown experiments
	for the gene of interest. The siRNA database provides a gene-centric
	view of siRNA experimental data, including siRNAs of known efficacy
	and siRNAs predicted to be of high efficacy by a combination of methods.
	Linked to these sequences is information such as siRNA thermodynamic
	properties and the potential for sequence-specific off-target effects.
	The database enables the user to evaluate an siRNA's potential for
	inhibition and non-specific effects. The database is available at
	http://siRNA.cgb.ki.se.},
  doi = {10.1093/nar/gki136},
  pdf = {../local/Chalk2005siRNAdb.pdf},
  file = {Chalk2005siRNAdb.pdf:Chalk2005siRNAdb.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {33/suppl_1/D131},
  pmid = {15608162},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1093/nar/gki136}
}

@article{Chan2003Detection,
  author = {Ian Chan and William Wells and Robert V Mulkern and Steven Haker
	and Jianqing Zhang and Kelly H Zou and Stephan E Maier and Clare
	M C Tempany},
  title = {Detection of prostate cancer by integration of line-scan diffusion,
	{T}2-mapping and {T}2-weighted magnetic resonance imaging; a multichannel
	statistical classifier.},
  journal = {Med {P}hys},
  year = {2003},
  volume = {30},
  pages = {2390-8},
  number = {9},
  month = {Sep},
  abstract = {A multichannel statistical classifier for detecting prostate cancer
	was developed and validated by combining information from three different
	magnetic resonance ({MR}) methodologies: {T}2-weighted, {T}2-mapping,
	and line scan diffusion imaging ({LSDI}). {F}rom these {MR} sequences,
	four different sets of image intensities were obtained: {T}2-weighted
	({T}2{W}) from {T}2-weighted imaging, {A}pparent {D}iffusion {C}oefficient
	({ADC}) from {LSDI}, and proton density ({PD}) and {T}2 ({T}2 {M}ap)
	from {T}2-mapping imaging. {M}anually segmented tumor labels from
	a radiologist, which were validated by biopsy results, served as
	tumor "ground truth." {T}extural features were extracted from the
	images using co-occurrence matrix ({CM}) and discrete cosine transform
	({DCT}). {A}natomical location of voxels was described by a cylindrical
	coordinate system. {A} statistical jack-knife approach was used to
	evaluate our classifiers. {S}ingle-channel maximum likelihood ({ML})
	classifiers were based on 1 of the 4 basic image intensities. {O}ur
	multichannel classifiers: support vector machine ({SVM}) and {F}isher
	linear discriminant ({FLD}), utilized five different sets of derived
	features. {E}ach classifier generated a summary statistical map that
	indicated tumor likelihood in the peripheral zone ({PZ}) of the prostate
	gland. {T}o assess classifier accuracy, the average areas under the
	receiver operator characteristic ({ROC}) curves over all subjects
	were compared. {O}ur best {FLD} classifier achieved an average {ROC}
	area of 0.839(+/-0.064), and our best {SVM} classifier achieved an
	average {ROC} area of 0.761(+/-0.043). {T}he {T}2{W} {ML} classifier,
	our best single-channel classifier, only achieved an average {ROC}
	area of 0.599(+/-0.146). {C}ompared to the best single-channel {ML}
	classifier, our best multichannel {FLD} and {SVM} classifiers have
	statistically superior {ROC} performance ({P}=0.0003 and 0.0017,
	respectively) from pairwise two-sided t-test. {B}y integrating the
	information from multiple images and capturing the textural and anatomical
	features in tumor areas, summary statistical maps can potentially
	aid in image-guided prostate biopsy and assist in guiding and controlling
	delivery of localized therapy under image guidance.},
  pdf = {../local/Chan2003Detection.pdf},
  file = {Chan2003Detection.pdf:local/Chan2003Detection.pdf:PDF},
  keywords = {Algorithms, Anion Exchange Resins, Antigen-Antibody Complex, Artificial
	Intelligence, Automated, Automatic Data Processing, Biological, Blood
	Cells, Chemical, Chromatography, Cluster Analysis, Comparative Study,
	Computational Biology, Computer Simulation, Computer-Assisted, Data
	Interpretation, Databases, Decision Making, Decision Trees, Diffusion
	Magnetic Resonance Imaging, English Abstract, Epitopes, Expert Systems,
	Factual, Fuzzy Logic, Gene Expression Profiling, Gene Expression
	Regulation, Gene Targeting, Genome, Histocompatibility Antigens Class
	I, Humans, Image Interpretation, Image Processing, In Vitro, Indicators
	and Reagents, Information Storage and Retrieval, Ion Exchange, Least-Squares
	Analysis, Liver Cirrhosis, Magnetic Resonance Imaging, Male, Models,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic
	Acid Conformation, P.H.S., Pattern Recognition, Pro, Prostatic Neoplasms,
	Protein, Protein Binding, Protein Interaction Mapping, Proteins,
	Quantitative Structure-Activity Relationship, RNA, ROC Curve, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Severity of Illness Index, Statistical, Structure-Activity
	Relationship, Subtraction Technique, T-Lymphocyte, Transcription
	Factors, Transfer, Treatment Outcome, U.S. Gov't, User-Computer Interface,
	inear Dynamics, teome, 14528961}
}

@article{Chan2002Comparison,
  author = {Kwokleung Chan and Te-Won Lee and Pamela A Sample and Michael H Goldbaum
	and Robert N Weinreb and Terrence J Sejnowski},
  title = {Comparison of machine learning and traditional classifiers in glaucoma
	diagnosis.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2002},
  volume = {49},
  pages = {963-74},
  number = {9},
  month = {Sep},
  abstract = {Glaucoma is a progressive optic neuropathy with characteristic structural
	changes in the optic nerve head reflected in the visual field. {T}he
	visual-field sensitivity test is commonly used in a clinical setting
	to evaluate glaucoma. {S}tandard automated perimetry ({SAP}) is a
	common computerized visual-field test whose output is amenable to
	machine learning. {W}e compared the performance of a number of machine
	learning algorithms with {STATPAC} indexes mean deviation, pattern
	standard deviation, and corrected pattern standard deviation. {T}he
	machine learning algorithms studied included multilayer perceptron
	({MLP}), support vector machine ({SVM}), and linear ({LDA}) and quadratic
	discriminant analysis ({QDA}), {P}arzen window, mixture of {G}aussian
	({MOG}), and mixture of generalized {G}aussian ({MGG}). {MLP} and
	{SVM} are classifiers that work directly on the decision boundary
	and fall under the discriminative paradigm. {G}enerative classifiers,
	which first model the data probability density and then perform classification
	via {B}ayes' rule, usually give deeper insight into the structure
	of the data space. {W}e have applied {MOG}, {MGG}, {LDA}, {QDA},
	and {P}arzen window to the classification of glaucoma from {SAP}.
	{P}erformance of the various classifiers was compared by the areas
	under their receiver operating characteristic curves and by sensitivities
	(true-positive rates) at chosen specificities (true-negative rates).
	{T}he machine-learning-type classifiers showed improved performance
	over the best indexes from {STATPAC}. {F}orward-selection and backward-elimination
	methodology further improved the classification rate and also has
	the potential to reduce testing time by diminishing the number of
	visual-field location measurements.},
  doi = {10.1109/TBME.2002.802012},
  pdf = {../local/Chan2002Comparison.pdf},
  file = {Chan2002Comparison.pdf:local/Chan2002Comparison.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Discriminant Analysis,
	Drug, Drug Design, Electrostatics, Epitopes, Eukaryotic Cells, Factual,
	False Negative Reactions, False Positive Reactions, Feasibility Studies,
	Female, Gene Expression, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers,
	Glaucoma, HLA Antigens, Hemolysins, Histocompatibility Antigens Class
	I, Humans, Internet, Intraocular Pressure, Ion Exchange, Lasers,
	Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms,
	Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models,
	Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology,
	Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation,
	Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array
	Sequence Analysis, Open-Angle, Ophthalmoscopy, Optic Disk, Optic
	Nerve Diseases, Ovarian Neoplasms, P.H.S., Pattern Recognition, Peptides,
	Perimetry, Predictive Value of Tests, Probability, Probability Learning,
	Protein, Protein Binding, Protein Conformation, Proteins, Quality
	Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	T-Lymphocyte, Thermodynamics, Transcription, Tumor Markers, U.S.
	Gov't, 12214886},
  url = {http://dx.doi.org/10.1109/TBME.2002.802012}
}

@article{Chanda2003Fulfilling,
  author = {S. K. Chanda and J. S. Caldwell},
  title = {{F}ulfilling the promise: drug discovery in the post-genomic era.},
  journal = {Drug Discov Today},
  year = {2003},
  volume = {8},
  pages = {168--174},
  number = {4},
  month = {Feb},
  abstract = {The genomic era has brought with it a basic change in experimentation,
	enabling researchers to look more comprehensively at biological systems.
	The sequencing of the human genome coupled with advances in automation
	and parallelization technologies have afforded a fundamental transformation
	in the drug target discovery paradigm, towards systematic whole genome
	and proteome analyses. In conjunction with novel proteomic techniques,
	genome-wide annotation of function in cellular models is possible.
	Overlaying data derived from whole genome sequence, expression and
	functional analysis will facilitate the identification of causal
	genes in disease and significantly streamline the target validation
	process. Moreover, several parallel technological advances in small
	molecule screening have resulted in the development of expeditious
	and powerful platforms for elucidating inhibitors of protein or pathway
	function. Conversely, high-throughput and automated systems are currently
	being used to identify targets of orphan small molecules. The consolidation
	of these emerging functional genomics and drug discovery technologies
	promises to reap the fruits of the genomic revolution.},
  owner = {mahe},
  pii = {S1359644602025953},
  pmid = {12581711},
  timestamp = {2006.08.11}
}

@article{Chang2008Fast,
  author = {Chang, C. Q. and Ding, Z. and Hung, Y. S. and Fung, P. C.},
  title = {Fast network component analysis (FastNCA) for gene regulatory network
	reconstruction from microarray data},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {1349--1358},
  number = {11},
  owner = {jp},
  timestamp = {2011.07.23}
}

@manual{Chang2001LIBSVM,
  title = {{LIBSVM}: a library for support vector machines},
  author = {Chang, C.-C. and Lin, C.-J.},
  year = {2001},
  note = {Software available at \url{http://www.csie.ntu.edu.tw/~cjlin/libsvm}},
  owner = {jp},
  timestamp = {2010.11.02}
}

@article{Chang2004Analysis,
  author = {Ming-Wei Chang and Chih-Jen Lin and Ruby Chiu-Hsing Weng},
  title = {Analysis of switching dynamics with competing support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {720-7},
  number = {3},
  month = {May},
  abstract = {We present a framework for the unsupervised segmentation of switching
	dynamics using support vector machines. {F}ollowing the architecture
	by {P}awelzik et al., where annealed competing neural networks were
	used to segment a nonstationary time series, in this paper, we exploit
	the use of support vector machines, a well-known learning technique.
	{F}irst, a new formulation of support vector regression is proposed.
	{S}econd, an expectation-maximization step is suggested to adaptively
	adjust the annealing parameter. {R}esults indicate that the proposed
	approach is promising.},
  doi = {10.1109/TNN.2004.824270},
  pdf = {../local/Chang2004Analysis.pdf},
  file = {Chang2004Analysis.pdf:local/Chang2004Analysis.pdf:PDF},
  keywords = {Algorithms, Artificial Intelligence, Bayes Theorem, Computing Methodologies,
	Models, Neural Networks (Computer), Non-U.S. Gov't, Normal Distribution,
	Regression Analysis, Research Design, Research Support, Theoretical,
	15384558},
  url = {http://dx.doi.org/10.1109/TNN.2004.824270}
}

@article{Chang2005Automatic,
  author = {Ruey-Feng Chang and Wen-Jie Wu and Woo Kyung Moon and Dar-Ren Chen},
  title = {Automatic ultrasound segmentation and morphology based diagnosis
	of solid breast tumors.},
  journal = {Breast {C}ancer {R}es {T}reat},
  year = {2005},
  volume = {89},
  pages = {179-85},
  number = {2},
  month = {Jan},
  abstract = {Ultrasound ({US}) is a useful diagnostic tool to distinguish benign
	from malignant masses of the breast. {I}t is a very convenient and
	safe diagnostic method. {H}owever, there is a considerable overlap
	benignancy and malignancy in ultrasonic images and interpretation
	is subjective. {A} high performance breast tumors computer-aided
	diagnosis ({CAD}) system can provide an accurate and reliable diagnostic
	second opinion for physicians to distinguish benign breast lesions
	from malignant ones. {T}he potential of sonographic texture analysis
	to improve breast tumor classifications has been demonstrated. {H}owever,
	the texture analysis is system-dependent. {T}he disadvantages of
	these systems which use texture analysis to classify tumors are they
	usually perform well only in one specific ultrasound system. {W}hile
	{M}orphological based {US} diagnosis of breast tumor will take the
	advantage of nearly independent to either the setting of {US} system
	and different {US} machines. {I}n this study, the tumors are segmented
	using the newly developed level set method at first and then six
	morphologic features are used to distinguish the benign and malignant
	cases. {T}he support vector machine ({SVM}) is used to classify the
	tumors. {T}here are 210 ultrasonic images of pathologically proven
	benign breast tumors from 120 patients and carcinomas from 90 patients
	in the ultrasonic image database. {T}he database contains only one
	image from each patient. {T}he ultrasonic images are captured at
	the largest diameter of the tumor. {T}he images are collected consecutively
	from {A}ugust 1, 1999 to {M}ay 31, 2000; the patients' ages ranged
	from 18 to 64 years. {S}onography is performed using an {ATL} {HDI}
	3000 system with a {L}10-5 small part transducer. {I}n the experiment,
	the accuracy of {SVM} with shape information for classifying malignancies
	is 90.95\% (191/210), the sensitivity is 88.89\% (80/90), the specificity
	is 92.5\% (111/120), the positive predictive value is 89.89\% (80/89),
	and the negative predictive value is 91.74\% (111/121).},
  doi = {10.1007/s10549-004-2043-z},
  pdf = {../local/Chang2005Automatic.pdf},
  file = {Chang2005Automatic.pdf:local/Chang2005Automatic.pdf:PDF},
  keywords = {breastcancer},
  url = {http://dx.doi.org/10.1007/s10549-004-2043-z}
}

@article{Chang2003Improvement,
  author = {Ruey-Feng Chang and Wen-Jie Wu and Woo Kyung Moon and Dar-Ren Chen},
  title = {Improvement in breast tumor discrimination by support vector machines
	and speckle-emphasis texture analysis.},
  journal = {Ultrasound {M}ed {B}iol},
  year = {2003},
  volume = {29},
  pages = {679-86},
  number = {5},
  month = {May},
  abstract = {Recent statistics show that breast cancer is a major cause of death
	among women in developed countries. {H}ence, finding an accurate
	and effective diagnostic method is very important. {I}n this paper,
	we propose a high precision computer-aided diagnosis ({CAD}) system
	for sonography. {W}e utilize a support vector machine ({SVM}) to
	classify breast tumors according to their texture information surrounding
	speckle pixels. {W}e test our system with 250 pathologically-proven
	breast tumors including 140 benign and 110 malignant ones. {A}lso
	we compare the diagnostic performances of three texture features,
	i.e., speckle-emphasis texture feature, nonspeckle-emphasis texture
	feature and conventional all pixels texture feature, applied to breast
	sonography using {SVM}. {I}n our experiment, the accuracy of {SVM}
	with speckle information for classifying malignancies is 93.2\% (233/250),
	the sensitivity is 95.45\% (105/110), the specificity is 91.43\%
	(128/140), the positive predictive value is 89.74\% (105/117) and
	the negative predictive value is 96.24\% (128/133). {B}ased on the
	experimental results, speckle phenomenon is a useful tool to be used
	in computer-aided diagnosis; its performance is better than those
	of the other two features. {S}peckle phenomenon, which is considered
	as noise in sonography, can intrude into judgments of a physician
	using naked eyes but it is another story for application in a computer-aided
	diagnosis algorithm.},
  doi = {10.1016/S0301-5629(02)00788-3},
  pdf = {../local/Chang2003Improvement.pdf},
  file = {Chang2003Improvement.pdf:local/Chang2003Improvement.pdf:PDF},
  keywords = {breastcancer},
  pii = {S0301562902007883},
  url = {http://dx.doi.org/10.1016/S0301-5629(02)00788-3}
}

@article{Chang2003Support,
  author = {Ruey-Feng Chang and Wen-Jie Wu and Woo Kyung Moon and Yi-Hong Chou
	and Dar-Ren Chen},
  title = {Support vector machines for diagnosis of breast tumors on {US} images.},
  journal = {Acad {R}adiol},
  year = {2003},
  volume = {10},
  pages = {189-97},
  number = {2},
  month = {Feb},
  abstract = {R{ATIONALE} {AND} {OBJECTIVES}: {B}reast cancer has become the leading
	cause of cancer deaths among women in developed countries. {T}o decrease
	the related mortality, disease must be treated as early as possible,
	but it is hard to detect and diagnose tumors at an early stage. {A}
	well-designed computer-aided diagnostic system can help physicians
	avoid misdiagnosis and avoid unnecessary biopsy without missing cancers.
	{I}n this study, the authors tested one such system to determine
	its effectiveness. {MATERIALS} {AND} {METHODS}: {M}any computer-aided
	diagnostic systems for ultrasonography are based on the neural network
	model and classify breast tumors according to texture features. {T}he
	authors tested a refinement of this model, an advanced support vector
	machine ({SVM}), in 250 cases of pathologically proved breast tumors
	(140 benign and 110 malignant), and compared its performance with
	that of a multilayer propagation neural network. {RESULTS}: {T}he
	accuracy of the {SVM} for classifying malignancies was 85.6\% (214
	of 250); the sensitivity, 95.45\% (105 of 110); the specificity,
	77.86\% (109 of 140); the positive predictive value, 77.21\% (105
	of 136); and the negative predictive value, 95.61\% (109 of 114).
	{CONCLUSION}: {T}he {SVM} proved helpful in the imaging diagnosis
	of breast cancer. {T}he classification ability of the {SVM} is nearly
	equal to that of the neural network model, and the {SVM} has a much
	shorter training time (1 vs 189 seconds). {G}iven the increasing
	size and complexity of data sets, the {SVM} is therefore preferable
	for computer-aided diagnosis.},
  doi = {10.1016/S1076-6332(03)80044-2},
  url = {http://dx.doi.org/10.1016/S1076-6332(03)80044-2}
}

@book{Chapelle2006Semi-Supervised,
  title = {Semi-Supervised Learning},
  publisher = {MIT Press},
  year = {2006},
  author = {Chapelle, O. and Sch{\"o}lkopf, B. and Zien, A.},
  address = {Cambridge, MA},
  owner = {fantine},
  timestamp = {2009.07.09},
  url = {http://www.kyb.tuebingen.mpg.de/ssl-book}
}

@incollection{Chapelle2005A,
  author = {Olivier {Chapelle} and Zaid {Harchaoui}},
  title = {A Machine Learning Approach to Conjoint Analysis},
  booktitle = {Advances in Neural Information Processing Systems 17},
  publisher = {MIT Press},
  year = {2005},
  editor = {Lawrence K. Saul and Yair Weiss and {L\'{e}on} Bottou},
  pages = {257-264},
  address = {Cambridge, MA},
  original = {0257_852.PDF}
}

@book{Chavel1984Eigenvalues,
  title = {Eigenvalues in {R}iemannian geometry},
  publisher = {Academic Press},
  year = {1984},
  author = {I. Chavel},
  address = {Orlando, Fl.},
  subject = {net}
}

@article{Chen2007BiophysJ,
  author = {Chen, C. and Cui, J. and Lu, H. and Wang, R. and Zhang, S. and Shen,
	P.},
  title = {Modeling of the role of a Bax-activation switch in the mitochondrial
	apoptosis decision},
  journal = {Biophys J},
  year = {2007},
  volume = {92},
  pages = {4304-15},
  number = {12},
  abstract = {We performed in silico modeling of the regulatory network of mitochondrial
	apoptosis through which we examined the role of a Bax-activation
	switch in governing the mitochondrial apoptosis decision. Two distinct
	modeling methods were used in this article. One is continuous and
	deterministic, comprised of a set of ordinary differential equations.
	The other, carried out in a discrete manner, is based on a cellular
	automaton, which takes stochastic fluctuations into consideration.
	We focused on dynamic properties of the mitochondrial apoptosis regulatory
	network. The roles of Bcl-2 family proteins in cellular responses
	to apoptotic stimuli were examined. In our simulations, a self-amplification
	process of Bax-activation is indicated. Further analysis suggests
	that the core module of Bax-activation is bistable in both deterministic
	and stochastic models, and this feature is robust to noise and wide
	ranges of parameter variation. When coupling with Bax-polymerization,
	it forms a one-way-switch, which governs irreversible behaviors of
	Bax-activation even with attenuation of apoptotic stimulus. Together
	with the growing biochemical evidence, we propose a novel molecular
	switch mechanism embedded in the mitochondrial apoptosis regulatory
	network and give a plausible explanation for the all-or-none, irreversible
	character of mitochondrial apoptosis.},
  keywords = {csbcbook}
}

@article{Chen2007FEBS,
  author = {Chen, C. and Cui, J. and Zhang, W. and Shen, P.},
  title = {Robustness analysis identifies the plausible model of the Bcl-2 apoptotic
	switch},
  journal = {FEBS Lett},
  year = {2007},
  volume = {581},
  pages = {5143-50},
  number = {26},
  abstract = {In this paper two competing models of the B-cell lymphoma 2 (Bcl-2)
	apoptotic switch were contrasted by mathematical modeling and robustness
	analysis. Since switch-like behaviors are required for models that
	attempt to explain the all-or-none decisions of apoptosis, ultrasensitivity
	was employed as a criterion for comparison. Our results successfully
	exhibit that the direct activation model operates more reliably to
	achieve a robust switch in cellular conditions. Moreover, by investigating
	the robustness of other important features of the Bcl-2 apoptotic
	switch (including low Bax basal activation, inhibitory role of anti-apoptotic
	proteins and insensitivity to small perturbations) the direct activation
	model was further supported. In all, we identified the direct activation
	model as a more plausible explanation for the Bcl-2 apoptotic switch.},
  keywords = {csbcbook}
}

@article{Chen2011Removing,
  author = {Chao Chen and Kay Grennan and Judith Badner and Dandan Zhang and
	Elliot Gershon and Li Jin and Chunyu Liu},
  title = {Removing batch effects in analysis of expression microarray data:
	an evaluation of six batch adjustment methods.},
  journal = {PLoS One},
  year = {2011},
  volume = {6},
  pages = {e17238},
  number = {2},
  abstract = {The expression microarray is a frequently used approach to study gene
	expression on a genome-wide scale. However, the data produced by
	the thousands of microarray studies published annually are confounded
	by "batch effects," the systematic error introduced when samples
	are processed in multiple batches. Although batch effects can be
	reduced by careful experimental design, they cannot be eliminated
	unless the whole study is done in a single batch. A number of programs
	are now available to adjust microarray data for batch effects prior
	to analysis. We systematically evaluated six of these programs using
	multiple measures of precision, accuracy and overall performance.
	ComBat, an Empirical Bayes method, outperformed the other five programs
	by most metrics. We also showed that it is essential to standardize
	expression data at the probe level when testing for correlation of
	expression profiles, due to a sizeable probe effect in microarray
	data that can inflate the correlation among replicates and unrelated
	samples.},
  doi = {10.1371/journal.pone.0017238},
  institution = {National Ministry of Education Key Laboratory of Contemporary Anthropology,
	Fudan University, Shanghai, People's Republic of China.},
  keywords = {Bayes Theorem; Case-Control Studies; Data Interpretation, Statistical;
	Gene Expression Profiling, standards/statistics /&/ numerical data;
	Humans; Microarray Analysis, standards/statistics /&/ numerical data;
	ROC Curve; Reference Standards; Research Design; Sample Size; Selection
	Bias; Validation Studies as Topic},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pmid = {21386892},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1371/journal.pone.0017238}
}

@article{Chen2005ChemDB,
  author = {Chen, J. and Swamidass, S. J. and Dou, Y. and Bruand, J. and Baldi,
	P.},
  title = {Chem{DB}: a public database of small molecules and related chemoinformatics
	resources},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {4133--4139},
  number = {22},
  month = {Sep},
  abstract = {M{OTIVATION}: {T}he development of chemoinformatics has been hampered
	by the lack of large, publicly available, comprehensive repositories
	of molecules, in particular of small molecules. {S}mall molecules
	play a fundamental role in organic chemistry and biology. {T}hey
	can be used as combinatorial building blocks for chemical synthesis,
	as molecular probes in chemical genomics and systems biology, and
	for the screening and discovery of new drugs and other useful compounds.
	{RESULTS}: {W}e describe {C}hem{DB}, a public database of small molecules
	available over the {W}eb. {C}hem{DB} is built using the digital catalogs
	of over a hundred vendors and other public sources and is annotated
	with information derived from these sources as well as from computational
	methods, such as predicted solubility and 3{D} structure. {I}t supports
	multiple molecular formats and is periodically updated, automatically
	whenever possible. {T}he current version of the database contains
	approximately 4.1 {M} commercially available compounds, 8.2 {M} counting
	isomers. {T}he database includes a user-friendly graphical interface,
	chemical reactions capabilities, as well as unique search capabilities.
	{AVAILABILITY}: {D}atabase, datasets, and supplementary materials
	available through: http://cdb.ics.uci.edu.},
  doi = {10.1093/bioinformatics/bti683},
  pii = {bti683},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti683}
}

@inproceedings{Chen2009A,
  author = {Chen, Jianhui and Tang, Lei and Liu, Jun and Ye, Jieping},
  title = {A convex formulation for learning shared structures from multiple
	tasks},
  booktitle = {ICML '09: Proceedings of the 26th Annual International Conference
	on Machine Learning},
  year = {2009},
  pages = {137--144},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1553374.1553392},
  isbn = {978-1-60558-516-1},
  location = {Montreal, Quebec, Canada}
}

@article{Chen2004Reducing,
  author = {Jiun-Hung Chen and Chu-Song Chen},
  title = {Reducing {SVM} classification time using multiple mirror classifiers.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1173-83},
  number = {2},
  month = {Apr},
  abstract = {We propose an approach that uses mirror point pairs and a multiple
	classifier system to reduce the classification time of a support
	vector machine ({SVM}). {D}ecisions made with multiple simple classifiers
	formed from mirror pairs are integrated to approximate the classification
	rule of a single {SVM}. {A} coarse-to-fine approach is developed
	for selecting a given number of member classifiers. {A} clustering
	method, derived from the similarities between classifiers, is used
	for a coarse selection. {A} greedy strategy is then used for fine
	selection of member classifiers. {S}elected member classifiers are
	further refined by finding a weighted combination with a perceptron.
	{E}xperiment results show that our approach can successfully speed
	up {SVM} decisions while maintaining comparable classification accuracy.},
  doi = {10.1109/TSMCB.2003.821867},
  pdf = {../local/Chen2004Reducing.pdf},
  file = {Chen2004Reducing.pdf:local/Chen2004Reducing.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TSMCB.2003.821867}
}

@article{Chen2007GPCR,
  author = {Chen, J.-Z. and Wang, J. and Xie, X.-Q.},
  title = {GPCR structure-based virtual screening approach for CB2 antagonist
	search.},
  journal = {J. Chem. Inf. Model.},
  year = {2007},
  volume = {47},
  pages = {1626--1637},
  number = {4},
  abstract = {The potential for therapeutic specificity in regulating diseases has
	made cannabinoid (CB) receptors one of the most important G-protein-coupled
	receptor (GPCR) targets in search for new drugs. Considering the
	lack of related 3D experimental structures, we have established a
	structure-based virtual screening protocol to search for CB2 bioactive
	antagonists based on the 3D CB2 homology structure model. However,
	the existing homology-predicted 3D models often deviate from the
	native structure and therefore may incorrectly bias the in silico
	design. To overcome this problem, we have developed a 3D testing
	database query algorithm to examine the constructed 3D CB2 receptor
	structure model as well as the predicted binding pocket. In the present
	study, an antagonist-bound CB2 receptor complex model was initially
	generated using flexible docking simulation and then further optimized
	by molecular dynamic and mechanical (MD/MM) calculations. The refined
	3D structural model of the CB2-ligand complex was then inspected
	by exploring the interactions between the receptor and ligands in
	order to predict the potential CB2 binding pocket for its antagonist.
	The ligand-receptor complex model and the predicted antagonist binding
	pockets were further processed and validated by FlexX-Pharm docking
	against a testing compound database that contains known antagonists.
	Furthermore, a consensus scoring (CScore) function algorithm was
	established to rank the binding interaction modes of a ligand on
	the CB2 receptor. Our results indicated that the known antagonists
	seeded in the testing database can be distinguished from a significant
	amount of randomly chosen molecules. Our studies demonstrated that
	the established GPCR structure-based virtual screening approach provided
	a new strategy with a high potential for in silico identifying novel
	CB2 antagonist leads based on the homology-generated 3D CB2 structure
	model.},
  doi = {10.1021/ci7000814},
  pdf = {../local/Chen2007GPCR.pdf},
  file = {Chen2007GPCR.pdf:Chen2007GPCR.pdf:PDF},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {17580929},
  timestamp = {2008.07.21},
  url = {http://dx.doi.org/10.1021/ci7000814}
}

@article{Chen2005stochastic,
  author = {Chen, K.-C. and Wang, T.-Y. and Tseng, H.-H. and Huang, C.-Y. F.
	and Kao, C.-Y.},
  title = {{A} stochastic differential equation model for quantifying transcriptional
	regulatory network in {S}accharomyces cerevisiae},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2883--2890},
  number = {12},
  month = {Jun},
  abstract = {MOTIVATION: The explosion of microarray studies has promised to shed
	light on the temporal expression patterns of thousands of genes simultaneously.
	However, available methods are far from adequate in efficiently extracting
	useful information to aid in a greater understanding of transcriptional
	regulatory network. Biological systems have been modeled as dynamic
	systems for a long history, such as genetic networks and cell regulatory
	network. This study evaluated if the stochastic differential equation
	(SDE), which is prominent for modeling dynamic diffusion process
	originating from the irregular Brownian motion, can be applied in
	modeling the transcriptional regulatory network in Saccharomyces
	cerevisiae. RESULTS: To model the time-continuous gene-expression
	datasets, a model of SDE is applied to depict irregular patterns.
	Our goal is to fit a generalized linear model by combining putative
	regulators to estimate the transcriptional pattern of a target gene.
	Goodness-of-fit is evaluated by log-likelihood and Akaike Information
	Criterion. Moreover, estimations of the contribution of regulators
	and inference of transcriptional pattern are implemented by statistical
	approaches. Our SDE model is basic but the test results agree well
	with the observed dynamic expression patterns. It implies that advanced
	SDE model might be perfectly suited to portray transcriptional regulatory
	networks. AVAILABILITY: The R code is available on request. CONTACT:
	cykao@csie.ntu.edu.tw SUPPLEMENTARY INFORMATION: http://www.csie.ntu.edu.tw/~b89x035/yeast/},
  doi = {10.1093/bioinformatics/bti415},
  pii = {bti415},
  pmid = {15802287},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti415}
}

@article{Chen2004Level,
  author = {Q. Chen and Z. M. Zhou and Y. G. Qu and P. A. Heng and D. S. Xia},
  title = {Level set based auto segmentation of the tagged left ventricle {MR}
	images.},
  journal = {Stud {H}ealth {T}echnol {I}nform},
  year = {2004},
  volume = {98},
  pages = {63-5},
  abstract = {To facilitate automatic segmentation, we adopt {SVM} ({S}upport {V}ector
	{M}achine) to localize the left ventricle, and the segmentation is
	then carried out with narrow band level set. {T}he method of generating
	the narrow band is improved such that the time used is reduced. {B}ased
	on the imaging characteristics of the tagged left ventricle {MR}
	images, {BPV} (block-pixel variation) and intensity comparability
	are introduced to improve the speed term of level set and to increase
	the precision of segmentation. {O}ur method can perform the segmentation
	of the tagged left ventricle {MR} images accurately and automatically.}
}

@article{Chen2004Sparse,
  author = {Sheng Chen and Xia Hong and Chris J Harris},
  title = {Sparse kernel density construction using orthogonal forward regression
	with leave-one-out test score and local regularization.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1708-17},
  number = {4},
  month = {Aug},
  abstract = {This paper presents an efficient construction algorithm for obtaining
	sparse kernel density estimates based on a regression approach that
	directly optimizes model generalization capability. {C}omputational
	efficiency of the density construction is ensured using an orthogonal
	forward regression, and the algorithm incrementally minimizes the
	leave-one-out test score. {A} local regularization method is incorporated
	naturally into the density construction process to further enforce
	sparsity. {A}n additional advantage of the proposed algorithm is
	that it is fully automatic and the user is not required to specify
	any criterion to terminate the density construction procedure. {T}his
	is in contrast to an existing state-of-art kernel density estimation
	method using the support vector machine ({SVM}), where the user is
	required to specify some critical algorithm parameter. {S}everal
	examples are included to demonstrate the ability of the proposed
	algorithm to effectively construct a very sparse kernel density estimate
	with comparable accuracy to that of the full sample optimized {P}arzen
	window density estimate. {O}ur experimental results also demonstrate
	that the proposed algorithm compares favorably with the {SVM} method,
	in terms of both test accuracy and sparsity, for constructing kernel
	density estimates.},
  doi = {10.1109/TSMCB.2003.817107},
  pdf = {../local/Chen2004Sparse.pdf},
  file = {Chen2004Sparse.pdf:Chen2004Sparse.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TSMCB.2003.817107}
}

@article{Chen1998Atomic,
  author = {Chen, S. S. and Donoho, D. L. and Saunders, M.},
  title = {Atomic decomposition by basis pursuit},
  journal = {SIAM J. Sci. Comput.},
  year = {1998},
  volume = {20},
  pages = {33--61},
  number = {1},
  doi = {10.1137/S1064827596304010},
  timestamp = {2007.12.31},
  url = {http://dx.doi.org/10.1137/S1064827596304010}
}

@article{Chen1999Modeling,
  author = {Chen, T. and He, H. L. and Church, G. M.},
  title = {Modeling gene expression with differential equations},
  journal = {Pac. Symp. Biocomput.},
  year = {1999},
  volume = {4},
  pages = {29--40},
  abstract = {We propose a differential equation model for gene expression and provide
	two methods to construct the model from a set of temporal data. We
	model both transcription and translation by kinetic equations with
	feedback loops from translation products to transcription. Degradation
	of proteins and mRNAs is also incorporated. We study two methods
	to construct the model from experimental data: Minimum Weight Solutions
	to Linear Equations (MWSLE), which determines the regulation by solving
	under-determined linear equations, and Fourier Transform for Stable
	Systems (FTSS), which refines the model with cell cycle constraints.
	The results suggest that a minor set of temporal data may be sufficient
	to construct the model at the genome level. We also give a comprehensive
	discussion of other extended models: the RNA Model, the Protein Model,
	and the Time Delay Model.},
  pmid = {10380183},
  timestamp = {2008.02.04}
}

@article{Chen2008Mapping,
  author = {Wei Chen and Vera Kalscheuer and Andreas Tzschach and Corinna Menzel
	and Reinhard Ullmann and Marcel Holger Schulz and Fikret Erdogan
	and Na Li and Zofia Kijas and Ger Arkesteijn and Isidora Lopez Pajares
	and Margret Goetz-Sothmann and Uwe Heinrich and Imma Rost and Andreas
	Dufke and Ute Grasshoff and Birgitta Glaeser and Martin Vingron and
	H. Hilger Ropers},
  title = {Mapping translocation breakpoints by next-generation sequencing.},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {1143--1149},
  number = {7},
  month = {Jul},
  abstract = {Balanced chromosome rearrangements (BCRs) can cause genetic diseases
	by disrupting or inactivating specific genes, and the characterization
	of breakpoints in disease-associated BCRs has been instrumental in
	the molecular elucidation of a wide variety of genetic disorders.
	However, mapping chromosome breakpoints using traditional methods,
	such as in situ hybridization with fluorescent dye-labeled bacterial
	artificial chromosome clones (BAC-FISH), is rather laborious and
	time-consuming. In addition, the resolution of BAC-FISH is often
	insufficient to unequivocally identify the disrupted gene. To overcome
	these limitations, we have performed shotgun sequencing of flow-sorted
	derivative chromosomes using "next-generation" (Illumina/Solexa)
	multiplex sequencing-by-synthesis technology. As shown here for three
	different disease-associated BCRs, the coverage attained by this
	platform is sufficient to bridge the breakpoints by PCR amplification,
	and this procedure allows the determination of their exact nucleotide
	positions within a few weeks. Its implementation will greatly facilitate
	large-scale breakpoint mapping and gene finding in patients with
	disease-associated balanced translocations.},
  doi = {10.1101/gr.076166.108},
  pdf = {../local/Chen2008Mapping.pdf},
  file = {Chen2008Mapping.pdf:Chen2008Mapping.pdf:PDF},
  institution = {Max Planck Institute for Molecular Genetics, 14195 Berlin, Germany.
	wei@molgen.mpg.de},
  keywords = {ngs, csbcbook, csbcbook-ch2},
  owner = {jp},
  pii = {gr.076166.108},
  pmid = {18326688},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.076166.108}
}

@article{Chen2002Gene,
  author = {Chen, X. and Cheung, S. T. and So, S. and Fan, S. T. and Barry, C.
	and Higgins, J. and Lai, K.-M. and Ji, J. and Dudoit, S. and Ng,
	I. O L. and {Van De Rijn}, M. and Botstein, D. and Brown, P. O.},
  title = {Gene expression patterns in human liver cancers.},
  journal = {Mol. Biol. Cell},
  year = {2002},
  volume = {13},
  pages = {1929--1939},
  number = {6},
  month = {Jun},
  abstract = {Hepatocellular carcinoma (HCC) is a leading cause of death worldwide.
	Using cDNA microarrays to characterize patterns of gene expression
	in HCC, we found consistent differences between the expression patterns
	in HCC compared with those seen in nontumor liver tissues. The expression
	patterns in HCC were also readily distinguished from those associated
	with tumors metastatic to liver. The global gene expression patterns
	intrinsic to each tumor were sufficiently distinctive that multiple
	tumor nodules from the same patient could usually be recognized and
	distinguished from all the others in the large sample set on the
	basis of their gene expression patterns alone. The distinctive gene
	expression patterns are characteristic of the tumors and not the
	patient; the expression programs seen in clonally independent tumor
	nodules in the same patient were no more similar than those in tumors
	from different patients. Moreover, clonally related tumor masses
	that showed distinct expression profiles were also distinguished
	by genotypic differences. Some features of the gene expression patterns
	were associated with specific phenotypic and genotypic characteristics
	of the tumors, including growth rate, vascular invasion, and p53
	overexpression.},
  doi = {10.1091/mbc.02-02-0023},
  pdf = {../local/Chen2002Gene.pdf},
  file = {Chen2002Gene.pdf:Chen2002Gene.pdf:PDF},
  institution = {Department of Biochemistry, Stanford University School of Medicine,
	Stanford, California 94305, USA.},
  keywords = {csbcbook-ch3, csbcbook},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12058060},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1091/mbc.02-02-0023}
}

@article{Chen1998Recursive,
  author = {X. Chen and A. Russinko III and S. S. Young},
  title = {Recursive {P}artitioning {A}nalysis of a {L}arge {S}tructure-{A}ctivity
	{D}ata {S}et {U}sing {T}hree-{D}imensional {D}escriptors},
  journal = {J Chem Inf Comput Sci},
  year = {1998},
  volume = {38},
  pages = {1054-1062},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Chen2004Prediction,
  author = {Chen, Y.C. and Lin, Y.S. and Lin, C.J. and Hwang, J.K.},
  title = {Prediction of the bonding states of cysteines using the support vector
	machines based on multiple feature vectors and cysteine state sequences},
  journal = {Proteins},
  year = {2004},
  volume = {55},
  pages = {1036-1042},
  number = {4},
  abstract = {The support vector machine ({SVM}) method is used to predict the bonding
	states of cysteines. {B}esides using local descriptors such as the
	local sequences, we include global information, such as amino acid
	compositions and the patterns of the states of cysteines (bonded
	or nonbonded), or cysteine state sequences, of the proteins. {W}e
	found that {SVM} based on local sequences or global amino acid compositions
	yielded similar prediction accuracies for the data set comprising
	4136 cysteine-containing segments extracted from 969 nonhomologous
	proteins. {H}owever, the {SVM} method based on multiple feature vectors
	(combining local sequences and global amino acid compositions) significantly
	improves the prediction accuracy, from 80% to 86%. {I}f coupled with
	cysteine state sequences, {SVM} based on multiple feature vectors
	yields 90% in overall prediction accuracy and a 0.77 {M}atthews correlation
	coefficient, around 10% and 22% higher than the corresponding values
	obtained by {SVM} based on local sequence information.},
  doi = {10.1002/prot.20079},
  pdf = {../local/Chen2004Prediction.pdf},
  file = {Chen2004Prediction.pdf:local/Chen2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/Chen2004Prediction.pdf}
}

@article{Chen2005Understanding,
  author = {Chen, Y. and Xu, D.},
  title = {Understanding protein dispensability through machine-learning analysis
	of high-throughput data},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {575-581},
  month = {Mar},
  abstract = {Motivation: {P}rotein dispensability is fundamental to understanding
	of gene function and evolution. {R}ecent advances in generating high-throughput
	data such as genomic sequence data, protein-protein interaction data,
	gene-expression data, and growth-rate data of mutants allow us to
	investigate protein dispensability systematically at the genome scale.{R}esults:
	{I}n our studies, protein dispensability is represented as a fitness
	score that is measured by the growth rate of gene-deletion mutants.
	{T}hrough analyses of high-throughput data in yeast {S}accharomyces
	cerevisia, we found that a protein's dispensability had significant
	correlations with its evolutionary rate and duplication rate, as
	well as its connectivity in protein-protein interaction network and
	gene-expression correlation network. {N}eural network and support
	vector machine were applied to predict protein dispensability through
	high-throughput data. {O}ur studies shed some lights on global characteristics
	of protein dispensability and evolution.{A}vailability: {T}he original
	datasets for protein dispensability analysis and prediction, together
	with related scripts, are available at http://digbio.missouri.edu/~ychen/{P}ro{D}ispen/.},
  doi = {10.1093/bioinformatics/bti058},
  pdf = {../local/Chen2005Understanding.pdf},
  file = {Chen2005Understanding.pdf:local/Chen2005Understanding.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti058}
}

@article{Cheng2005Protein,
  author = {Betty Yee Man Cheng and Jaime G Carbonell and Judith Klein-Seetharaman},
  title = {Protein classification based on text document classification techniques.},
  journal = {Proteins},
  year = {2005},
  volume = {58},
  pages = {955-70},
  number = {4},
  month = {Mar},
  abstract = {The need for accurate, automated protein classification methods continues
	to increase as advances in biotechnology uncover new proteins. {G}-protein
	coupled receptors ({GPCR}s) are a particularly difficult superfamily
	of proteins to classify due to extreme diversity among its members.
	{P}revious comparisons of {BLAST}, k-nearest neighbor (k-{NN}), hidden
	markov model ({HMM}) and support vector machine ({SVM}) using alignment-based
	features have suggested that classifiers at the complexity of {SVM}
	are needed to attain high accuracy. {H}ere, analogous to document
	classification, we applied {D}ecision {T}ree and {N}aive {B}ayes
	classifiers with chi-square feature selection on counts of n-grams
	(i.e. short peptide sequences of length n) to this classification
	task. {U}sing the {GPCR} dataset and evaluation protocol from the
	previous study, the {N}aive {B}ayes classifier attained an accuracy
	of 93.0 and 92.4\% in level {I} and level {II} subfamily classification
	respectively, while {SVM} has a reported accuracy of 88.4 and 86.3\%.
	{T}his is a 39.7 and 44.5\% reduction in residual error for level
	{I} and level {II} subfamily classification, respectively. {T}he
	{D}ecision {T}ree, while inferior to {SVM}, outperforms {HMM} in
	both level {I} and level {II} subfamily classification. {F}or those
	{GPCR} families whose profiles are stored in the {P}rotein {FAM}ilies
	database of alignments and {HMM}s ({PFAM}), our method performs comparably
	to a search against those profiles. {F}inally, our method can be
	generalized to other protein families by applying it to the superfamily
	of nuclear receptors with 94.5, 97.8 and 93.6\% accuracy in family,
	level {I} and level {II} subfamily classification respectively.},
  doi = {10.1002/prot.20373},
  pdf = {../local/Cheng2005Protein.pdf},
  file = {Cheng2005Protein.pdf:local/Cheng2005Protein.pdf:PDF},
  url = {http://dx.doi.org/10.1002/prot.20373}
}

@article{Cheng2000Biclustering,
  author = {Y. Cheng and G. M. Church},
  title = {Biclustering of expression data.},
  journal = {Proc Int Conf Intell Syst Mol Biol},
  year = {2000},
  volume = {8},
  pages = {93--103},
  abstract = {An efficient node-deletion algorithm is introduced to find submatrices
	in expression data that have low mean squared residue scores and
	it is shown to perform well in finding co-regulation patterns in
	yeast and human. This introduces "biclustering", or simultaneous
	clustering of both genes and conditions, to knowledge discovery from
	expression data. This approach overcomes some problems associated
	with traditional clustering methods, by allowing automatic discovery
	of similarity based on a subset of attributes, simultaneous clustering
	of genes and conditions, and overlapped grouping that provides a
	better representation for genes with multiple functions or regulated
	by many factors.},
  institution = {Department of Genetics, Harvard Medical School, Boston, MA 02115,
	USA. yizong.cheng@uc.edu},
  keywords = {Algorithms; Animals; Gene Expression Profiling, methods; Humans; Multigene
	Family; Oligonucleotide Array Sequence Analysis, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10977070},
  timestamp = {2012.02.27}
}

@article{Chenna2003Multiple,
  author = {Chenna, R. and Sugawara, H. and Koike, T. and Lopez, R. and Gibson,
	T. J. and Higgins, D. G. and Thompson, J. D.},
  title = {Multiple sequence alignment with the {Clustal} series of programs.},
  journal = {Nucleic Acids Res.},
  year = {2003},
  volume = {31},
  pages = {3497--3500},
  number = {13},
  month = {Jul},
  abstract = {The Clustal series of programs are widely used in molecular biology
	for the multiple alignment of both nucleic acid and protein sequences
	and for preparing phylogenetic trees. The popularity of the programs
	depends on a number of factors, including not only the accuracy of
	the results, but also the robustness, portability and user-friendliness
	of the programs. New features include NEXUS and FASTA format output,
	printing range numbers and faster tree calculation. Although, Clustal
	was originally developed to run on a local computer, numerous Web
	servers have been set up, notably at the EBI (European Bioinformatics
	Institute) (http://www.ebi.ac.uk/clustalw/).},
  keywords = {Algorithms; Amino Acid Sequence; Internet; Nucleic Acids; Phylogeny;
	Sequence Alignment; Sequence Analysis; Sequence Analysis, Protein;
	Software},
  owner = {laurent},
  pmid = {12824352},
  timestamp = {2008.01.15}
}

@article{Cherezov2007High-resolution,
  author = {Vadim Cherezov and Daniel M Rosenbaum and Michael A Hanson and SÃ¸ren
	G F Rasmussen and Foon Sun Thian and Tong Sun Kobilka and Hee-Jung
	Choi and Peter Kuhn and William I Weis and Brian K Kobilka and Raymond
	C Stevens},
  title = {High-resolution crystal structure of an engineered human beta2-adrenergic
	G protein-coupled receptor.},
  journal = {Science},
  year = {2007},
  volume = {318},
  pages = {1258--1265},
  number = {5854},
  month = {Nov},
  abstract = {Heterotrimeric guanine nucleotide-binding protein (G protein)-coupled
	receptors constitute the largest family of eukaryotic signal transduction
	proteins that communicate across the membrane. We report the crystal
	structure of a human beta2-adrenergic receptor-T4 lysozyme fusion
	protein bound to the partial inverse agonist carazolol at 2.4 angstrom
	resolution. The structure provides a high-resolution view of a human
	G protein-coupled receptor bound to a diffusible ligand. Ligand-binding
	site accessibility is enabled by the second extracellular loop, which
	is held out of the binding cavity by a pair of closely spaced disulfide
	bridges and a short helical segment within the loop. Cholesterol,
	a necessary component for crystallization, mediates an intriguing
	parallel association of receptor molecules in the crystal lattice.
	Although the location of carazolol in the beta2-adrenergic receptor
	is very similar to that of retinal in rhodopsin, structural differences
	in the ligand-binding site and other regions highlight the challenges
	in using rhodopsin as a template model for this large receptor family.},
  doi = {10.1126/science.1150577},
  pdf = {../local/Cherezov2007High-resolution.pdf},
  file = {Cherezov2007High-resolution.pdf:Cherezov2007High-resolution.pdf:PDF},
  owner = {laurent},
  pii = {1150577},
  pmid = {17962520},
  timestamp = {2008.04.01},
  url = {http://dx.doi.org/10.1126/science.1150577}
}

@article{Cherkassky2004Practical,
  author = {Vladimir Cherkassky and Yunqian Ma},
  title = {Practical selection of {SVM} parameters and noise estimation for
	{SVM} regression.},
  journal = {Neural {N}etw},
  year = {2004},
  volume = {17},
  pages = {113-26},
  number = {1},
  month = {Jan},
  abstract = {We investigate practical selection of hyper-parameters for support
	vector machines ({SVM}) regression (that is, epsilon-insensitive
	zone and regularization parameter {C}). {T}he proposed methodology
	advocates analytic parameter selection directly from the training
	data, rather than re-sampling approaches commonly used in {SVM} applications.
	{I}n particular, we describe a new analytical prescription for setting
	the value of insensitive zone epsilon, as a function of training
	sample size. {G}ood generalization performance of the proposed parameter
	selection is demonstrated empirically using several low- and high-dimensional
	regression problems. {F}urther, we point out the importance of {V}apnik's
	epsilon-insensitive loss for regression problems with finite samples.
	{T}o this end, we compare generalization performance of {SVM} regression
	(using proposed selection of epsilon-values) with regression using
	'least-modulus' loss (epsilon=0) and standard squared loss. {T}hese
	comparisons indicate superior generalization performance of {SVM}
	regression under sparse sample settings, for various types of additive
	noise.},
  doi = {10.1016/S0893-6080(03)00169-2},
  pdf = {../local/Cherkassky2004Practical.pdf},
  file = {Cherkassky2004Practical.pdf:Cherkassky2004Practical.pdf:PDF},
  pii = {S0893608003001692},
  url = {http://dx.doi.org/10.1016/S0893-6080(03)00169-2}
}

@article{Chi2008year,
  author = {Chi, K. R.},
  title = {The year of sequencing},
  journal = {Nat. Methods},
  year = {2008},
  volume = {5},
  pages = {11--14},
  number = {1},
  month = {Jan},
  abstract = {In 2007, the next-generation sequencing technologies have come into
	their own with an impressive array of successful applications. Kelly
	Rae Chi reports.},
  doi = {10.1038/nmeth1154},
  pdf = {../local/Chi2008year.pdf},
  file = {Chi2008year.pdf:Chi2008year.pdf:PDF},
  keywords = {csbcbook, csbcbook-ch2},
  owner = {jp},
  pmid = {18175410},
  timestamp = {2009.10.13},
  url = {http://dx.doi.org/10.1038/nmeth1154}
}

@article{Chiang2001Visualizing,
  author = {Chiang, D. Y. and Brown, P. O. and Eisen, M. B.},
  title = {Visualizing associations between genome sequences and gene expression
	data using genome-mean expression profiles},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {49S--55S},
  pdf = {../local/chia01.pdf},
  file = {chia01.pdf:local/chia01.pdf:PDF},
  subject = {microarray},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/17/suppl_1/S49.pdf}
}

@article{Chiang2009High-resolution,
  author = {Derek Y Chiang and Gad Getz and David B Jaffe and Michael J T O'Kelly
	and Xiaojun Zhao and Scott L Carter and Carsten Russ and Chad Nusbaum
	and Matthew Meyerson and Eric S Lander},
  title = {High-resolution mapping of copy-number alterations with massively
	parallel sequencing.},
  journal = {Nat. Methods},
  year = {2009},
  volume = {6},
  pages = {99--103},
  number = {1},
  month = {Jan},
  abstract = {Cancer results from somatic alterations in key genes, including point
	mutations, copy-number alterations and structural rearrangements.
	A powerful way to discover cancer-causing genes is to identify genomic
	regions that show recurrent copy-number alterations (gains and losses)
	in tumor genomes. Recent advances in sequencing technologies suggest
	that massively parallel sequencing may provide a feasible alternative
	to DNA microarrays for detecting copy-number alterations. Here we
	present: (i) a statistical analysis of the power to detect copy-number
	alterations of a given size; (ii) SegSeq, an algorithm to segment
	equal copy numbers from massively parallel sequence data; and (iii)
	analysis of experimental data from three matched pairs of tumor and
	normal cell lines. We show that a collection of approximately 14
	million aligned sequence reads from human cell lines has comparable
	power to detect events as the current generation of DNA microarrays
	and has over twofold better precision for localizing breakpoints
	(typically, to within approximately 1 kilobase).},
  doi = {10.1038/nmeth.1276},
  pdf = {../local/Chiang2009High-resolution.pdf},
  file = {Chiang2009High-resolution.pdf:Chiang2009High-resolution.pdf:PDF},
  institution = {Broad Institute, Massachusetts Institute of Technology, 7 Cambridge
	Center, Cambridge, MA 02142, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {nmeth.1276},
  pmid = {19043412},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/nmeth.1276}
}

@article{Chin2008Translating,
  author = {Chin, L. and Gray, J. W.},
  title = {Translating insights from the cancer genome into clinical practice.},
  journal = {Nature},
  year = {2008},
  volume = {452},
  pages = {553--563},
  number = {7187},
  month = {Apr},
  abstract = {Cancer cells have diverse biological capabilities that are conferred
	by numerous genetic aberrations and epigenetic modifications. Today's
	powerful technologies are enabling these changes to the genome to
	be catalogued in detail. Tomorrow is likely to bring a complete atlas
	of the reversible and irreversible alterations that occur in individual
	cancers. The challenge now is to work out which molecular abnormalities
	contribute to cancer and which are simply 'noise' at the genomic
	and epigenomic levels. Distinguishing between these will aid in understanding
	how the aberrations in a cancer cell collaborate to drive pathophysiology.
	Past successes in converting information from genomic discoveries
	into clinical tools provide valuable lessons to guide the translation
	of emerging insights from the genome into clinical end points that
	can affect the practice of cancer medicine.},
  doi = {10.1038/nature06914},
  pdf = {../local/Chin2008Translating.pdf},
  file = {Chin2008Translating.pdf:Chin2008Translating.pdf:PDF},
  institution = {Dana-Farber Cancer Institute and Harvard Medical School, 44 Binney
	Street, Boston, Massachusetts 02115, USA. lynda_chin@dfci.harvard.edu},
  keywords = {csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature06914},
  pmid = {18385729},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1038/nature06914}
}

@article{Chin2007High-resolution,
  author = {Chin, S. F. and Teschendorff, A. E. and Marioni, J. C. and Wang,
	Y. and Barbosa-Morais, N. L. and Thorne, N. P. and Costa, J. L. and
	Pinder, S. E. and van de Wiel, M. A. and Green, A. R. and Ellis,
	I. O. and Porter, P. L. and Tavar{\'e}, S. and Brenton, J. D. and
	Ylstra, B. and Caldas, C.},
  title = {High-resolution {aCGH} and expression profiling identifies a novel
	genomic subtype of {ER} negative breast cancer.},
  journal = {Genome Biol.},
  year = {2007},
  volume = {8},
  pages = {R215},
  number = {10},
  abstract = {BACKGROUND: The characterization of copy number alteration patterns
	in breast cancer requires high-resolution genome-wide profiling of
	a large panel of tumor specimens. To date, most genome-wide array
	comparative genomic hybridization studies have used tumor panels
	of relatively large tumor size and high Nottingham Prognostic Index
	(NPI) that are not as representative of breast cancer demographics.
	RESULTS: We performed an oligo-array-based high-resolution analysis
	of copy number alterations in 171 primary breast tumors of relatively
	small size and low NPI, which was therefore more representative of
	breast cancer demographics. Hierarchical clustering over the common
	regions of alteration identified a novel subtype of high-grade estrogen
	receptor (ER)-negative breast cancer, characterized by a low genomic
	instability index. We were able to validate the existence of this
	genomic subtype in one external breast cancer cohort. Using matched
	array expression data we also identified the genomic regions showing
	the strongest coordinate expression changes ('hotspots'). We show
	that several of these hotspots are located in the phosphatome, kinome
	and chromatinome, and harbor members of the 122-breast cancer CAN-list.
	Furthermore, we identify frequently amplified hotspots on 8q22.3
	(EDD1, WDSOF1), 8q24.11-13 (THRAP6, DCC1, SQLE, SPG8) and 11q14.1
	(NDUFC2, ALG8, USP35) associated with significantly worse prognosis.
	Amplification of any of these regions identified 37 samples with
	significantly worse overall survival (hazard ratio (HR) = 2.3 (1.3-1.4)
	p = 0.003) and time to distant metastasis (HR = 2.6 (1.4-5.1) p =
	0.004) independently of NPI. CONCLUSION: We present strong evidence
	for the existence of a novel subtype of high-grade ER-negative tumors
	that is characterized by a low genomic instability index. We also
	provide a genome-wide list of common copy number alteration regions
	in breast cancer that show strong coordinate aberrant expression,
	and further identify novel frequently amplified regions that correlate
	with poor prognosis. Many of the genes associated with these regions
	represent likely novel oncogenes or tumor suppressors.},
  doi = {10.1186/gb-2007-8-10-r215},
  pdf = {../local/Chin2007High-resolution.pdf},
  file = {Chin2007High-resolution.pdf:Chin2007High-resolution.pdf:PDF},
  institution = {Breast Cancer Functional Genomics, Cancer Research UK Cambridge Research
	Institute and Department of Oncology University of Cambridge, Li
	Ka-Shing Centre, Robinson Way, Cambridge CB2 0RE, UK. sc10021@cam.ac.uk},
  keywords = {breastcancer, cgh},
  owner = {jp},
  pii = {gb-2007-8-10-r215},
  pmid = {17925008},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1186/gb-2007-8-10-r215}
}

@article{Chin2006Using,
  author = {Chin, S.-F. and Wang, Y. and Thorne, N. P. and Teschendorff, A. E.
	and Pinder, S. E. and Vias, M. and Naderi, A. and Roberts, I. and
	Barbosa-Morais, N. L. and Garcia, M. J. and Iyer, N. G. and Kranjac,
	T. and Robertson, J. F. R. and Aparicio, S. and Tavare, S. and Ellis,
	I. and Brenton, J. D. and Caldas, C.},
  title = {Using array-comparative genomic hybridization to define molecular
	portraits of primary breast cancers},
  journal = {Oncogene},
  year = {2006},
  volume = {26},
  pages = {1959--1970},
  number = {13},
  month = sep,
  doi = {10.1038/sj.onc.1209985},
  pdf = {../local/Chin2006Using.pdf},
  file = {Chin2006Using.pdf:Chin2006Using.pdf:PDF},
  issn = {0950-9232},
  keywords = {breastcancer},
  owner = {franck},
  timestamp = {2007.11.23},
  url = {http://dx.doi.org/10.1038/sj.onc.1209985}
}

@inproceedings{Chipman2003Clustering,
  author = {Chipman, H. and Hastie, T. and Tibshirani, R.},
  title = {Clustering Microarray Data},
  booktitle = {Statistical Analysis of Gene Expression Microarray Data},
  year = {2003},
  editor = {Speed, T.},
  pages = {159--200},
  publisher = {Chapman and Hall, CRC press.},
  owner = {jp},
  timestamp = {2011.12.29}
}

@inbook{Chipman2003Statistical,
  chapter = {Clustering Microarray Data},
  pages = {159--200},
  title = {Statistical Analysis of Gene Expression Microarray Data},
  publisher = {Chapman and Hall, CRC press.},
  year = {2003},
  editor = {Speed, T.},
  author = {Chipman, H. and Hastie, T. and Tibshirani, R.},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Choi2007Coupled,
  author = {Choi, H.-S. and Han, S. and Yokota, H. and Cho, K.-H.},
  title = {Coupled positive feedbacks provoke slow induction plus fast switching
	in apoptosis},
  journal = {FEBS Letters},
  year = {2007},
  volume = {581},
  pages = {2684 - 2690},
  number = {14},
  abstract = {Apoptosis is a form of a programmed cell death for multicellular organisms
	to remove unwanted or damaged cells. This critical choice of cellular
	fate is an all-or-none process, but its dynamics remains unraveled.
	The switch-like apoptotic decision has to be reliable, and once a
	pro-apoptotic fate is determined it requires fast and irreversible
	execution. One of the key regulators in apoptosis is caspase-3. Interestingly,
	activated caspase-3 quickly executes apoptosis, but it takes considerable
	time to activate it. Here, we have analyzed this slow induction plus
	fast switching mechanism of caspase-3 through mathematical modeling
	and computational simulation. First, we have shown that two positive
	feedbacks, composed of caspase-8 and XIAP, are essential for the
	slow induction plus fast switching behavior of caspase-3. Second,
	we have found that XIAP in the feedback loops primarily regulates
	induction time of caspase-3. In many cancer cells activation of caspase-3
	is suppressed. Our results suggest that reinforcement of the positive
	feedback by XIAP, which relieves XIAP-mediated caspase-3 inhibition,
	might favor a pro-apoptotic cellular fate.},
  doi = {DOI: 10.1016/j.febslet.2007.05.016},
  pdf = {../local/Choi2007Coupled.pdf},
  file = {Choi2007Coupled.pdf:Choi2007Coupled.pdf:PDF},
  issn = {0014-5793},
  keywords = {csbcbook},
  url = {http://www.sciencedirect.com/science/article/B6T36-4NSR1M2-F/2/1d13173e1580459d4c4430a96116dc3d}
}

@article{Chou2001Prediction,
  author = {Chou, K.-C.},
  title = {Prediction of protein signal sequences and their cleavage sites},
  journal = {Protein. {S}truct. {F}unct. {G}enet.},
  year = {2001},
  volume = {42},
  pages = {136--139},
  pdf = {../local/chou01.pdf},
  file = {chou01.pdf:local/chou01.pdf:PDF},
  subject = {bioprot},
  url = {http://www3.interscience.wiley.com/cgi-bin/abstract/75504759/START}
}

@article{Chou2001Using,
  author = {Chou, K.-C.},
  title = {Using subsite coupling to predict signal peptides},
  journal = {Protein {E}ng.},
  year = {2001},
  volume = {14},
  pages = {75--79},
  number = {2},
  pdf = {../local/chou01b.pdf},
  file = {chou01b.pdf:local/chou01b.pdf:PDF},
  subject = {bioprot},
  url = {http://protein.oupjournals.org/cgi/content/abstract/14/2/75}
}

@article{Chou2002Using,
  author = {Chou, K.-C. and Cai, Y.-D.},
  title = {Using {F}unctional {D}omain {C}omposition and {S}upport {V}ector
	{M}achines for {P}rediction of {P}rotein {S}ubcellular {L}ocation},
  journal = {J. {B}iol. {C}hem.},
  year = {2002},
  volume = {277},
  pages = {45765-45769},
  number = {48},
  abstract = {Proteins are generally classified into the following 12 subcellular
	locations: 1) chloroplast, 2) cytoplasm, 3) cytoskeleton, 4) endoplasmic
	reticulum, 5) extracellular, 6) {G}olgi apparatus, 7) lysosome, 8)
	mitochondria, 9) nucleus, 10) peroxisome, 11) plasma membrane, and
	12) vacuole. {B}ecause the function of a protein is closely correlated
	with its subcellular location, with the rapid increase in new protein
	sequences entering into databanks, it is vitally important for both
	basic research and pharmaceutical industry to establish a high throughput
	tool for predicting protein subcellular location. {I}n this paper,
	a new concept, the so-called "functional domain composition" is introduced.
	{B}ased on the novel concept, the representation for a protein can
	be defined as a vector in a high-dimensional space, where each of
	the clustered functional domains derived from the protein universe
	serves as a vector base. {W}ith such a novel representation for a
	protein, the support vector machine ({SVM}) algorithm is introduced
	for predicting protein subcellular location. {H}igh success rates
	are obtained by the self-consistency test, jackknife test, and independent
	dataset test, respectively. {T}he current approach not only can play
	an important complementary role to the powerful covariant discriminant
	algorithm based on the pseudo amino acid composition representation
	({C}hou, {K}. {C}. (2001) {P}roteins {S}truct. {F}unct. {G}enet.
	43, 246-255; {C}orrection (2001) {P}roteins {S}truct. {F}unct. {G}enet.
	44, 60), but also may greatly stimulate the development of this area.},
  pdf = {../local/Chou2002Using.pdf},
  file = {Chou2002Using.pdf:local/Chou2002Using.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.jbc.org/cgi/content/abstract/277/48/45765}
}

@article{Choudhary2010Decoding,
  author = {Chunaram Choudhary and Matthias Mann},
  title = {Decoding signalling networks by mass spectrometry-based proteomics.},
  journal = {Nat Rev Mol Cell Biol},
  year = {2010},
  volume = {11},
  pages = {427--439},
  number = {6},
  month = {Jun},
  abstract = {Signalling networks regulate essentially all of the biology of cells
	and organisms in normal and disease states. Signalling is often studied
	using antibody-based techniques such as western blots. Large-scale
	'precision proteomics' based on mass spectrometry now enables the
	system-wide characterization of signalling events at the levels of
	post-translational modifications, protein-protein interactions and
	changes in protein expression. This technology delivers accurate
	and unbiased information about the quantitative changes of thousands
	of proteins and their modifications in response to any perturbation.
	Current studies focus on phosphorylation, but acetylation, methylation,
	glycosylation and ubiquitylation are also becoming amenable to investigation.
	Large-scale proteomics-based signalling research will fundamentally
	change our understanding of signalling networks.},
  doi = {10.1038/nrm2900},
  institution = {The Novo Nordisk Foundation Center for Protein Research, Faculty
	of Health Sciences, University of Copenhagen, 2200 Copenhagen, Denmark.
	chuna.choudhary@cpr.ku.dk},
  keywords = {Animals; Humans; Mass Spectrometry; Protein Processing, Post-Translational;
	Proteome; Proteomics; Signal Transduction},
  owner = {phupe},
  pii = {nrm2900},
  pmid = {20461098},
  timestamp = {2010.08.13},
  url = {http://dx.doi.org/10.1038/nrm2900}
}

@article{Chow2001Identifying,
  author = {M. L. Chow and E. J. Moler and I. S. Mian},
  title = {Identifying marker genes in transcription profiling data using a
	mixture of feature relevance experts.},
  journal = {Physiol. {G}enomics},
  year = {2001},
  volume = {5},
  pages = {99-111},
  number = {2},
  month = {Mar},
  abstract = {Transcription profiling experiments permit the expression levels of
	many genes to be measured simultaneously. {G}iven profiling data
	from two types of samples, genes that most distinguish the samples
	(marker genes) are good candidates for subsequent in-depth experimental
	studies and developing decision support systems for diagnosis, prognosis,
	and monitoring. {T}his work proposes a mixture of feature relevance
	experts as a method for identifying marker genes and illustrates
	the idea using published data from samples labeled as acute lymphoblastic
	and myeloid leukemia ({ALL}, {AML}). {A} feature relevance expert
	implements an algorithm that calculates how well a gene distinguishes
	samples, reorders genes according to this relevance measure, and
	uses a supervised learning method [here, support vector machines
	({SVM}s)] to determine the generalization performances of different
	nested gene subsets. {T}he mixture of three feature relevance experts
	examined implement two existing and one novel feature relevance measures.
	{F}or each expert, a gene subset consisting of the top 50 genes distinguished
	{ALL} from {AML} samples as completely as all 7,070 genes. {T}he
	125 genes at the union of the top 50s are plausible markers for a
	prototype decision support system. {C}hromosomal aberration and other
	data support the prediction that the three genes at the intersection
	of the top 50s, cystatin {C}, azurocidin, and adipsin, are good targets
	for investigating the basic biology of {ALL}/{AML}. {T}he same data
	were employed to identify markers that distinguish samples based
	on their labels of {T} cell/{B} cell, peripheral blood/bone marrow,
	and male/female. {S}elenoprotein {W} may discriminate {T} cells from
	{B} cells. {R}esults from analysis of transcription profiling data
	from tumor/nontumor colon adenocarcinoma samples support the general
	utility of the aforementioned approach. {T}heoretical issues such
	as choosing {SVM} kernels and their parameters, training and evaluating
	feature relevance experts, and the impact of potentially mislabeled
	samples on marker identification (feature selection) are discussed.},
  pdf = {../local/Chow2001Identifying.pdf},
  file = {Chow2001Identifying.pdf:local/Chow2001Identifying.pdf:PDF},
  keywords = {biosvm},
  pii = {5/2/99},
  url = {http://physiolgenomics.physiology.org/cgi/content/abstract/5/2/99}
}

@article{Chowdhury2010Identification,
  author = {Chowdhury, S. A. and Koyut\"urk, M.},
  title = {Identification of coordinately dysregulated subnetworks in complex
	phenotypes.},
  journal = {Pac. Symp. Biocomput.},
  year = {2010},
  pages = {133--144},
  abstract = {In the study of complex phenotypes, single gene markers can only provide
	limited insights into the manifestation of phenotype. To this end,
	protein-protein interaction (PPI) networks prove useful in the identification
	of multiple interacting markers. Recent studies show that, when considered
	together, many proteins that are connected via physical and functional
	interactions exhibit significant differential expression with respect
	to various complex phenotypes, including cancers. As compared to
	single gene markers, these "coordinately dysregulated subnetworks"
	improve diagnosis and prognosis of cancer significantly and offer
	novel insights into the network dynamics of phenotype. However, the
	problem of identifying coordinately dysregulated subnetworks presents
	significant algorithmic challenges. Existing approaches utilize heuristics
	that aim to greedily maximize information-theoretic class separability
	measures, however, by definition of "coordinate" dysregulation, such
	greedy algorithms do not suit well to this problem. In this paper,
	we formulate coordinate dysregulation in the context of the well-known
	set-cover problem, with a view to capturing the coordination between
	multiple genes at a sample-specific resolution. Based on this formulation,
	we adapt state-of-the-art approximation algorithms for set-cover
	to the identification of coordinately dysregulated subnetworks. Comprehensive
	experimental results on human colorectal cancer (CRC) show that,
	when compared to existing algorithms, the proposed algorithm, NETCOVER,
	improves diagnosis of cancer and prediction of metastasis significantly.
	Our results also demonstrate that subnetworks in the neighborhood
	of known CRC driver genes exhibit significant coordinate dysregulation,
	indicating that the notion of coordinate dysregulation may indeed
	be useful in understanding the network dynamics of complex phenotypes.},
  pdf = {../local/Chowdhury2010Identification.pdf},
  file = {Chowdhury2010Identification.pdf:Chowdhury2010Identification.pdf:PDF},
  institution = {Department of Electrical Engineering and Computer Science, Case Western
	Reserve University, Cleveland, OH, USA. sxc426@eecs.case.edu.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {9789814295291_0016},
  pmid = {19908366},
  timestamp = {2011.10.08}
}

@article{Chowdhury2011Subnetwork,
  author = {Chowdhury, S. A. and Nibbe, R. K. and Chance, M. R. and Koyut\"urk,
	M.},
  title = {Subnetwork state functions define dysregulated subnetworks in cancer.},
  journal = {J. Comput. Biol.},
  year = {2011},
  volume = {18},
  pages = {263--281},
  number = {3},
  month = {Mar},
  abstract = {Emerging research demonstrates the potential of protein-protein interaction
	(PPI) networks in uncovering the mechanistic bases of cancers, through
	identification of interacting proteins that are coordinately dysregulated
	in tumorigenic and metastatic samples. When used as features for
	classification, such coordinately dysregulated subnetworks improve
	diagnosis and prognosis of cancer considerably over single-gene markers.
	However, existing methods formulate coordination between multiple
	genes through additive representation of their expression profiles
	and utilize fast heuristics to identify dysregulated subnetworks,
	which may not be well suited to the potentially combinatorial nature
	of coordinate dysregulation. Here, we propose a combinatorial formulation
	of coordinate dysregulation and decompose the resulting objective
	function to cast the problem as one of identifying subnetwork state
	functions that are indicative of phenotype. Based on this formulation,
	we show that coordinate dysregulation of larger subnetworks can be
	bounded using simple statistics on smaller subnetworks. We then use
	these bounds to devise an efficient algorithm, Crane, that can search
	the subnetwork space more effectively than existing algorithms. Comprehensive
	cross-classification experiments show that subnetworks identified
	by Crane outperform those identified by additive algorithms in predicting
	metastasis of colorectal cancer (CRC).},
  doi = {10.1089/cmb.2010.0269},
  pdf = {../local/Chowdhury2011Subnetwork.pdf},
  file = {Chowdhury2011Subnetwork.pdf:Chowdhury2011Subnetwork.pdf:PDF},
  institution = {Department of Electrical Engineering and Computer Science, Case Western
	Reserve University, Cleveland, Ohio, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {21385033},
  timestamp = {2011.10.08},
  url = {http://dx.doi.org/10.1089/cmb.2010.0269}
}

@article{Chu1998Transcriptional,
  author = {S. Chu and J. DeRisi and M. Eisen and J. Mulholland and D. Botstein
	and P.O. Brown and I. Herskowitz},
  title = {The {T}ranscriptional {P}rogram of {S}porulation in {B}udding {Y}east},
  journal = {Science},
  year = {1998},
  volume = {282},
  pages = {699--705},
  pdf = {../local/chu98.pdf},
  file = {chu98.pdf:local/chu98.pdf:PDF},
  owner = {phupe},
  subject = {microarray},
  timestamp = {2009.10.15},
  url = {http://www.sciencemag.org/cgi/reprint/282/5389/699.pdf}
}

@article{Chu2005improved,
  author = {Wei Chu and Chong Jin Ong and S. Sathiya Keerthi},
  title = {An improved conjugate gradient scheme to the solution of least squares
	{SVM}.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {498-501},
  number = {2},
  month = {Mar},
  abstract = {The least square support vector machines ({LS}-{SVM}) formulation
	corresponds to the solution of a linear system of equations. {S}everal
	approaches to its numerical solutions have been proposed in the literature.
	{I}n this letter, we propose an improved method to the numerical
	solution of {LS}-{SVM} and show that the problem can be solved using
	one reduced system of linear equations. {C}ompared with the existing
	algorithm for {LS}-{SVM}, the approach used in this letter is about
	twice as efficient. {N}umerical results using the proposed method
	are provided for comparisons with other existing algorithms.},
  doi = {10.1109/TNN.2004.841785},
  pdf = {../local/Chu2005improved.pdf},
  file = {Chu2005improved.pdf:local/Chu2005improved.pdf:PDF},
  keywords = {80 and over, Aged, Algorithms, Area Under Curve, Cross-Sectional Studies,
	Diagnostic Imaging, Diagnostic Techniques, Glaucoma, Humans, Lasers,
	Least-Squares Analysis, Middle Aged, Nerve Fibers, Non-U.S. Gov't,
	Ophthalmological, Optic Nerve Diseases, P.H.S., ROC Curve, Research
	Support, Retinal Ganglion Cells, Sensitivity and Specificity, U.S.
	Gov't, 15787157},
  url = {http://dx.doi.org/10.1109/TNN.2004.841785}
}

@article{Chuang2007Network-based,
  author = {Chuang, H.-Y. and Lee, E. and Liu, Y.-T. and Lee, D. and Ideker,
	T.},
  title = {Network-based classification of breast cancer metastasis.},
  journal = {Mol. Syst. Biol.},
  year = {2007},
  volume = {3},
  pages = {140},
  abstract = {Mapping the pathways that give rise to metastasis is one of the key
	challenges of breast cancer research. Recently, several large-scale
	studies have shed light on this problem through analysis of gene
	expression profiles to identify markers correlated with metastasis.
	Here, we apply a protein-network-based approach that identifies markers
	not as individual genes but as subnetworks extracted from protein
	interaction databases. The resulting subnetworks provide novel hypotheses
	for pathways involved in tumor progression. Although genes with known
	breast cancer mutations are typically not detected through analysis
	of differential expression, they play a central role in the protein
	network by interconnecting many differentially expressed genes. We
	find that the subnetwork markers are more reproducible than individual
	marker genes selected without network information, and that they
	achieve higher accuracy in the classification of metastatic versus
	non-metastatic tumors.},
  doi = {10.1038/msb4100180},
  pdf = {../local/Chuang2007Network-based.pdf},
  file = {Chuang2007Network-based.pdf:Chuang2007Network-based.pdf:PDF},
  institution = {Bioinformatics Program, University of California San Diego, La Jolla,
	CA 92093, USA.},
  keywords = {breastcancer},
  owner = {jp},
  pii = {msb4100180},
  pmid = {17940530},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1038/msb4100180}
}

@article{Chung2003Spectra,
  author = {Fan Chung and Linyuan Lu and Van Vu},
  title = {{S}pectra of random graphs with given expected degrees.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {6313--6318},
  number = {11},
  month = {May},
  abstract = {In the study of the spectra of power-law graphs, there are basically
	two competing approaches. One is to prove analogues of Wigner's semicircle
	law, whereas the other predicts that the eigenvalues follow a power-law
	distribution. Although the semicircle law and the power law have
	nothing in common, we will show that both approaches are essentially
	correct if one considers the appropriate matrices. We will prove
	that (under certain mild conditions) the eigenvalues of the (normalized)
	Laplacian of a random power-law graph follow the semicircle law,
	whereas the spectrum of the adjacency matrix of a power-law graph
	obeys the power law. Our results are based on the analysis of random
	graphs with given expected degrees and their relations to several
	key invariants. Of interest are a number of (new) values for the
	exponent beta, where phase transitions for eigenvalue distributions
	occur. The spectrum distributions have direct implications to numerous
	graph algorithms such as, for example, randomized algorithms that
	involve rapidly mixing Markov chains.},
  doi = {10.1073/pnas.0937490100},
  keywords = {12743375},
  pii = {0937490100},
  pmid = {12743375},
  timestamp = {2006.02.28},
  url = {http://dx.doi.org/10.1073/pnas.0937490100}
}

@book{Chung1997Spectral,
  title = {Spectral graph theory},
  publisher = {American Mathematical Society},
  year = {1997},
  author = {Chung, F. R. K.},
  volume = {92},
  series = {CBMS Regional Conference Series},
  address = {Providence},
  subject = {net}
}

@article{Chung2003Radius,
  author = {Kai-Min Chung and Wei-Chun Kao and Chia-Liang Sun and Li-Lun Wang
	and Chih-Jen Lin},
  title = {Radius margin bounds for support vector machines with the {RBF} kernel.},
  journal = {Neural {C}omput},
  year = {2003},
  volume = {15},
  pages = {2643-81},
  number = {11},
  month = {Nov},
  abstract = {An important approach for efficient support vector machine ({SVM})
	model selection is to use differentiable bounds of the leave-one-out
	(loo) error. {P}ast efforts focused on finding tight bounds of loo
	(e.g., radius margin bounds, span bounds). {H}owever, their practical
	viability is still not very satisfactory. {D}uan, {K}eerthi, and
	{P}oo (2003) showed that radius margin bound gives good prediction
	for {L}2-{SVM}, one of the cases we look at. {I}n this letter, through
	analyses about why this bound performs well for {L}2-{SVM}, we show
	that finding a bound whose minima are in a region with small loo
	values may be more important than its tightness. {B}ased on this
	principle, we propose modified radius margin bounds for {L}1-{SVM}
	(the other case) where the original bound is applicable only to the
	hard-margin case. {O}ur modification for {L}1-{SVM} achieves comparable
	performance to {L}2-{SVM}. {T}o study whether {L}1- or {L}2-{SVM}
	should be used, we analyze other properties, such as their differentiability,
	number of support vectors, and number of free support vectors. {I}n
	this aspect, {L}1-{SVM} possesses the advantage of having fewer support
	vectors. {T}heir implementations are also different, so we discuss
	related issues in detail.},
  doi = {10.1162/089976603322385108},
  url = {http://dx.doi.org/10.1162/089976603322385108}
}

@article{Churchill2002Fundamentals,
  author = {Churchill, G. A.},
  title = {Fundamentals of experimental design for cDNA microarrays},
  journal = {Nat. Genet.},
  year = {2002},
  volume = {32 Suppl},
  pages = {490--495},
  month = {Dec},
  abstract = {Microarray technology is now widely available and is being applied
	to address increasingly complex scientific questions. Consequently,
	there is a greater demand for statistical assessment of the conclusions
	drawn from microarray experiments. This review discusses fundamental
	issues of how to design an experiment to ensure that the resulting
	data are amenable to statistical analysis. The discussion focuses
	on two-color spotted cDNA microarrays, but many of the same issues
	apply to single-color gene-expression assays as well.},
  doi = {10.1038/ng1031},
  institution = {The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA.
	garyc@jax.org},
  keywords = {Animals; DNA, Complementary, analysis; Gene Expression; Gene Expression
	Profiling, methods; Mice; Models, Biological; Oligonucleotide Array
	Sequence Analysis, methods; Reference Standards; Reproducibility
	of Results; Research Design; Statistics as Topic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {ng1031},
  pmid = {12454643},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/ng1031}
}

@article{Cianchetta2005Predictive,
  author = {Cianchetta, G. and Li, Y. and Kang, J. and Rampe, D. and Fravolini,
	A. and Cruciani, G. and Vaz, R. J.},
  title = {{P}redictive models for h{ERG} potassium channel blockers.},
  journal = {Bioorg. Med. Chem. Lett.},
  year = {2005},
  volume = {15},
  pages = {3637--3642},
  number = {15},
  month = {Aug},
  abstract = {We report here a general method for the prediction of hERG potassium
	channel blockers using computational models generated from correlation
	analyses of a large dataset and pharmacophore-based GRIND descriptors.
	These 3D-QSAR models are compared favorably with other traditional
	and chemometric based HQSAR methods.},
  doi = {03.062},
  keywords = {chemoinformatics herg},
  pii = {S0960-894X(05)00368-9},
  pmid = {15978804},
  timestamp = {2006.10.06},
  url = {http://dx.doi.org/03.062}
}

@article{Cianfrocca2004Prognostic,
  author = {Cianfrocca, M. and Goldstein, L. J.},
  title = {Prognostic and predictive factors in early-stage breast cancer},
  journal = {Oncologist},
  year = {2004},
  volume = {9},
  pages = {606--616},
  number = {6},
  abstract = {Breast cancer is the most common malignancy among American women.
	Due to increased screening, the majority of patients present with
	early-stage breast cancer. The Oxford Overview Analysis demonstrates
	that adjuvant hormonal therapy and polychemotherapy reduce the risk
	of recurrence and death from breast cancer. Adjuvant systemic therapy,
	however, has associated risks and it would be useful to be able to
	optimally select patients most likely to benefit. The purpose of
	adjuvant systemic therapy is to eradicate distant micrometastatic
	deposits. It is essential therefore to be able to estimate an individual
	patient's risk of harboring clinically silent micrometastatic disease
	using established prognostic factors. It is also beneficial to be
	able to select the optimal adjuvant therapy for an individual patient
	based on established predictive factors. It is standard practice
	to administer systemic therapy to all patients with lymph node-positive
	disease. However, there are clearly differences among node-positive
	women that may warrant a more aggressive therapeutic approach. Furthermore,
	there are many node-negative women who would also benefit from adjuvant
	systemic therapy. Prognostic factors therefore must be differentiated
	from predictive factors. A prognostic factor is any measurement available
	at the time of surgery that correlates with disease-free or overall
	survival in the absence of systemic adjuvant therapy and, as a result,
	is able to correlate with the natural history of the disease. In
	contrast, a predictive factor is any measurement associated with
	response to a given therapy. Some factors, such as hormone receptors
	and HER2/neu overexpression, are both prognostic and predictive.},
  doi = {10.1634/theoncologist.9-6-606},
  pdf = {../local/Cianfrocca2004Prognostic.pdf},
  file = {Cianfrocca2004Prognostic.pdf:Cianfrocca2004Prognostic.pdf:PDF},
  institution = {D.O., Fox Chase Cancer Center, 333 Cottman Ave, Philadelphia, Pennsylvania
	19111, USA. M_Cianfrocca@fccc.edu},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {9/6/606},
  pmid = {15561805},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1634/theoncologist.9-6-606}
}

@article{Cianfrocca2009New,
  author = {Cianfrocca, M. and Gradishar, W.},
  title = {New molecular classifications of breast cancer},
  journal = {CA Cancer J. Clin.},
  year = {2009},
  volume = {59},
  pages = {303--313},
  number = {5},
  abstract = {Traditionally, pathologic determinations of tumor size, lymph node
	status, endocrine receptor status, and human epidermal growth factor
	receptor 2 (HER2) status have driven prognostic predictions and adjuvant
	therapy recommendations for patients with early stage breast cancer.
	However, these prognostic and predictive factors are relatively crude
	measures, resulting in many patients being overtreated or undertreated.
	As a result of gene expression assays, there is growing recognition
	that breast cancer is a molecularly heterogeneous disease. Evidence
	from gene expression microarrays suggests the presence of multiple
	molecular subtypes of breast cancer. The recent commercial availability
	of gene expression profiling techniques that predict risk of disease
	recurrence as well as potential chemotherapy benefit have shown promise
	in refining clinical decision making. These techniques will be reviewed
	in this article.},
  doi = {10.3322/caac.20029},
  pdf = {../local/Cianfrocca2009New.pdf},
  file = {Cianfrocca2009New.pdf:Cianfrocca2009New.pdf:PDF},
  institution = {Division of Hematology/Oncology, Northwestern University, Feinberg
	School of Medicine, Robert H. Lurie Comprehensive Cancer Center,
	Chicago, IL 60611, USA. m-cianfrocca@northwestern.edu},
  keywords = {csbcbook, breastcancer},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {59/5/303},
  pmid = {19729680},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.3322/caac.20029}
}

@article{Ciliberto2005CellCycle,
  author = {Ciliberto, A. and Novak, B. and Tyson, J. J.},
  title = {Steady states and oscillations in the p53/Mdm2 network},
  journal = {Cell Cycle},
  year = {2005},
  volume = {4},
  pages = {488-93},
  number = {3},
  abstract = {p53 is activated in response to events compromising the genetic integrity
	of a cell. Recent data show that p53 activity does not increase steadily
	with genetic damage but rather fluctuates in an oscillatory fashion.
	Theoretical studies suggest that oscillations can arise from a combination
	of positive and negative feedbacks or from a long negative feedback
	loop alone. Both negative and positive feedbacks are present in the
	p53/Mdm2 network, but it is not known what roles they play in the
	oscillatory response to DNA damage. We developed a mathematical model
	of p53 oscillations based on positive and negative feedbacks in the
	p53/Mdm2 network. According to the model, the system reacts to DNA
	damage by moving from a stable steady state into a region of stable
	limit cycles. Oscillations in the model are born with large amplitude,
	which guarantees an all-or-none response to damage. As p53 oscillates,
	damage is repaired and the system moves back to a stable steady state
	with low p53 activity. The model reproduces experimental data in
	quantitative detail. We suggest new experiments for dissecting the
	contributions of negative and positive feedbacks to the generation
	of oscillations.},
  keywords = {csbcbook}
}

@article{Citri2006MolCelBiol,
  author = {Ami Citri and Yosef Yarden},
  title = {EGF-ERBB signalling: towards the systems level.},
  journal = {Nat Rev Mol Cell Biol},
  year = {2006},
  volume = {7},
  pages = {505--516},
  number = {7},
  month = {Jul},
  abstract = {Signalling through the ERBB/HER receptors is intricately involved
	in human cancer and already serves as a target for several cancer
	drugs. Because of its inherent complexity, it is useful to envision
	ERBB signalling as a bow-tie-configured, evolvable network, which
	shares modularity, redundancy and control circuits with robust biological
	and engineered systems. Because network fragility is an inevitable
	trade-off of robustness, systems-level understanding is expected
	to generate therapeutic opportunities to intercept aberrant network
	activation.},
  doi = {10.1038/nrm1962},
  institution = {Department of Biological Regulation, the Weizmann Institute of Science,
	1 Hertzl Street, Rehovot 76100, Israel.},
  keywords = {Animals; Endocytosis, physiology; Epidermal Growth Factor, metabolism;
	Feedback, Physiological; Humans; Ligands; Models, Molecular; Oncogene
	Proteins v-erbB, genetics/metabolism; Phosphatidylinositol 3-Kinases,
	metabolism; Protein Conformation; Receptor, Epidermal Growth Factor,
	chemistry/genetics/metabolism; Signal Transduction, physiology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {nrm1962},
  pmid = {16829981},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/nrm1962}
}

@article{Clark1994Do,
  author = {Clark, G. M.},
  title = {Do we really need prognostic factors for breast cancer?},
  journal = {Breast Cancer Res Treat},
  year = {1994},
  volume = {30},
  pages = {117--126},
  doi = {10.1007/BF00666054},
  owner = {jp},
  timestamp = {2012.03.01},
  url = {http://dx.doi.org/10.1007/BF00666054}
}

@article{Clarke1990Information-theoretic,
  author = {Clarke, B.S. and Barron, A.R.},
  title = {Information-theoretic asymptotics of {B}ayes methods},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1990},
  volume = {36},
  pages = {453-471},
  number = {3},
  month = {May},
  abstract = {In the absence of knowledge of the true density function, {B}ayesian
	models take the joint density function for a sequence of n random
	variables to be an average of densities with respect to a prior.
	{T}he authors examine the relative entropy distance {D}n between
	the true density and the {B}ayesian density and show that the asymptotic
	distance is (d/2)(log n)+c, where d is the dimension of the parameter
	vector. {T}herefore, the relative entropy rate {D}n/n converges to
	zero at rate (log n)/n. {T}he constant c, which the authors explicitly
	identify, depends only on the prior density function and the {F}isher
	information matrix evaluated at the true parameter value. {C}onsequences
	are given for density estimation, universal data compression, composite
	hypothesis testing, and stock-market portfolio selection },
  pdf = {../local/Clarke1990Information-theoretic.pdf},
  file = {Clarke1990Information-theoretic.pdf:local/Clarke1990Information-theoretic.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Clarke1994Jeffreys,
  author = {Clarke, B. S. and Barron, A. R.},
  title = {Jeffreys' prior is asymptotically least favorable under entropy risk},
  journal = {J. {S}tat. {P}lann. {I}nfer.},
  year = {1994},
  volume = {31},
  pages = {37-60},
  number = {1},
  abstract = {We provide a rigorous proof that {J}effreys' prior asymptotically
	maximizes {S}hannon's mutual information between a sample of size
	n and the parameter. {T}his was conjectured by {B}ernardo (1979)
	and, despite the absence of a proof, forms the basis of the reference
	prior method in {B}ayesian statistical analysis. {O}ur proof rests
	on an examination of large sample decision theoretic properties associated
	with the relative entropy or the {K}ullback?{L}eibler distance between
	probability density functions for independent and identically distributed
	random variables. {F}or smooth finite-dimensional parametric families
	we derive an asymptotic expression for the minimax risk and for the
	related maximin risk. {A}s a result, we show that, among continuous
	positive priors, {J}effreys' prior uniquely achieves the asymptotic
	maximin value. {I}n the discrete parameter case we show that, asymptotically,
	the {B}ayes risk reduces to the entropy of the prior so that the
	reference prior is seen to be the maximum entropy prior. {W}e identify
	the physical significance of the risks by giving two information-theoretic
	interpretations in terms of probabilistic coding.},
  doi = {10.1016/0378-3758(94)90153-8},
  pdf = {../local/Clarke1994Jeffreys.pdf},
  file = {Clarke1994Jeffreys.pdf:local/Clarke1994Jeffreys.pdf:PDF},
  keywords = {information-theory},
  owner = {vert},
  url = {http://dx.doi.org/10.1016/0378-3758(94)90153-8}
}

@inbook{Cleveland1992Statistical,
  chapter = {Local regression models},
  title = {Statistical Models in {S}},
  publisher = {Wasworth \& Brooks/Cole},
  year = {1992},
  editor = {Chambers, J. M. and Hastie, T. J.},
  author = {Cleveland, W. S. and Grosse, E. and Shyu, W. M.},
  timestamp = {2007.09.19}
}

@article{Clifford1878Binary,
  author = {W. K. Clifford},
  title = {Binary forms of alternate variables},
  journal = {Proc. London Math. Soc.},
  year = {1878},
  volume = {10},
  pages = {277--286},
  number = {9}
}

@article{Clifford1878Note,
  author = {W. K. Clifford},
  title = {Note on quantics of alternate numbers, used as a means for determining
	the invariants and covariants of quantics in general},
  journal = {Proc. London Math. Soc.},
  year = {1878},
  volume = {10},
  pages = {258--265},
  number = {9}
}

@article{Coe2006Differential,
  author = {Coe, B. P. and Lockwood, W. W. and Girard, L. and Chari, R. and MacAulay,
	C. and Lam, S. and Gazdar, A. F. and Minna, J. D. and Lam, W. L.},
  title = {Differential disruption of cell cycle pathways in small cell and
	non-small cell lung cancer},
  journal = {Br. J. Cancer},
  year = {2006},
  volume = {94},
  pages = {1927--1935},
  owner = {kb},
  timestamp = {2011.05.17}
}

@article{Coe2007Resolving,
  author = {Coe, B. P. and Ylstra, B. and Carvalho, B. and Meijer, G. A. and
	Macaulay, C. and Lam, W. L.},
  title = {Resolving the resolution of array {CGH}},
  journal = {Genomics},
  year = {2007},
  volume = {89},
  pages = {647--653},
  number = {5},
  month = {May},
  abstract = {Many recent technologies have been designed to supplant conventional
	metaphase CGH technology with the goal of refining the description
	of segmental copy number status throughout the genome. However, the
	emergence of new technologies has led to confusion as to how to describe
	adequately the capabilities of each array platform. The design of
	a CGH array can incorporate a uniform or a highly variable element
	distribution. This can lead to bias in the reporting of average or
	median resolutions, making it difficult to provide a fair comparison
	of platforms. In this report, we propose a new definition of resolution
	for array CGH technology, termed "functional resolution," that incorporates
	the uniformity of element spacing on the array, as well as the sensitivity
	of each platform to single-copy alterations. Calculation of these
	metrics is automated through the development of a Java-based application,
	"ResCalc," which is applicable to any array CGH platform.},
  doi = {10.1016/j.ygeno.2006.12.012},
  pdf = {../local/Coe2007Resolving.pdf},
  file = {Coe2007Resolving.pdf:Coe2007Resolving.pdf:PDF},
  institution = {British Columbia Cancer Research Centre, 675 West 10th Avenue, Vancouver,
	BC, Canada V5Z 1L3. bcoe@bccrc.ca},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0888-7543(07)00004-3},
  pmid = {17276656},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1016/j.ygeno.2006.12.012}
}

@article{Coelho2000Genome-wide,
  author = {Coelho, P.S. and Kumar, A. and Snyder, M.},
  title = {Genome-wide mutant collections: toolboxes for functional genomics},
  journal = {Curr. {O}pin. {M}icrobiol.},
  year = {2000},
  volume = {3},
  pages = {309-315},
  pdf = {../local/Coelho2000Genome-wide.pdf},
  file = {Coelho2000Genome-wide.pdf:local/Coelho2000Genome-wide.pdf:PDF},
  owner = {vert}
}

@article{Cogoni1996Transgene,
  author = {Cogoni, C. and Irelan, J. T. and Schumacher, M. and Schmidhauser,
	T. J. and Selker, E. U. and Macino, G.},
  title = {{T}ransgene silencing of the al-1 gene in vegetative cells of {N}eurospora
	is mediated by a cytoplasmic effector and does not depend on {DNA}-{DNA}
	interactions or {DNA} methylation.},
  journal = {EMBO J.},
  year = {1996},
  volume = {15},
  pages = {3153--3163},
  number = {12},
  month = {Jun},
  abstract = {The molecular mechanisms involved in transgene-induced gene silencing
	('quelling') in Neurospora crassa were investigated using the carotenoid
	biosynthetic gene albino-1 (al-1) as a visual marker. Deletion derivatives
	of the al-1 gene showed that a transgene must contain at least approximately
	132 bp of sequences homologous to the transcribed region of the native
	gene in order to induce quelling. Transgenes containing only al-1
	promoter sequences do not cause quelling. Specific sequences are
	not required for gene silencing, as different regions of the al-1
	gene produced quelling. A mutant defective in cytosine methylation
	(dim-2) exhibited normal frequencies and degrees of silencing, indicating
	that cytosine methylation is not responsible for quelling, despite
	the fact that methylation of transgene sequences frequently is correlated
	with silencing. Silencing was shown to be a dominant trait, operative
	in heterokaryotic strains containing a mixture of transgenic and
	non-transgenic nuclei. This result indicates that a diffusable, trans-acting
	molecule is involved in quelling. A transgene-derived, sense RNA
	was detected in quelled strains and was found to be absent in their
	revertants. These data are consistent with a model in which an RNA-DNA
	or RNA-RNA interaction is involved in transgene-induced gene silencing
	in Neurospora.},
  pdf = {../local/Cogoni1996Transgene.pdf},
  file = {Cogoni1996Transgene.pdf:Cogoni1996Transgene.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pmid = {8670816},
  timestamp = {2006.03.28},
  url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC450258}
}

@article{Cohen2004application,
  author = {Gilles Cohen and M\'elanie Hilario and Hugo Sax and StÃÂ©phane Hugonnet
	and Christian Pellegrini and Antoine Geissbuhler},
  title = {An application of one-class support vector machine to nosocomial
	infection detection.},
  journal = {Medinfo},
  year = {2004},
  volume = {11},
  pages = {716-20},
  number = {Pt 1},
  abstract = {Nosocomial infections ({NI}s)---those acquired in health care settings---are
	among the major causes of increased mortality among hospitalized
	patients. {T}hey are a significant burden for patients and health
	authorities alike; it is thus important to monitor and detect them
	through an effective surveillance system. {T}his paper describes
	a retrospective analysis of a prevalence survey of {NI}s done in
	the {G}eneva {U}niversity {H}ospital. {O}ur goal is to identify patients
	with one or more {NI}s on the basis of clinical and other data collected
	during the survey. {I}n this two-class classification task, the main
	difficulty lies in the significant imbalance between positive or
	infected (11\%) and negative (89\%) cases. {T}o cope with class imbalance,
	we investigate one-class {SVM}s which can be trained to distinguish
	two classes on the basis of examples from a single class (in this
	case, only "normal" or non infected patients). {T}he infected ones
	are then identified as "abnormal" cases or outliers that deviate
	significantly from the normal profile. {E}xperimental results are
	encouraging: whereas standard 2-class {SVM}s scored a baseline sensitivity
	of 50.6\% on this problem, the one-class approach increased sensitivity
	to as much as 92.6\%. {T}hese results are comparable to those obtained
	by the authors in a previous study on asymmetrical soft margin {SVM}s;
	they suggest that one-class {SVM}s can provide an effective and efficient
	way of overcoming data imbalance in classification problems.},
  keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts,
	Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis,
	Cerebrovascular Accident, Cerebrovascular Circulation, Comparative
	Study, Computer-Assisted, Cross Infection, Cysteine, Data Collection,
	Decision Trees, Dementia, Diagnosis, Disulfides, Doppler, Embolism,
	Expert Systems, Extramural, Factor Analysis, Female, Gene Expression,
	Gene Expression Profiling, Health Status, Heart Septal Defects, Hospitals,
	Humans, Infection Control, Intracranial Embolism, Male, Models, Molecular,
	Myocardial Infarction, N.I.H., Neoplasms, Neural Networks (Computer),
	Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Oxidation-Reduction,
	P.H.S., Pattern Recognition, Population Surveillance, Prevalence,
	Prognosis, Protein Binding, Protein Folding, Proteins, ROC Curve,
	Research Support, Retrospective Studies, Sensitivity and Specificity,
	Software, Statistical, Switzerland, Transcranial, Treatment Outcome,
	U.S. Gov't, Ultrasonography, University, 15360906},
  pii = {D040004219}
}

@article{Coi2006Prediction,
  author = {Coi, A. and Massarelli, I. and Murgia, L. and Saraceno, M. and Calderone,
	V. and Bianucci, A. M.},
  title = {{P}rediction of h{ERG} potassium channel affinity by the {CODESSA}
	approach.},
  journal = {Bioorg. Med. Chem.},
  year = {2006},
  volume = {14},
  pages = {3153--3159},
  number = {9},
  month = {May},
  abstract = {The problem of predicting torsadogenic cardiotoxicity of drugs is
	afforded in this work. QSAR studies on a series of molecules, acting
	as hERG K+ channel blockers, were carried out for this purpose by
	using the CODESSA program. Molecules belonging to the analyzed dataset
	are characterized by different therapeutic targets and by high molecular
	diversity. The predictive power of the obtained models was estimated
	by means of rigorous validation criteria implying the use of highly
	diagnostic statistical parameters on the test set, other than the
	training set. Validation results obtained for a blind set, disjoined
	from the whole dataset initially considered, confirmed the predictive
	potency of the models proposed here, so suggesting that they are
	worth to be considered as a valuable tool for practical applications
	in predicting the blockade of hERG K+ channels.},
  doi = {10.1016/j.bmc.2005.12.030},
  keywords = {chemoinformatics herg},
  pii = {S0968-0896(05)01212-5},
  pmid = {16426850},
  timestamp = {2006.10.06},
  url = {http://dx.doi.org/10.1016/j.bmc.2005.12.030}
}

@article{Cole2001Polychemotherapy,
  author = {Cole, B. F. and Gelber, R. D. and Gelber, S. and Coates, A. S. and
	Goldhirsch, A.},
  title = {Polychemotherapy for early breast cancer: an overview of the randomised
	clinical trials with quality-adjusted survival analysis.},
  journal = {Lancet},
  year = {2001},
  volume = {358},
  pages = {277--286},
  number = {9278},
  month = {Jul},
  doi = {10.1016/S0140-6736(01)05483-6},
  keywords = {Aged; Antineoplastic Agents, therapeutic use; Breast Neoplasms, drug
	therapy/mortality; Chemotherapy, Adjuvant; Drug Therapy, Combination;
	Female; Follow-Up Studies; Humans; Middle Aged; Prognosis; Quality-Adjusted
	Life Years; Randomized Controlled Trials as Topic; Survival Analysis;
	Tamoxifen, therapeutic use},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0140-6736(01)05483-6},
  pmid = {11498214},
  timestamp = {2012.03.01},
  url = {http://dx.doi.org/10.1016/S0140-6736(01)05483-6}
}

@article{Cole2005Comparing,
  author = {Jason C Cole and Christopher W Murray and J. Willem M Nissink and
	Richard D Taylor and Robin Taylor},
  title = {Comparing protein-ligand docking programs is difficult.},
  journal = {Proteins},
  year = {2005},
  volume = {60},
  pages = {325--332},
  number = {3},
  month = {Aug},
  abstract = {There is currently great interest in comparing protein-ligand docking
	programs. A review of recent comparisons shows that it is difficult
	to draw conclusions of general applicability. Statistical hypothesis
	testing is required to ensure that differences in pose-prediction
	success rates and enrichment rates are significant. Numerical measures
	such as root-mean-square deviation need careful interpretation and
	may profitably be supplemented by interaction-based measures and
	visual inspection of dockings. Test sets must be of appropriate diversity
	and of good experimental reliability. The effects of crystal-packing
	interactions may be important. The method used for generating starting
	ligand geometries and positions may have an appreciable effect on
	docking results. For fair comparison, programs must be given search
	problems of equal complexity (e.g. binding-site regions of the same
	size) and approximately equal time in which to solve them. Comparisons
	based on rescoring require local optimization of the ligand in the
	space of the new objective function. Re-implementations of published
	scoring functions may give significantly different results from the
	originals. Ostensibly minor details in methodology may have a profound
	influence on headline success rates.},
  doi = {10.1002/prot.20497},
  institution = {Cambridge Crystallographic Data Centre, Cambridge, United Kingdom.},
  keywords = {Algorithms; Artificial Intelligence; Binding Sites; Computational
	Biology, methods; Computer Simulation; Crystallization; Crystallography,
	X-Ray; Databases, Protein; Ligands; Models, Molecular; Molecular
	Structure; Programming Languages; Protein Binding; Proteins, chemistry;
	Proteomics, methods; Reproducibility of Results; Software},
  owner = {bricehoffmann},
  pmid = {15937897},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1002/prot.20497}
}

@article{Coleman2004Noninvasive,
  author = {D. Jackson Coleman and Ronald H Silverman and Mark J Rondeau and
	H. Culver Boldt and Harriet O Lloyd and Frederic L Lizzi and Thomas
	A Weingeist and Xue Chen and Sumalee Vangveeravong and Robert Folberg},
  title = {Noninvasive in vivo detection of prognostic indicators for high-risk
	uveal melanoma: ultrasound parameter imaging.},
  journal = {Ophthalmology},
  year = {2004},
  volume = {111},
  pages = {558-64},
  number = {3},
  month = {Mar},
  abstract = {P{URPOSE}: {P}rimary malignant melanoma of the choroid and ciliary
	body has traditionally been treated without histologic staging, using
	purely clinical indicators. {T}he presence of extravascular matrix
	patterns ({EMP}) in histologic sections of uveal melanoma has been
	shown to be an independent indicator of metastatic risk. {T}hese
	patterns are of a dimension and physical composition that are likely
	to be detected with ultrasound backscatter analysis. {O}ur aim was
	to determine whether ultrasound parameter imaging could detect the
	presence of {EMP} at a diagnostically significant level for treatment
	staging and for planning investigational studies of therapeutic modalities.
	{DESIGN}: {P}rospective, masked ultrasound-pathologic correlative
	study. {PARTICIPANTS}: {O}ne hundred seventeen patients diagnosed
	with previously untreated choroidal melanoma were scanned within
	2 weeks before enucleation. {METHODS}: {T}umors were evaluated histologically
	and divided into high-risk and low-risk groups on the basis of the
	presence of 2\% or more histologic cross-sectional area composed
	of {EMP} patterns. {D}igital ultrasound data were processed to generate
	parameter images representing the size and concentration of ultrasound
	scatterers. {H}istologic and ultrasound images and data were correlated,
	and linear and nonlinear statistical methods were used to create
	multivariate models for noninvasive differentiation of high-risk
	and low-risk tumors. {MAIN} {OUTCOME} {MEASURES}: {P}resence or absence
	of high-risk {EMP} and associated ultrasound parameter classification
	models. {RESULTS}: {O}f the 117 tumors, 69 were classified as low
	risk, and 48 were classified as high-risk with histologic analysis.
	{A} classification that used ultrasound parameter image features
	with linear discriminant analysis could correctly identify 79.5\%
	of cases retrospectively and 75.2\% of cases by use of cross-validation,
	an estimate of prospective classification ability. {B}y use of a
	more powerful classification technique (support vector machine),
	93.1\% of cases were correctly classified retrospectively. {W}ith
	a cross-validation procedure, 80.10\% of cases were correctly classified.
	{CONCLUSIONS}: {U}ltrasound can be used noninvasively to classify
	tumors into high-risk and low-risk groups by detecting the presence
	of {EMP} patterns. {B}y the use of previous studies that compared
	the histologic presence of {EMP} patterns with patient survival,
	estimates of hazard rates associated with ultrasound risk groups
	can be made. {T}he noninvasive ultrasound classification is potentially
	useful as a prognostic variable and as a tool for stratification
	of patient populations for tumor treatment evaluation.},
  doi = {10.1016/j.ophtha.2003.06.021},
  pdf = {../local/Coleman2004Noninvasive.pdf},
  file = {Coleman2004Noninvasive.pdf:local/Coleman2004Noninvasive.pdf:PDF},
  keywords = {Algorithms, Ambergris, Combinatorial Chemistry Techniques, Eye Enucleation,
	Humans, Melanoma, Models, Molecular, Molecular Conformation, Non-U.S.
	Gov't, Odors, P.H.S., Perfume, Predictive Value of Tests, Prognosis,
	Prospective Studies, Quantitative Structure-Activity Relationship,
	Research Support, U.S. Gov't, Uveal Neoplasms, 15019336},
  pii = {S0161642003014982},
  url = {http://dx.doi.org/10.1016/j.ophtha.2003.06.021}
}

@article{Collier2004Comparison,
  author = {Nigel Collier and Koichi Takeuchi},
  title = {Comparison of character-level and part of speech features for name
	recognition in biomedical texts.},
  journal = {J {B}iomed {I}nform},
  year = {2004},
  volume = {37},
  pages = {423-35},
  number = {6},
  month = {Dec},
  abstract = {The immense volume of data which is now available from experiments
	in molecular biology has led to an explosion in reported results
	most of which are available only in unstructured text format. {F}or
	this reason there has been great interest in the task of text mining
	to aid in fact extraction, document screening, citation analysis,
	and linkage with large gene and gene-product databases. {I}n particular
	there has been an intensive investigation into the named entity ({NE})
	task as a core technology in all of these tasks which has been driven
	by the availability of high volume training sets such as the {GENIA}
	v3.02 corpus. {D}espite such large training sets accuracy for biology
	{NE} has proven to be consistently far below the high levels of performance
	in the news domain where {F} scores above 90 are commonly reported
	which can be considered near to human performance. {W}e argue that
	it is crucial that more rigorous analysis of the factors that contribute
	to the model's performance be applied to discover where the underlying
	limitations are and what our future research direction should be.
	{O}ur investigation in this paper reports on variations of two widely
	used feature types, part of speech ({POS}) tags and character-level
	orthographic features, and makes a comparison of how these variations
	influence performance. {W}e base our experiments on a proven state-of-the-art
	model, support vector machines using a high quality subset of 100
	annotated {MEDLINE} abstracts. {E}xperiments reveal that the best
	performing features are orthographic features with {F} score of 72.6.
	{A}lthough the {B}rill tagger trained in-domain on the {GENIA} v3.02p
	{POS} corpus gives the best overall performance of any {POS} tagger,
	at an {F} score of 68.6, this is still significantly below the orthographic
	features. {I}n combination these two features types appear to interfere
	with each other and degrade performance slightly to an {F} score
	of 72.3.},
  doi = {10.1016/j.jbi.2004.08.008},
  pdf = {../local/Collier2004Comparison.pdf},
  file = {Collier2004Comparison.pdf:local/Collier2004Comparison.pdf:PDF},
  keywords = {biosvm nlp},
  pii = {S1532-0464(04)00088-7},
  url = {http://dx.doi.org/10.1016/j.jbi.2004.08.008}
}

@article{Collins2003Vision,
  author = {Francis S Collins and Eric D Green and Alan E Guttmacher and Mark
	S Guyer and U. S. National Human Genome Research Institute},
  title = {A vision for the future of genomics research.},
  journal = {Nature},
  year = {2003},
  volume = {422},
  pages = {835--847},
  number = {6934},
  month = {Apr},
  institution = {National Human Genome Research Institute, National Institutes of
	Health, Bethesda, Maryland 20892, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {12695777},
  timestamp = {2010.07.28}
}

@inproceedings{Collins2001Convolution,
  author = {Collins, M. and Duffy, N.},
  title = {Convolution {K}ernels for {N}atural {L}anguage},
  booktitle = {Adv. {N}eural. {I}nform. {P}rocess {S}yst.},
  year = {2001},
  editor = {Dietterich, T. G. and Becker, S. and Ghahramani, Z.},
  volume = {14},
  pages = {625-632},
  publisher = {MIT Press}
}

@article{Collobert2002parallel,
  author = {Ronan Collobert and Samy Bengio and Yoshua Bengio},
  title = {A parallel mixture of {SVM}s for very large scale problems.},
  journal = {Neural {C}omput},
  year = {2002},
  volume = {14},
  pages = {1105-14},
  number = {5},
  month = {May},
  abstract = {Support vector machines ({SVM}s) are the state-of-the-art models for
	many classification problems, but they suffer from the complexity
	of their training algorithm, which is at least quadratic with respect
	to the number of examples. {H}ence, it is hopeless to try to solve
	real-life problems having more than a few hundred thousand examples
	with {SVM}s. {T}his article proposes a new mixture of {SVM}s that
	can be easily implemented in parallel and where each {SVM} is trained
	on a small subset of the whole data set. {E}xperiments on a large
	benchmark data set ({F}orest) yielded significant time improvement
	(time complexity appears empirically to locally grow linearly with
	the number of examples). {I}n addition, and surprisingly, a significant
	improvement in generalization was observed.},
  doi = {10.1162/089976602753633402},
  url = {http://dx.doi.org/10.1162/089976602753633402}
}

@article{Comon1994Independent,
  author = {Pierre Comon},
  title = {Independent Component Analysis: a new concept?},
  journal = {Signal {P}rocessing},
  year = {1994},
  volume = {36},
  pages = {287--314},
  number = {3}
}

@article{Consortium2006MicroArray,
  author = {M. A. Q. C. Consortium and Leming Shi and Laura H Reid and Wendell
	D Jones and Richard Shippy and Janet A Warrington and Shawn C Baker
	and Patrick J Collins and Francoise de Longueville and Ernest S Kawasaki
	and Kathleen Y Lee and Yuling Luo and Yongming Andrew Sun and James
	C Willey and Robert A Setterquist and Gavin M Fischer and Weida Tong
	and Yvonne P Dragan and David J Dix and Felix W Frueh and Frederico
	M Goodsaid and Damir Herman and Roderick V Jensen and Charles D Johnson
	and Edward K Lobenhofer and Raj K Puri and Uwe Schrf and Jean Thierry-Mieg
	and Charles Wang and Mike Wilson and Paul K Wolber and Lu Zhang and
	Shashi Amur and Wenjun Bao and Catalin C Barbacioru and Anne Bergstrom
	Lucas and Vincent Bertholet and Cecilie Boysen and Bud Bromley and
	Donna Brown and Alan Brunner and Roger Canales and Xiaoxi Megan Cao
	and Thomas A Cebula and James J Chen and Jing Cheng and Tzu-Ming
	Chu and Eugene Chudin and John Corson and J. Christopher Corton and
	Lisa J Croner and Christopher Davies and Timothy S Davison and Glenda
	Delenstarr and Xutao Deng and David Dorris and Aron C Eklund and
	Xiao-hui Fan and Hong Fang and Stephanie Fulmer-Smentek and James
	C Fuscoe and Kathryn Gallagher and Weigong Ge and Lei Guo and Xu
	Guo and Janet Hager and Paul K Haje and Jing Han and Tao Han and
	Heather C Harbottle and Stephen C Harris and Eli Hatchwell and Craig
	A Hauser and Susan Hester and Huixiao Hong and Patrick Hurban and
	Scott A Jackson and Hanlee Ji and Charles R Knight and Winston P
	Kuo and J. Eugene LeClerc and Shawn Levy and Quan-Zhen Li and Chunmei
	Liu and Ying Liu and Michael J Lombardi and Yunqing Ma and Scott
	R Magnuson and Botoul Maqsodi and Tim McDaniel and Nan Mei and Ola
	Myklebost and Baitang Ning and Natalia Novoradovskaya and Michael
	S Orr and Terry W Osborn and Adam Papallo and Tucker A Patterson
	and Roger G Perkins and Elizabeth H Peters and Ron Peterson and Kenneth
	L Philips and P. Scott Pine and Lajos Pusztai and Feng Qian and Hongzu
	Ren and Mitch Rosen and Barry A Rosenzweig and Raymond R Samaha and
	Mark Schena and Gary P Schroth and Svetlana Shchegrova and Dave D
	Smith and Frank Staedtler and Zhenqiang Su and Hongmei Sun and Zoltan
	Szallasi and Zivana Tezak and Danielle Thierry-Mieg and Karol L Thompson
	and Irina Tikhonova and Yaron Turpaz and Beena Vallanat and Christophe
	Van and Stephen J Walker and Sue Jane Wang and Yonghong Wang and
	Russ Wolfinger and Alex Wong and Jie Wu and Chunlin Xiao and Qian
	Xie and Jun Xu and Wen Yang and Liang Zhang and Sheng Zhong and Yaping
	Zong and William Slikker},
  title = {The {M}icro{A}rray {Q}uality {C}ontrol ({MAQC}) project shows inter-
	and intraplatform reproducibility of gene expression measurements},
  journal = {Nat. Biotechnol.},
  year = {2006},
  volume = {24},
  pages = {1151--1161},
  number = {9},
  month = {Sep},
  abstract = {Over the last decade, the introduction of microarray technology has
	had a profound impact on gene expression research. The publication
	of studies with dissimilar or altogether contradictory results, obtained
	using different microarray platforms to analyze identical RNA samples,
	has raised concerns about the reliability of this technology. The
	MicroArray Quality Control (MAQC) project was initiated to address
	these concerns, as well as other performance and data analysis issues.
	Expression data on four titration pools from two distinct reference
	RNA samples were generated at multiple test sites using a variety
	of microarray-based and alternative technology platforms. Here we
	describe the experimental design and probe mapping efforts behind
	the MAQC project. We show intraplatform consistency across test sites
	as well as a high level of interplatform concordance in terms of
	genes identified as differentially expressed. This study provides
	a resource that represents an important first step toward establishing
	a framework for the use of microarrays in clinical and regulatory
	settings.},
  doi = {10.1038/nbt1239},
  institution = {National Center for Toxicological Research, US Food and Drug Administration,
	Jefferson, Arkansas 72079, USA.},
  keywords = {Equipment Design; Equipment Failure Analysis; Gene Expression Profiling,
	instrumentation/methods; Oligonucleotide Array Sequence Analysis,
	instrumentation; Quality Assurance, Health Care, methods; Quality
	Control; Reproducibility of Results; Sensitivity and Specificity;
	United States},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {nbt1239},
  pmid = {16964229},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/nbt1239}
}

@article{Conte2004Thirty,
  author = {Conte, D. and Foggia, P. and Sansone, C. and Vento, M.},
  title = {Thirty years of graph matching in pattern recognition},
  journal = {Int. J. Pattern. Recogn. Artif. Intell.},
  year = {2004},
  volume = {18},
  pages = {265--298},
  number = {3},
  abstract = {A recent paper posed the question: "Graph Matching: What are we really
	talking about?". Far from providing a definite answer to that question,
	in this paper we will try to characterize the role that graphs play
	within the Pattern Recognition field. To this aim two taxonomies
	are presented and discussed. The first includes almost all the graph
	matching algorithms proposed from the late seventies, and describes
	the different classes of algorithms. The second taxonomy considers
	the types of common applications of graph-based techniques in the
	Pattern Recognition and Machine Vision field.},
  doi = {10.1142/S0218001404003228},
  owner = {michael},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1142/S0218001404003228}
}

@article{Cook2010model,
  author = {Peter R Cook},
  title = {A model for all genomes: the role of transcription factories.},
  journal = {J Mol Biol},
  year = {2010},
  volume = {395},
  pages = {1--10},
  number = {1},
  month = {Jan},
  abstract = {A model for all genomes involving one major architectural motif is
	presented: DNA or chromatin loops are tethered to "factories" through
	the transcription machinery-a polymerase (active or inactive) or
	its transcription factors (activators or repressors). These loops
	appear and disappear as polymerases initiate and terminate (and as
	factors bind and dissociate), so the structure is ever-changing and
	self-organizing. This model is parsimonious, detailed (and so easily
	tested), and incorporates elements found in various other models.},
  doi = {10.1016/j.jmb.2009.10.031},
  institution = {Sir William Dunn School of Pathology, University of Oxford, South
	Parks Road, Oxford OX1 3RE, UK. peter.cook@path.ox.ac.uk},
  keywords = {Animals; Bacteria; Eukaryota; Gene Expression Regulation; Genome;
	Humans; Models, Genetic; Transcription, Genetic},
  owner = {phupe},
  pii = {S0022-2836(09)01271-6},
  pmid = {19852969},
  timestamp = {2010.08.27},
  url = {http://dx.doi.org/10.1016/j.jmb.2009.10.031}
}

@article{Cooper2001GlycoSuiteDB,
  author = {Cooper, C. and Harrison, M. and Wilkins, M. and Packer, N.},
  title = {Glyco{S}uite{DB}: a new curated relational database of glycoprotein
	glycan structures and their biological sources},
  journal = {Nucleic {A}cids {R}es.},
  year = {2001},
  volume = {29},
  pages = {332-335},
  pdf = {../local/Cooper2001GlycoSuiteDB.pdf},
  file = {Cooper2001GlycoSuiteDB.pdf:local/Cooper2001GlycoSuiteDB.pdf:PDF},
  owner = {vert},
  url = {http://nar.oxfordjournals.org/cgi/content/full/29/1/332}
}

@inproceedings{Cordella01improved,
  author = {L. P. Cordella and P. Foggia and C. Sansone and M. Vento},
  title = {An improved algorithm for matching large graphs},
  booktitle = {In: 3rd IAPR-TC15 Workshop on Graph-based Representations in Pattern
	Recognition, Cuen},
  year = {2001},
  pages = {149--159}
}

@inproceedings{Cordella1999Performance,
  author = {Cordella, L. P. and Foggia, P. and Sansone, C. and Vento, M.},
  title = {Performance Evaluation of the VF Graph Matching Algorithm},
  booktitle = {ICIAP '99: Proceedings of the 10th International Conference on Image
	Analysis and Processing},
  year = {1999},
  pages = {1172},
  address = {Washington, DC, USA},
  publisher = {IEEE Computer Society},
  isbn = {0-7695-0040-4}
}

@article{Cordella1996Efficient,
  author = {L. P. Cordella and P. Foggia and C. Sansone and M. Vento},
  title = {An Efficient Algorithm for the Inexact Matching of ARG Graphs Using
	a Contextual Transformational Model},
  journal = {Pattern Recognition, International Conference on},
  year = {1996},
  volume = {3},
  pages = {180},
  address = {Los Alamitos, CA, USA},
  doi = {http://doi.ieeecomputersociety.org/10.1109/ICPR.1996.546934},
  issn = {1051-4651},
  publisher = {IEEE Computer Society}
}

@inproceedings{Cortes2003Rational,
  author = {Cortes, C. and Haffner, P. and Mohri, M.},
  title = {Rational {K}ernels},
  booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 15},
  year = {2003},
  editor = {Suzanna Becker and Sebastian Thrun and Klaus Obermayer},
  publisher = {MIT Press}
}

@article{Cortes1995Support-Vector,
  author = {Cortes, C. and Vapnik, V.},
  title = {Support-Vector Networks},
  journal = {Machine Learning},
  year = {1995},
  volume = {20},
  pages = {273-297},
  note = {10.1023/A:1022627411411},
  abstract = {The support-vector network is a new learning machine for two-group
	classification problems. The machine conceptually implements the
	following idea: input vectors are non-linearly mapped to a very high-dimension
	feature space. In this feature space a linear decision surface is
	constructed. Special properties of the decision surface ensures high
	generalization ability of the learning machine. The idea behind the
	support-vector network was previously implemented for the restricted
	case where the training data can be separated without errors. We
	here extend this result to non-separable training data.},
  issn = {0885-6125},
  issue = {3},
  keyword = {Computer Science},
  owner = {fantine},
  publisher = {Springer Netherlands},
  timestamp = {2010.10.24},
  url = {http://dx.doi.org/10.1023/A:1022627411411}
}

@article{Coupez2006Docking,
  author = {B. Coupez and R. A. Lewis},
  title = {Docking and scoring--theoretically easy, practically impossible?},
  journal = {Curr. Med. Chem.},
  year = {2006},
  volume = {13},
  pages = {2995--3003},
  number = {25},
  abstract = {Structure-based Drug Design (SBDD) is an essential part of the modern
	medicinal chemistry, and has led to the acceleration of many projects,
	and even to drugs on the market. Programs that perform docking and
	scoring of ligands to receptors are powerful tools in the drug designer's
	armoury that enhance the process of SBDD. They are even deployed
	on the desktop of many bench chemists. It is timely to review the
	state of the art, to understand how good our docking programs are,
	and what are the issues. In this review we would like to provide
	a guide around the reliable aspects of docking and scoring and the
	associated pitfalls aiming at an audience of medicinal chemists rather
	than modellers. For convenience, we will divide the review into two
	parts: docking and scoring. Docking concerns the preparation of the
	receptor and the ligand(s), the sampling of conformational space
	and stereochemistry (if appropriate). Scoring concerns the evaluation
	of all of the ligand-receptor poses generated by docking. The two
	processes are not truly independent, and this will be discussed here
	in detail. The preparation of the receptor and ligand(s) before docking
	requires great care. For the receptor, issues of protonation, tautomerisation
	and hydration are key, and we will discuss current approaches to
	these issues. Even more important is the degree of sampling: can
	the algorithms reproduce what is observed experimentally? If they
	can, are the scoring algorithms good enough to recognise this pose
	as the best? Do the scores correlate with observed binding affinity?
	How does local knowledge of the target (for example hinge-binding
	to a kinase) affect the accuracy of the predictions? We will review
	the key findings from several evaluation studies and present conclusions
	about when and how to interpret and trust the results of docking
	and scoring. Finally, we will present an outline of some of the latest
	developments in the area of scoring functions.},
  institution = {Computer-Aided Drug Discovery, Novartis Institute for Biomedical
	Research, Switzerland.},
  keywords = {Cluster Analysis; Computational Biology, methods; Computer Simulation;
	Computer-Aided Design; Databases, Factual; Drug Design; Ligands;
	Models, Chemical; Software; Structure-Activity Relationship},
  owner = {bricehoffmann},
  pmid = {17073642},
  timestamp = {2009.02.13}
}

@inproceedings{Cour2007Recognizing,
  author = {Cour, T. and Shi, J.},
  title = {Recognizing objects by piecing together the Segmentation Puzzle},
  booktitle = {Proc. IEEE Conference on Computer Vision and Pattern Recognition
	CVPR '07},
  year = {2007},
  pages = {1--8},
  month = {17--22 June },
  doi = {10.1109/CVPR.2007.383051},
  owner = {michael},
  timestamp = {2009.11.10}
}

@inproceedings{Cour2006Balanced,
  author = {Cour, T. and Srinivasan, P. and Shi, J. },
  title = {Balanced Graph Matching},
  booktitle = {Advanced in Neural Information Processing Systems},
  year = {2006},
  citeulike-article-id = {1072329},
  keywords = {graph-matching, spectral, spectral-matching},
  posted-at = {2007-01-28 07:49:33},
  priority = {5}
}

@article{Cover1996Universal,
  author = {Cover, T.M. and Ordentlich, E. },
  title = {Universal portfolios with side information},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {348-363},
  number = {2},
  month = {Mar},
  abstract = {We present a sequential investment algorithm, the ?-weighted universal
	portfolio with side information, which achieves, to first order in
	the exponent, the same wealth as the best side-information dependent
	investment strategy (the best state-constant rebalanced portfolio)
	determined in hindsight from observed market and side-information
	outcomes. {T}his is an individual sequence result which shows the
	difference between the exponential growth wealth of the best state-constant
	rebalanced portfolio and the universal portfolio with side information
	is uniformly less than (d/(2n))log (n+1)+(k/n)log 2 for every stock
	market and side-information sequence and for all time n. {H}ere d=k(m-1)
	is the number of degrees of freedom in the state-constant rebalanced
	portfolio with k states of side information and m stocks. {T}he proof
	of this result establishes a close connection between universal investment
	and universal data compression },
  pdf = {../local/Cover1996Universal.pdf},
  file = {Cover1996Universal.pdf:local/Cover1996Universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@book{Cover1990Elements,
  title = {Elements of {I}nformation {T}heory},
  publisher = {John Wiley},
  year = {1990},
  author = {Cover, T.M. and Thomas, J.A.},
  address = {New-York},
  owner = {vert}
}

@article{Cowell2007application,
  author = {Cowell, J.K. and Hawthorn, L.},
  title = {The application of microarray technology to the analysis of the cancer
	genome},
  journal = {Curr. Mol. Med.},
  year = {2007},
  volume = {7},
  pages = {103--120},
  number = {1},
  month = {Feb},
  abstract = {The identification of genetic events that are involved in the development
	of human cancer has been facilitated through the development and
	application of a diverse series of high resolution, high throughput
	microarray platforms. Essentially there are two types of array; those
	that carry PCR products from cloned nucleic acids (e.g. cDNA, BACs,
	cosmids) and those that use oligonucleotides. Each has advantages
	and disadvantages but it is now possible to survey genome wide DNA
	copy number abnormalities and expression levels to allow correlations
	between losses, gains and amplifications in tumor cells with genes
	that are over- and under-expressed in the same samples. The gene
	expression arrays that provide estimates of mRNA levels in tumors
	have given rise to exon-specific arrays that can identify both gene
	expression levels, alternative splicing events and mRNA processing
	alterations. Oligonucleotide arrays are also being used to interrogate
	single nucleotide polymorphisms (SNPs) throughout the genome for
	linkage and association studies and these have been adapted to quantify
	copy number abnormalities and loss of heterozygosity events. To identify
	as yet unknown transcripts tiling arrays across the genome have been
	developed which can also identify DNA methylation changes and be
	used to identify DNA-protein interactions using ChIP on Chip protocols.
	Ultimately DNA sequencing arrays will allow resequencing of chromosome
	regions and whole genomes. With all of these capabilities becoming
	routine in genomics laboratories, the idea of a systematic characterization
	of the sum genetic events that give rise to a cancer cell is rapidly
	becoming a reality.},
  institution = {Department of Cancer Genetics, Roswell Park Cancer Institute, Buffalo,
	New York 14263, USA. john.cowell@roswellpark.org},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17311536},
  timestamp = {2009.10.18}
}

@article{Cox1990Asymptotic,
  author = {Cox, D. and O'Sullivan, F.},
  title = {Asymptotic analysis of penalized likelihood and related estimators},
  journal = {Ann. {S}tat.},
  year = {1990},
  volume = {18},
  pages = {1676-1695},
  pdf = {../local/Cox1990Asymptotic.pdf},
  file = {Cox1990Asymptotic.pdf:local/Cox1990Asymptotic.pdf:PDF},
  owner = {jeanphilippevert}
}

@article{Cox2003Functional,
  author = {David D Cox and Robert L Savoy},
  title = {Functional magnetic resonance imaging (f{MRI}) "brain reading": detecting
	and classifying distributed patterns of f{MRI} activity in human
	visual cortex.},
  journal = {Neuroimage},
  year = {2003},
  volume = {19},
  pages = {261-70},
  number = {2 Pt 1},
  month = {Jun},
  abstract = {Traditional (univariate) analysis of functional {MRI} (f{MRI}) data
	relies exclusively on the information contained in the time course
	of individual voxels. {M}ultivariate analyses can take advantage
	of the information contained in activity patterns across space, from
	multiple voxels. {S}uch analyses have the potential to greatly expand
	the amount of information extracted from f{MRI} data sets. {I}n the
	present study, multivariate statistical pattern recognition methods,
	including linear discriminant analysis and support vector machines,
	were used to classify patterns of f{MRI} activation evoked by the
	visual presentation of various categories of objects. {C}lassifiers
	were trained using data from voxels in predefined regions of interest
	during a subset of trials for each subject individually. {C}lassification
	of subsequently collected f{MRI} data was attempted according to
	the similarity of activation patterns to prior training examples.
	{C}lassification was done using only small amounts of data (20 s
	worth) at a time, so such a technique could, in principle, be used
	to extract information about a subject's percept on a near real-time
	basis. {C}lassifiers trained on data acquired during one session
	were equally accurate in classifying data collected within the same
	session and across sessions separated by more than a week, in the
	same subject. {A}lthough the highest classification accuracies were
	obtained using patterns of activity including lower visual areas
	as input, classification accuracies well above chance were achieved
	using regions of interest restricted to higher-order object-selective
	visual areas. {I}n contrast to typical f{MRI} data analysis, in which
	hours of data across many subjects are averaged to reveal slight
	differences in activation, the use of pattern recognition methods
	allows a subtle 10-way discrimination to be performed on an essentially
	trial-by-trial basis within individuals, demonstrating that f{MRI}
	data contain far more information than is typically appreciated.},
  pii = {S1053811903000491}
}

@article{Crick1970Central,
  author = {Crick, F.},
  title = {Central Dogma of Molecular Biology},
  journal = {Nature},
  year = {1970},
  volume = {227},
  pages = {561--563},
  abstract = {The central dogma of molecular biology deals with the detailed
	
	residue-by-residue transfer of sequential information. It states
	
	that such informatfon cannot be transferred from protein to either
	
	proteln or nucleic acid.},
  pdf = {../local/Crick1970Central.pdf},
  file = {Crick1970Central.pdf:Crick1970Central.pdf:PDF},
  keywords = {csbcbook},
  owner = {jp},
  url = {http://profiles.nlm.nih.gov/SC/B/C/C/H/_/scbcch.pdf}
}

@book{Cristianini2000introduction,
  title = {An introduction to {S}upport {V}ector {M}achines and other kernel-based
	learning methods},
  publisher = {Cambridge University Press},
  year = {2000},
  author = {Cristianini, N. and Shawe-Taylor, J.},
  subject = {kernel},
  url = {http://www.support-vector.net}
}

@article{Cristianini2002Latent,
  author = {Cristianini, N. and Shawe-Taylor, J. and Lodhi, H.},
  title = {Latent semantic kernels},
  journal = {J. {I}ntell. {I}nform. {S}yst.},
  year = {2002},
  volume = {18},
  pages = {127--152},
  number = {2-3},
  doi = {10.1023/A:1013625426931},
  pdf = {../local/Cristianini2002Latent.pdf},
  file = {Cristianini2002Latent.pdf:local/Cristianini2002Latent.pdf:PDF},
  url = {http://dx.doi.org/10.1023/A:1013625426931}
}

@article{Croce2009Causes,
  author = {Carlo M. Croce},
  title = {Causes and consequences of {microRNA} dysregulation in cancer},
  journal = {Nat Rev Genet},
  year = {2009},
  volume = {10},
  pages = {704--714},
  number = {10},
  month = oct,
  doi = {10.1038/nrg2634},
  pdf = {../local/Croce2009Causes.pdf},
  file = {Croce2009Causes.pdf:Croce2009Causes.pdf:PDF},
  issn = {1471-0056},
  keywords = {csbcbook},
  owner = {phupe},
  timestamp = {2009.10.15},
  url = {http://dx.doi.org/10.1038/nrg2634}
}

@article{Croce2008Oncogenes,
  author = {Croce, C. M.},
  title = {Oncogenes and cancer.},
  journal = {N. Engl. J. Med.},
  year = {2008},
  volume = {358},
  pages = {502--511},
  number = {5},
  month = {Jan},
  doi = {10.1056/NEJMra072367},
  pdf = {../local/Croce2008Oncogenes.pdf},
  file = {Croce2008Oncogenes.pdf:Croce2008Oncogenes.pdf:PDF},
  institution = {Department of Molecular Virology, Immunology, and Medical Genetics
	and the Human Cancer Genetics Program, Ohio State University Medical
	Center, Columbus, OH 43210, USA. carlo.croce@osumc.edu},
  keywords = {csbcbook},
  owner = {jp},
  pii = {358/5/502},
  pmid = {18234754},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1056/NEJMra072367}
}

@book{Csorgo1998Limit,
  title = {Limit theorems in change-point analysis},
  publisher = {John Wiley},
  year = {1998},
  author = {Cs\"org\"o, M. and Horvath, L.},
  address = {New York},
  owner = {jp},
  timestamp = {2012.10.02}
}

@article{Csorgo1978Strong,
  author = {Csorgo, M. and Revesz, P.},
  title = {Strong {A}pproximations of the {Q}uantile {P}rocess},
  journal = {Ann. {S}tat.},
  year = {1978},
  volume = {6},
  pages = {882-894},
  number = {4},
  month = {July},
  booktitle = {Annals of {S}tatistics},
  pdf = {../local/Csorgo1978Strong.pdf},
  file = {Csorgo1978Strong.pdf:local/Csorgo1978Strong.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0090-5364%28197807%296%3A4%3C882%3ASAOTQP%3E2.0.CO%3B2-H}
}

@article{Cucker2002Best,
  author = {Cucker, F. and Smale, S.},
  title = {Best choices for regularization parameters in learning theory: on
	the bias-variance problem},
  journal = {Foundations of {C}omputational {M}athematics},
  year = {2002},
  volume = {2},
  pages = {413-428},
  number = {4},
  doi = {10.1007/s102080010030},
  pdf = {../local/Cucker2002Best.pdf},
  file = {Cucker2002Best.pdf:local/Cucker2002Best.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1007/s102080010030}
}

@article{Cucker2002On,
  author = {Cucker, F. and Smale, S.},
  title = {On the mathematical foundations of learning},
  journal = {Bull. {A}mer. {M}ath. {S}oc},
  year = {2002},
  volume = {39},
  pages = {1-49},
  pdf = {../local/Cucker2002On.pdf},
  file = {Cucker2002On.pdf:local/Cucker2002On.pdf:PDF},
  owner = {jeanphilippevert}
}

@article{Cuff1999Evaluation,
  author = {Cuff, J. A. and Barton, G. J.},
  title = {Evaluation and improvement of multiple sequence methods for protein
	secondary structure prediction},
  journal = {Protein. {S}truct. {F}unct. {G}enet.},
  year = {1999},
  volume = {34},
  pages = {508-519},
  pdf = {../local/cuff99.pdf},
  file = {cuff99.pdf:local/cuff99.pdf:PDF},
  subject = {biocasp},
  url = {http://www3.interscience.wiley.com/cgi-bin/fulltext/65000270/FILE?TPL=ft_start}
}

@article{Cui2008Two,
  author = {Cui, J. and Chen, C. and Lu, H. and Sun, T. and Shen, P.},
  title = {Two Independent Positive Feedbacks and Bistability in the Bcl-2 Apoptotic
	Switch},
  journal = {PLoS ONE},
  year = {2008},
  volume = {3},
  pages = {e1469},
  number = {1},
  month = {01},
  abstract = {Background - The complex interplay between B-cell lymphoma 2 (Bcl-2)
	family proteins constitutes a crucial checkpoint in apoptosis. Its
	detailed molecular mechanism remains controversial. Our former modeling
	studies have selected the ‘Direct Activation Model’ as a better explanation
	for experimental observations. In this paper, we continue to extend
	this model by adding interactions according to updating experimental
	findings. Methodology/Principal Findings - Through mathematical simulation
	we found bistability, a kind of switch, can arise from a positive
	(double negative) feedback in the Bcl-2 interaction network established
	by anti-apoptotic group of Bcl-2 family proteins. Moreover, Bax/Bak
	auto-activation as an independent positive feedback can enforce the
	bistability, and make it more robust to parameter variations. By
	ensemble stochastic modeling, we also elucidated how intrinsic noise
	can change ultrasensitive switches into gradual responses. Our modeling
	result agrees well with recent experimental data where bimodal Bax
	activation distributions in cell population were found. Conclusions/Significance
	- Along with the growing experimental evidences, our studies successfully
	elucidate the switch mechanism embedded in the Bcl-2 interaction
	network and provide insights into pharmacological manipulation of
	Bcl-2 apoptotic switch as further cancer therapies.},
  doi = {10.1371/journal.pone.0001469},
  pdf = {../local/Cui2008Two.pdf},
  file = {Cui2008Two.pdf:Cui2008Two.pdf:PDF},
  keywords = {csbcbook},
  publisher = {Public Library of Science},
  url = {http://dx.plos.org/10.1371/journal.pone.0001469}
}

@article{Cui2004Esub8,
  author = {Cui, Q. and Jiang, T. and Liu, B. and Ma, S.},
  title = {Esub8: {A} novel tool to predict protein subcellular localizations
	in eukaryotic organisms},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  pages = {66},
  number = {66},
  abstract = {Background {S}ubcellular localization of a new protein sequence is
	very important and fruitful for understanding its function. {A}s
	the number of new genomes has dramatically increased over recent
	years, a reliable and efficient system to predict protein subcellular
	location is urgently needed. {R}esults {E}sub8 was developed to predict
	protein subcellular localizations for eukaryotic proteins based on
	amino acid composition. {I}n this research, the proteins are classified
	into the following eight groups: chloroplast, cytoplasm, extracellular,
	{G}olgi apparatus, lysosome, mitochondria, nucleus and peroxisome.
	{W}e know subcellular localization is a typical classification problem;
	consequently, a one-against-one (1-v-1) multi-class support vector
	machine was introduced to construct the classifier. {U}nlike previous
	methods, ours considers the order information of protein sequences
	by a different method. {O}ur method is tested in three subcellular
	localization predictions for prokaryotic proteins and four subcellular
	localization predictions for eukaryotic proteins on {R}einhardt's
	dataset. {T}he results are then compared to several other methods.
	{T}he total prediction accuracies of two tests are both 100% by a
	self-consistency test, and are 92.9% and 84.14% by the jackknife
	test, respectively. {E}sub8 also provides excellent results: the
	total prediction accuracies are 100% by a self-consistency test and
	87% by the jackknife test. {C}onclusions {O}ur method represents
	a different approach for predicting protein subcellular localization
	and achieved a satisfactory result; furthermore, we believe {E}sub8
	will be a useful tool for predicting protein subcellular localizations
	in eukaryotic organisms.},
  doi = {10.1186/1471-2105-5-66},
  pdf = {../local/Cui2004Esub8.pdf},
  file = {Cui2004Esub8.pdf:local/Cui2004Esub8.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/5/66}
}

@article{Curran1995molecular,
  author = {Curran, M. E. and Splawski, I. and Timothy, K. W. and Vincent, G.
	M. and Green, E. D. and Keating, M. T.},
  title = {{A} molecular basis for cardiac arrhythmia: {HERG} mutations cause
	long {QT} syndrome.},
  journal = {Cell},
  year = {1995},
  volume = {80},
  pages = {795--803},
  number = {5},
  month = {Mar},
  abstract = {To identify genes involved in cardiac arrhythmia, we investigated
	patients with long QT syndrome (LQT), an inherited disorder causing
	sudden death from a ventricular tachyarrythmia, torsade de pointes.
	We previously mapped LQT loci on chromosomes 11 (LQT1), 7 (LQT2),
	and 3 (LQT3). Here, linkage and physical mapping place LQT2 and a
	putative potassium channel gene, HERG, on chromosome 7q35-36. Single
	strand conformation polymorphism and DNA sequence analyses reveal
	HERG mutations in six LQT families, including two intragenic deletions,
	one splice-donor mutation, and three missense mutations. In one kindred,
	the mutation arose de novo. Northern blot analyses show that HERG
	is strongly expressed in the heart. These data indicate that HERG
	is LQT2 and suggest a likely cellular mechanism for torsade de pointes.},
  keywords = {herg},
  pmid = {7889573},
  timestamp = {2006.10.05}
}

@article{Curtis2012genomic,
  author = {Curtis, Christina and Shah, Sohrab P. and Chin, Suet-Feung and Turashvili,
	Gulisa and Rueda, Oscar M. and Dunning, Mark J. and Speed, Doug and
	Lynch, Andy G. and Samarajiwa, Shamith and Yuan, Yinyin and Gräf,
	Stefan and Ha, Gavin and Haffari, Gholamreza and Bashashati, Ali
	and Russell, Roslin and McKinney, Steven and , M. E. T. A. B. R.
	I. C Group and Langerød, Anita and Green, Andrew and Provenzano,
	Elena and Wishart, Gordon and Pinder, Sarah and Watson, Peter and
	Markowetz, Florian and Murphy, Leigh and Ellis, Ian and Purushotham,
	Arnie and Børresen-Dale, Anne-Lise and Brenton, James D. and Tavaré,
	Simon and Caldas, Carlos and Aparicio, Samuel},
  title = {The genomic and transcriptomic architecture of 2,000 breast tumours
	reveals novel subgroups.},
  journal = {Nature},
  year = {2012},
  volume = {486},
  pages = {346--352},
  number = {7403},
  month = {Jun},
  abstract = {The elucidation of breast cancer subgroups and their molecular drivers
	requires integrated views of the genome and transcriptome from representative
	numbers of patients. We present an integrated analysis of copy number
	and gene expression in a discovery and validation set of 997 and
	995 primary breast tumours, respectively, with long-term clinical
	follow-up. Inherited variants (copy number variants and single nucleotide
	polymorphisms) and acquired somatic copy number aberrations (CNAs)
	were associated with expression in ~40\% of genes, with the landscape
	dominated by cis- and trans-acting CNAs. By delineating expression
	outlier genes driven in cis by CNAs, we identified putative cancer
	genes, including deletions in PPP2R2A, MTAP and MAP2K4. Unsupervised
	analysis of paired DNA–RNA profiles revealed novel subgroups with
	distinct clinical outcomes, which reproduced in the validation cohort.
	These include a high-risk, oestrogen-receptor-positive 11q13/14 cis-acting
	subgroup and a favourable prognosis subgroup devoid of CNAs. Trans-acting
	aberration hotspots were found to modulate subgroup-specific gene
	networks, including a TCR deletion-mediated adaptive immune response
	in the ‘CNA-devoid’ subgroup and a basal-specific chromosome 5 deletion-associated
	mitotic network. Our results provide a novel molecular stratification
	of the breast cancer population, derived from the impact of somatic
	CNAs on the transcriptome.},
  doi = {10.1038/nature10983},
  pdf = {../local/Curtis2012genomic.pdf},
  file = {Curtis2012genomic.pdf:Curtis2012genomic.pdf:PDF},
  institution = {Department of Oncology, University of Cambridge, Hills Road, Cambridge
	CB2 2XZ, UK.},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {nature10983},
  pmid = {22522925},
  timestamp = {2012.09.03},
  url = {http://dx.doi.org/10.1038/nature10983}
}

@article{Curtis2005,
  author = {Curtis, Keira R. and Oresic, Matej and Vidal-Puig, Antonio },
  title = {Pathways to the analysis of microarray data},
  journal = {Trends in {B}iotechnology},
  year = {2005},
  volume = {23},
  pages = {429--435},
  number = {8},
  abstract = {The development of microarray technology allows the simultaneous measurement
	of the expression of many thousands of genes. {T}he information gained
	offers an unprecedented opportunity to fully characterize biological
	processes. {H}owever, this challenge will only be successful if new
	tools for the efficient integration and interpretation of large datasets
	are available. {O}ne of these tools, pathway analysis, involves looking
	for consistent but subtle changes in gene expression by incorporating
	either pathway or functional annotations. {W}e review several methods
	of pathway analysis and compare the performance of three, the binomial
	distribution, z scores, and gene set enrichment analysis, on two
	microarray datasets. {P}athway analysis is a promising tool to identify
	the mechanisms that underlie diseases, adaptive physiological compensatory
	responses and new avenues for investigation.},
  citeulike-article-id = {270629},
  doi = {10.1016/j.tibtech.2005.05.011},
  keywords = {networks pathways},
  priority = {4},
  url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=pubmed\&dopt=Abstract\&list_uids=15950303}
}

@article{Cuturi2005Semigroupa,
  author = {Cuturi, M. and Fukumizu, K. and Vert, J.-P.},
  title = {Semigroup kernels on measures},
  journal = {J. Mach. Learn. Res.},
  year = {2005},
  volume = {6},
  pages = {1169-1198},
  pdf = {../local/Cuturi2005Semigroupa.pdf},
  file = {Cuturi2005Semigroupa.pdf:Cuturi2005Semigroupa.pdf:PDF},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.09},
  url = {http://jmlr.csail.mit.edu/papers/v6/cuturi05a.html}
}

@inproceedings{Cuturi2007kernel,
  author = {Cuturi, M. and Vert, J. P. and Birkenes, O. and Matsui, T.},
  title = {A kernel for time series based on global alignment},
  booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech
	and Signal Processing (ICASSP 2007)},
  year = {2007},
  volume = {2},
  pages = {II-413--II-416},
  doi = {10.1109/ICASSP.2007.366260},
  pdf = {../local/Cuturi2007kernel.pdf},
  file = {Cuturi2007kernel.pdf:Cuturi2007kernel.pdf:PDF},
  owner = {jp},
  timestamp = {2008.11.01},
  url = {http://dx.doi.org/10.1109/ICASSP.2007.366260}
}

@inproceedings{Cuturi2005Semigroup,
  author = {Cuturi, M. and Vert, J.-P. },
  title = {Semigroup kernels on finite sets},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2005},
  editor = {Lawrence K. Saul and Yair Weiss and L{\'e}on Bottou},
  volume = {17},
  pages = {329-336},
  publisher = {MIT Press, Cambridge, MA},
  pdf = {../local/cuturi_version_finale.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_marco/cuturi_version_finale.pdf:PDF;cuturi_version_finale.pdf:http\},
  file = {cuturi_version_finale.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_marco/cuturi_version_finale.pdf:PDF;cuturi_version_finale.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_marco/cuturi_version_finale.pdf:PDF},
  owner = {vert}
}

@article{Cuturi2005context-tree,
  author = {Cuturi, M. and Vert, J.-P.},
  title = {The context-tree kernel for strings},
  journal = {Neural {N}etwork.},
  year = {2005},
  volume = {18},
  pages = {1111-1123},
  number = {4},
  abstract = {We propose a new kernel for strings which borrows ideas and techniques
	from information theory and data compression. {T}his kernel can be
	used in combination with any kernel method, in particular {S}upport
	{V}ector {M}achines for string classi- fication, with notable applications
	in proteomics. {B}y using a {B}ayesian averaging framework with conjugate
	priors on a class of {M}arkovian models known as prob- abilistic
	suffix trees or context-trees, we compute the value of this kernel
	in linear time and space while only using the information contained
	in the spectrum of the considered strings. {T}his is ensured through
	an adaptation of a compression method known as the context-tree weighting
	algorithm. {E}ncouraging classification results are reported on a
	standard protein homology detection experiment, showing that the
	context-tree kernel performs well with respect to other state-of-the-art
	methods while using no biological prior knowledge.},
  doi = {10.1016/j.neunet.2005.07.010},
  pdf = {../local/Cuturi2005context-tree.pdf},
  file = {Cuturi2005context-tree.pdf:local/Cuturi2005context-tree.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1016/j.neunet.2005.07.010}
}

@inproceedings{Cuturi2004mutual,
  author = {Cuturi, M. and Vert, J.-P.},
  title = {A mutual information kernel for strings},
  booktitle = {Proceedings of {IJCNN} 2004},
  year = {2004},
  pages = {1904-1910},
  pdf = {../local/ijcnn04.pdf:http\://cg.ensmp.fr/~vert/publi/04ijcnn/ijcnn04.pdf:PDF;ijcnn04.pdf:http\},
  file = {ijcnn04.pdf:http\://cg.ensmp.fr/~vert/publi/04ijcnn/ijcnn04.pdf:PDF;ijcnn04.pdf:http\://cg.ensmp.fr/~vert/publi/04ijcnn/ijcnn04.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Curturi2009White,
  author = {M. Cuturi and J.-P. Vert and A. D'Aspremont},
  title = {White Functionals for Anomaly Detection in Dynamical Systems},
  booktitle = {Advances in Neural Information Processing Systems 22},
  year = {2009},
  editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams
	and A. Culotta},
  pages = {432--440},
  pdf = {../local/Curturi2009White.pdf},
  file = {Curturi2009White.pdf:Curturi2009White.pdf:PDF},
  owner = {jp},
  timestamp = {2009.12.21},
  url = {http://books.nips.cc/papers/files/nips22/NIPS2009_1195.pdf}
}

@article{Doennes2002Prediction,
  author = {Pierre D\"onnes and Arne Elofsson},
  title = {Prediction of {MHC} class {I} binding peptides, using {SVMHC}.},
  journal = {BMC Bioinformatics},
  year = {2002},
  volume = {3},
  pages = {25},
  month = {Sep},
  abstract = {BACKGROUND: T-cells are key players in regulating a specific immune
	response. Activation of cytotoxic T-cells requires recognition of
	specific peptides bound to Major Histocompatibility Complex (MHC)
	class I molecules. MHC-peptide complexes are potential tools for
	diagnosis and treatment of pathogens and cancer, as well as for the
	development of peptide vaccines. Only one in 100 to 200 potential
	binders actually binds to a certain MHC molecule, therefore a good
	prediction method for MHC class I binding peptides can reduce the
	number of candidate binders that need to be synthesized and tested.
	RESULTS: Here, we present a novel approach, SVMHC, based on support
	vector machines to predict the binding of peptides to MHC class I
	molecules. This method seems to perform slightly better than two
	profile based methods, SYFPEITHI and HLA_BIND. The implementation
	of SVMHC is quite simple and does not involve any manual steps, therefore
	as more data become available it is trivial to provide prediction
	for more MHC types. SVMHC currently contains prediction for 26 MHC
	class I types from the MHCPEP database or alternatively 6 MHC class
	I types from the higher quality SYFPEITHI database. The prediction
	models for these MHC types are implemented in a public web service
	available at http://www.sbc.su.se/svmhc/. CONCLUSIONS: Prediction
	of MHC class I binding peptides using Support Vector Machines, shows
	high performance and is easy to apply to a large number of MHC class
	I types. As more peptide data are put into MHC databases, SVMHC can
	easily be updated to give prediction for additional MHC class I types.
	We suggest that the number of binding peptides needed for SVM training
	is at least 20 sequences.},
  keywords = {Animals; Artificial Intelligence; Comparative Study; Computational
	Biology; Databases, Protein; Epitopes, T-Lymphocyte; HLA Antigens;
	Histocompatibility Antigens Class I; Humans; Peptides; Predictive
	Value of Tests; Protein Binding; Research Support, Non-U.S. Gov't;
	Sensitivity and Specificity},
  owner = {jacob},
  pmid = {12225620},
  timestamp = {2006.08.30}
}

@article{Dahlquist2002GenMAPP,
  author = {Dahlquist, K. D. and Salomonis, N. and Vranizan, K. and Lawlor, S.
	C. and Conklin, B. R.},
  title = {{GenMAPP}, a new tool for viewing and analyzing microarray data on
	biological pathways.},
  journal = {Nat. Genet.},
  year = {2002},
  volume = {31},
  pages = {19--20},
  number = {1},
  month = {May},
  doi = {10.1038/ng0502-19},
  pdf = {../local/Dahlquist2002GenMAPP.pdf},
  file = {Dahlquist2002GenMAPP.pdf:Dahlquist2002GenMAPP.pdf:PDF},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng0502-19},
  pmid = {11984561},
  timestamp = {2011.10.04},
  url = {http://dx.doi.org/10.1038/ng0502-19}
}

@article{Dai2005Evolving,
  author = {Dai, M. and Wang, P. and Boyd, A.D. and Kostov, G. and Athey, B.
	and Jones, E.G. and Bunney, W.E. and Myers, R.M. and Speed, T.P.
	and Akil, H. and others},
  title = {Evolving gene/transcript definitions significantly alter the interpretation
	of GeneChip data},
  journal = {Nucleic acids research},
  year = {2005},
  volume = {33},
  pages = {e175--e175},
  number = {20},
  publisher = {Oxford Univ Press}
}

@article{Daily2007Distinct,
  author = {Daily, J. P. and Scanfeld, D. and Pochet, N. and Le Roch, K. and
	Plouffe, D. and Kamal, M. and Sarr, O. and Mboup, S. and Ndir, O.
	and Wypi, D.j and Levasseur, K. and Thomas, E. and Tamayo, P. and
	Dong, C. and Zhou, Y. and Lander, E. S. and Ndiaye, D. and Wirth,
	D. and Winzeler, E. A. and Mesirov, J. P. and Regev, A.},
  title = {Distinct physiological states of Plasmodium falciparum in malaria-infected
	patients},
  journal = {Nature},
  year = {2007},
  volume = {450},
  pages = {1091--1095},
  number = {7172},
  month = {Dec},
  abstract = {Infection with the malaria parasite Plasmodium falciparum leads to
	widely different clinical conditions in children, ranging from mild
	flu-like symptoms to coma and death. Despite the immense medical
	implications, the genetic and molecular basis of this diversity remains
	largely unknown. Studies of in vitro gene expression have found few
	transcriptional differences between different parasite strains. Here
	we present a large study of in vivo expression profiles of parasites
	derived directly from blood samples from infected patients. The in
	vivo expression profiles define three distinct transcriptional states.
	The biological basis of these states can be interpreted by comparison
	with an extensive compendium of expression data in the yeast Saccharomyces
	cerevisiae. The three states in vivo closely resemble, first, active
	growth based on glycolytic metabolism, second, a starvation response
	accompanied by metabolism of alternative carbon sources, and third,
	an environmental stress response. The glycolytic state is highly
	similar to the known profile of the ring stage in vitro, but the
	other states have not been observed in vitro. The results reveal
	a previously unknown physiological diversity in the in vivo biology
	of the malaria parasite, in particular evidence for a functional
	mitochondrion in the asexual-stage parasite, and indicate in vivo
	and in vitro studies to determine how this variation may affect disease
	manifestations and treatment.},
  doi = {10.1038/nature06311},
  pdf = {../local/Daily2007Distinct.pdf},
  file = {Daily2007Distinct.pdf:Daily2007Distinct.pdf:PDF},
  institution = {Department of Immunology and Infectious Disease, [Harvard School
	of Public Health, 665 Huntington Avenue, Boston, Massachusetts 02115,
	USA.},
  keywords = {plasmodium},
  owner = {jp},
  pii = {nature06311},
  pmid = {18046333},
  timestamp = {2009.04.28},
  url = {http://dx.doi.org/10.1038/nature06311}
}

@article{Dalca2010VARiD,
  author = {Dalca, A. V. and Rumble, S. M. and Levy, S. and Brudno, M.},
  title = {{VARiD}: a variation detection framework for color-space and letter-space
	platforms.},
  journal = {Bioinformatics},
  year = {2010},
  volume = {26},
  pages = {i343--i349},
  number = {12},
  month = {Jun},
  abstract = {High-throughput sequencing (HTS) technologies are transforming the
	study of genomic variation. The various HTS technologies have different
	sequencing biases and error rates, and while most HTS technologies
	sequence the residues of the genome directly, generating base calls
	for each position, the Applied Biosystem's SOLiD platform generates
	dibase-coded (color space) sequences. While combining data from the
	various platforms should increase the accuracy of variation detection,
	to date there are only a few tools that can identify variants from
	color space data, and none that can analyze color space and regular
	(letter space) data together.We present VARiD--a probabilistic method
	for variation detection from both letter- and color-space reads simultaneously.
	VARiD is based on a hidden Markov model and uses the forward-backward
	algorithm to accurately identify heterozygous, homozygous and tri-allelic
	SNPs, as well as micro-indels. Our analysis shows that VARiD performs
	better than the AB SOLiD toolset at detecting variants from color-space
	data alone, and improves the calls dramatically when letter- and
	color-space reads are combined.The toolset is freely available at
	http://compbio.cs.utoronto.ca/varid.},
  doi = {10.1093/bioinformatics/btq184},
  pdf = {../local/Dalca2010VARiD.pdf},
  file = {Dalca2010VARiD.pdf:Dalca2010VARiD.pdf:PDF},
  institution = {Department of Electrical Engineering and Computer Science, Massachusetts
	Institute of Technology, Cambridge, MA, USA. varid@cs.toronto.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btq184},
  pmid = {20529926},
  timestamp = {2011.10.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btq184}
}

@article{Dalma-Weiszhausz2006Affymetrix,
  author = {Dennise D Dalma-Weiszhausz and Janet Warrington and Eugene Y Tanimoto
	and C. Garrett Miyada},
  title = {The affymetrix GeneChip platform: an overview.},
  journal = {Methods Enzymol.},
  year = {2006},
  volume = {410},
  pages = {3--28},
  abstract = {The intent of this chapter is to provide the reader with a review
	of GeneChip technology and the complete system it represents, including
	its versatility, components, and the exciting applications that are
	enabled by this platform. The following aspects of the technology
	are reviewed: array design and manufacturing, target preparation,
	instrumentation, data analysis, and both current and future applications.
	There are key differentiators between Affymetrix' GeneChip technology
	and other microarray-based methods. The most distinguishing feature
	of GeneChip microarrays is that their manufacture is directed by
	photochemical synthesis. Because of this manufacturing technology,
	more than a million different probes can be synthesized on an array
	roughly the size of a thumbnail. These numbers allow the inclusion
	of multiple probes to interrogate the same target sequence, providing
	statistical rigor to data interpretation. Over the years the GeneChip
	platform has proven to be a reliable and robust system, enabling
	many new discoveries and breakthroughs to be made by the scientific
	community.},
  doi = {10.1016/S0076-6879(06)10001-4},
  pdf = {../local/Dalma-Weiszhausz2006Affymetrix.pdf},
  file = {Dalma-Weiszhausz2006Affymetrix.pdf:Dalma-Weiszhausz2006Affymetrix.pdf:PDF},
  institution = {Expression Product Development, AFFY-METRIX, INC., Santa Clara, California,
	USA.},
  keywords = {Animals; Humans; Oligonucleotide Array Sequence Analysis, instrumentation/methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S0076-6879(06)10001-4},
  pmid = {16938544},
  timestamp = {2010.08.01},
  url = {http://dx.doi.org/10.1016/S0076-6879(06)10001-4}
}

@article{Dalton2007Evaluation,
  author = {James A R Dalton and Richard M Jackson},
  title = {An evaluation of automated homology modelling methods at low target
	template sequence similarity.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {1901--1908},
  number = {15},
  month = {Aug},
  abstract = {MOTIVATION: There are two main areas of difficulty in homology modelling
	that are particularly important when sequence identity between target
	and template falls below 50\%: sequence alignment and loop building.
	These problems become magnified with automatic modelling processes,
	as there is no human input to correct mistakes. As such we have benchmarked
	several stand-alone strategies that could be implemented in a workflow
	for automated high-throughput homology modelling. These include three
	new sequence-structure alignment programs: 3D-Coffee, Staccato and
	SAlign, plus five homology modelling programs and their respective
	loop building methods: Builder, Nest, Modeller, SegMod/ENCAD and
	Swiss-Model. The SABmark database provided 123 targets with at least
	five templates from the same SCOP family and sequence identities
	http://dx.doi.org/10.1093/bioinformatics/btm262}
}

@article{Darbellay2004Solid,
  author = {Georges A Darbellay and Rebecca Duff and Jean-Marc Vesin and Paul-AndrÃ©
	Despland and Dirk W Droste and Carlos Molina and Joachim Serena and
	Roman Sztajzel and Patrick Ruchat and Theodoros Karapanayiotides
	and Afksendyios Kalangos and Julien Bogousslavsky and Erich B Ringelstein
	and GÃ©rald Devuyst},
  title = {Solid or gaseous circulating brain emboli: are they separable by
	transcranial ultrasound?},
  journal = {J {C}ereb {B}lood {F}low {M}etab},
  year = {2004},
  volume = {24},
  pages = {860-8},
  number = {8},
  month = {Aug},
  abstract = {High-intensity transient signals ({HITS}) detected by transcranial
	{D}oppler ({TCD}) ultrasound may correspond to artifacts or to microembolic
	signals, the latter being either solid or gaseous emboli. {T}he goal
	of this study was to assess what can be achieved with an automatic
	signal processing system for artifact/microembolic signals and solid/gas
	differentiation in different clinical situations. {T}he authors studied
	3,428 {HITS} in vivo in a multicenter study, i.e., 1,608 artifacts
	in healthy subjects, 649 solid emboli in stroke patients with a carotid
	stenosis, and 1,171 gaseous emboli in stroke patients with patent
	foramen ovale. {T}hey worked with the dual-gate {TCD} combined to
	three types of statistical classifiers: binary decision trees ({BDT}),
	artificial neural networks ({ANN}), and support vector machines ({SVM}).
	{T}he sensitivity and specificity to separate artifacts from microembolic
	signals by {BDT} reached was 94\% and 97\%, respectively. {F}or the
	discrimination between solid and gaseous emboli, the classifier achieved
	a sensitivity and specificity of 81\% and 81\% for {BDT}, 84\% and
	84\% for {ANN}, and 86\% and 86\% for {SVM}, respectively. {T}he
	current results for artifact elimination and solid/gas differentiation
	are already useful to extract data for future prospective clinical
	studies.},
  keywords = {Air, Algorithms, Amino Acids, Animals, Artifacts, Atrial, Carotid
	Stenosis, Cerebrovascular Accident, Cerebrovascular Circulation,
	Comparative Study, Cysteine, Decision Trees, Disulfides, Doppler,
	Embolism, Heart Septal Defects, Humans, Intracranial Embolism, Models,
	Molecular, Neural Networks (Computer), Non-U.S. Gov't, Oxidation-Reduction,
	Protein Binding, Protein Folding, Proteins, Research Support, Sensitivity
	and Specificity, Transcranial, Ultrasonography, 15362716}
}

@article{Daubechies1988Orthonormal,
  author = {Daubechies, I.},
  title = {Orthonormal bases of compactly supported weavelets},
  journal = {Comm. Pure Appl. Math.},
  year = {1988},
  volume = {41},
  pages = {909--996},
  pdf = {../local/Daubechies1988Orthonormal.pdf},
  file = {Daubechies1988Orthonormal.pdf:Daubechies1988Orthonormal.pdf:PDF},
  owner = {jp},
  timestamp = {2012.12.13}
}

@article{Daubechies2004iterative,
  author = {Daubechies, Ingrid and Defrise, Michel and De Mol, Christine},
  title = {An iterative thresholding algorithm for linear inverse problems with
	a sparsity constraint},
  journal = {Communications on {P}ure and {A}pplied {M}athematics},
  year = {2004},
  volume = {57},
  pages = {1413--1457},
  number = {11}
}

@inproceedings{Daume2009Bayesian,
  author = {Daume, H.},
  title = {{Bayesian Multitask Learning with Latent Hierarchies}},
  booktitle = {25th Conference on Uncertainty in Artificial Intelligence},
  year = {2009}
}

@article{David2010Cancer:,
  author = {A. Rosalie David and Michael R Zimmerman},
  title = {Cancer: an old disease, a new disease or something in between?},
  journal = {Nat Rev Cancer},
  year = {2010},
  volume = {10},
  pages = {728--733},
  number = {10},
  month = {Oct},
  abstract = {In industrialized societies, cancer is second only to cardiovascular
	disease as a cause of death. The history of this disorder has the
	potential to improve our understanding of disease prevention, aetiology,
	pathogenesis and treatment. A striking rarity of malignancies in
	ancient physical remains might indicate that cancer was rare in antiquity,
	and so poses questions about the role of carcinogenic environmental
	factors in modern societies. Although the rarity of cancer in antiquity
	remains undisputed, the first published histological diagnosis of
	cancer in an Egyptian mummy demonstrates that new evidence is still
	forthcoming.},
  doi = {10.1038/nrc2914},
  institution = {KNH Centre of Biomedical Egyptology, The University of Manchester,
	3.614 Stopford Building, Oxford Road, Manchester M13 9PT, UK. rosalie.david@manchester.ac.uk},
  keywords = {Animals; Art; Fossils; Hominidae; Humans; Neoplasms; Paleopathology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {nrc2914},
  pmid = {20814420},
  timestamp = {2011.05.31},
  url = {http://dx.doi.org/10.1038/nrc2914}
}

@article{Davies2001Discrete,
  author = {Davies, E. B. and Gladwell, G. M. L. and Leydold, J. and Stadler,
	P. F.},
  title = {Discrete {N}odal {D}omain {T}heorems},
  journal = {Lin. {A}lg. {A}ppl.},
  year = {2001},
  volume = {336},
  pages = {51--60},
  pdf = {../local/davi01.pdf},
  file = {davi01.pdf:local/davi01.pdf:PDF},
  subject = {net},
  url = {http://citeseer.nj.nec.com/davies01discrete.html}
}

@article{Davies2007Poisson,
  author = {Davies, J.R. and Jackson, R.M. and Mardia, K.V. and Taylor, C.C.},
  title = {The Poisson Index: a new probabilistic model for protein ligand binding
	site similarity.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {3001--3008},
  number = {22},
  month = {Nov},
  abstract = {MOTIVATION: The large-scale comparison of protein-ligand binding sites
	is problematic, in that measures of structural similarity are difficult
	to quantify and are not easily understood in terms of statistical
	similarity that can ultimately be related to structure and function.
	We present a binding site matching score the Poisson Index (PI) based
	upon a well-defined statistical model. PI requires only the number
	of matching atoms between two sites and the size of the two sites-the
	same information used by the Tanimoto Index (TI), a comparable and
	widely used measure for molecular similarity. We apply PI and TI
	to a previously automatically extracted set of binding sites to determine
	the robustness and usefulness of both scores. RESULTS: We found that
	PI outperforms TI; moreover, site similarity is poorly defined for
	TI at values around the 99.5\% confidence level for which PI is well
	defined. A difference map at this confidence level shows that PI
	gives much more meaningful information than TI. We show individual
	examples where TI fails to distinguish either a false or a true site
	paring in contrast to PI, which performs much better. TI cannot handle
	large or small sites very well, or the comparison of large and small
	sites, in contrast to PI that is shown to be much more robust. Despite
	the difficulty of determining a biological 'ground truth' for binding
	site similarity we conclude that PI is a suitable measure of binding
	site similarity and could form the basis for a binding site classification
	scheme comparable to existing protein domain classification schema.},
  owner = {vero},
  pmid = {17893083 },
  timestamp = {2009.02.04}
}

@article{Davies2005Array,
  author = {Davies, J. J. and Wilson, I. M. and Lam, W. L.},
  title = {Array {CGH} technologies and their applications to cancer genomes},
  journal = {Chromosome Res.},
  year = {2005},
  volume = {13},
  pages = {237--248},
  number = {3},
  abstract = {Cancer is a disease characterized by genomic instability. Comparative
	genomic hybridization (CGH) is a technique designed for detecting
	segmental genomic alterations. Recent advances in array-based CGH
	technology have enabled examination of chromosomal regions in unprecedented
	detail, revolutionizing our understanding of tumour genomes. A number
	of array-based technologies have been developed, aiming to improve
	the resolution of CGH, enabling researchers to refine and define
	regions in the genome that may be causal to cancer, and facilitating
	gene discovery at a rapid rate. This article reviews the various
	array CGH platforms and their use in the study of cancer genomes.
	In addition, the need for high-resolution analysis is discussed as
	well as the importance of studying early-stage disease to discover
	genetic alterations that may be causal to cancer progression and
	aetiology.},
  doi = {10.1007/s10577-005-2168-x},
  pdf = {../local/Davies2005Array.pdf},
  file = {Davies2005Array.pdf:Davies2005Array.pdf:PDF},
  institution = {British Columbia Cancer Research Centre, 675 W 10th Ave., Vancouver
	BC, V5Z 1L3, Canada. jdavies@bccrc.ca},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {15868418},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1007/s10577-005-2168-x}
}

@article{Davies2007Harnessing,
  author = {Matthew N Davies and Darren R Flower},
  title = {Harnessing bioinformatics to discover new vaccines.},
  journal = {Drug Discov Today},
  year = {2007},
  volume = {12},
  pages = {389--395},
  number = {9-10},
  month = {May},
  abstract = {Vaccine design is highly suited to the application of in silico techniques,
	for both the discovery and development of new and existing vaccines.
	Here, we discuss computational contributions to epitope mapping and
	reverse vaccinology, two techniques central to the new discipline
	of immunomics. Also discussed are methods to improve the efficiency
	of vaccination, such as codon optimization and adjuvant discovery
	in addition to the identification of allergenic proteins. We also
	review current software developed to facilitate vaccine design.},
  doi = {10.1016/j.drudis.2007.03.010},
  keywords = {Animals; Computational Biology; Drug Design; Epitope Mapping; Humans;
	Software Design; Vaccination; Vaccines},
  owner = {laurent},
  pii = {S1359-6446(07)00135-3},
  pmid = {17467575},
  timestamp = {2007.08.23},
  url = {http://dx.doi.org/10.1016/j.drudis.2007.03.010}
}

@article{Davis2006Reliable,
  author = {Davis, C.A. and Gerick, F. and Hintermair, V. and Friedel, C.C. and
	Fundel, K. and K{\"u}ffner, R. and Zimmer, R.},
  title = {Reliable gene signatures for microarray classification: assessment
	of stability and performance},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {2356--2363},
  number = {19},
  publisher = {Oxford Univ Press}
}

@article{Davisson1973Universal,
  author = { Davisson, L. },
  title = {Universal noiseless coding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1973},
  volume = {19},
  pages = {783- 795},
  number = {6},
  month = {Nov},
  abstract = { {U}niversal coding is any asymptotically optimum method of block-to-block
	memoryless source coding for sources with unknown parameters. {T}his
	paper considers noiseless coding for such sources, primarily in terms
	of variable-length coding, with performance measured as a function
	of the coding redundancy relative to the per-letter conditional source
	entropy given the unknown parameter. {I}t is found that universal
	(i.e., zero redundancy) coding in a weighted sense is possible if
	and only if the per-letter average mutual information between the
	parameter space and the message space is zero. {U}niversal coding
	is possible in a maximin sense if and only if the channel capacity
	between the two spaces is zero. {U}niversal coding is possible in
	a minimax sense if and only if a probability mass function exists,
	independent of the unknown parameter, for which the relative entropy
	of the known conditional-probability mass-function is zero. {S}everal
	examples are given to illustrate the ideas. {P}articular attention
	is given to sources that are stationary and ergodic for any fixed
	parameter although the whole ensemble is not. {F}or such sources,
	weighted universal codes always exist if the alphabet is finite,
	or more generally if the entropy is finite. {M}inimax universal codes
	result if an additional entropy stability constraint is applied.
	{A} discussion of fixed-rate universal coding is also given briefly
	with performance measured by a probability of error.},
  pdf = {../local/Davisson1973Universal.pdf},
  file = {Davisson1973Universal.pdf:local/Davisson1973Universal.pdf:PDF},
  keywords = {information-theory universal-coding},
  owner = {vert}
}

@article{Bie2007Kernel-based,
  author = {De Bie, T. and Tranchevent, L.-C. and van Oeffelen, L. M. M. and
	Moreau, Y.},
  title = {Kernel-based data fusion for gene prioritization},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {i125--i132},
  number = {13},
  month = {Jul},
  abstract = {MOTIVATION: Hunting disease genes is a problem of primary importance
	in biomedical research. Biologists usually approach this problem
	in two steps: first a set of candidate genes is identified using
	traditional positional cloning or high-throughput genomics techniques;
	second, these genes are further investigated and validated in the
	wet lab, one by one. To speed up discovery and limit the number of
	costly wet lab experiments, biologists must test the candidate genes
	starting with the most probable candidates. So far, biologists have
	relied on literature studies, extensive queries to multiple databases
	and hunches about expected properties of the disease gene to determine
	such an ordering. Recently, we have introduced the data mining tool
	ENDEAVOUR (Aerts et al., 2006), which performs this task automatically
	by relying on different genome-wide data sources, such as Gene Ontology,
	literature, microarray, sequence and more. RESULTS: In this article,
	we present a novel kernel method that operates in the same setting:
	based on a number of different views on a set of training genes,
	a prioritization of test genes is obtained. We furthermore provide
	a thorough learning theoretical analysis of the method's guaranteed
	performance. Finally, we apply the method to the disease data sets
	on which ENDEAVOUR (Aerts et al., 2006) has been benchmarked, and
	report a considerable improvement in empirical performance. AVAILABILITY:
	The MATLAB code used in the empirical results will be made publicly
	available.},
  doi = {10.1093/bioinformatics/btm187},
  pdf = {../local/Bie2007Kernel-based.pdf},
  file = {Bie2007Kernel-based.pdf:Bie2007Kernel-based.pdf:PDF},
  institution = {Department of Engineering Mathematics, University of Bristol, University
	Walk, BS8 1TR, Bristol, UK. tijl.debie@gmail.com},
  owner = {jp},
  pii = {23/13/i125},
  pmid = {17646288},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm187}
}

@book{Degroot1970Optimal,
  title = { Optimal statistical decisions / Morris H. De Groot },
  publisher = { McGraw-Hill, New York :, },
  year = { 1970 },
  author = { De Groot, Morris H. },
  pages = { xvi, 489 p. : }
}

@article{Deb2003Reliable,
  author = {Kalyanmoy Deb and A. Raji Reddy},
  title = {Reliable classification of two-class cancer data using evolutionary
	algorithms.},
  journal = {Biosystems},
  year = {2003},
  volume = {72},
  pages = {111-29},
  number = {1-2},
  month = {Nov},
  abstract = {In the area of bioinformatics, the identification of gene subsets
	responsible for classifying available disease samples to two or more
	of its variants is an important task. {S}uch problems have been solved
	in the past by means of unsupervised learning methods (hierarchical
	clustering, self-organizing maps, k-mean clustering, etc.) and supervised
	learning methods (weighted voting approach, k-nearest neighbor method,
	support vector machine method, etc.). {S}uch problems can also be
	posed as optimization problems of minimizing gene subset size to
	achieve reliable and accurate classification. {T}he main difficulties
	in solving the resulting optimization problem are the availability
	of only a few samples compared to the number of genes in the samples
	and the exorbitantly large search space of solutions. {A}lthough
	there exist a few applications of evolutionary algorithms ({EA}s)
	for this task, here we treat the problem as a multiobjective optimization
	problem of minimizing the gene subset size and minimizing the number
	of misclassified samples. {M}oreover, for a more reliable classification,
	we consider multiple training sets in evaluating a classifier. {C}ontrary
	to the past studies, the use of a multiobjective {EA} ({NSGA}-{II})
	has enabled us to discover a smaller gene subset size (such as four
	or five) to correctly classify 100\% or near 100\% samples for three
	cancer samples ({L}eukemia, {L}ymphoma, and {C}olon). {W}e have also
	extended the {NSGA}-{II} to obtain multiple non-dominated solutions
	discovering as much as 352 different three-gene combinations providing
	a 100\% correct classification to the {L}eukemia data. {I}n order
	to have further confidence in the identification task, we have also
	introduced a prediction strength threshold for determining a sample's
	belonging to one class or the other. {A}ll simulation results show
	consistent gene subset identifications on three disease samples and
	exhibit the flexibilities and efficacies in using a multiobjective
	{EA} for the gene subset identification task.},
  doi = {10.1016/S0303-2647(03)00138-2},
  pii = {S0303264703001382},
  url = {http://dx.doi.org/10.1016/S0303-2647(03)00138-2}
}

@article{Debnath1991Structure-Activity,
  author = {Debnath, A.K. and Lopez de Compadre, R.L. and Debnath, G. and Schusterman,
	A.J. and Hansch, C.},
  title = {Structure-Activity Relationship of Mutagenic Aromatic and Heteroaromatic
	Nitro compounds. Correlation with molecular orbital energies and
	hydrophobicity},
  journal = {J. Med. Chem.},
  year = {1991},
  volume = {34},
  pages = {786-797},
  number = {2},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.07.31}
}

@article{Debouck1999DNA,
  author = {C. Debouck and P. N. Goodfellow},
  title = {{DNA} microarrays in drug discovery and development.},
  journal = {Nat. Genet.},
  year = {1999},
  volume = {21},
  pages = {48--50},
  number = {1 Suppl},
  month = {Jan},
  abstract = {DNA microarrays can be used to measure the expression patterns of
	thousands of genes in parallel, generating clues to gene function
	that can help to identify appropriate targets for therapeutic intervention.
	They can also be used to monitor changes in gene expression in response
	to drug treatments. Here, we discuss the different ways in which
	microarray analysis is likely to affect drug discovery.},
  doi = {10.1038/4475},
  keywords = {Agricultural, Alleles, Alternaria, Amino Acid, Amino Acid Chloromethyl
	Ketones, Amino Acid Sequence, Animal, Animals, Apoptosis, Asthma,
	Bacteria, Base Sequence, Binding Sites, Biotechnology, Blotting,
	Bone Density, Bone Matrix, Bone and Bones, CCR5, Camptothecin, Caspases,
	Cathepsins, Cell Surface, Central America, Chloroplast, Chondrocytes,
	Chromosome Mapping, Chromosomes, Cloning, Cluster Analysis, Collagen,
	Comparative Study, Coumarins, Crops, Crystallography, DNA, DNA Primers,
	Dipeptides, Disease, Disease Models, Drug Design, Drug Evaluation,
	Drug Industry, Enzyme Activation, Enzyme Inhibitors, Escherichia
	coli, Evolution, Exons, Expressed Sequence Tags, Female, Fetus, Fluorescent
	Dyes, Food Microbiology, Founder Effect, GTP-Binding Proteins, Gene
	Expression, Gene Frequency, Gene Library, Genes, Genetic, Genetic
	Predisposition to Disease, Genome, Geography, Growth Plate, Haplotypes,
	Hordeum, Human, Humans, Inclusion Bodies, Injections, Intraperitoneal,
	Introns, Isatin, Knockout, Male, Membrane Proteins, Messenger, Mice,
	Models, Molecular, Molecular Sequence Data, Molecular Structure,
	Mutation, Mycotoxins, Neutrophils, Non-U.S. Gov't, Northern, Oligonucleotide
	Array Sequence Analysis, Osteoarthritis, Osteochondrodysplasias,
	Osteoclasts, Osteopetrosis, Pair 15, Phaseolus, Polymorphism, Preclinical,
	Pregnancy, Promoter Regions (Genetics), Protein Precursors, Proteomics,
	RNA, Receptors, Recombinant Fusion Proteins, Recombinant Proteins,
	Research Support, Restriction Fragment Length, Ribosomal Proteins,
	Sequence Alignment, Sequence Analysis, Sequence Homology, South America,
	Species Specificity, Splenomegaly, Sulfonamides, Synteny, Tissue
	Distribution, Transcription, Trichothecenes, X-Ray, 9915501},
  owner = {piedro},
  pmid = {9915501},
  timestamp = {2006.08.11},
  url = {http://dx.doi.org/10.1038/4475}
}

@inproceedings{Decatur1997PAC,
  author = {Decatur, S.E.},
  title = {PAC Learning with Constant-Partition Classification Noise and Applications
	to Decision Tree Induction},
  booktitle = {Proceedings of the Fourteenth International Conference on Machine
	Learning},
  year = {1997},
  series = {ICML '97},
  pages = {83--91},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  acmid = {657273},
  isbn = {1-55860-486-3},
  numpages = {9},
  owner = {mordelet},
  timestamp = {2010.12.08},
  url = {http://portal.acm.org/citation.cfm?id=645526.657273}
}

@article{Declercq2009RIP,
  author = {Declercq, W. and Vanden Berghe, T. and Vandenabeele, P.},
  title = {{RIP} Kinases at the Crossroads of Cell Death and Survival},
  journal = {Cell},
  year = {2009},
  volume = {138},
  pages = {229-232},
  number = {2},
  doi = {10.1016/j.cell.2009.07.006},
  pdf = {../local/Declercq2009RIP.pdf},
  file = {Declercq2009RIP.pdf:Declercq2009RIP.pdf:PDF},
  keywords = {csbcbook},
  url = {http://dx.doi.org/10.1016/j.cell.2009.07.006}
}

@article{Decoste2002Training,
  author = {Decoste, D. and Sch\"{o}lkopf, B.},
  title = {Training Invariant Support Vector Machines},
  journal = {Mach. Learn.},
  year = {2002},
  volume = {46},
  pages = {161--190},
  number = {1-3},
  pdf = {../local/Decoste2002Training.pdf},
  file = {Decoste2002Training.pdf:Decoste2002Training.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.22}
}

@article{Degroeve2002Feature,
  author = {Degroeve, S. and De Baets, B. and Van de Peer, Y. and Rouze, P.},
  title = {Feature subset selection for splice site prediction},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {S75-S83},
  number = {Suppl. 1},
  abstract = {Motivation: {T}he large amount of available annotated {A}rabidopsis
	thaliana sequences allows the induction of splice site prediction
	models with supervised learning algorithms (see {H}aussler (1998)
	for a review and references). {T}hese algorithms need information
	sources or features from which the models can be computed. {F}or
	splice site prediction, the features we consider in this study are
	the presence or absence of certain nucleotides in close proximity
	to the splice site. {S}ince it is not known how many and which nucleotides
	are relevant for splice site prediction, the set of features is chosen
	large enough such that the probability that all relevant information
	sources are in the set is very high. {U}sing only those features
	that are relevant for constructing a splice site prediction system
	might improve the system and might also provide us with useful biological
	knowledge. {U}sing fewer features will of course also improve the
	prediction speed of the system. {R}esults: {A} wrapper-based feature
	subset selection algorithm using a support vector machine or a naive
	{B}ayes prediction method was evaluated against the traditional method
	for selecting features relevant for splice site prediction. {O}ur
	results show that this wrapper approach selects features that improve
	the performance against the use of all features and against the use
	of the features selected by the traditional method. {A}vailability:
	{T}he data and additional interactive graphs on the selected feature
	subsets are available at http://www.psb.rug.ac.be/gps {C}ontact:
	svgro@gengenp.rug.ac.be yvdp@gengenp.rug.ac.be},
  pdf = {../local/Degroeve2002Feature.pdf},
  file = {Degroeve2002Feature.pdf:local/Degroeve2002Feature.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/18/suppl_2/S75}
}

@article{Degroeve2005SpliceMachine,
  author = {Degroeve, S. and Saeys, Y. and De Baets, B. and Rouze, P. and Van
	de Peer, Y.},
  title = {{{S}plice{M}achine}: predicting splice sites from high-dimensional
	local context representations},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1332-1338},
  abstract = {Motivation: {I}n this age of complete genome sequencing, finding the
	location and structure of genes is crucial for further molecular
	research. {T}he accurate prediction of intron boundaries largely
	facilitates the correct prediction of gene structure in nuclear genomes.
	{M}any tools for localizing these boundaries on {DNA} sequences have
	been developed and are available to researchers through the internet.
	{N}evertheless, these tools still make many false positive predictions.{R}esults:
	{T}his manuscript presents a novel publicly available splice site
	prediction tool named {S}plice{M}achine that (i) shows state-of-the-art
	prediction performance on {A}rabidopsis thaliana and human sequences,
	(ii) performs a computationally fast annotation, and (iii) can be
	trained by the user on its own data.{A}vailability: {R}esults, figures
	and software are available at http://bioinformatics.psb.ugent.be/supplementary_data/.},
  doi = {10.1093/bioinformatics/bti166},
  pdf = {../local/Degroeve2005SpliceMachine.pdf},
  file = {Degroeve2005SpliceMachine.pdf:local/Degroeve2005SpliceMachine.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti166v1}
}

@article{Dekker2002Capturing,
  author = {Job Dekker and Karsten Rippe and Martijn Dekker and Nancy Kleckner},
  title = {Capturing chromosome conformation.},
  journal = {Science},
  year = {2002},
  volume = {295},
  pages = {1306--1311},
  number = {5558},
  month = {Feb},
  abstract = {We describe an approach to detect the frequency of interaction between
	any two genomic loci. Generation of a matrix of interaction frequencies
	between sites on the same or different chromosomes reveals their
	relative spatial disposition and provides information about the physical
	properties of the chromatin fiber. This methodology can be applied
	to the spatial organization of entire genomes in organisms from bacteria
	to human. Using the yeast Saccharomyces cerevisiae, we could confirm
	known qualitative features of chromosome organization within the
	nucleus and dynamic changes in that organization during meiosis.
	We also analyzed yeast chromosome III at the G1 stage of the cell
	cycle. We found that chromatin is highly flexible throughout. Furthermore,
	functionally distinct AT- and GC-rich domains were found to exhibit
	different conformations, and a population-average 3D model of chromosome
	III could be determined. Chromosome III emerges as a contorted ring.},
  doi = {10.1126/science.1067799},
  institution = {Department of Molecular and Cellular Biology, Harvard University,
	Cambridge, MA 02138, USA. jdekker@fas.harvard.edu},
  keywords = {AT Rich Sequence; Cell Fractionation; Cell Nucleus; Centromere; Chromatin;
	Chromosomes, Fungal; Cross-Linking Reagents; Deoxyribonuclease EcoRI;
	Formaldehyde; G1 Phase; GC Rich Sequence; Genome, Fungal; Mathematics;
	Meiosis; Mitosis; Polymerase Chain Reaction; Protein Conformation;
	Saccharomyces cerevisiae; Telomere},
  owner = {phupe},
  pii = {295/5558/1306},
  pmid = {11847345},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1126/science.1067799}
}

@article{Demvsar2006Statistical,
  author = {Dem\v{s}ar, J.},
  title = {Statistical {C}omparisons of {C}lassifiers over {M}ultiple {D}ata
	{S}ets},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2006},
  volume = {7},
  pages = {1-30},
  owner = {mahe},
  timestamp = {2006.08.09},
  url = {http://jmlr.csail.mit.edu/papers/v7/demsar06a.html}
}

@article{Dempster1972Covariance,
  author = {Dempster, A. P.},
  title = {Covariance selection},
  journal = {Biometrics},
  year = {1972},
  volume = {28},
  pages = {157--175},
  owner = {jp},
  timestamp = {2012.03.20},
  url = {http://www.jstor.org/stable/2528966}
}

@article{Deng2001Unsupervised,
  author = {Deng, Y. and Manjunath, B. S.},
  title = {Unsupervised segmentation of color-texture regions in images and
	video},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2001},
  volume = {23},
  pages = {800--810},
  number = {8},
  month = {Aug},
  abstract = {A method for unsupervised segmentation of color-texture regions in
	images and video is presented. This method, which we refer to as
	JSEG, consists of two independent steps: color quantization and spatial
	segmentation. In the first step, colors in the image are quantized
	to several representative classes that can be used to differentiate
	regions in the image. The image pixels are then replaced by their
	corresponding color class labels, thus forming a class-map of the
	image. The focus of this work is on spatial segmentation, where a
	criterion for "good" segmentation using the class-map is proposed.
	Applying the criterion to local windows in the class-map results
	in the "J-image," in which high and low values correspond to possible
	boundaries and interiors of color-texture regions. A region growing
	method is then used to segment the image based on the multiscale
	J-images. A similar approach is applied to video sequences. An additional
	region tracking scheme is embedded into the region growing process
	to achieve consistent segmentation and tracking results, even for
	scenes with nonrigid object motion. Experiments show the robustness
	of the JSEG algorithm on real images and video.},
  doi = {10.1109/34.946985},
  pdf = {../local/Deng2001Unsupervised.pdf},
  file = {Deng2001Unsupervised.pdf:local/Deng2001Unsupervised.pdf:PDF},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1109/34.946985}
}

@inproceedings{Denis1998PAC,
  author = {Denis, F.},
  title = {PAC Learning from Positive Statistical Queries},
  booktitle = {Proceedings of the 9th International Conference on Algorithmic Learning
	Theory},
  year = {1998},
  series = {ALT '98},
  pages = {112--126},
  address = {London, UK},
  publisher = {Springer-Verlag},
  acmid = {757188},
  isbn = {3-540-65013-X},
  numpages = {15},
  owner = {mordelet},
  timestamp = {2010.11.03},
  url = {http://portal.acm.org/citation.cfm?id=647716.757188}
}

@inproceedings{Denis2003Text,
  author = {Denis, F. and Gilleron, R. and Laurent, A. and Tommasi, M.},
  title = {Text Classification and Co-Training from Positive and Unlabeled Examples},
  booktitle = {Proceedings of the ICML 2003 Workshop: The Continuum from Labeled
	to Unlabeled Data},
  year = {2003},
  owner = {fantine},
  timestamp = {2009.06.09},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.7101}
}

@article{Denis2005Learning,
  author = {Denis, F. and Gilleron, R. and Letouzey, F.},
  title = {Learning from positive and unlabeled examples},
  journal = {Theor. Comput. Sci.},
  year = {2005},
  volume = {348},
  pages = {70--83},
  number = {1},
  abstract = {In many machine learning settings, labeled examples are difficult
	to collect while unlabeled data are abundant. Also, for some binary
	classification problems, positive examples which are elements of
	the target concept are available. Can these additional data be used
	to improve accuracy of supervised learning algorithms? We investigate
	in this paper the design of learning algorithms from positive and
	unlabeled data only. Many machine learning and data mining algorithms,
	such as decision tree induction algorithms and naive Bayes algorithms,
	use examples only to evaluate statistical queries (SQ-like algorithms).
	Kearns designed the statistical query learning model in order to
	describe these algorithms. Here, we design an algorithm scheme which
	transforms any SQ-like algorithm into an algorithm based on positive
	statistical queries (estimate for probabilities over the set of positive
	instances) and instance statistical queries (estimate for probabilities
	over the instance space). We prove that any class learnable in the
	statistical query learning model is learnable from positive statistical
	queries and instance statistical queries only if a lower bound on
	the weight of any target concept f can be estimated in polynomial
	time. Then, we design a decision tree induction algorithm POSC4.5,
	based on C4.5, that uses only positive and unlabeled examples and
	we give experimental results for this algorithm. In the case of imbalanced
	classes in the sense that one of the two classes (say the positive
	class) is heavily underrepresented compared to the other class, the
	learning problem remains open. This problem is challenging because
	it is encountered in many real-world applications.},
  doi = {http://dx.doi.org/10.1016/j.tcs.2005.09.007},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.01.29}
}

@inproceedings{Denis2002Text,
  author = {Denis, F. and Gilleron, R. and Tommasi, M.},
  title = {Text Classification from Positive and Unlabeled Examples},
  booktitle = {Proc. of the 9th International Conference on Information Processing
	and Management of Uncertainty in Knowledge-Based Systems},
  year = {2002},
  owner = {fantine},
  timestamp = {2009.07.02},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.8.5291}
}

@article{Dennis2005Hunting,
  author = {Jayne L Dennis and Karin A Oien},
  title = {Hunting the primary: novel strategies for defining the origin of
	tumours.},
  journal = {J {P}athol},
  year = {2005},
  volume = {205},
  pages = {236-47},
  number = {2},
  month = {Jan},
  abstract = {In 1995, two methods of genome-wide expression profiling were first
	described: expression microarrays and serial analysis of gene expression
	({SAGE}). {I}n the subsequent 10 years, many hundreds of papers have
	been published describing the application of these technologies to
	a wide spectrum of biological and clinical questions. {C}ommon to
	all of this research is a basic process of data gathering and analysis.
	{T}he techniques and statistical and bio-informatic tools involved
	in this process are reviewed. {T}he processes of class discovery
	(using clustering and self-organizing maps), class prediction (weighted
	voting, k nearest neighbour, support vector machines, and artificial
	neural networks), target identification (fold change, discriminant
	analysis, and principal component analysis), and target validation
	({RT}-{PCR} and tissue microarrays) are described. {F}inally, the
	diagnostic problem of adenocarcinomas that present as metastases
	of unknown origin is reviewed, and it is demonstrated how integration
	of expression profiling techniques promises to throw new light on
	this important clinical challenge.},
  doi = {10.1002/path.1702},
  pdf = {../local/Dennis2005Hunting.pdf},
  file = {Dennis2005Hunting.pdf:local/Dennis2005Hunting.pdf:PDF},
  url = {http://dx.doi.org/10.1002/path.1702}
}

@inproceedings{Deodhar2007framework,
  author = {Meghana Deodhar and Joydeep Ghosh},
  title = {A framework for simultaneous co-clustering and learning from complex
	data},
  booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference
	on Knowledge discovery and data mining},
  year = {2007},
  pages = {250--259},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1281192.1281222},
  isbn = {978-1-59593-609-7},
  location = {San Jose, California, USA}
}

@article{DeRisi1997Exploring,
  author = {DeRisi, J. L. and Iyer, V. R. and Brown, P. O.},
  title = {Exploring the metabolic and genetic control of gene expression on
	a genomic scale},
  journal = {Science},
  year = {1997},
  volume = {278},
  pages = {680--686},
  number = {5338},
  pdf = {../local/deri97.pdf},
  file = {deri97.pdf:local/deri97.pdf:PDF},
  subject = {microarray},
  url = {http://www.sciencemag.org/cgi/reprint/278/5338/680.pdf}
}

@article{Deshpande2006Targeting,
  author = {Deshpande, D. A. and Penn, R. B.},
  title = {Targeting {G} protein-coupled receptor signaling in asthma.},
  journal = {Cell. Signal.},
  year = {2006},
  volume = {18},
  pages = {2105--2120},
  number = {12},
  month = {Dec},
  abstract = {The complex disease asthma, an obstructive lung disease in which excessive
	airway smooth muscle (ASM) contraction as well as increased ASM mass
	reduces airway lumen size and limits airflow, can be viewed as a
	consequence of aberrant airway G protein-coupled receptor (GPCR)
	function. The central role of GPCRs in determining airway resistance
	is underscored by the fact that almost every drug used in the treatment
	of asthma directly or indirectly targets either GPCR-ligand interaction,
	GPCR signaling, or processes that produce GPCR agonists. Although
	many airway cells contribute to the regulation of airway resistance
	and architecture, ASM properties and functions have the greatest
	impact on airway homeostasis. The theme of this review is that GPCR-mediated
	regulation of ASM tone and ASM growth is a major determinant of the
	acute and chronic features of asthma, and multiple strategies targeting
	GPCR signaling may be employed to prevent or manage these features.},
  doi = {10.1016/j.cellsig.2006.04.008},
  owner = {laurent},
  pii = {S0898-6568(06)00130-6},
  pmid = {16828259},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1016/j.cellsig.2006.04.008}
}

@inproceedings{Deshpande2002Automated,
  author = {M. Deshpande and G. Karypis},
  title = {Automated {A}pproaches for {C}lassifying {S}tructures},
  booktitle = {Proceedings of the 2nd Workshop on Data Mining in Bioinformatics
	(BIOKDD '02), 2002},
  year = {2002},
  owner = {mahe},
  timestamp = {2006.09.26}
}

@inproceedings{Deshpande2002Evaluation,
  author = {Deshpande, M. and Karypis, G.},
  title = {Evaluation of {T}echniques for {C}lassifying {B}iological {S}equences},
  booktitle = {P{AKDD} '02: {P}roceedings of the 6th {P}acific-{A}sia {C}onference
	on {A}dvances in {K}nowledge {D}iscovery and {D}ata {M}ining},
  year = {2002},
  pages = {417--431},
  publisher = {Springer Verlag},
  abstract = {In recent years we have witnessed an exponential increase in the amount
	of biological information, either {DNA} or protein sequences, that
	has become available in public databases. {T}his has been followed
	by an increased interest in developing computational techniques to
	automatically classify these large volumes of sequence data into
	various categories corresponding to either their role in the chromosomes,
	their structure, and/or their function. {I}n this paper we evaluate
	some of the widely-used sequence classification algorithms and develop
	a framework for modeling sequences in a fashion so that traditional
	machine learning algorithms, such as support vector machines, can
	be applied easily. {O}ur detailed experimental evaluation shows that
	the {SVM}-based approaches are able to achieve higher classification
	accuracy compared to the more traditional sequence classification
	algorithms such as {M}arkov model based techniques and {K}-nearest
	neighbor based approaches.},
  pdf = {../local/Deshpande2002Evaluation.pdf},
  file = {Deshpande2002Evaluation.pdf:local/Deshpande2002Evaluation.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Deshpande2005Frequent,
  author = {M. Deshpande and M. Kuramochi and N. Wale and G. Karypis},
  title = {Frequent {S}ubstructure-{B}ased {A}pproaches for {C}lassifying {C}hemical
	{C}ompounds},
  journal = {IEEE T. Knowl. Data. En.},
  year = {2005},
  volume = {17},
  pages = {1036-1050},
  number = {8},
  month = {August},
  doi = {http://doi.ieeecomputersociety.org/10.1109/TKDE.2005.127},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.04}
}

@article{Desmedt2008Biological,
  author = {Desmedt, C. and Haibe-Kains, B. and Wirapati, P. and Buyse, M. and
	Larsimont, D. and Bontempi, G. and Delorenzi, M. and Piccart, M.
	and Sotiriou, C.},
  title = {Biological processes associated with breast cancer clinical outcome
	depend on the molecular subtypes},
  journal = {Clin. Cancer Res.},
  year = {2008},
  volume = {14},
  pages = {5158--5165},
  number = {16},
  month = {Aug},
  abstract = {Recently, several prognostic gene expression signatures have been
	identified; however, their performance has never been evaluated according
	to the previously described molecular subtypes based on the estrogen
	receptor (ER) and human epidermal growth factor receptor 2 (HER2),
	and their biological meaning has remained unclear. Here we aimed
	to perform a comprehensive meta-analysis integrating both clinicopathologic
	and gene expression data, focusing on the main molecular subtypes.We
	developed gene expression modules related to key biological processes
	in breast cancer such as tumor invasion, immune response, angiogenesis,
	apoptosis, proliferation, and ER and HER2 signaling, and then analyzed
	these modules together with clinical variables and several prognostic
	signatures on publicly available microarray studies (>2,100 patients).Multivariate
	analysis showed that in the ER+/HER2- subgroup, only the proliferation
	module and the histologic grade were significantly associated with
	clinical outcome. In the ER-/HER2- subgroup, only the immune response
	module was associated with prognosis, whereas in the HER2+ tumors,
	the tumor invasion and immune response modules displayed significant
	association with survival. Proliferation was identified as the most
	important component of several prognostic signatures, and their performance
	was limited to the ER+/HER2- subgroup.Although proliferation is the
	strongest parameter predicting clinical outcome in the ER+/HER2-
	subtype and the common denominator of most prognostic gene signatures,
	immune response and tumor invasion seem to be the main molecular
	processes associated with prognosis in the ER-/HER2- and HER2+ subgroups,
	respectively. These findings may help to define new clinicogenomic
	models and to identify new therapeutic strategies in the specific
	molecular subgroups.},
  doi = {10.1158/1078-0432.CCR-07-4756},
  pdf = {../local/Desmedt2008Biological.pdf},
  file = {Desmedt2008Biological.pdf:Desmedt2008Biological.pdf:PDF},
  institution = {Medical Oncology Department, Jules Bordet Institute.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {14/16/5158},
  pmid = {18698033},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1158/1078-0432.CCR-07-4756}
}

@article{Desobry2005online,
  author = {Desobry, F. and Davy, M. and Doncarli, C.},
  title = {An online kernel change detection algorithm},
  journal = {IEEE T. Signal. Proces.},
  year = {2005},
  volume = {53},
  pages = {2961--2974},
  number = {8},
  doi = {10.1109/TSP.2005.851098},
  pdf = {../local/Desobry2005online.pdf},
  file = {Desobry2005online.pdf:Desobry2005online.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.13},
  url = {http://dx.doi.org/10.1109/TSP.2005.851098}
}

@article{Deupi2007Structural,
  author = {Deupi, X. and D\"olker, N. and L\`opez-Rodr\`iguez, M. L. and Campillo,
	M. and Ballesteros, J. A. and Pardo, L.},
  title = {Structural models of class a {G} protein-coupled receptors as a tool
	for drug design: insights on transmembrane bundle plasticity.},
  journal = {Curr. Top. Med. Chem.},
  year = {2007},
  volume = {7},
  pages = {991--998},
  number = {10},
  abstract = {G protein-coupled receptors (GPCRs) interact with an extraordinary
	diversity of ligands by means of their extracellular domains and/or
	the extracellular part of the transmembrane (TM) segments. Each receptor
	subfamily has developed specific sequence motifs to adjust the structural
	characteristics of its cognate ligands to a common set of conformational
	rearrangements of the TM segments near the G protein binding domains
	during the activation process. Thus, GPCRs have fulfilled this adaptation
	during their evolution by customizing a preserved 7TM scaffold through
	conformational plasticity. We use this term to describe the structural
	differences near the binding site crevices among different receptor
	subfamilies, responsible for the selective recognition of diverse
	ligands among different receptor subfamilies. By comparing the sequence
	of rhodopsin at specific key regions of the TM bundle with the sequences
	of other GPCRs we have found that the extracellular region of TMs
	2 and 3 provides a remarkable example of conformational plasticity
	within Class A GPCRs. Thus, rhodopsin-based molecular models need
	to include the plasticity of the binding sites among GPCR families,
	since the "quality" of these homology models is intimately linked
	with the success in the processes of rational drug-design or virtual
	screening of chemical databases.},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {17508932},
  timestamp = {2008.07.21}
}

@book{Devillers1996Neural,
  title = {Neural Networks in QSAR and Drug Design},
  publisher = {Academic Press, London},
  year = {1996},
  author = {J. Devillers},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@book{DeVore1993Constructive,
  title = {Constructive {A}pproximation},
  publisher = {Springer Verlag},
  year = {1993},
  author = {DeVore, R. A. and Lorentz, G. G.},
  series = {Springer Grundlehren der Mathematischen Wissenschaften}
}

@article{Devos2004Classification,
  author = {A. Devos and L. Lukas and J. A K Suykens and L. Vanhamme and A. R.
	Tate and F. A. Howe and C. MajÃ³s and A. Moreno-Torres and M. van
	der Graaf and C. ArÃºs and S. Van Huffel},
  title = {Classification of brain tumours using short echo time 1{H} {MR} spectra.},
  journal = {J {M}agn {R}eson},
  year = {2004},
  volume = {170},
  pages = {164-75},
  number = {1},
  month = {Sep},
  abstract = {The purpose was to objectively compare the application of several
	techniques and the use of several input features for brain tumour
	classification using {M}agnetic {R}esonance {S}pectroscopy ({MRS}).
	{S}hort echo time 1{H} {MRS} signals from patients with glioblastomas
	(n = 87), meningiomas (n = 57), metastases (n = 39), and astrocytomas
	grade {II} (n = 22) were provided by six centres in the {E}uropean
	{U}nion funded {INTERPRET} project. {L}inear discriminant analysis,
	least squares support vector machines ({LS}-{SVM}) with a linear
	kernel and {LS}-{SVM} with radial basis function kernel were applied
	and evaluated over 100 stratified random splittings of the dataset
	into training and test sets. {T}he area under the receiver operating
	characteristic curve ({AUC}) was used to measure the performance
	of binary classifiers, while the percentage of correct classifications
	was used to evaluate the multiclass classifiers. {T}he influence
	of several factors on the classification performance has been tested:
	{L}2- vs. water normalization, magnitude vs. real spectra and baseline
	correction. {T}he effect of input feature reduction was also investigated
	by using only the selected frequency regions containing the most
	discriminatory information, and peak integrated values. {U}sing {L}2-normalized
	complete spectra the automated binary classifiers reached a mean
	test {AUC} of more than 0.95, except for glioblastomas vs. metastases.
	{S}imilar results were obtained for all classification techniques
	and input features except for water normalized spectra, where classification
	performance was lower. {T}his indicates that data acquisition and
	processing can be simplified for classification purposes, excluding
	the need for separate water signal acquisition, baseline correction
	or phasing.},
  doi = {10.1016/j.jmr.2004.06.010},
  pdf = {../local/Devos2004Classification.pdf},
  file = {Devos2004Classification.pdf:local/Devos2004Classification.pdf:PDF},
  pii = {S1090-7807(04)00181-8},
  url = {http://dx.doi.org/10.1016/j.jmr.2004.06.010}
}

@article{Devos2005use,
  author = {A. Devos and A. W. Simonetti and M. van der Graaf and L. Lukas and
	J. A K Suykens and L. Vanhamme and L. M C Buydens and A. Heerschap
	and S. Van Huffel},
  title = {The use of multivariate {MR} imaging intensities versus metabolic
	data from {MR} spectroscopic imaging for brain tumour classification.},
  journal = {J {M}agn {R}eson},
  year = {2005},
  volume = {173},
  pages = {218-28},
  number = {2},
  month = {Apr},
  abstract = {This study investigated the value of information from both magnetic
	resonance imaging and magnetic resonance spectroscopic imaging ({MRSI})
	to automated discrimination of brain tumours. {T}he influence of
	imaging intensities and metabolic data was tested by comparing the
	use of {MR} spectra from {MRSI}, {MR} imaging intensities, peak integration
	values obtained from the {MR} spectra and a combination of the latter
	two. {T}hree classification techniques were objectively compared:
	linear discriminant analysis, least squares support vector machines
	({LS}-{SVM}) with a linear kernel as linear techniques and {LS}-{SVM}
	with radial basis function kernel as a nonlinear technique. {C}lassifiers
	were evaluated over 100 stratified random splittings of the dataset
	into training and test sets. {T}he area under the receiver operating
	characteristic ({ROC}) curve ({AUC}) was used as a global performance
	measure on test data. {I}n general, all techniques obtained a high
	performance when using peak integration values with or without {MR}
	imaging intensities. {F}or example for low- versus high-grade tumours,
	low- versus high-grade gliomas and gliomas versus meningiomas, the
	mean test {AUC} was higher than 0.91, 0.94, and 0.99, respectively,
	when both {MR} imaging intensities and peak integration values were
	used. {T}he use of metabolic data from {MRSI} significantly improved
	automated classification of brain tumour types compared to the use
	of {MR} imaging intensities solely.},
  doi = {10.1016/j.jmr.2004.12.007},
  pdf = {../local/Devos2005use.pdf},
  file = {Devos2005use.pdf:local/Devos2005use.pdf:PDF},
  pii = {S1090-7807(04)00415-X},
  url = {http://dx.doi.org/10.1016/j.jmr.2004.12.007}
}

@article{Devroye1988Automatic,
  author = {Devroye, L.},
  title = {Automatic pattern recognition: a study of the probability of error},
  journal = {I{EEE} {T}rans. {P}attern {A}nal. {M}ach. {I}ntell.},
  year = {1988},
  volume = {10},
  pages = {530-543},
  number = {4},
  month = {Jul},
  abstract = {A test sequence is used to select the best rule from a class of discrimination
	rules defined in terms of the training sequence. {T}he {V}apnik-{C}hervonenkis
	and related inequalities are used to obtain distribution-free bounds
	on the difference between the probability of error of the selected
	rule and the probability of error of the best rule in the given class.
	{T}he bounds are used to prove the consistency and asymptotic optimality
	for several popular classes, including linear discriminators, nearest-neighbor
	rules, kernel-based rules, histogram rules, binary tree classifiers,
	and {F}ourier series classifiers. {I}n particular, the method can
	be used to choose the smoothing parameter in kernel-based rules,
	to choose k in the k-nearest neighbor rule, and to choose between
	parametric and nonparametric rules },
  pdf = {../local/Devroye1988Automatic.pdf},
  file = {Devroye1988Automatic.pdf:local/Devroye1988Automatic.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@book{Devroye1996Probabilistic,
  title = {A {P}robabilistic {T}heory of {P}attern {R}ecognition},
  publisher = {Springer},
  year = {1996},
  author = {Devroye, L. and Gy{\"o}rfi, L. and Lugosi, G.},
  volume = {31},
  series = {Applications of Mathematics}
}

@book{Devroye2000Combinatorial,
  title = {Combinatorial {M}ethods in {D}ensity {E}stimation},
  publisher = {Springer},
  year = {2000},
  author = {L. Devroye and G. Lugosi},
  series = {Springer Series in Statistics}
}

@article{Dhingra2005Substantial,
  author = {Vikas Dhingra and Mukta Gupta and Tracy Andacht and Zhen F Fu},
  title = {New frontiers in proteomics research: a perspective.},
  journal = {Int. J. Pharm.},
  year = {2005},
  volume = {299},
  pages = {1--18},
  number = {1-2},
  month = {Aug},
  abstract = {Substantial advances have been made in the fundamental understanding
	of human biology, ranging from DNA structure to identification of
	diseases associated with genetic abnormalities. Genome sequence information
	is becoming available in unprecedented amounts. The absence of a
	direct functional correlation between gene transcripts and their
	corresponding proteins, however, represents a significant roadblock
	for improving the efficiency of biological discoveries. The success
	of proteomics depends on the ability to identify and analyze protein
	products in a cell or tissue and, this is reliant on the application
	of several key technologies. Proteomics is in its exponential growth
	phase. Two-dimensional electrophoresis complemented with mass spectrometry
	provides a global view of the state of the proteins from the sample.
	Proteins identification is a requirement to understand their functional
	diversity. Subtle difference in protein structure and function can
	contribute to complexity and diversity of life. This review focuses
	on the progress and the applications of proteomics science with special
	reference to integration of the evolving technologies involved to
	address biological questions.},
  doi = {10.1016/j.ijpharm.2005.04.010},
  institution = {Department of Pathology, University of Georgia, Athens, GA 30602,
	USA. vdhingra@vet.uga.edu},
  keywords = {Computational Biology; Electrophoresis, Gel, Two-Dimensional; Humans;
	Peptide Mapping; Protein Interaction Mapping; Proteomics; Spectrometry,
	Mass, Matrix-Assisted Laser Desorption-Ionization},
  owner = {ljacob},
  pii = {S0378-5173(05)00226-7},
  pmid = {15979831},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1016/j.ijpharm.2005.04.010}
}

@article{Dias2008Molecular,
  author = {Raquel Dias and Walter Filgueira de Azevedo},
  title = {Molecular docking algorithms.},
  journal = {Curr. Drug Targets},
  year = {2008},
  volume = {9},
  pages = {1040--1047},
  number = {12},
  month = {Dec},
  abstract = {By means of virtual screening of small molecules databases it is possible
	to identify new potential inhibitors against a target of interest.
	Molecular docking is a computer simulation procedure to predict the
	conformation of a receptor-ligand complex. Each docking program makes
	use of one or more specific search algorithms, which are the methods
	used to predict the possible conformations of a binary complex. In
	the present review we describe several molecular-docking search algorithms,
	and the programs which apply such methodologies. We also discuss
	how virtual screening can be optimized, describing methods that may
	increase accuracy of the simulation process, with relatively fast
	docking algorithms.},
  institution = {Faculdade de Biociï¿½ncias, Laboratï¿½rio de Bioquï¿½mica Estrutural,
	Pontifï¿½cia Universidade Catï¿½lica do Rio Grande do Sul, Porto
	Alegre, RS, Brazil.},
  keywords = {Algorithms; Database Management Systems; Information Storage and Retrieval;
	Models, Molecular; Molecular Conformation; Monte Carlo Method},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pmid = {19128213},
  timestamp = {2009.08.25}
}

@article{Didiano2008Molecular,
  author = {Dominic Didiano and Oliver Hobert},
  title = {Molecular architecture of a miRNA-regulated 3' UTR.},
  journal = {RNA},
  year = {2008},
  volume = {14},
  pages = {1297--1317},
  number = {7},
  month = {Jul},
  abstract = {Animal genomes contain hundreds of microRNAs (miRNAs), small regulatory
	RNAs that control gene expression by binding to complementary sites
	in target mRNAs. Some rules that govern miRNA/target interaction
	have been elucidated but their general applicability awaits further
	experimentation on a case-by-case basis. We use here an assay system
	in transgenic nematodes to analyze the interaction of the Caenorhabditis
	elegans lsy-6 miRNA with 3' UTR sequences. In contrast to many previously
	described assay systems used to analyze miRNA/target interactions,
	our assay system operates within the cellular context in which lsy-6
	normally functions, a single neuron in the nervous system of C. elegans.
	Through extensive mutational analysis, we define features in the
	known and experimentally validated target of lsy-6, the 3' UTR of
	the cog-1 homeobox gene, that are required for a functional miRNA/target
	interaction. We describe that both in the context of the cog-1 3'
	UTR and in the context of heterologous 3' UTRs, one or more seed
	matches are not a reliable predictor for a functional miRNA/target
	interaction. We rather find that two nonsequence specific contextual
	features beyond miRNA target sites are critical determinants of miRNA-mediated
	3' UTR regulation. The contextual features reside 3' of lsy-6 binding
	sites in the 3' UTR and act in a combinatorial manner; mutation of
	each results in limited defects in 3' UTR regulation, but a combinatorial
	deletion results in complete loss of 3' UTR regulation. Together
	with two lsy-6 sites, these two contextual features are capable of
	imparting regulation on a heterologous 3' UTR. Moreover, the contextual
	features need to be present in a specific configuration relative
	to miRNA binding sites and could either represent protein binding
	sites or provide an appropriate structural context. We conclude that
	a given target site resides in a 3' UTR context that evolved beyond
	target site complementarity to support regulation by a specific miRNA.
	The large number of 3' UTRs that we analyzed in this study will also
	be useful to computational biologists in designing the next generation
	of miRNA/target prediction algorithms.},
  doi = {10.1261/rna.1082708},
  pdf = {../local/Didiano2008Molecular.pdf},
  file = {Didiano2008Molecular.pdf:Didiano2008Molecular.pdf:PDF},
  institution = {Department of Biochemistry and Molecular Biophysics, Howard Hughes
	Medical Institute, Columbia University Medical Center, New York,
	New York 10032, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {rna.1082708},
  pmid = {18463285},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1261/rna.1082708}
}

@article{Didiano2006Perfect,
  author = {Dominic Didiano and Oliver Hobert},
  title = {Perfect seed pairing is not a generally reliable predictor for miRNA-target
	interactions.},
  journal = {Nat Struct Mol Biol},
  year = {2006},
  volume = {13},
  pages = {849--851},
  number = {9},
  month = {Sep},
  abstract = {We use Caenorhabditis elegans to test proposed general rules for microRNA
	(miRNA)-target interactions. We show that G.U base pairing is tolerated
	in the 'seed' region of the lsy-6 miRNA interaction with its in vivo
	target cog-1, and that 6- to 8-base-pair perfect seed pairing is
	not a generally reliable predictor for an interaction of lsy-6 with
	a 3' untranslated region (UTR). Rather, lsy-6 can functionally interact
	with its target site only in specific 3' UTR contexts. Our findings
	illustrate the difficulty of establishing generalizable rules of
	miRNA-target interactions.},
  doi = {10.1038/nsmb1138},
  pdf = {../local/Didiano2006Perfect.pdf},
  file = {Didiano2006Perfect.pdf:Didiano2006Perfect.pdf:PDF},
  institution = {Department of Biochemistry and Molecular Biophysics, Columbia University
	Medical Center, Howard Hughes Medical Institute, 701 W. 168th Street,
	New York, New York 10032, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nsmb1138},
  pmid = {16921378},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1038/nsmb1138}
}

@article{Diekman2003Hybrid,
  author = {Casey Diekman and Wei He and Nagabhushana Prabhu and Harvey Cramer},
  title = {Hybrid methods for automated diagnosis of breast tumors.},
  journal = {Anal {Q}uant {C}ytol {H}istol},
  year = {2003},
  volume = {25},
  pages = {183-90},
  number = {4},
  month = {Aug},
  abstract = {O{BJECTIVE}: {T}o design and analyze a new family of hybrid methods
	for the diagnosis of breast tumors using fine needle aspirates. {STUDY}
	{DESIGN}: {W}e present a radically new approach to the design of
	diagnosis systems. {I}n the new approach, a nonlinear classifier
	with high sensitivity but low specificity is hybridized with a linear
	classifier having low sensitivity but high specificity. {D}ata from
	the {W}isconsin {B}reast {C}ancer {D}atabase are used to evaluate,
	computationally, the performance of the hybrid classifiers. {RESULTS}:
	{T}he diagnosis scheme obtained by hybridizing the nonlinear classifier
	ellipsoidal multisurface method ({EMSM}) with the linear classifier
	proximal support vector machine ({PSVM}) was found to have a mean
	sensitivity of 97.36\% and a mean specificity of 95.14\% and was
	found to yield a 2.44\% improvement in the reliability of positive
	diagnosis over that of {EMSM} at the expense of 0.4\% degradation
	in the reliability of negative diagnosis, again compared to {EMSM}.
	{A}t the 95\% confidence level we can trust the hybrid method to
	be 96.19-98.53\% correct in its malignant diagnosis of new tumors
	and 93.57-96.71\% correct in its benign diagnosis. {CONCLUSION}:
	{H}ybrid diagnosis schemes represent a significant paradigm shift
	and provide a promising new technique to improve the specificity
	of nonlinear classifiers without seriously affecting the high sensitivity
	of nonlinear classifiers.},
  keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Anion Exchange Resins,
	Antigen-Antibody Complex, Artificial Intelligence, Automated, Automatic
	Data Processing, Benchmarking, Biological, Biological Markers, Biopsy,
	Blood Cells, Blood Proteins, Breast Neoplasms, Cell Line, Cellular
	Structures, Chemical, Chromatography, Chromosome Aberrations, Cluster
	Analysis, Colonic Neoplasms, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, Computing Methodologies,
	DNA, Data Interpretation, Databases, Decision Making, Decision Trees,
	Diagnosis, Diffusion Magnetic Resonance Imaging, Disease, English
	Abstract, Epitopes, Expert Systems, Factual, Female, Fine-Needle,
	Fusion, Fuzzy Logic, Gene Expression Profiling, Gene Expression Regulation,
	Gene Targeting, Genetic, Genome, Histocompatibility Antigens Class
	I, Humans, Hydrogen Bonding, Hydrophobicity, Image Interpretation,
	Image Processing, In Vitro, Indicators and Reagents, Information
	Storage and Retrieval, Ion Exchange, Least-Squares Analysis, Leiomyosarcoma,
	Liver Cirrhosis, Lung Neoplasms, Magnetic Resonance Imaging, Male,
	Mass, Mathematical Computing, Matrix-Assisted Laser Desorption-Ionization,
	Models, Molecular, Molecular Sequence Data, Neoplasm Proteins, Neoplasms,
	Neoplastic, Nephroblastoma, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonl, Nucleic Acid Conformation, Nucleic Acid Hybridization,
	Oligonucleotide Array Sequence Analysis, Oncogene Proteins, Ovarian
	Neoplasms, P.H.S., Pattern Recognition, Predictive Value of Tests,
	Pro, Prostatic Neoplasms, Protein, Protein Binding, Protein Interaction
	Mapping, Protein Structure, Proteins, Quantitative Structure-Activity
	Relationship, RNA, ROC Curve, Reproducibility of Results, Research
	Support, Rhabdomyosarcoma, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sequence Analysis, Severity of Illness Index,
	Software, Solubility, Spectrometry, Statistical, Structure-Activity
	Relationship, Subcellular Fractions, Subtraction Technique, T-Lymphocyte,
	Tissue Distribution, Transcription Factors, Transfer, Treatment Outcome,
	Tumor, Tumor Markers, U.S. Gov't, User-Computer Interface, inear
	Dynamics, teome, 12961824}
}

@book{Diestel2000Graph,
  title = {Graph theory},
  publisher = {Springer-Verlag},
  year = {2000},
  author = {R. Diestel}
}

@article{Dieterle2003Urinary,
  author = {Frank Dieterle and Silvia MÃ¼ller-Hagedorn and Hartmut M Liebich
	and GÃ¼nter Gauglitz},
  title = {Urinary nucleosides as potential tumor markers evaluated by learning
	vector quantization.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2003},
  volume = {28},
  pages = {265-79},
  number = {3},
  month = {Jul},
  abstract = {Modified nucleosides were recently presented as potential tumor markers
	for breast cancer. {T}he patterns of the levels of urinary nucleosides
	are different for tumor bearing individuals and for healthy individuals.
	{T}hus, a powerful pattern recognition method is needed. {A}lthough
	backpropagation ({BP}) neural networks are becoming increasingly
	common in medical literature for pattern recognition, it has been
	shown that often-superior methods exist like learning vector quantization
	({LVQ}) and support vector machines ({SVM}). {T}he aim of this feasibility
	study is to get an indication of the performance of urinary nucleoside
	levels evaluated by {LVQ} in contrast to the evaluation the popular
	{BP} and {SVM} networks. {U}rine samples were collected from female
	breast cancer patients and from healthy females. {T}welve different
	ribonucleosides were isolated and quantified by a high performance
	liquid chromatography ({HPLC}) procedure. {LVQ}, {SVM} and {BP} networks
	were trained and the performance was evaluated by the classification
	of the test sets into the categories "cancer" and "healthy". {A}ll
	methods showed a good classification with a sensitivity ranging from
	58.8 to 70.6\% at a specificity of 88.4-94.2\% for the test patterns.
	{A}lthough the classification performance of all methods is comparable,
	the {LVQ} implementations are superior in terms of more qualitative
	features: the results of {LVQ} networks are more reproducible, as
	the initialization is deterministic. {T}he {LVQ} networks can be
	trained by unbalanced sizes of the different classes. {LVQ} networks
	are fast during training, need only few parameters adjusted for training
	and can be retrained by patterns of "local individuals". {A}s at
	least some of these features play an important role in an implementation
	into a medical decision support system, it is recommended to use
	{LVQ} for an extended study.},
  doi = {10.1016/S0933-3657(03)00058-7},
  pdf = {../local/Dieterle2003Urinary.pdf},
  file = {Dieterle2003Urinary.pdf:local/Dieterle2003Urinary.pdf:PDF},
  keywords = {80 and over, Adnexal Diseases, Adult, Aged, Algorithms, Artificial
	Intelligence, Automated, Bayes Theorem, Biological, Breast Neoplasms,
	Case-Control Studies, Chromatography, Comparative Study, Computational
	Biology, Computer-Assisted, Diagnosis, Differential, Feasibility
	Studies, Female, High Pressure Liquid, Humans, Logistic Models, Middle
	Aged, Neural Networks (Computer), Non-U.S. Gov't, Nucleosides, Ovarian
	Neoplasms, Pattern Recognition, Predictive Value of Tests, ROC Curve,
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Tumor Markers, 12927336},
  pii = {S0933365703000587},
  url = {http://dx.doi.org/10.1016/S0933-3657(03)00058-7}
}

@article{Dietterich1998Experimental,
  author = {Dietterich, T.},
  title = {An Experimental Comparison of Three Methods for Constructing Ensembles
	of Decision Trees: Bagging, Boosting, and Randomization},
  journal = {Mach. Learn.},
  year = {1998},
  volume = {40},
  pages = {139--157},
  number = {40},
  doi = {10.1023/A:1007607513941},
  pdf = {../local/Dietterich1998Experimental.pdf},
  file = {Dietterich1998Experimental.pdf:Dietterich1998Experimental.pdf:PDF},
  owner = {jp},
  timestamp = {2012.03.21},
  url = {http://dx.doi.org/10.1023/A:1007607513941}
}

@article{Dietterich1997Solving,
  author = {Dietterich, T.G. and Lathrop, R.H. and Lozano-Perez, T.},
  title = {Solving the {M}ultiple {I}nstance {P}roblem with {A}xis-{P}arallel
	{R}ectangles},
  journal = {Artificial Intelligence},
  year = {1997},
  volume = {89},
  pages = {31-71},
  number = {1-2},
  citeseerurl = {http://citeseer.ist.psu.edu/dietterich97solving.html},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@incollection{Dietterich2002Machine,
  author = {Dietterich, T. G.},
  title = {Machine {L}earning for {S}equential {D}ata: {A} {R}eview},
  booktitle = {Structural, {S}yntactic, and {S}tatistical {P}attern {R}ecognition;
	{L}ecture {N}otes in {C}omputer {S}cience, {V}ol. 2396},
  publisher = {Springer-Verlag},
  year = {2002},
  editor = {Caelli, T.},
  pages = {15--30},
  pdf = {../local/Dietterich2002Machine.pdf},
  file = {Dietterich2002Machine.pdf:local/Dietterich2002Machine.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@article{DiMasi2003price,
  author = {J. A. DiMasi and R. W. Hansen and H. G. Grabowski},
  title = {{T}he price of innovation: new estimates of drug development costs.},
  journal = {J Health Econ},
  year = {2003},
  volume = {22},
  pages = {151--185},
  number = {2},
  month = {Mar},
  abstract = {The research and development costs of 68 randomly selected new drugs
	were obtained from a survey of 10 pharmaceutical firms. These data
	were used to estimate the average pre-tax cost of new drug development.
	The costs of compounds abandoned during testing were linked to the
	costs of compounds that obtained marketing approval. The estimated
	average out-of-pocket cost per new drug is 403 million US dollars
	(2000 dollars). Capitalizing out-of-pocket costs to the point of
	marketing approval at a real discount rate of 11\% yields a total
	pre-approval cost estimate of 802 million US dollars (2000 dollars).
	When compared to the results of an earlier study with a similar methodology,
	total capitalized costs were shown to have increased at an annual
	rate of 7.4\% above general price inflation.},
  keywords = {Capital Expenditures, Costs and Cost Analysis, Data Collection, Drug
	Approval, Drug Evaluation, Drug Industry, Drugs, Economic, Humans,
	Inflation, Investigational, Organizational Innovation, Preclinical,
	Research Support, United States, 16087260},
  owner = {mahe},
  pii = {S0167629602001261},
  pmid = {16087260},
  timestamp = {2006.08.12}
}

@manual{Dimitriadou2011e1071,
  title = {e1071: Misc Functions of the Department of Statistics (e1071), TU
	Wien},
  author = {Dimitriadou, E. and Hornik, K. and Leisch, F. and Meyer, D. and Weingessel,
	A.},
  year = {2011},
  note = {R package version 1.6},
  owner = {jp},
  timestamp = {2012.07.31},
  url = {http://CRAN.R-project.org/package=e1071}
}

@article{Ding2001Multi-class,
  author = {Ding, C.H.Q. and Dubchak, I.},
  title = {Multi-class protein fold recognition using support vector machines
	and neural networks},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {349--358},
  abstract = {Motivation: {P}rotein fold recognition is an important approach to
	structure discovery without relying on sequence similarity. {W}e
	study this approach with new multi-class classification methods and
	examined many issues important for a practical recognition system.
	{R}esults: {M}ost current discriminative methods for protein fold
	prediction use the one-against-others method, which has the well-known
	?{F}alse {P}ositives? problem. {W}e investigated two new methods:
	the unique one-against-others and the all-against-all methods. {B}oth
	improve prediction accuracy by 14?110% on a dataset containing 27
	{SCOP} folds. {W}e used the {S}upport {V}ector {M}achine ({SVM})
	and the {N}eural {N}etwork ({NN}) learning methods as base classifiers.
	{SVM}s converges fast and leads to high accuracy. {W}hen scores of
	multiple parameter datasets are combined, majority voting reduces
	noise and increases recognition accuracy. {W}e examined many issues
	involved with large number of classes, including dependencies of
	prediction accuracy on the number of folds and on the number of representatives
	in a fold. {O}verall, recognition systems achieve 56% fold prediction
	accuracy on a protein test dataset, where most of the proteins have
	below 25% sequence identity with the proteins used in training. {S}upplementary
	information: {T}he protein parameter datasets used in this paper
	are available online (http://www.nersc.gov/~cding/protein).},
  pdf = {../local/Ding2001Multi-class.pdf},
  file = {Ding2001Multi-class.pdf:local/Ding2001Multi-class.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/17/4/349.pdf}
}

@article{Ding2005Minimum,
  author = {Chris Ding and Hanchuan Peng},
  title = {Minimum redundancy feature selection from microarray gene expression
	data.},
  journal = {J {B}ioinform {C}omput {B}iol},
  year = {2005},
  volume = {3},
  pages = {185-205},
  number = {2},
  month = {Apr},
  abstract = {How to selecting a small subset out of the thousands of genes in microarray
	data is important for accurate classification of phenotypes. {W}idely
	used methods typically rank genes according to their differential
	expressions among phenotypes and pick the top-ranked genes. {W}e
	observe that feature sets so obtained have certain redundancy and
	study methods to minimize it. {W}e propose a minimum redundancy -
	maximum relevance ({MRMR}) feature selection framework. {G}enes selected
	via {MRMR} provide a more balanced coverage of the space and capture
	broader characteristics of phenotypes. {T}hey lead to significantly
	improved class predictions in extensive experiments on 6 gene expression
	data sets: {NCI}, {L}ymphoma, {L}ung, {C}hild {L}eukemia, {L}eukemia,
	and {C}olon. {I}mprovements are observed consistently among 4 classification
	methods: {N}aive {B}ayes, {L}inear discriminant analysis, {L}ogistic
	regression, and {S}upport vector machines. {SUPPLIMENTARY}: {T}he
	top 60 {MRMR} genes for each of the datasets are listed in http://crd.lbl.gov/~cding/{MRMR}/.
	{M}ore information related to {MRMR} methods can be found at http://www.hpeng.net/.},
  keywords = {Adult, Aged, Aging, Algorithms, Animals, Apoptosis, Artificial Intelligence,
	Automated, Biological, Bone Marrow, Breast Neoplasms, Classification,
	Cluster Analysis, Comparative Study, Computer Simulation, Computer-Assisted,
	Diagnosis, Dose-Response Relationship, Drug, Female, Foot, Gait,
	Gene Expression Profiling, Gene Expression Regulation, Gene Silencing,
	Genetic Vectors, Humans, Image Interpretation, Information Storage
	and Retrieval, Kidney, Liver, Logistic Models, Male, Messenger, Models,
	Myocardium, Neoplasms, Non-U.S. Gov't, Oligonucleotide Array Sequence
	Analysis, Pattern Recognition, Pharmaceutical Preparations, Polymerase
	Chain Reaction, Principal Component Analysis, Proteins, RNA, Rats,
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Small Interfering, Sprague-Dawley, Statistical, Subcellular Fractions,
	Unknown Primary, 15852500},
  pii = {S0219720005001004}
}

@article{Ding2003statistical,
  author = {Ding, Y. and Lawrence, C. E.},
  title = {A statistical sampling algorithm for {RNA} secondary structure prediction.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2003},
  volume = {31},
  pages = {7280-301},
  number = {24},
  month = {Dec},
  abstract = {An {RNA} molecule, particularly a long-chain m{RNA}, may exist as
	a population of structures. {F}urther more, multiple structures have
	been demonstrated to play important functional roles. {T}hus, a representation
	of the ensemble of probable structures is of interest. {W}e present
	a statistical algorithm to sample rigorously and exactly from the
	{B}oltzmann ensemble of secondary structures. {T}he forward step
	of the algorithm computes the equilibrium partition functions of
	{RNA} secondary structures with recent thermodynamic parameters.
	{U}sing conditional probabilities computed with the partition functions
	in a recursive sampling process, the backward step of the algorithm
	quickly generates a statistically representative sample of structures.
	{W}ith cubic run time for the forward step, quadratic run time in
	the worst case for the sampling step, and quadratic storage, the
	algorithm is efficient for broad applicability. {W}e demonstrate
	that, by classifying sampled structures, the algorithm enables a
	statistical delineation and representation of the {B}oltzmann ensemble.
	{A}pplications of the algorithm show that alternative biological
	structures are revealed through sampling. {S}tatistical sampling
	provides a means to estimate the probability of any structural motif,
	with or without constraints. {F}or example, the algorithm enables
	probability profiling of single-stranded regions in {RNA} secondary
	structure. {P}robability profiling for specific loop types is also
	illustrated. {B}y overlaying probability profiles, a mutual accessibility
	plot can be displayed for predicting {RNA}:{RNA} interactions. {B}oltzmann
	probability-weighted density of states and free energy distributions
	of sampled structures can be readily computed. {W}e show that a sample
	of moderate size from the ensemble of an enormous number of possible
	structures is sufficient to guarantee statistical reproducibility
	in the estimates of typical sampling statistics. {O}ur applications
	suggest that the sampling algorithm may be well suited to prediction
	of m{RNA} structure and target accessibility. {T}he algorithm is
	applicable to the rational design of small interfering {RNA}s (si{RNA}s),
	antisense oligonucleotides, and trans-cleaving ribozymes in gene
	knock-down studies.}
}

@article{Ding2004Sfold,
  author = {Ding, Y. and Yu, C. Chan and Lawrence, C. E.},
  title = {{S}fold web server for statistical folding and rational design of
	nucleic acids.},
  journal = {Nucleic Acids Res.},
  year = {2004},
  volume = {32},
  pages = {W135--W141},
  number = {Web Server issue},
  month = {Jul},
  abstract = {The Sfold web server provides user-friendly access to Sfold, a recently
	developed nucleic acid folding software package, via the World Wide
	Web (WWW). The software is based on a new statistical sampling paradigm
	for the prediction of RNA secondary structure. One of the main objectives
	of this software is to offer computational tools for the rational
	design of RNA-targeting nucleic acids, which include small interfering
	RNAs (siRNAs), antisense oligonucleotides and trans-cleaving ribozymes
	for gene knock-down studies. The methodology for siRNA design is
	based on a combination of RNA target accessibility prediction, siRNA
	duplex thermodynamic properties and empirical design rules. Our approach
	to target accessibility evaluation is an original extension of the
	underlying RNA folding algorithm to account for the likely existence
	of a population of structures for the target mRNA. In addition to
	the application modules Sirna, Soligo and Sribo for siRNAs, antisense
	oligos and ribozymes, respectively, the module Srna offers comprehensive
	features for statistical representation of sampled structures. Detailed
	output in both graphical and text formats is available for all modules.
	The Sfold server is available at http://sfold.wadsworth.org and http://www.bioinfo.rpi.edu/applications/sfold.},
  doi = {10.1093/nar/gkh449},
  keywords = {sirna},
  owner = {vert},
  pii = {32/suppl_2/W135},
  pmid = {15215366},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1093/nar/gkh449}
}

@article{Dixon2012Topological,
  author = {Dixon, J. R. and Selvaraj, S. and Yue, F. and Kim, A. and Li, Y.
	and Shen, Y. and Hu, M. and Liu, J. S. and Ren, B.},
  title = {Topological domains in mammalian genomes identified by analysis of
	chromatin interactions.},
  journal = {Nature},
  year = {2012},
  volume = {485},
  pages = {376-80},
  number = {5},
  doi = {10.1038/nature11082},
  pdf = {../local/Dixon2012Topological.pdf},
  file = {Dixon2012Topological.pdf:Dixon2012Topological.pdf:PDF},
  keywords = {ngs, hic},
  owner = {nelle},
  timestamp = {2013.03.30},
  url = {http://dx.doi.org/10.1038/nature11082}
}

@article{Djordjevic2003biophysical,
  author = {Marko Djordjevic and Anirvan M Sengupta and Boris I Shraiman},
  title = {A biophysical approach to transcription factor binding site discovery.},
  journal = {Genome {R}es.},
  year = {2003},
  volume = {13},
  pages = {2381-90},
  number = {11},
  month = {Nov},
  abstract = {Identification of transcription factor binding sites within regulatory
	segments of genomic {DNA} is an important step toward understanding
	of the regulatory circuits that control expression of genes. {H}ere,
	we describe a novel bioinformatics method that bases classification
	of potential binding sites explicitly on the estimate of sequence-specific
	binding energy of a given transcription factor. {T}he method also
	estimates the chemical potential of the factor that defines the threshold
	of binding. {I}n contrast with the widely used information-theoretic
	weight matrix method, the new approach correctly describes saturation
	in the transcription factor/{DNA} binding probability. {T}his results
	in a significant improvement in the number of expected false positives,
	particularly in the ubiquitous case of low-specificity factors. {I}n
	the strong binding limit, the algorithm is related to the "support
	vector machine" approach to pattern recognition. {T}he new method
	is used to identify likely genomic binding sites for the {E}. coli
	transcription factors collected in the {DPI}nteract database. {I}n
	addition, for {CRP} (a global regulatory factor), the likely regulatory
	modality (i.e., repressor or activator) of predicted binding sites
	is determined.},
  doi = {10.1101/gr.1271603},
  pdf = {../local/Djordjevic2003biophysical.pdf},
  file = {Djordjevic2003biophysical.pdf:local/Djordjevic2003biophysical.pdf:PDF},
  pii = {13/11/2381},
  url = {http://dx.doi.org/10.1101/gr.1271603}
}

@article{Do2006Normalization,
  author = {Do, J. H. and Choi, D. K.},
  title = {Normalization of microarray data: single-labeled and dual-labeled
	arrays},
  journal = {Molecules and Cells},
  year = {2006},
  volume = {22},
  pages = {254-261},
  owner = {philippe},
  timestamp = {2010.08.04}
}

@article{Dobson2005Predicting,
  author = {Dobson, P.D. and Doig, A.J.},
  title = {Predicting enzyme class from protein structure without alignments},
  journal = {J. {M}ol. {B}iol.},
  year = {2005},
  volume = {345},
  pages = {187-199},
  number = {1},
  month = {Jan},
  abstract = {Methods for predicting protein function from structure are becoming
	more important as the rate at which structures are solved increases
	more rapidly than experimental knowledge. {A}s a result, protein
	structures now frequently lack functional annotations. {T}he majority
	of methods for predicting protein function are reliant upon identifying
	a similar protein and transferring its annotations to the query protein.
	{T}his method fails when a similar protein cannot be identified,
	or when any similar proteins identified also lack reliable annotations.
	{H}ere, we describe a method that can assign function from structure
	without the use of algorithms reliant upon alignments. {U}sing simple
	attributes that can be calculated from any crystal structure, such
	as secondary structure content, amino acid propensities, surface
	properties and ligands, we describe each enzyme in a non-redundant
	set. {T}he set is split according to {E}nzyme {C}lassification ({EC})
	number. {W}e combine the predictions of one-class versus one-class
	support vector machine models to make overall assignments of {EC}
	number to an accuracy of 35% with the top-ranked prediction, rising
	to 60% accuracy with the top two ranks. {I}n doing so we demonstrate
	the utility of simple structural attributes in protein function prediction
	and shed light on the link between structure and function. {W}e apply
	our methods to predict the function of every currently unclassified
	protein in the {P}rotein {D}ata {B}ank.},
  doi = {10.1016/j.jmb.2004.10.024},
  pdf = {../local/Dobson2005Predicting.pdf},
  file = {Dobson2005Predicting.pdf:local/Dobson2005Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.jmb.2004.10.024}
}

@article{Dobson2003Distinguishing,
  author = {Dobson, P.D. and Doig, A.J.},
  title = {Distinguishing enzyme structures from non-enzymes without alignments},
  journal = {J. {M}ol. {B}iol.},
  year = {2003},
  volume = {330},
  pages = {771-783},
  number = {4},
  abstract = {The ability to predict protein function from structure is becoming
	increasingly important as the number of structures resolved is growing
	more rapidly than our capacity to study function. {C}urrent methods
	for predicting protein function are mostly reliant on identifying
	a similar protein of known function. {F}or proteins that are highly
	dissimilar or are only similar to proteins also lacking functional
	annotations, these methods fail. {H}ere, we show that protein function
	can be predicted as enzymatic or not without resorting to alignments.
	{W}e describe 1178 high-resolution proteins in a structurally non-redundant
	subset of the {P}rotein {D}ata {B}ank using simple features such
	as secondary-structure content, amino acid propensities, surface
	properties and ligands. {T}he subset is split into two functional
	groupings, enzymes and non-enzymes. {W}e use the support vector machine-learning
	algorithm to develop models that are capable of assigning the protein
	class. {V}alidation of the method shows that the function can be
	predicted to an accuracy of 77% using 52 features to describe each
	protein. {A}n adaptive search of possible subsets of features produces
	a simplified model based on 36 features that predicts at an accuracy
	of 80%. {W}e compare the method to sequence-based methods that also
	avoid calculating alignments and predict a recently released set
	of unrelated proteins. {T}he most useful features for distinguishing
	enzymes from non-enzymes are secondary-structure content, amino acid
	frequencies, number of disulphide bonds and size of the largest cleft.
	{T}his method is applicable to any structure as it does not require
	the identification of sequence or structural similarity to a protein
	of known function.},
  doi = {10.1016/S0022-2836(03)00628-4},
  pdf = {../local/Dobson2003Distinguishing.pdf},
  file = {Dobson2003Distinguishing.pdf:local/Dobson2003Distinguishing.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0022-2836(03)00628-4}
}

@inproceedings{Doi2000Hybrid,
  author = {Doi, A. and Matsuno, H. and Nagasaki, M. and Miyano, S.},
  title = {Hybrid {P}etri net representation of gene regulatory network},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing},
  year = {2000},
  volume = {5},
  pages = {341--352},
  pdf = {../local/Doi2000Hybrid.pdf},
  file = {Doi2000Hybrid.pdf:local/Doi2000Hybrid.pdf:PDF},
  owner = {vert},
  url = {http://helix-web.stanford.edu/psb00/matsuno.pdf}
}

@article{Domon2010Options,
  author = {Bruno Domon and Ruedi Aebersold},
  title = {Options and considerations when selecting a quantitative proteomics
	strategy.},
  journal = {Nat Biotechnol},
  year = {2010},
  volume = {28},
  pages = {710--721},
  number = {7},
  month = {Jul},
  abstract = {The vast majority of proteomic studies to date have relied on mass
	spectrometric techniques to identify, and in some cases quantify,
	peptides that have been generated by proteolysis. Current approaches
	differ in the types of instrument used, their performance profiles,
	the manner in which they interface with biological research strategies,
	and their reliance on and use of prior information. Here, we consider
	the three main mass spectrometry (MS)-based proteomic approaches
	used today: shotgun (or discovery), directed and targeted strategies.
	We discuss the principles of each technique, their strengths and
	weaknesses and the dependence of their performance profiles on the
	composition of the biological sample. Our goal is to provide a rational
	framework for selecting strategies optimally suited to address the
	specific research issue under consideration.},
  doi = {10.1038/nbt.1661},
  institution = {Institute of Molecular Systems Biology, ETH Zurich, Zurich, Switzerland.
	bruno.domon@crp-santa.lu},
  owner = {phupe},
  pii = {nbt.1661},
  pmid = {20622845},
  timestamp = {2010.08.19},
  url = {http://dx.doi.org/10.1038/nbt.1661}
}

@article{Donaldson2003PreBIND,
  author = {Donaldson, I. and Martin, J. and de Bruijn, B. and Wolting, C. and
	Lay, V. and Tuekam, B. and Zhang, S. and Baskin, B. and Bader, G.D.
	and Michalickova, K. and Pawson, T. and Hogue, C.W.V. },
  title = {{{P}re{BIND}} and {T}extomy - mining the biomedical literature for
	protein-protein interactions using a support vector machine},
  journal = {B{MC} {B}ioinformatics},
  year = {2003},
  volume = {4},
  pages = {11},
  number = {1},
  month = {Mar},
  abstract = {Background {T}he majority of experimentally verified molecular interaction
	and biological pathway data are present in the unstructured text
	of biomedical journal articles where they are inaccessible to computational
	methods. {T}he {B}iomolecular interaction network database ({BIND})
	seeks to capture these data in a machine-readable format. {W}e hypothesized
	that the formidable task-size of backfilling the database could be
	reduced by using {S}upport {V}ector {M}achine technology to first
	locate interaction information in the literature. {W}e present an
	information extraction system that was designed to locate protein-protein
	interaction data in the literature and present these data to curators
	and the public for review and entry into {BIND}. {R}esults {C}ross-validation
	estimated the support vector machine's test-set precision, accuracy
	and recall for classifying abstracts describing interaction information
	was 92%, 90% and 92% respectively. {W}e estimated that the system
	would be able to recall up to 60% of all non-high throughput interactions
	present in another yeast-protein interaction database. {F}inally,
	this system was applied to a real-world curation problem and its
	use was found to reduce the task duration by 70% thus saving 176
	days. {C}onclusions {M}achine learning methods are useful as tools
	to direct interaction and pathway database back-filling; however,
	this potential can only be realized if these techniques are coupled
	with human review and entry into a factual database such as {BIND}.
	{T}he {P}re{BIND} system described here is available to the public
	at http://bind.ca. {C}urrent capabilities allow searching for human,
	mouse and yeast protein-interaction information.},
  doi = {10.1186/1471-2105-4-11},
  pdf = {../local/Donaldson2003PreBIND.pdf},
  file = {Donaldson2003PreBIND.pdf:local/Donaldson2003PreBIND.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/4/11/abstract}
}

@article{Dong2005Prediction,
  author = {Hai-Long Dong and Yan-Fang Sui},
  title = {Prediction of {HLA}-{A2}-restricted {CTL} epitope specific to {HCC}
	by {SYFPEITHI} combined with polynomial method.},
  journal = {World J Gastroenterol},
  year = {2005},
  volume = {11},
  pages = {208--211},
  number = {2},
  month = {Jan},
  abstract = {AIM: To predict the HLA-A2-restricted CTL epitopes of tumor antigens
	associated with hepatocellular carcinoma (HCC). METHODS: MAGE-1,
	MAGE-3, MAGE-8, P53 and AFP were selected as objective antigens in
	this study for the close association with HCC. The HLA-A*0201 restricted
	CTL epitopes of objective tumor antigens were predicted by SYFPEITHI
	prediction method combined with the polynomial quantitative motifs
	method. The threshold of polynomial scores was set to -24. RESULTS:
	The SYFPEITHI prediction values of all possible nonamers of a given
	protein sequence were added together and the ten high-scoring peptides
	of each protein were chosen for further analysis in primary prediction.
	Thirty-five candidates of CTL epitopes (nonamers) derived from the
	primary prediction results were selected by analyzing with the polynomial
	method and compared with reported CTL epitopes. CONCLUSION: The combination
	of SYFPEITHI prediction method and polynomial method can improve
	the prediction efficiency and accuracy. These nonamers may be useful
	in the design of therapeutic peptide vaccine for HCC and as immunotherapeutic
	strategies against HCC after identified by immunology experiment.},
  keywords = {Amino Acid Sequence; Carcinoma, Hepatocellular; Databases, Protein;
	Epitopes; HLA-A2 Antigen; Humans; Liver Neoplasms; Major Histocompatibility
	Complex; Research Support, Non-U.S. Gov't; T-Lymphocytes, Cytotoxic},
  owner = {jacob},
  pmid = {15633217},
  timestamp = {2006.08.30}
}

@article{Dong2005Fast,
  author = {Jian-xiong Dong and Adam Krzyzak and Ching Y Suen},
  title = {Fast {SVM} training algorithm with decomposition on very large data
	sets.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2005},
  volume = {27},
  pages = {603-18},
  number = {4},
  month = {Apr},
  abstract = {Training a support vector machine on a data set of huge size with
	thousands of classes is a challenging problem. {T}his paper proposes
	an efficient algorithm to solve this problem. {T}he key idea is to
	introduce a parallel optimization step to quickly remove most of
	the nonsupport vectors, where block diagonal matrices are used to
	approximate the original kernel matrix so that the original problem
	can be split into hundreds of subproblems which can be solved more
	efficiently. {I}n addition, some effective strategies such as kernel
	caching and efficient computation of kernel matrix are integrated
	to speed up the training process. {O}ur analysis of the proposed
	algorithm shows that its time complexity grows linearly with the
	number of classes and size of the data set. {I}n the experiments,
	many appealing properties of the proposed algorithm have been investigated
	and the results show that the proposed algorithm has a much better
	scaling capability than {L}ibsvm, {SVM}light, and {SVMT}orch. {M}oreover,
	the good generalization performances on several large databases have
	also been achieved.},
  keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence,
	Automated, Automatic Data Processing, Butadienes, Chloroplasts, Comparative
	Study, Computer Simulation, Computer-Assisted, Database Management
	Systems, Databases, Diagnosis, Disinfectants, Dose-Response Relationship,
	Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines,
	Expert Systems, Factual, Feedback, Fungicides, Gene Expression Profiling,
	Genes, Genetic Markers, Humans, Image Enhancement, Image Interpretation,
	Implanted, Industrial, Information Storage and Retrieval, Kidney,
	Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis,
	Molecular Biology, Motor Cortex, Movement, Natural Language Processing,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Numerical
	Analysis, Pattern Recognition, Plant Proteins, Predictive Value of
	Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats,
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Signal Processing, Sprague-Dawley, Subcellular Fractions, Terminology,
	Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer
	Interface, 15794164}
}

@article{Doniger2002Predicting,
  author = {Doniger, S. and Hofmann, T. and Yeh, J.},
  title = {Predicting {CNS} permeability of drug molecules: comparison of neural
	network and support vector machine algorithms},
  journal = {J. {C}omput. {B}iol.},
  year = {2002},
  volume = {9},
  pages = {849-864},
  number = {6},
  abstract = {Two different machine-learning algorithms have been used to predict
	the blood-brain barrier permeability of different classes of molecules,
	to develop a method to predict the ability of drug compounds to penetrate
	the {CNS}. {T}he first algorithm is based on a multilayer perceptron
	neural network and the second algorithm uses a support vector machine.
	{B}oth algorithms are trained on an identical data set consisting
	of 179 {CNS} active molecules and 145 {CNS} inactive molecules. {T}he
	training parameters include molecular weight, lipophilicity, hydrogen
	bonding, and other variables that govern the ability of a molecule
	to diffuse through a membrane. {T}he results show that the support
	vector machine outperforms the neural network. {B}ased on over 30
	different validation sets, the {SVM} can predict up to 96% of the
	molecules correctly, averaging 81.5% over 30 test sets, which comprised
	of equal numbers of {CNS} positive and negative molecules. {T}his
	is quite favorable when compared with the neural network's average
	performance of 75.7% with the same 30 test sets. {T}he results of
	the {SVM} algorithm are very encouraging and suggest that a classification
	tool like this one will prove to be a valuable prediction approach.},
  doi = {10.1089/10665270260518317},
  pdf = {../local/Doniger2002Predicting.pdf},
  file = {Doniger2002Predicting.pdf:local/Doniger2002Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Donoho1994Denoising,
  author = {David L. Donoho},
  title = {De-Noising By Soft-Thresholding},
  journal = {{IEEE} {T}rans. {IT}},
  year = {1994},
  volume = {41},
  pages = {613--627},
  number = {3}
}

@article{Donoho2003Hessian,
  author = {Donoho, D. L. and Grimes, C.},
  title = {Hessian eigenmaps: {L}ocally linear embedding techniques for high-dimensional
	data},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2003},
  volume = {100},
  pages = {5591-5596},
  number = {10},
  doi = {10.1073/pnas.1031596100},
  pdf = {../local/5591.pdf:http\},
  file = {5591.pdf:http\://www.pnas.org/cgi/reprint/100/10/5591.pdf:PDF},
  keywords = {dimred},
  url = {http://www.pnas.org/cgi/content/abstract/100/10/5591}
}

@article{Donoho2000High,
  author = {Donoho, D. L. and Johnstone, I. and Stine, B. and Piatetsky-Shapiro,
	G.},
  title = {High-Dimensional Data Analysis : The Curses and Blessings of Dimensionality},
  journal = {Statistics},
  year = {2000},
  pages = {1--33},
  owner = {jp},
  timestamp = {2012.03.08}
}

@article{Doolan2003Identification,
  author = {Denise L Doolan and Scott Southwood and Daniel A Freilich and John
	Sidney and Norma L Graber and Lori Shatney and Lolita Bebris and
	Laurence Florens and Carlota Dobano and Adam A Witney and Ettore
	Appella and Stephen L Hoffman and John R Yates and Daniel J Carucci
	and Alessandro Sette},
  title = {{I}dentification of {P}lasmodium falciparum antigens by antigenic
	analysis of genomic and proteomic data.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2003},
  volume = {100},
  pages = {9952--9957},
  number = {17},
  month = {Aug},
  abstract = {The recent explosion in genomic sequencing has made available a wealth
	of data that can now be analyzed to identify protein antigens, potential
	targets for vaccine development. Here we present, in the context
	of Plasmodium falciparum, a strategy that rapidly identifies target
	antigens from large and complex genomes. Sixteen antigenic proteins
	recognized by volunteers immunized with radiation-attenuated P. falciparum
	sporozoites, but not by mock immunized controls, were identified.
	Several of these were more antigenic than previously identified and
	well characterized P. falciparum-derived protein antigens. The data
	suggest that immune responses to Plasmodium are dispersed on a relatively
	large number of parasite antigens. These studies have implications
	for our understanding of immunodominance and breadth of responses
	to complex pathogens.},
  doi = {10.1073/pnas.1633254100},
  pdf = {../local/Doolan2003Identification.pdf},
  file = {Doolan2003Identification.pdf:local/Doolan2003Identification.pdf:PDF},
  keywords = {plasmodium},
  pii = {1633254100},
  pmid = {12886016},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1073/pnas.1633254100}
}

@article{Dostie2006Chromosome,
  author = {Josée Dostie and Todd A Richmond and Ramy A Arnaout and Rebecca R
	Selzer and William L Lee and Tracey A Honan and Eric D Rubio and
	Anton Krumm and Justin Lamb and Chad Nusbaum and Roland D Green and
	Job Dekker},
  title = {Chromosome Conformation Capture Carbon Copy (5C): a massively parallel
	solution for mapping interactions between genomic elements.},
  journal = {Genome Res},
  year = {2006},
  volume = {16},
  pages = {1299--1309},
  number = {10},
  month = {Oct},
  abstract = {Physical interactions between genetic elements located throughout
	the genome play important roles in gene regulation and can be identified
	with the Chromosome Conformation Capture (3C) methodology. 3C converts
	physical chromatin interactions into specific ligation products,
	which are quantified individually by PCR. Here we present a high-throughput
	3C approach, 3C-Carbon Copy (5C), that employs microarrays or quantitative
	DNA sequencing using 454-technology as detection methods. We applied
	5C to analyze a 400-kb region containing the human beta-globin locus
	and a 100-kb conserved gene desert region. We validated 5C by detection
	of several previously identified looping interactions in the beta-globin
	locus. We also identified a new looping interaction in K562 cells
	between the beta-globin Locus Control Region and the gamma-beta-globin
	intergenic region. Interestingly, this region has been implicated
	in the control of developmental globin gene switching. 5C should
	be widely applicable for large-scale mapping of cis- and trans- interaction
	networks of genomic elements and for the study of higher-order chromosome
	structure.},
  doi = {10.1101/gr.5571506},
  pdf = {../local/Dostie2006Chromosome.pdf},
  file = {Dostie2006Chromosome.pdf:Dostie2006Chromosome.pdf:PDF},
  institution = {Program in Gene Function and Expression and Department of Biochemistry
	and Molecular Pharmacology, University of Massachusetts Medical School,
	Worcester, Massachusetts 01605-0103, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {gr.5571506},
  pmid = {16954542},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1101/gr.5571506}
}

@article{Dostie2007Chromosome,
  author = {Josée Dostie and Ye Zhan and Job Dekker},
  title = {Chromosome conformation capture carbon copy technology.},
  journal = {Curr Protoc Mol Biol},
  year = {2007},
  volume = {Chapter 21},
  pages = {Unit 21.14},
  month = {Oct},
  abstract = {Chromosome conformation capture (3C) is used to quantify physical
	DNA contacts in vivo at high resolution. 3C was first used in yeast
	to map the spatial chromatin organization of chromosome III, and
	in higher eukaryotes to demonstrate that genomic DNA elements regulate
	target genes by physically interacting with them. 3C has been widely
	adopted for small-scale analysis of functional chromatin interactions
	along (cis) or between (trans) chromosomes. For larger-scale applications,
	chromosome conformation capture carbon copy (5C) combines 3C with
	ligation-mediated amplification (LMA) to simultaneously quantify
	hundreds of thousands of physical DNA contacts by microarray or ultra-high-throughput
	DNA sequencing. 5C allows the mapping of extensive networks of physical
	interactions among large sets of genomic elements throughout the
	genome. Such networks can provide important biological insights,
	e.g., by identifying relationships between regulatory elements and
	their target genes. This unit describes 5C for large-scale analysis
	of cis- and trans-chromatin interactions in mammalian cells.},
  doi = {10.1002/0471142727.mb2114s80},
  institution = {University of Massachusetts Medical School, Worcester, Massachusetts,
	USA.},
  keywords = {Chromosomes, Artificial, Bacterial; Chromosomes, chemistry; DNA Primers,
	metabolism; Molecular Biology, methods; Nucleic Acid Conformation;
	Oligonucleotide Array Sequence Analysis; Polymerase Chain Reaction;
	Sequence Analysis, DNA; Templates, Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {18265398},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1002/0471142727.mb2114s80}
}

@article{Doubet1992CarbBank,
  author = {S. Doubet and P. Albersheim},
  title = {Carb{B}ank.},
  journal = {Glycobiology},
  year = {1992},
  volume = {2},
  pages = {505},
  number = {6},
  month = {Dec},
  keywords = {glycans}
}

@article{Doubet1989Complex,
  author = {S. Doubet and K. Bock and D. Smith and A. Darvill and P. Albersheim},
  title = {The {C}omplex {C}arbohydrate {S}tructure {D}atabase.},
  journal = {Trends {B}iochem {S}ci},
  year = {1989},
  volume = {14},
  pages = {475-7},
  number = {12},
  month = {Dec},
  abstract = {The {C}omplex {C}arbohydrate {S}tructure {D}atabase ({CCSD}) and {C}arb{B}ank,
	an {IBM} {PC}/{AT} (or compatible) database management system, were
	created to provide an information system to meet the needs of people
	interested in carbohydrate science. {T}he {CCSD}, which presently
	contains more than 2000 citations, is expected to double in size
	in the next two years and to include, soon thereafter, all of the
	published structures of carbohydrates larger than disaccharides.},
  keywords = {Carbohydrate Sequence, Carbohydrates, Databases, Factual, Information
	Systems, Molecular Structure, 2623761}
}

@article{Dover2002Methylation,
  author = {Jim Dover and Jessica Schneider and Mary Anne Tawiah-Boateng and
	Adam Wood and Kimberly Dean and Mark Johnston and Ali Shilatifard},
  title = {Methylation of histone H3 by COMPASS requires ubiquitination of histone
	H2B by Rad6.},
  journal = {J Biol Chem},
  year = {2002},
  volume = {277},
  pages = {28368--28371},
  number = {32},
  month = {Aug},
  abstract = {The DNA of eukaryotes is wrapped around nucleosomes and packaged into
	chromatin. Covalent modifications of the histone proteins that comprise
	the nucleosome alter chromatin structure and have major effects on
	gene expression. Methylation of lysine 4 of histone H3 by COMPASS
	is required for silencing of genes located near chromosome telomeres
	and within the rDNA (Krogan, N. J, Dover, J., Khorrami, S., Greenblatt,
	J. F., Schneider, J., Johnston, M., and Shilatifard, A. (2002) J.
	Biol. Chem. 277, 10753-10755; Briggs, S. D., Bryk, M., Strahl, B.
	D., Cheung, W. L., Davie, J. K., Dent, S. Y., Winston, F., and Allis,
	C. D. (2001) Genes. Dev. 15, 3286-3295). To learn about the mechanism
	of histone methylation, we surveyed the genome of the yeast Saccharomyces
	cerevisiae for genes necessary for this process. By analyzing approximately
	4800 mutant strains, each deleted for a different non-essential gene,
	we discovered that the ubiquitin-conjugating enzyme Rad6 is required
	for methylation of lysine 4 of histone H3. Ubiquitination of histone
	H2B on lysine 123 is the signal for the methylation of histone H3,
	which leads to silencing of genes located near telomeres.},
  doi = {10.1074/jbc.C200348200},
  institution = {Department of Biochemistry, Saint Louis University School of Medicine,
	St. Louis, Missouri 63104, USA.},
  keywords = {DNA, Ribosomal, metabolism; Electrophoresis, Polyacrylamide Gel; Gene
	Silencing; Histones, metabolism; Ligases, metabolism; Lysine, metabolism;
	Methylation; Models, Biological; Mutation; Saccharomyces cerevisiae
	Proteins; Saccharomyces cerevisiae, genetics; Ubiquitin, metabolism;
	Ubiquitin-Conjugating Enzymes},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {C200348200},
  pmid = {12070136},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1074/jbc.C200348200}
}

@article{Doyle2005PlosBiol,
  author = {John Doyle and Marie Csete},
  title = {Motifs, control, and stability.},
  journal = {PLoS Biol},
  year = {2005},
  volume = {3},
  pages = {e392},
  number = {11},
  month = {Nov},
  doi = {10.1371/journal.pbio.0030392},
  institution = {Department of Control and Dynamical Systems, California Institute
	of Technology, Pasadena, California, United States of America. doyle@caltech.edu},
  keywords = {Amino Acid Motifs; Bacterial Physiological Phenomena; Bacterial Proteins,
	chemistry; Escherichia coli, metabolism; Genes, Bacterial; Genes,
	Plant; Glycolysis; Heat-Shock Proteins, chemistry; Models, Biological;
	Models, Theoretical; Molecular Chaperones, chemistry; Plant Proteins,
	chemistry; Protein Interaction Mapping; Protein Structure, Tertiary;
	Transcription Factors, chemistry; Transcription, Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {05-PLBI-P-0948},
  pmid = {16277557},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1371/journal.pbio.0030392}
}

@article{Doytchinova2004Identifying,
  author = {Irini A Doytchinova and Pingping Guan and Darren R Flower},
  title = {Identifying human {MHC} supertypes using bioinformatic methods.},
  journal = {J. Immunol.},
  year = {2004},
  volume = {172},
  pages = {4314--4323},
  number = {7},
  month = {Apr},
  abstract = {Classification of MHC molecules into supertypes in terms of peptide-binding
	specificities is an important issue, with direct implications for
	the development of epitope-based vaccines with wide population coverage.
	In view of extremely high MHC polymorphism (948 class I and 633 class
	II HLA alleles) the experimental solution of this task is presently
	impossible. In this study, we describe a bioinformatics strategy
	for classifying MHC molecules into supertypes using information drawn
	solely from three-dimensional protein structure. Two chemometric
	techniques-hierarchical clustering and principal component analysis-were
	used independently on a set of 783 HLA class I molecules to identify
	supertypes based on structural similarities and molecular interaction
	fields calculated for the peptide binding site. Eight supertypes
	were defined: A2, A3, A24, B7, B27, B44, C1, and C4. The two techniques
	gave 77\% consensus, i.e., 605 HLA class I alleles were classified
	in the same supertype by both methods. The proposed strategy allowed
	"supertype fingerprints" to be identified. Thus, the A2 supertype
	fingerprint is Tyr(9)/Phe(9), Arg(97), and His(114) or Tyr(116);
	the A3-Tyr(9)/Phe(9)/Ser(9), Ile(97)/Met(97) and Glu(114) or Asp(116);
	the A24-Ser(9) and Met(97); the B7-Asn(63) and Leu(81); the B27-Glu(63)
	and Leu(81); for B44-Ala(81); the C1-Ser(77); and the C4-Asn(77).},
  keywords = {Alleles; Amino Acid Motifs; Binding Sites; Computational Biology;
	DNA Fingerprinting; HLA Antigens; HLA-A Antigens; HLA-B Antigens;
	HLA-C Antigens; Histocompatibility Antigens Class I; Histocompatibility
	Testing; Humans; Multigene Family; Protein Interaction Mapping},
  owner = {laurent},
  pmid = {15034046},
  timestamp = {2007.01.03}
}

@article{Doytchinova2005Towards,
  author = {Irini A Doytchinova and Valerie Walshe and Persephone Borrow and
	Darren R Flower},
  title = {Towards the chemometric dissection of peptide--HLA-A*0201 binding
	affinity: comparison of local and global QSAR models.},
  journal = {J Comput Aided Mol Des},
  year = {2005},
  volume = {19},
  pages = {203--212},
  number = {3},
  month = {Mar},
  abstract = {The affinities of 177 nonameric peptides binding to the HLA-A*0201
	molecule were measured using a FACS-based MHC stabilisation assay
	and analysed using chemometrics. Their structures were described
	by global and local descriptors, QSAR models were derived by genetic
	algorithm, stepwise regression and PLS. The global molecular descriptors
	included molecular connectivity chi indices, kappa shape indices,
	E-state indices, molecular properties like molecular weight and log
	P, and three-dimensional descriptors like polarizability, surface
	area and volume. The local descriptors were of two types. The first
	used a binary string to indicate the presence of each amino acid
	type at each position of the peptide. The second was also position-dependent
	but used five z-scales to describe the main physicochemical properties
	of the amino acids forming the peptides. The models were developed
	using a representative training set of 131 peptides and validated
	using an independent test set of 46 peptides. It was found that the
	global descriptors could not explain the variance in the training
	set nor predict the affinities of the test set accurately. Both types
	of local descriptors gave QSAR models with better explained variance
	and predictive ability. The results suggest that, in their interactions
	with the MHC molecule, the peptide acts as a complicated ensemble
	of multiple amino acids mutually potentiating each other.},
  doi = {10.1007/s10822-005-3993-x},
  keywords = {Algorithms; Amino Acid Sequence; Binding Sites; HLA-A Antigens; Models,
	Theoretical; Oligopeptides; Quantitative Structure-Activity Relationship;
	Regression Analysis},
  owner = {laurent},
  pmid = {16059672},
  timestamp = {2007.08.27},
  url = {http://dx.doi.org/10.1007/s10822-005-3993-x}
}

@article{Dreiseitl2001comparison,
  author = {S. Dreiseitl and L. Ohno-Machado and H. Kittler and S. Vinterbo and
	H. Billhardt and M. Binder},
  title = {A comparison of machine learning methods for the diagnosis of pigmented
	skin lesions.},
  journal = {J {B}iomed {I}nform},
  year = {2001},
  volume = {34},
  pages = {28-36},
  number = {1},
  month = {Feb},
  abstract = {We analyze the discriminatory power of k-nearest neighbors, logistic
	regression, artificial neural networks ({ANN}s), decision tress,
	and support vector machines ({SVM}s) on the task of classifying pigmented
	skin lesions as common nevi, dysplastic nevi, or melanoma. {T}hree
	different classification tasks were used as benchmarks: the dichotomous
	problem of distinguishing common nevi from dysplastic nevi and melanoma,
	the dichotomous problem of distinguishing melanoma from common and
	dysplastic nevi, and the trichotomous problem of correctly distinguishing
	all three classes. {U}sing {ROC} analysis to measure the discriminatory
	power of the methods shows that excellent results for specific classification
	problems in the domain of pigmented skin lesions can be achieved
	with machine-learning methods. {O}n both dichotomous and trichotomous
	tasks, logistic regression, {ANN}s, and {SVM}s performed on about
	the same level, with k-nearest neighbors and decision trees performing
	worse.},
  doi = {10.1006/jbin.2001.1004},
  pdf = {../local/Dreiseitl2001comparison.pdf},
  file = {Dreiseitl2001comparison.pdf:local/Dreiseitl2001comparison.pdf:PDF},
  keywords = {Algorithms, Amino Acid Sequence, Artificial Intelligence, Biological,
	Cell Compartmentation, Comparative Study, Computer Simulation, Computer-Assisted,
	Decision Trees, Diagnosis, Discriminant Analysis, Humans, Logistic
	Models, Melanoma, Models, Neural Networks (Computer), Nevus, Non-U.S.
	Gov't, Organelles, P.H.S., Pigmented, Predictive Value of Tests,
	Proteins, Reproducibility of Results, Research Support, Skin Diseases,
	Skin Neoplasms, Skin Pigmentation, U.S. Gov't, 11376540},
  url = {http://dx.doi.org/10.1006/jbin.2001.1004}
}

@article{Drews2000Drug,
  author = {J. Drews},
  title = {Drug {D}iscovery: {A} {H}istorical {P}erspective},
  journal = {Science},
  year = {2000},
  volume = {287},
  pages = {1960-1964},
  month = {March},
  doi = {10.1126/science.287.5460.1960},
  pdf = {../local/Drews2000Drug.pdf},
  file = {Drews2000Drug.pdf:Drews2000Drug.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.15},
  url = {http://dx.doi.org/10.1126/science.287.5460.1960}
}

@article{Driel2006text-mining,
  author = {van Driel, M.A. and Bruggeman, J. and Vriend, G. and Brunner, H.G.
	and Leunissen, J.A.M.},
  title = {A text-mining analysis of the human phenome.},
  journal = {Eur. J. Hum. Genet.},
  year = {2006},
  volume = {14},
  pages = {535--542},
  number = {5},
  month = {May},
  abstract = {A number of large-scale efforts are underway to define the relationships
	between genes and proteins in various species. But, few attempts
	have been made to systematically classify all such relationships
	at the phenotype level. Also, it is unknown whether such a phenotype
	map would carry biologically meaningful information. We have used
	text mining to classify over 5000 human phenotypes contained in the
	Online Mendelian Inheritance in Man database. We find that similarity
	between phenotypes reflects biological modules of interacting functionally
	related genes. These similarities are positively correlated with
	a number of measures of gene function, including relatedness at the
	level of protein sequence, protein motifs, functional annotation,
	and direct protein-protein interaction. Phenotype grouping reflects
	the modular nature of human disease genetics. Thus, phenotype mapping
	may be used to predict candidate genes for diseases as well as functional
	relations between genes and proteins. Such predictions will further
	improve if a unified system of phenotype descriptors is developed.
	The phenotype similarity data are accessible through a web interface
	at http://www.cmbi.ru.nl/MimMiner/.},
  doi = {10.1038/sj.ejhg.5201585},
  institution = {Centre for Molecular and Biomolecular Informatics, Radboud University
	Nijmegen, Toernooiveld 1, 6525ED Nijmegen, the Netherlands.},
  keywords = {Chromosome Mapping; Databases, Genetic; Genetic Predisposition to
	Disease; Genetic Vectors; Genome, Human; Genotype; Humans; Models,
	Genetic; Models, Statistical; Multigene Family; Phenotype},
  owner = {mordelet},
  pii = {5201585},
  pmid = {16493445},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1038/sj.ejhg.5201585}
}

@article{Driel2003new,
  author = {van Driel, M. and Cuelenaere, K. and Kemmeren, P.P.C.W. and Leunissen,
	J.A.M. and Brunner, H.G.},
  title = {A new web-based data mining tool for the identification of candidate
	genes for human genetic disorders.},
  journal = {Eur. J. Hum. Genet.},
  year = {2003},
  volume = {11},
  pages = {57--63},
  number = {1},
  month = {Jan},
  abstract = {To identify the gene underlying a human genetic disorder can be difficult
	and time-consuming. Typically, positional data delimit a chromosomal
	region that contains between 20 and 200 genes. The choice then lies
	between sequencing large numbers of genes, or setting priorities
	by combining positional data with available expression and phenotype
	data, contained in different internet databases. This process of
	examining positional candidates for possible functional clues may
	be performed in many different ways, depending on the investigator's
	knowledge and experience. Here, we report on a new tool called the
	GeneSeeker, which gathers and combines positional data and expression/phenotypic
	data in an automated way from nine different web-based databases.
	This results in a quick overview of interesting candidate genes in
	the region of interest. The GeneSeeker system is built in a modular
	fashion allowing for easy addition or removal of databases if required.
	Databases are searched directly through the web, which obviates the
	need for data warehousing. In order to evaluate the GeneSeeker tool,
	we analysed syndromes with known genesis. For each of 10 syndromes
	the GeneSeeker programme generated a shortlist that contained a significantly
	reduced number of candidate genes from the critical region, yet still
	contained the causative gene. On average, a list of 163 genes based
	on position alone was reduced to a more manageable list of 22 genes
	based on position and expression or phenotype information. We are
	currently expanding the tool by adding other databases. The GeneSeeker
	is available via the web-interface (http://www.cmbi.kun.nl/GeneSeeker/).},
  doi = {10.1038/sj.ejhg.5200918},
  institution = {Centre for Molecular and Biomolecular Informatics, University of
	Nijmegen, The Netherlands. M.vanDriel@cmbi.kun.nl},
  keywords = {Computational Biology; Databases, Genetic; Databases, Nucleic Acid;
	Gene Expression; Genetic Diseases, Inborn; Humans; Internet; Noonan
	Syndrome; Software},
  owner = {mordelet},
  pii = {5200918},
  pmid = {12529706},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1038/sj.ejhg.5200918}
}

@article{Drier2011two,
  author = {Drier, Y. and Domany, E.},
  title = {Do two machine-learning based prognostic signatures for breast cancer
	capture the same biological processes?},
  journal = {PloS one},
  year = {2011},
  volume = {6},
  pages = {e17795},
  number = {3},
  publisher = {Public Library of Science}
}

@article{Dror2005Accurate,
  author = {Dror, G. and Sorek, R. and Shamir, R.},
  title = {Accurate identification of alternatively spliced exons using support
	vector machine},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {897-901},
  number = {7},
  month = {Apr},
  abstract = {Motivation: {A}lternative splicing is a major component of the regulation
	acting on mammalian transcriptomes. {I}t is estimated that over half
	of all human genes have more than one splice variant. {P}revious
	studies have shown that alternatively spliced exons possess several
	features that distinguish them from constitutively spliced ones.
	{R}ecently, we have demonstrated that such features can be used to
	distinguish alternative from constitutive exons. {I}n the current
	study we use advanced machine learning methods to generate robust
	alternative exons classifier.{R}esults: {W}e extracted several hundred
	local sequence features of constitutive as well as alternative exons.
	{U}sing feature selection methods we find seven attributes that are
	dominant for the task of classification. {S}everal less informative
	features help to slightly increase the performance of the classifier.
	{T}he classifier achieves a true positive rate of 50% for a false
	positive rate of 0.5%. {T}his result enables one to reliably identify
	alternatively spliced exons in exon databases that are believed to
	be dominated by constitutive exons.{A}vailability: {U}pon request
	from the authors.},
  doi = {10.1093/bioinformatics/bti132},
  pdf = {../local/Dror2005Accurate.pdf},
  file = {Dror2005Accurate.pdf:local/Dror2005Accurate.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti132v1}
}

@article{Dubey2005Support,
  author = {Anshul Dubey and Matthew J Realff and Jay H Lee and Andreas S Bommarius},
  title = {Support vector machines for learning to identify the critical positions
	of a protein.},
  journal = {J {T}heor {B}iol},
  year = {2005},
  volume = {234},
  pages = {351-61},
  number = {3},
  month = {Jun},
  abstract = {A method for identifying the positions in the amino acid sequence,
	which are critical for the catalytic activity of a protein using
	support vector machines ({SVM}s) is introduced and analysed. {SVM}s
	are supported by an efficient learning algorithm and can utilize
	some prior knowledge about the structure of the problem. {T}he amino
	acid sequences of the variants of a protein, created by inducing
	mutations, along with their fitness are required as input data by
	the method to predict its critical positions. {T}o investigate the
	performance of this algorithm, variants of the beta-lactamase enzyme
	were created in silico using simulations of both mutagenesis and
	recombination protocols. {R}esults from literature on beta-lactamase
	were used to test the accuracy of this method. {I}t was also compared
	with the results from a simple search algorithm. {T}he algorithm
	was also shown to be able to predict critical positions that can
	tolerate two different amino acids and retain function.},
  doi = {10.1016/j.jtbi.2004.11.037},
  pdf = {../local/Dubey2005Support.pdf},
  file = {Dubey2005Support.pdf:local/Dubey2005Support.pdf:PDF},
  keywords = {biosvm},
  pii = {S0022-5193(04)00585-5},
  url = {http://dx.doi.org/10.1016/j.jtbi.2004.11.037}
}

@article{Dubus2006In,
  author = {Dubus, E. and Ijjaali, I. and Petitet, F. and Michel, A.},
  title = {{I}n {S}ilico {C}lassification of h{ERG} {C}hannel {B}lockers: a
	{K}nowledge-{B}ased {S}trategy.},
  journal = {Chem. Med. Chem.},
  year = {2006},
  volume = {1},
  pages = {622--630},
  number = {6},
  month = {Jun},
  abstract = {The blockage of the hERG potassium channel by a wide number of diverse
	compounds has become a major pharmacological safety concern as it
	can lead to sudden cardiac death. In silico models can be potent
	tools to screen out potential hERG blockers as early as possible
	during the drug-discovery process. In this study, predictive models
	developed using the recursive partitioning method and created using
	diverse datasets from 203 molecules tested on the hERG channel are
	described. The first model was built with hERG compounds grouped
	into two classes, with a separation limit set at an IC(50) value
	of 1 mum, and reaches an overall accuracy of 81 \%. The misclassification
	of molecules having a range of activity between 1 and 10 muM led
	to the generation of a tri-class model able to correctly classify
	high, moderate, and weak hERG blockers with an overall accuracy of
	90 \%. Another model, constructed with the high and weak hERG-blocker
	categories, successfully increases the accuracy to 96 \%. The results
	reported herein indicate that a combination of precise, knowledge
	management resources and powerful modeling tools are invaluable to
	assessing potential cardiotoxic side effects related to hERG blockage.},
  doi = {10.1002/cmdc.200500099},
  keywords = {herg chemoinformatics},
  pmid = {16892402},
  timestamp = {2006.10.06},
  url = {http://dx.doi.org/10.1002/cmdc.200500099}
}

@inproceedings{Duchenne2009Tensor,
  author = {Duchenne, O. and Bach, F. and Kweon, I. and Ponce, J.},
  title = {A tensor-based algorithm for high-order graph matching},
  booktitle = {Proc. IEEE Conference on Computer Vision and Pattern Recognition
	CVPR 2009},
  year = {2009},
  pages = {1980--1987},
  month = {20--25 June },
  doi = {10.1109/CVPRW.2009.5206619},
  owner = {michael},
  timestamp = {2009.11.17}
}

@inproceedings{Duchi2008Efficient,
  author = {J. Duchi and S. Shalev-Shwartz and Y. Singer and T. Chandra},
  title = {Efficient projections onto the L1-ball for learning in high dimensions},
  booktitle = {Proceedings of the 25th {A}nnual {I}nternational {C}onference on
	{M}achine {L}earning ({ICML} 2008)},
  year = {2008},
  editor = {Andrew McCallum and Sam Roweis},
  pages = {272--279},
  publisher = {Omnipress},
  location = {Helsinki, Finland}
}

@book{Duda2001Pattern,
  title = {Pattern Classification},
  publisher = {Wiley-Interscience},
  year = {2001},
  author = {R. O. Duda and P. E. Hart and D. G. Stork},
  owner = {mahe},
  timestamp = {2006.09.07}
}

@article{Dudoit2002Comparison,
  author = {Dudoit, S. and Fridlyand, J. and Speed, T.},
  title = {Comparison of discrimination methods for classification of tumors
	using gene expression data},
  journal = {J. Am. Stat. Assoc.},
  year = {2002},
  volume = {97},
  pages = {77--87},
  owner = {jp},
  timestamp = {2012.03.04}
}

@article{Dudoit2002Statistical,
  author = {Dudoit, S. and Yang, Y. H. and Callow, M. J. and Speed, T. P.},
  title = {Statistical methods for identifying differentially expressed genes
	in replicated {cDNA} microarray experiments},
  journal = {Statistica Sinica},
  year = {2002},
  volume = {12},
  pages = {111--139},
  pdf = {../local/Dudoit2002Statistical.pdf},
  file = {Dudoit2002Statistical.pdf:Dudoit2002Statistical.pdf:PDF},
  owner = {jp},
  timestamp = {2011.10.03}
}

@article{Dunson2008The,
  author = {Dunson, D. and Xue, Y. and Carin, L.},
  title = {The Matrix Stick-Breaking Process: Flexible Bayes Meta-Analysis},
  journal = {Journal of the {A}merican {S}tatistical {A}ssociation},
  year = {2008},
  volume = {103},
  pages = {317--327},
  number = {481},
  month = {March},
  abstract = {In analyzing data from multiple related studies, it often is of interest
	to borrow information across studies and to cluster similar studies.
	Although parametric hierarchical models are commonly used, of concern
	is sensitivity to the form chosen for the random-effects distribution.
	A Dirichlet process (DP) prior can allow the distribution to be unknown,
	while clustering studies; however, the DP does not allow local clustering
	of studies with respect to a subset of the coefficients without making
	independence assumptions. Motivated by this problem, we propose a
	matrix stick-breaking process (MSBP) as a prior for a matrix of random
	probability measures. Properties of the MSBP are considered, and
	methods are developed for posterior computation using Markov chain
	Monte Carlo. Using the MSBP as a prior for a matrix of study-specific
	regression coefficients, we demonstrate advantages over parametric
	modeling in simulated examples. The methods are further illustrated
	using a multinational uterotrophic bioassay study.},
  url = {http://www.ingentaconnect.com/content/asa/jasa/2008/00000103/00000481/art00036}
}

@book{Durbin1998Biological,
  title = {Biological {S}equence {A}nalysis: {P}robabilistic {M}odels of {P}roteins
	and {N}ucleic {A}cids},
  publisher = {Cambridge University Press},
  year = {1998},
  author = {Durbin, R. and Eddy, S. and Krogh, A. and Mitchison, G.}
}

@article{Donnes2002Prediction,
  author = {D{\"o}nnes, P. and Elofsson, A.},
  title = {Prediction of {MHC} class {I} binding peptides, using {SVMHC}},
  journal = {B{MC} {B}ioinformatics},
  year = {2002},
  volume = {3},
  pages = {25},
  number = {1},
  month = {Sep},
  abstract = {Background {T}-cells are key players in regulating a specific immune
	response. {A}ctivation of cytotoxic {T}-cells requires recognition
	of specific peptides bound to {M}ajor {H}istocompatibility {C}omplex
	({MHC}) class {I} molecules. {MHC}-peptide complexes are potential
	tools for diagnosis and treatment of pathogens and cancer, as well
	as for the development of peptide vaccines. {O}nly one in 100 to
	200 potential binders actually binds to a certain {MHC} molecule,
	therefore a good prediction method for {MHC} class {I} binding peptides
	can reduce the number of candidate binders that need to be synthesized
	and tested. {R}esults {H}ere, we present a novel approach, {SVMHC},
	based on support vector machines to predict the binding of peptides
	to {MHC} class {I} molecules. {T}his method seems to perform slightly
	better than two profile based methods, {SYFPEITHI} and {HLA}_{BIND}.
	{T}he implementation of {SVMHC} is quite simple and does not involve
	any manual steps, therefore as more data become available it is trivial
	to provide prediction for more {MHC} types. {SVMHC} currently contains
	prediction for 26 {MHC} class {I} types from the {MHCPEP} database
	or alternatively 6 {MHC} class {I} types from the higher quality
	{SYFPEITHI} database. {T}he prediction models for these {MHC} types
	are implemented in a public web service available at http://www.sbc.su.se/svmhc/.
	{C}onclusions {P}rediction of {MHC} class {I} binding peptides using
	{S}upport {V}ector {M}achines, shows high performance and is easy
	to apply to a large number of {MHC} class {I} types. {A}s more peptide
	data are put into {MHC} databases, {SVMHC} can easily be updated
	to give prediction for additional {MHC} class {I} types. {W}e suggest
	that the number of binding peptides needed for {SVM} training is
	at least 20 sequences.},
  doi = {10.1186/1471-2105-3-25},
  pdf = {../local/Donnes2002Prediction.pdf},
  file = {Donnes2002Prediction.pdf:local/Donnes2002Prediction.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/3/25/abstract}
}

@article{Donnes2005Integrated,
  author = {D{\"o}nnes, P. and Kohlbacher, O.},
  title = {Integrated modeling of the major events in the {MHC} class {I} antigen
	processing pathway.},
  journal = {Protein {S}ci.},
  year = {2005},
  volume = {14},
  pages = {2132-2140},
  month = {Jun},
  abstract = {Rational design of epitope-driven vaccines is a key goal of immunoinformatics.
	{T}ypically, candidate selection relies on the prediction of {MHC}-peptide
	binding only, as this is known to be the most selective step in the
	{MHC} class {I} antigen processing pathway. {H}owever, proteasomal
	cleavage and transport by the transporter associated with antigen
	processing ({TAP}) are essential steps in antigen processing as well.
	{W}hile prediction methods exist for the individual steps, no method
	has yet offered an integrated prediction of all three major processing
	events. {H}ere we present {WAPP}, a method combining prediction of
	proteasomal cleavage, {TAP} transport, and {MHC} binding into a single
	prediction system. {T}he proteasomal cleavage site prediction employs
	a new matrix-based method that is based on experimentally verified
	proteasomal cleavage sites. {S}upport vector regression is used for
	predicting peptides transported by {TAP}. {MHC} binding is the last
	step in the antigen processing pathway and was predicted using a
	support vector machine method, {SVMHC}. {T}he individual methods
	are combined in a filtering approach mimicking the natural processing
	pathway. {WAPP} thus predicts peptides that are cleaved by the proteasome
	at the {C} terminus, transported by {TAP}, and show significant affinity
	to {MHC} class {I} molecules. {T}his results in a decrease in false
	positive rates compared to {MHC} binding prediction alone. {C}ompared
	to prediction of {MHC} binding only, we report an increased overall
	accuracy and a lower rate of false positive predictions for the {HLA}-{A}*0201,
	{HLA}-{B}*2705, {HLA}-{A}*01, and {HLA}-{A}*03 alleles using {WAPP}.
	{T}he method is available online through our prediction server at
	http://www-bs.informatik.uni-tuebingen.de/{WAPP}.},
  doi = {10.1110/ps.051352405},
  pdf = {../local/Donnes2005Integrated.pdf},
  file = {Donnes2005Integrated.pdf:local/Donnes2005Integrated.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  pii = {ps.051352405},
  url = {http://dx.doi.org/10.1110/ps.051352405}
}

@article{Diaz-Uriarte2006Gene,
  author = {D{\'\i}az-Uriarte, R. and De Andres, S.A.},
  title = {Gene selection and classification of microarray data using random
	forest},
  journal = {BMC bioinformatics},
  year = {2006},
  volume = {7},
  pages = {3},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@article{Early1998Polychemotherapy,
  author = {{Early Breast Cancer Trialists’ Collaborative Group}},
  title = {Polychemotherapy for early breast cancer: an overview of the randomised
	trials. Early Breast Cancer Trialists' Collaborative Group.},
  journal = {Lancet},
  year = {1998},
  volume = {352},
  pages = {930--942},
  number = {9132},
  month = {Sep},
  abstract = {There have been many randomised trials of adjuvant prolonged polychemotherapy
	among women with early breast cancer, and an updated overview of
	their results is presented.In 1995, information was sought on each
	woman in any randomised trial that began before 1990 and involved
	treatment groups that differed only with respect to the chemotherapy
	regimens that were being compared. Analyses involved about 18,000
	women in 47 trials of prolonged polychemotherapy versus no chemotherapy,
	about 6000 in 11 trials of longer versus shorter polychemotherapy,
	and about 6000 in 11 trials of anthracycline-containing regimens
	versus CMF (cyclophosphamide, methotrexate, and fluorouracil).For
	recurrence, polychemotherapy produced substantial and highly significant
	proportional reductions both among women aged under 50 at randomisation
	(35\% [SD 4] reduction; 2p<0.00001) and among those aged 50-69 (20\%
	[SD 3] reduction; 2p<0.00001); few women aged 70 or over had been
	studied. For mortality, the reductions were also significant both
	among women aged under 50 (27\% [SD 5] reduction; 2p<0.00001) and
	among those aged 50-69 (11\% [SD 3] reduction; 2p=0.0001). The recurrence
	reductions emerged chiefly during the first 5 years of follow-up,
	whereas the difference in survival grew throughout the first 10 years.
	After standardisation for age and time since randomisation, the proportional
	reductions in risk were similar for women with node-negative and
	node-positive disease. Applying the proportional mortality reduction
	observed in all women aged under 50 at randomisation would typically
	change a 10-year survival of 71\% for those with node-negative disease
	to 78\% (an absolute benefit of 7\%), and of 42\% for those with
	node-positive disease to 53\% (an absolute benefit of 11\%). The
	smaller proportional mortality reduction observed in all women aged
	50-69 at randomisation would translate into smaller absolute benefits,
	changing a 10-year survival of 67\% for those with node-negative
	disease to 69\% (an absolute gain of 2\%) and of 46\% for those with
	node-positive disease to 49\% (an absolute gain of 3\%). The age-specific
	benefits of polychemotherapy appeared to be largely irrespective
	of menopausal status at presentation, oestrogen receptor status of
	the primary tumour, and of whether adjuvant tamoxifen had been given.
	In terms of other outcomes, there was a reduction of about one-fifth
	(2p=0.05) in contralateral breast cancer, which has already been
	included in the analyses of recurrence, and no apparent adverse effect
	on deaths from causes other than breast cancer (death rate ratio
	0.89 [SD 0.09]). The directly randomised comparisons of longer versus
	shorter durations of polychemotherapy did not indicate any survival
	advantage with the use of more than about 3-6 months of polychemotherapy.
	By contrast, directly randomised comparisons did suggest that, compared
	with CMF alone, the anthracycline-containing regimens studied produced
	somewhat greater effects on recurrence (2p=0.006) and mortality (69\%
	vs 72\% 5-year survival; log-rank 2p=0.02). But this comparison is
	one of many that could have been selected for emphasis, the 99\%
	CI reaches zero, and the results of several of the relevant trials
	are not yet available.Some months of adjuvant polychemotherapy (eg,
	with CMF or an anthracycline-containing regimen) typically produces
	an absolute improvement of about 7-11\% in 10-year survival for women
	aged under 50 at presentation with early breast cancer, and of about
	2-3\% for those aged 50-69 (unless their prognosis is likely to be
	extremely good even without such treatment). Treatment decisions
	involve consideration not only of improvements in cancer recurrence
	and survival but also of adverse side-effects of treatment, and this
	report makes no recommendations as to who should or should not be
	treated.},
  keywords = {Adult; Aged; Antineoplastic Combined Chemotherapy Protocols, therapeutic
	use; Breast Neoplasms, chemistry/drug therapy/mortality; Chemotherapy,
	Adjuvant; Drug Administration Schedule; Female; Humans; Lymphatic
	Metastasis; Menopause; Middle Aged; Neoplasm Recurrence, Local; Randomized
	Controlled Trials as Topic; Receptors, Estrogen, analysis; Tamoxifen,
	administration /&/ dosage},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0140673698033017},
  pmid = {9752815},
  timestamp = {2012.03.01}
}

@article{Edwards2000Escherichia,
  author = {Edwards, J. S. and Palsson, B. O.},
  title = {The \emph{{E}scherichia coli} MG1655 in silico metabolic genotype:
	its definition, characteristics, and capabilities.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2000},
  volume = {97},
  pages = {5528--5533},
  number = {10},
  month = {May},
  abstract = {The Escherichia coli MG1655 genome has been completely sequenced.
	The annotated sequence, biochemical information, and other information
	were used to reconstruct the E. coli metabolic map. The stoichiometric
	coefficients for each metabolic enzyme in the E. coli metabolic map
	were assembled to construct a genome-specific stoichiometric matrix.
	The E. coli stoichiometric matrix was used to define the system's
	characteristics and the capabilities of E. coli metabolism. The effects
	of gene deletions in the central metabolic pathways on the ability
	of the in silico metabolic network to support growth were assessed,
	and the in silico predictions were compared with experimental observations.
	It was shown that based on stoichiometric and capacity constraints
	the in silico analysis was able to qualitatively predict the growth
	potential of mutant strains in 86\% of the cases examined. Herein,
	it is demonstrated that the synthesis of in silico metabolic genotypes
	based on genomic, biochemical, and strain-specific information is
	possible, and that systems analysis methods are available to analyze
	and interpret the metabolic phenotype.},
  doi = {10.1073/pnas.97.10.5528},
  pdf = {../local/Edwards2000Escherichia.pdf},
  file = {Edwards2000Escherichia.pdf:Edwards2000Escherichia.pdf:PDF},
  institution = {Department of Bioengineering, University of California, San Diego,
	La Jolla, CA 92093-0412, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {97/10/5528},
  pmid = {10805808},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1073/pnas.97.10.5528}
}

@article{Efcavitch2010Single-molecule,
  author = {J. William Efcavitch and John F Thompson},
  title = {Single-molecule DNA analysis.},
  journal = {Annu Rev Anal Chem (Palo Alto Calif)},
  year = {2010},
  volume = {3},
  pages = {109--128},
  abstract = {The ability to detect single molecules of DNA or RNA has led to an
	extremely rich area of exploration of the single most important biomolecule
	in nature. In cases in which the nucleic acid molecules are tethered
	to a solid support, confined to a channel, or simply allowed to diffuse
	into a detection volume, novel techniques have been developed to
	manipulate the DNA and to examine properties such as structural dynamics
	and protein-DNA interactions. Beyond the analysis of the properties
	of nucleic acids themselves, single-molecule detection has enabled
	dramatic improvements in the throughput of DNA sequencing and holds
	promise for continuing progress. Both optical and nonoptical detection
	methods that use surfaces, nanopores, and zero-mode waveguides have
	been attempted, and one optically based instrument is already commercially
	available. The breadth of literature related to single-molecule DNA
	analysis is vast; this review focuses on a survey of efforts in molecular
	dynamics and nucleic acid sequencing.},
  doi = {10.1146/annurev.anchem.111808.073558},
  institution = {Helicos BioSciences Corporation, Cambridge, Massachusetts 02139,
	USA. jwefcavitch@helicosbio.com},
  owner = {phupe},
  pmid = {20636036},
  timestamp = {2010.08.20},
  url = {http://dx.doi.org/10.1146/annurev.anchem.111808.073558}
}

@article{Efron1979Bootstrap,
  author = {Efron, B.},
  title = {Bootstrap methods: another look at the jackknife},
  journal = {Ann. Stat.},
  year = {1979},
  volume = {7},
  pages = {1--26},
  number = {1},
  pdf = {../local/Efron1979Bootstrap.pdf},
  file = {Efron1979Bootstrap.pdf:Efron1979Bootstrap.pdf:PDF},
  publisher = {Institute of Mathematical Statistics}
}

@article{Efron2004Least,
  author = {Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani, R.},
  title = {Least angle regression},
  journal = {Ann. Stat.},
  year = {2004},
  volume = {32},
  pages = {407--499},
  number = {2},
  pdf = {../local/Efron2004Least.pdf},
  file = {Efron2004Least.pdf:Efron2004Least.pdf:PDF},
  timestamp = {2006.07.08}
}

@article{Efroni2007Identification,
  author = {Efroni, S. and Schaefer, C. F. and Buetow, K. H.},
  title = {Identification of key processes underlying cancer phenotypes using
	biologic pathway analysis.},
  journal = {PLoS One},
  year = {2007},
  volume = {2},
  pages = {e425},
  number = {5},
  abstract = {Cancer is recognized to be a family of gene-based diseases whose causes
	are to be found in disruptions of basic biologic processes. An increasingly
	deep catalogue of canonical networks details the specific molecular
	interaction of genes and their products. However, mapping of disease
	phenotypes to alterations of these networks of interactions is accomplished
	indirectly and non-systematically. Here we objectively identify pathways
	associated with malignancy, staging, and outcome in cancer through
	application of an analytic approach that systematically evaluates
	differences in the activity and consistency of interactions within
	canonical biologic processes. Using large collections of publicly
	accessible genome-wide gene expression, we identify small, common
	sets of pathways - Trka Receptor, Apoptosis response to DNA Damage,
	Ceramide, Telomerase, CD40L and Calcineurin - whose differences robustly
	distinguish diverse tumor types from corresponding normal samples,
	predict tumor grade, and distinguish phenotypes such as estrogen
	receptor status and p53 mutation state. Pathways identified through
	this analysis perform as well or better than phenotypes used in the
	original studies in predicting cancer outcome. This approach provides
	a means to use genome-wide characterizations to map key biological
	processes to important clinical features in disease.},
  doi = {10.1371/journal.pone.0000425},
  pdf = {../local/Efroni2007Identification.pdf},
  file = {Efroni2007Identification.pdf:Efroni2007Identification.pdf:PDF},
  institution = {National Cancer Institute Center for Bioinformatics, Rockville, Maryland,
	United States of America.},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pmid = {17487280},
  timestamp = {2011.10.05},
  url = {http://dx.doi.org/10.1371/journal.pone.0000425}
}

@article{Egan2000Prediction,
  author = {Egan, W. J. and Merz, K. M. and Baldwin, J. J.},
  title = {Prediction of drug absorption using multivariate statistics},
  journal = {J. Med. Chem.},
  year = {2000},
  volume = {43},
  pages = {3867--3877},
  number = {21},
  month = {Oct},
  abstract = {Literature data on compounds both well- and poorly-absorbed in humans
	were used to build a statistical pattern recognition model of passive
	intestinal absorption. Robust outlier detection was utilized to analyze
	the well-absorbed compounds, some of which were intermingled with
	the poorly-absorbed compounds in the model space. Outliers were identified
	as being actively transported. The descriptors chosen for inclusion
	in the model were PSA and AlogP98, based on consideration of the
	physical processes involved in membrane permeability and the interrelationships
	and redundancies between available descriptors. These descriptors
	are quite straightforward for a medicinal chemist to interpret, enhancing
	the utility of the model. Molecular weight, while often used in passive
	absorption models, was shown to be superfluous, as it is already
	a component of both PSA and AlogP98. Extensive validation of the
	model on hundreds of known orally delivered drugs, "drug-like" molecules,
	and Pharmacopeia, Inc. compounds, which had been assayed for Caco-2
	cell permeability, demonstrated a good rate of successful predictions
	(74-92\%, depending on the dataset and exact criterion used).},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {jm000292e},
  pmid = {11052792},
  timestamp = {2008.07.16}
}

@article{Egolf1993Prediction,
  author = {L. M. Egolf and P. C. Jurs},
  title = {Prediction of {B}oiling {P}oints of {O}rganic {H}terocyclic {C}ompounds
	{U}sing {R}egression and {N}eural {N}etworks {T}echniques},
  journal = {J Chem Inf Comput Sci},
  year = {1993},
  volume = {33},
  pages = {616-635},
  owner = {mahe},
  timestamp = {2006.09.07}
}

@article{Ehlers2005NBS1,
  author = {Justis P Ehlers and J. William Harbour},
  title = {N{BS}1 expression as a prognostic marker in uveal melanoma.},
  journal = {Clin. {C}ancer {R}es.},
  year = {2005},
  volume = {11},
  pages = {1849-53},
  number = {5},
  month = {Mar},
  abstract = {P{URPOSE}: {U}p to half of uveal melanoma patients die of metastatic
	disease. {T}reatment of the primary eye tumor does not improve survival
	in high-risk patients due to occult micrometastatic disease, which
	is present at the time of eye tumor diagnosis but is not detected
	and treated until months to years later. {H}ere, we use microarray
	gene expression data to identify a new prognostic marker. {EXPERIMENTAL}
	{DESIGN}: {M}icroarray gene expression profiles were analyzed in
	25 primary uveal melanomas. {T}umors were ranked by support vector
	machine ({SVM}) and by cytologic severity. {N}bs1 protein expression
	was assessed by quantitative immunohistochemistry in 49 primary uveal
	melanomas. {S}urvival was assessed using {K}aplan-{M}eier life-table
	analysis. {RESULTS}: {E}xpression of the {N}ijmegen breakage syndrome
	({NBS}1) gene correlated strongly with {SVM} and cytologic tumor
	rankings ({P} < 0.0001). {F}urther, immunohistochemistry expression
	of the {N}bs1 protein correlated strongly with both {SVM} and cytologic
	rankings ({P} < 0.0001). {T}he 6-year actuarial survival was 100\%
	in patients with low immunohistochemistry expression of {N}bs1 and
	22\% in those with high {N}bs1 expression ({P} = 0.01). {CONCLUSIONS}:
	{NBS}1 is a strong predictor of uveal melanoma survival and potentially
	could be used as a clinical marker for guiding clinical management.},
  doi = {10.1158/1078-0432.CCR-04-2054},
  pdf = {../local/Ehlers2005NBS1.pdf},
  file = {Ehlers2005NBS1.pdf:local/Ehlers2005NBS1.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acid Sequence, Amino Acids,
	Analysis of Variance, Animals, Area Under Curve, Artifacts, Automated,
	Bacteriophage T4, Base Sequence, Biological, Birefringence, Brain
	Chemistry, Brain Neoplasms, Cell Cycle Proteins, Comparative Study,
	Computational Biology, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Databases, Decision Trees, Diagnosis, Diagnostic Imaging,
	Diagnostic Techniques, Discriminant Analysis, Evolution, Extramural,
	Face, Female, Gene Expression Profiling, Genetic, Glaucoma, Humans,
	Immunohistochemistry, Intraocular Pressure, Lasers, Least-Squares
	Analysis, Likelihood Functions, Magnetic Resonance Imaging, Magnetic
	Resonance Spectroscopy, Male, Markov Chains, Melanoma, Middle Aged,
	Models, Molecular, Mutation, N.I.H., Nerve Fibers, Non-P.H.S., Non-U.S.
	Gov't, Nuclear Proteins, Nucleic Acid, Nucleic Acid Conformation,
	Numerical Analysis, Oligonucleotide Array Sequence Analysis, Ophthalmological,
	Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition,
	Photic Stimulation, Polymorphism, Prognosis, Prospective Studies,
	Protein, Protein Structure, Proteins, RNA, ROC Curve, Regression
	Analysis, Reproducibility of Results, Research Support, Retinal Ganglion
	Cells, Secondary, Sensitivity and Specificity, Sequence Analysis,
	Single Nucleotide, Single-Stranded Conformational, Software, Statistics,
	Survival Analysis, Tertiary, Tomography, Tumor Markers, U.S. Gov't,
	Untranslated, Uveal Neoplasms, Visual Fields, beta-Lactamases, 15756009},
  pii = {11/5/1849},
  url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/11/5/1849}
}

@article{Eid2009Real,
  author = {John Eid and Adrian Fehr and Jeremy Gray and Khai Luong and John
	Lyle and Geoff Otto and Paul Peluso and David Rank and Primo Baybayan
	and Brad Bettman and Arkadiusz Bibillo and Keith Bjornson and Bidhan
	Chaudhuri and Frederick Christians and Ronald Cicero and Sonya Clark
	and Ravindra Dalal and Alex Dewinter and John Dixon and Mathieu Foquet
	and Alfred Gaertner and Paul Hardenbol and Cheryl Heiner and Kevin
	Hester and David Holden and Gregory Kearns and Xiangxu Kong and Ronald
	Kuse and Yves Lacroix and Steven Lin and Paul Lundquist and Congcong
	Ma and Patrick Marks and Mark Maxham and Devon Murphy and Insil Park
	and Thang Pham and Michael Phillips and Joy Roy and Robert Sebra
	and Gene Shen and Jon Sorenson and Austin Tomaney and Kevin Travers
	and Mark Trulson and John Vieceli and Jeffrey Wegener and Dawn Wu
	and Alicia Yang and Denis Zaccarin and Peter Zhao and Frank Zhong
	and Jonas Korlach and Stephen Turner},
  title = {Real-time DNA sequencing from single polymerase molecules.},
  journal = {Science},
  year = {2009},
  volume = {323},
  pages = {133--138},
  number = {5910},
  month = {Jan},
  abstract = {We present single-molecule, real-time sequencing data obtained from
	a DNA polymerase performing uninterrupted template-directed synthesis
	using four distinguishable fluorescently labeled deoxyribonucleoside
	triphosphates (dNTPs). We detected the temporal order of their enzymatic
	incorporation into a growing DNA strand with zero-mode waveguide
	nanostructure arrays, which provide optical observation volume confinement
	and enable parallel, simultaneous detection of thousands of single-molecule
	sequencing reactions. Conjugation of fluorophores to the terminal
	phosphate moiety of the dNTPs allows continuous observation of DNA
	synthesis over thousands of bases without steric hindrance. The data
	report directly on polymerase dynamics, revealing distinct polymerization
	states and pause sites corresponding to DNA secondary structure.
	Sequence data were aligned with the known reference sequence to assay
	biophysical parameters of polymerization for each template position.
	Consensus sequences were generated from the single-molecule reads
	at 15-fold coverage, showing a median accuracy of 99.3\%, with no
	systematic error beyond fluorophore-dependent error rates.},
  doi = {10.1126/science.1162986},
  institution = {Pacific Biosciences, 1505 Adams Drive, Menlo Park, CA 94025, USA.},
  keywords = {Base Sequence; Consensus Sequence; DNA, Circular, chemistry; DNA,
	Single-Stranded, chemistry; DNA, biosynthesis; DNA-Directed DNA Polymerase,
	metabolism; Deoxyribonucleotides, metabolism; Enzymes, Immobilized;
	Fluorescent Dyes; Kinetics; Nanostructures; Sequence Analysis, DNA,
	methods; Spectrometry, Fluorescence},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {1162986},
  pmid = {19023044},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1126/science.1162986}
}

@article{Ein-Dor2005Outcome,
  author = {Ein-Dor, L. and Kela, I. and Getz, G. and Givol, D. and Domany, E.},
  title = {Outcome signature genes in breast cancer: is there a unique set?},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {171--178},
  number = {2},
  month = {Jan},
  abstract = {MOTIVATION: Predicting the metastatic potential of primary malignant
	tissues has direct bearing on the choice of therapy. Several microarray
	studies yielded gene sets whose expression profiles successfully
	predicted survival. Nevertheless, the overlap between these gene
	sets is almost zero. Such small overlaps were observed also in other
	complex diseases, and the variables that could account for the differences
	had evoked a wide interest. One of the main open questions in this
	context is whether the disparity can be attributed only to trivial
	reasons such as different technologies, different patients and different
	types of analyses. RESULTS: To answer this question, we concentrated
	on a single breast cancer dataset, and analyzed it by a single method,
	the one which was used by van't Veer et al. to produce a set of outcome-predictive
	genes. We showed that, in fact, the resulting set of genes is not
	unique; it is strongly influenced by the subset of patients used
	for gene selection. Many equally predictive lists could have been
	produced from the same analysis. Three main properties of the data
	explain this sensitivity: (1) many genes are correlated with survival;
	(2) the differences between these correlations are small; (3) the
	correlations fluctuate strongly when measured over different subsets
	of patients. A possible biological explanation for these properties
	is discussed. CONTACT: eytan.domany@weizmann.ac.il SUPPLEMENTARY
	INFORMATION: http://www.weizmann.ac.il/physics/complex/compphys/downloads/liate/},
  doi = {10.1093/bioinformatics/bth469},
  pdf = {../local/Ein-Dor2005Outcome.pdf},
  file = {Ein-Dor2005Outcome.pdf:Ein-Dor2005Outcome.pdf:PDF},
  institution = {Department of Physics of Complex Systems, Weizmann Institute of Science
	Rehovot 76100, Israel.},
  keywords = {breastcancer, microarray, featureselection},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {bth469},
  pmid = {15308542},
  timestamp = {2010.10.12},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth469}
}

@article{Ein-Dor2006Thousands,
  author = {Ein-Dor, L. and Zuk, O. and Domany, E.},
  title = {Thousands of samples are needed to generate a robust gene list for
	predicting outcome in cancer},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2006},
  volume = {103},
  pages = {5923--5928},
  number = {15},
  doi = {10.1073/pnas.0601231103},
  pdf = {../local/Ein-Dor2006Thousands.pdf},
  file = {Ein-Dor2006Thousands.pdf:Ein-Dor2006Thousands.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.12},
  url = {http://dx.doi.org/10.1073/pnas.0601231103}
}

@article{Einmahl1992Generalized,
  author = {Einmahl, J. H. J. and Mason, D. M.},
  title = {Generalized {Q}uantile {P}rocess},
  journal = {Ann. {S}tat.},
  year = {1992},
  volume = {20},
  pages = {1062-1078},
  month = {June},
  pdf = {../local/Einmahl1992Generalized.pdf},
  file = {Einmahl1992Generalized.pdf:local/Einmahl1992Generalized.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0090-5364%28199206%2920%3A2%3C1062%3AGQP%3E2.0.CO%3B2-6}
}

@article{Eisen1998Cluster,
  author = {Eisen, M. B. and Spellman, P. T. and Brown, P. O. and Botstein, D.},
  title = {Cluster analysis and display of genome-wide expression patterns},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {1998},
  volume = {95},
  pages = {14863--14868},
  month = {Dec},
  pdf = {../local/Eisen1998Cluster.pdf},
  file = {Eisen1998Cluster.pdf:Eisen1998Cluster.pdf:PDF},
  subject = {microarray},
  url = {http://www.pnas.org/cgi/reprint/95/25/14863.pdf}
}

@article{Eissing2004Bistability,
  author = {Eissing, T. and Conzelmann, H. and Gilles, E. D. and Allgower, F.
	and Bullinger, E. and Scheurich, P.},
  title = {Bistability Analyses of a Caspase Activation Model for Receptor-induced
	Apoptosis},
  journal = {J. Biol. Chem.},
  year = {2004},
  volume = {279},
  pages = {36892-36897},
  number = {35},
  abstract = {Apoptosis is an important physiological process crucially involved
	in development and homeostasis of multicellular organisms. Although
	the major signaling pathways have been unraveled, a detailed mechanistic
	understanding of the complex underlying network remains elusive.
	We have translated here the current knowledge of the molecular mechanisms
	of the death-receptor-activated caspase cascade into a mathematical
	model. A reduction down to the apoptotic core machinery enables the
	application of analytical mathematical methods to evaluate the system
	behavior within a wide range of parameters. Using parameter values
	from the literature, the model reveals an unstable status of survival
	indicating the need for further control. Based on recent publications
	we tested one additional regulatory mechanism at the level of initiator
	caspase activation and demonstrated that the resulting system displays
	desired characteristics such as bistability. In addition, the results
	from our model studies allowed us to reconcile the fast kinetics
	of caspase 3 activation observed at the single cell level with the
	much slower kinetics found at the level of a cell population.},
  doi = {10.1074/jbc.M404893200},
  eprint = {http://www.jbc.org/content/279/35/36892.full.pdf+html},
  pdf = {../local/Eissing2004Bistability.pdf},
  file = {Eissing2004Bistability.pdf:Eissing2004Bistability.pdf:PDF},
  keywords = {csbcbook},
  url = {http://www.jbc.org/content/279/35/36892.abstract}
}

@article{Eissing2007Response,
  author = {Eissing, T. and Waldherr, S. and Allgower, F. and Scheurich, P. and
	Bullinger, E.},
  title = {Response to Bistability in Apoptosis: Roles of Bax, Bcl-2, and Mitochondrial
	Permeability Transition Pores},
  journal = {Biophysical Journal},
  year = {2007},
  volume = {92},
  pages = {3332 - 3334},
  number = {9},
  doi = {10.1529/biophysj.106.100362},
  pdf = {../local/Eissing2007Response.pdf},
  file = {Eissing2007Response.pdf:Eissing2007Response.pdf:PDF},
  issn = {0006-3495},
  keywords = {csbcbook},
  url = {http://www.sciencedirect.com/science/article/B94RW-4TR4KB1-1B/2/8c4f12571fa01055b7dea12984318e9f}
}

@article{Eissing2007Responsea,
  author = {Eissing, T. and Waldherr, S. and Allgower, F. and Scheurich, P. and
	Bullinger, E.},
  title = {Response to Bistability in Apoptosis: Roles of Bax, Bcl-2, and Mitochondrial
	Permeability Transition Pores},
  journal = {Biophys. J.},
  year = {2007},
  volume = {92},
  pages = {3332--3334},
  number = {9},
  doi = {10.1529/biophysj.106.100362},
  issn = {0006-3495},
  keywords = {csbcbook},
  owner = {jp},
  timestamp = {2012.05.11},
  url = {http://www.sciencedirect.com/science/article/B94RW-4TR4KB1-1B/2/8c4f12571fa01055b7dea12984318e9f}
}

@article{Ekins2003In,
  author = {Ekins, S.},
  title = {{I}n silico approaches to predicting drug metabolism, toxicology
	and beyond.},
  journal = {Biochem. Soc. Trans.},
  year = {2003},
  volume = {31},
  pages = {611--614},
  number = {Pt 3},
  month = {Jun},
  abstract = {The discovery and optimization of new drug candidates is becoming
	increasingly reliant upon the combination of experimental and computational
	approaches related to drug metabolism, toxicology and general biopharmaceutical
	properties. With the considerable output of high-throughput assays
	for cytochrome-P450-mediated drug-drug interactions, metabolic stability
	and assays for toxicology, we have orders of magnitude more data
	that will facilitate model building. A recursive partitioning model
	for human liver microsomal metabolic stability based on over 800
	structurally diverse molecules was used to predict molecules with
	known log in vitro clearance data (Spearman's rho -0.64, P <0.0001).
	In addition, with solely published data, a quantitative structure-activity
	relationship for 66 inhibitors of the potassium channel human ether-a-gogo
	(hERG) that has been implicated in the failure of a number of recent
	drugs has been generated. This model has been validated with further
	published data for 25 molecules (Spearman's rho 0.83, P <0.0001).
	If continued value is to be realized from these types of computational
	models, there needs to be some applied research on their validation
	and optimization with new data. Some relatively simple approaches
	may have value when it comes to combining data from multiple models
	in order to improve and focus drug discovery on the molecules most
	likely to succeed.},
  doi = {10.1042/},
  keywords = {herg},
  pmid = {12773166},
  timestamp = {2007.03.27},
  url = {http://dx.doi.org/10.1042/}
}

@article{Ekins2002Towards,
  author = {S. Ekins and B. Boulanger and P. W. Swaan and M. A. Z. Hupcey},
  title = {{T}owards a new age of virtual {ADME}/{TOX} and multidimensional
	drug discovery.},
  journal = {J Comput Aided Mol Des},
  year = {2002},
  volume = {16},
  pages = {381--401},
  number = {5-6},
  abstract = {With the continual pressure to ensure follow-up molecules to billion
	dollar blockbuster drugs, there is a hurdle in profitability and
	growth for pharmaceutical companies in the next decades. With each
	success and failure we increasingly appreciate that a key to the
	success of synthesized molecules through the research and development
	process is the possession of drug-like properties. These properties
	include an adequate bioactivity as well as adequate solubility, an
	ability to cross critical membranes (intestinal and sometimes blood-brain
	barrier), reasonable metabolic stability and of course safety in
	humans. Dependent on the therapeutic area being investigated it might
	also be desirable to avoid certain enzymes or transporters to circumvent
	potential drug-drug interactions. It may also be important to limit
	the induction of these same proteins that can result in further toxicities.
	We have clearly moved the assessment of in vitro absorption, distribution,
	metabolism, excretion and toxicity (ADME/TOX) parameters much earlier
	in the discovery organization than a decade ago with the inclusion
	of higher throughput systems. We are also now faced with huge amounts
	of ADME/TOX data for each molecule that need interpretation and also
	provide a valuable resource for generating predictive computational
	models for future drug discovery. The present review aims to show
	what tools exist today for visualizing and modeling ADME/TOX data,
	what tools need to be developed, and how both the present and future
	tools are valuable for virtual filtering using ADME/TOX and bioactivity
	properties in parallel as a viable addition to present practices.},
  keywords = {ATP-Binding Cassette Transporters, Algorithms, Animals, Biological,
	Biological Availability, Computer Simulation, Drug Design, Drug Evaluation,
	Drug Industry, Gene Expression Profiling, Humans, Models, Organic
	Anion Transporters, P.H.S., Pharmaceutical, Pharmaceutical Preparations,
	Pharmacogenetics, Pharmacokinetics, Preclinical, Proteomics, Research
	Support, Software, Systems Biology, Technology, Toxicity Tests, U.S.
	Gov't, 12489686},
  owner = {mahe},
  pmid = {12489686},
  timestamp = {2006.08.16}
}

@article{Ekins2002Three-dimensional,
  author = {Ekins, S. and Crumb, W. J. and Sarazan, R. D. and Wikel, J. H. and
	Wrighton, S. A.},
  title = {{T}hree-dimensional quantitative structure-activity relationship
	for inhibition of human ether-a-go-go-related gene potassium channel.},
  journal = {J. Pharmacol. Exp. Ther.},
  year = {2002},
  volume = {301},
  pages = {427--434},
  number = {2},
  month = {May},
  abstract = {The protein product of the human ether-a-go-go gene (hERG) is a potassium
	channel that when inhibited by some drugs may lead to cardiac arrhythmia.
	Previously, a three-dimensional quantitative structure-activity relationship
	(3D-QSAR) pharmacophore model was constructed using Catalyst with
	in vitro inhibition data for antipsychotic agents. The rationale
	of the current study was to use a combination of in vitro and in
	silico technologies to further test the pharmacophore model and qualitatively
	predict whether molecules are likely to inhibit this potassium channel.
	These predictions were assessed with the experimental data using
	the Spearman's rho rank correlation. The antipsychotic-based hERG
	inhibitor model produced a statistically significant Spearman's rho
	of 0.71 for 11 molecules. In addition, 15 molecules from the literature
	were used as a further test set and were also well ranked by the
	same model with a statistically significant Spearman's rho value
	of 0.76. A Catalyst General hERG pharmacophore model was generated
	with these literature molecules, which contained four hydrophobic
	features and one positive ionizable feature. Linear regression of
	log-transformed observed versus predicted IC(50) values for this
	training set resulted in an r(2) value of 0.90. The model based on
	literature data was evaluated with the in vitro data generated for
	the original 22 molecules (including the antipsychotics) and illustrated
	a significant Spearman's rho of 0.77. Thus, the Catalyst 3D-QSAR
	approach provides useful qualitative predictions for test set molecules.
	The model based on literature data therefore provides a potentially
	valuable tool for discovery chemistry as future molecules may be
	synthesized that are less likely to inhibit hERG based on information
	provided by a pharmacophore for the inhibition of this potassium
	channel.},
  keywords = {herg},
  pmid = {11961040},
  timestamp = {2007.03.27}
}

@article{El-Naqa2004similarity,
  author = {El-Naqa, I. and Yang, Y. and Galatsanos, N. P. and Nishikawa, R.
	M. and Wernick, M. N.},
  title = {A similarity learning approach to content-based image retrieval:
	application to digital mammography.},
  journal = {I{EEE} {T}rans {M}ed {I}maging},
  year = {2004},
  volume = {23},
  pages = {1233-44},
  number = {10},
  month = {Oct},
  abstract = {In this paper, we describe an approach to content-based retrieval
	of medical images from a database, and provide a preliminary demonstration
	of our approach as applied to retrieval of digital mammograms. {C}ontent-based
	image retrieval ({CBIR}) refers to the retrieval of images from a
	database using information derived from the images themselves, rather
	than solely from accompanying text indices. {I}n the medical-imaging
	context, the ultimate aim of {CBIR} is to provide radiologists with
	a diagnostic aid in the form of a display of relevant past cases,
	along with proven pathology and other suitable information. {CBIR}
	may also be useful as a training tool for medical students and residents.
	{T}he goal of information retrieval is to recall from a database
	information that is relevant to the user's query. {T}he most challenging
	aspect of {CBIR} is the definition of relevance (similarity), which
	is used to guide the retrieval machine. {I}n this paper, we pursue
	a new approach, in which similarity is learned from training examples
	provided by human observers. {S}pecifically, we explore the use of
	neural networks and support vector machines to predict the user's
	notion of similarity. {W}ithin this framework we propose using a
	hierarchal learning approach, which consists of a cascade of a binary
	classifier and a regression module to optimize retrieval effectiveness
	and efficiency. {W}e also explore how to incorporate online human
	interaction to achieve relevance feedback in this learning framework.
	{O}ur experiments are based on a database consisting of 76 mammograms,
	all of which contain clustered microcalcifications ({MC}s). {O}ur
	goal is to retrieve mammogram images containing similar {MC} clusters
	to that in a query. {T}he performance of the retrieval system is
	evaluated using precision-recall curves computed using a cross-validation
	procedure. {O}ur experimental results demonstrate that: 1) the learning
	framework can accurately predict the perceptual similarity reported
	by human observers, thereby serving as a basis for {CBIR}; 2) the
	learning-based framework can significantly outperform a simple distance-based
	similarity metric; 3) the use of the hierarchical two-stage network
	can improve retrieval performance; and 4) relevance feedback can
	be effectively incorporated into this learning framework to achieve
	improvement in retrieval precision based on online interaction with
	users; and 5) the retrieved images by the network can have predicting
	value for the disease condition of the query.}
}

@article{El-Naqa2002support,
  author = {El-Naqa, I. and Yang, Y. and Wernick, M. N. and Galatsanos, N. P.
	and Nishikawa, R. M.},
  title = {A support vector machine approach for detection of microcalcifications.},
  journal = {I{EEE} {T}rans {M}ed {I}maging},
  year = {2002},
  volume = {21},
  pages = {1552-63},
  number = {12},
  month = {Dec},
  abstract = {In this paper, we investigate an approach based on support vector
	machines ({SVM}s) for detection of microcalcification ({MC}) clusters
	in digital mammograms, and propose a successive enhancement learning
	scheme for improved performance. {SVM} is a machine-learning method,
	based on the principle of structural risk minimization, which performs
	well when applied to data outside the training set. {W}e formulate
	{MC} detection as a supervised-learning problem and apply {SVM} to
	develop the detection algorithm. {W}e use the {SVM} to detect at
	each location in the image whether an {MC} is present or not. {W}e
	tested the proposed method using a database of 76 clinical mammograms
	containing 1120 {MC}s. {W}e use free-response receiver operating
	characteristic curves to evaluate detection performance, and compare
	the proposed algorithm with several existing methods. {I}n our experiments,
	the proposed {SVM} framework outperformed all the other methods tested.
	{I}n particular, a sensitivity as high as 94\% was achieved by the
	{SVM} method at an error rate of one false-positive cluster per image.
	{T}he ability of {SVM} to out perform several well-known methods
	developed for the widely studied problem of {MC} detection suggests
	that {SVM} is a promising technique for object detection in a medical
	imaging application.}
}

@article{Elbashir2001Duplexes,
  author = {Elbashir, S. M. and Harborth, J. and Lendeckel, W. and Yalcin, A.
	and Weber, K. and Tuschl, T.},
  title = {{D}uplexes of 21-nucleotide {RNA}s mediate {RNA} interference in
	cultured mammalian cells.},
  journal = {Nature},
  year = {2001},
  volume = {411},
  pages = {494--498},
  number = {6836},
  month = {May},
  abstract = {RNA interference (RNAi) is the process of sequence-specific, post-transcriptional
	gene silencing in animals and plants, initiated by double-stranded
	RNA (dsRNA) that is homologous in sequence to the silenced gene.
	The mediators of sequence-specific messenger RNA degradation are
	21- and 22-nucleotide small interfering RNAs (siRNAs) generated by
	ribonuclease III cleavage from longer dsRNAs. Here we show that 21-nucleotide
	siRNA duplexes specifically suppress expression of endogenous and
	heterologous genes in different mammalian cell lines, including human
	embryonic kidney (293) and HeLa cells. Therefore, 21-nucleotide siRNA
	duplexes provide a new tool for studying gene function in mammalian
	cells and may eventually be used as gene-specific therapeutics.},
  doi = {10.1038/35078107},
  pdf = {../local/Elbashir2001Duplexes.pdf},
  file = {Elbashir2001Duplexes.pdf:Elbashir2001Duplexes.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {35078107},
  pmid = {11373658},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1038/35078107}
}

@article{Elbashir2001RNA,
  author = {Elbashir, S. M. and Lendeckel, W. and Tuschl, T.},
  title = {R{NA} interference is mediated by 21- and 22-nucleotide {RNA}s.},
  journal = {Genes {D}ev.},
  year = {2001},
  volume = {15},
  pages = {188-200},
  number = {2},
  month = {Jan},
  abstract = {Double-stranded {RNA} (ds{RNA}) induces sequence-specific posttranscriptional
	gene silencing in many organisms by a process known as {RNA} interference
	({RNA}i). {U}sing a {D}rosophila in vitro system, we demonstrate
	that 21- and 22-nt {RNA} fragments are the sequence-specific mediators
	of {RNA}i. {T}he short interfering {RNA}s (si{RNA}s) are generated
	by an {RN}ase {III}-like processing reaction from long ds{RNA}. {C}hemically
	synthesized si{RNA} duplexes with overhanging 3' ends mediate efficient
	target {RNA} cleavage in the lysate, and the cleavage site is located
	near the center of the region spanned by the guiding si{RNA}. {F}urthermore,
	we provide evidence that the direction of ds{RNA} processing determines
	whether sense or antisense target {RNA} can be cleaved by the si{RNA}-protein
	complex.},
  keywords = {sirna}
}

@article{Elfilali2006ITTACA,
  author = {Elfilali, A. and Lair, S. and Verbeke, C. and La Rosa, P. and Radvanyi,
	F. and Barillot, E.},
  title = {{ITTACA}: a new database for integrated tumor transcriptome array
	and clinical data analysis.},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {D613--D616},
  number = {Database issue},
  month = {Jan},
  abstract = {Transcriptome microarrays have become one of the tools of choice for
	investigating the genes involved in tumorigenesis and tumor progression,
	as well as finding new biomarkers and gene expression signatures
	for the diagnosis and prognosis of cancer. Here, we describe a new
	database for Integrated Tumor Transcriptome Array and Clinical data
	Analysis (ITTACA). ITTACA centralizes public datasets containing
	both gene expression and clinical data. ITTACA currently focuses
	on the types of cancer that are of particular interest to research
	teams at Institut Curie: breast carcinoma, bladder carcinoma and
	uveal melanoma. A web interface allows users to carry out different
	class comparison analyses, including the comparison of expression
	distribution profiles, tests for differential expression and patient
	survival analyses. ITTACA is complementary to other databases, such
	as GEO and SMD, because it offers a better integration of clinical
	data and different functionalities. It also offers more options for
	class comparison analyses when compared with similar projects such
	as Oncomine. For example, users can define their own patient groups
	according to clinical data or gene expression levels. This added
	flexibility and the user-friendly web interface makes ITTACA especially
	useful for comparing personal results with the results in the existing
	literature. ITTACA is accessible online at http://bioinfo.curie.fr/ittaca.},
  doi = {10.1093/nar/gkj022},
  institution = {Institut Curie, Service Bioinformatique, 26 rue d'Ulm, Paris, 75248
	cedex 05, France.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {34/suppl_1/D613},
  pmid = {16381943},
  timestamp = {2010.10.13},
  url = {http://dx.doi.org/10.1093/nar/gkj022}
}

@inproceedings{Elkan2008Learning,
  author = {Elkan, C. and Noto, K.},
  title = {Learning classifiers from only positive and unlabeled data},
  booktitle = {KDD '08: Proceeding of the 14th ACM SIGKDD international conference
	on Knowledge discovery and data mining},
  year = {2008},
  pages = {213--220},
  address = {New York, NY, USA},
  publisher = {ACM},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  doi = {10.1145/1401890.1401920},
  ee = {http://doi.acm.org/10.1145/1401890.1401920},
  pdf = {../local/Elkan2008Learning.pdf},
  file = {Elkan2008Learning.pdf:Elkan2008Learning.pdf:PDF},
  keywords = {PUlearning},
  owner = {fantine},
  timestamp = {2009.06.09},
  url = {http://dx.doi.org/10.1145/1401890.1401920}
}

@article{Elkon2003Genome-wide,
  author = {Elkon, R. and Linhart, C. and Sharan, R. and Shamir, R. and Shiloh,
	Y.},
  title = {Genome-wide in silico identification of transcriptional regulators
	controlling the cell cycle in human cells.},
  journal = {Genome Res.},
  year = {2003},
  volume = {13},
  pages = {773--780},
  number = {5},
  month = {May},
  abstract = {Dissection of regulatory networks that control gene transcription
	is one of the greatest challenges of functional genomics. Using human
	genomic sequences, models for binding sites of known transcription
	factors, and gene expression data, we demonstrate that the reverse
	engineering approach, which infers regulatory mechanisms from gene
	expression patterns, can reveal transcriptional networks in human
	cells. To date, such methodologies were successfully demonstrated
	only in prokaryotes and low eukaryotes. We developed computational
	methods for identifying putative binding sites of transcription factors
	and for evaluating the statistical significance of their prevalence
	in a given set of promoters. Focusing on transcriptional mechanisms
	that control cell cycle progression, our computational analyses revealed
	eight transcription factors whose binding sites are significantly
	overrepresented in promoters of genes whose expression is cell-cycle-dependent.
	The enrichment of some of these factors is specific to certain phases
	of the cell cycle. In addition, several pairs of these transcription
	factors show a significant co-occurrence rate in cell-cycle-regulated
	promoters. Each such pair indicates functional cooperation between
	its members in regulating the transcriptional program associated
	with cell cycle progression. The methods presented here are general
	and can be applied to the analysis of transcriptional networks controlling
	any biological process.},
  doi = {10.1101/gr.947203},
  institution = {The David and Inez Myers Laboratory for Genetic Research, Department
	of Human Genetics, Sackler School of Medicine, and School of Computer
	Science, Tel Aviv University, Tel Aviv 69978, Israel.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {13/5/773},
  pmid = {12727897},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1101/gr.947203}
}

@article{Elliott2009Current,
  author = {Elliott, M and Parker, C and Smith, D and Borchers, CH},
  title = {Current Trends in Quantitative Proteomics},
  journal = {The Journal of Mass Spectrometry},
  year = {2009},
  volume = {44},
  pages = {1637-60},
  owner = {phupe},
  timestamp = {2010.08.19}
}

@article{Ellis1992Pathological,
  author = {Ellis, I. O. and Galea, M. and Broughton, N. and Locker, A. and Blamey,
	R. W. and Elston, C. W.},
  title = {Pathological prognostic factors in breast cancer. II. Histological
	type. Relationship with survival in a large study with long-term
	follow-up.},
  journal = {Histopathology},
  year = {1992},
  volume = {20},
  pages = {479--489},
  number = {6},
  month = {Jun},
  abstract = {The histological tumour type determined by current criteria has been
	investigated in a consecutive series of 1621 women with primary operable
	breast carcinoma, presenting between 1973 and 1987. All women underwent
	definitive surgery with node biopsy and none received adjuvant systemic
	therapy. Special types, tubular, invasive cribriform and mucinous,
	with a very favourable prognosis can be identified. A common type
	of tumour recognized by our group and designated tubular mixed carcinoma
	is shown to be prognostically distinct from carcinomas of no special
	type; it has a characteristic histological appearance and is the
	third most common type in this series. Analysis of subtypes of lobular
	carcinoma confirms differing prognoses. The classical, tubulo-lobular
	and lobular mixed types are associated with a better prognosis than
	carcinomas of no special type; this is not so for the solid variant.
	Tubulo-lobular carcinoma in particular has an extremely good prognosis
	similar to tumours included in the 'special type' category above.
	Neither medullary carcinoma nor atypical medullary carcinoma are
	found to carry a survival advantage over carcinomas of no special
	type. The results confirm that histological typing of human breast
	carcinoma can provide useful prognostic information.},
  doi = {10.1111/j.1365-2559.1992.tb01032.x},
  pdf = {../local/Ellis1992Pathological.pdf},
  file = {Ellis1992Pathological.pdf:Ellis1992Pathological.pdf:PDF},
  institution = {Department of Histopathology, City Hospital, Nottingham, UK.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {1607149},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1111/j.1365-2559.1992.tb01032}
}

@article{Elston1991Pathological,
  author = {Elston, C. W. and Ellis, I. O.},
  title = {Pathological prognostic factors in breast cancer. I. The value of
	histological grade in breast cancer: experience from a large study
	with long-term follow-up.},
  journal = {Histopathology},
  year = {1991},
  volume = {19},
  pages = {403--410},
  number = {5},
  month = {Nov},
  abstract = {Morphological assessment of the degree of differentiation has been
	shown in numerous studies to provide useful prognostic information
	in breast cancer, but until recently histological grading has not
	been accepted as a routine procedure, mainly because of perceived
	problems with reproducibility and consistency. In the Nottingham/Tenovus
	Primary Breast Cancer Study the most commonly used method, described
	by Bloom & Richardson, has been modified in order to make the criteria
	more objective. The revised technique involves semiquantitative evaluation
	of three morphological features--the percentage of tubule formation,
	the degree of nuclear pleomorphism and an accurate mitotic count
	using a defined field area. A numerical scoring system is used and
	the overall grade is derived from a summation of individual scores
	for the three variables: three grades of differentiation are used.
	Since 1973, over 2200 patients with primary operable breast cancer
	have been entered into a study of multiple prognostic factors. Histological
	grade, assessed in 1831 patients, shows a very strong correlation
	with prognosis; patients with grade I tumours have a significantly
	better survival than those with grade II and III tumours (P less
	than 0.0001). These results demonstrate that this method for histological
	grading provides important prognostic information and, if the grading
	protocol is followed consistently, reproducible results can be obtained.
	Histological grade forms part of the multifactorial Nottingham prognostic
	index, together with tumour size and lymph node stage, which is used
	to stratify individual patients for appropriate therapy.},
  doi = {10.1111/j.1365-2559.1991.tb00229.x},
  pdf = {../local/Elston1991Pathological.pdf},
  file = {Elston1991Pathological.pdf:Elston1991Pathological.pdf:PDF},
  institution = {Department of Histopathology, City Hospital, Nottingham, UK.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {1757079},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1111/j.1365-2559.1991.tb00229.x}
}

@book{Emsley1998Elements,
  title = {The {E}lements (third edition)},
  publisher = {Oxford University Press},
  year = {1998},
  author = {John Emsley},
  owner = {mahe},
  timestamp = {2006.02.03}
}

@article{Engelhardt2005Protein,
  author = {Engelhardt, B. E. and Jordan, M. I. and Muratore, K. E. and Brenner,
	S. E.},
  title = {Protein {M}olecular {F}unction {P}rediction by {B}ayesian {P}hylogenomics.},
  journal = {P{L}o{S} {C}omput. {B}iol.},
  year = {2005},
  volume = {1},
  pages = {e45},
  number = {5},
  month = {Oct},
  abstract = {We present a statistical graphical model to infer specific molecular
	function for unannotated protein sequences using homology. {B}ased
	on phylogenomic principles, {SIFTER} ({S}tatistical {I}nference of
	{F}unction {T}hrough {E}volutionary {R}elationships) accurately predicts
	molecular function for members of a protein family given a reconciled
	phylogeny and available function annotations, even when the data
	are sparse or noisy. {O}ur method produced specific and consistent
	molecular function predictions across 100 {P}fam families in comparison
	to the {G}ene {O}ntology annotation database, {BLAST}, {GO}tcha,
	and {O}rthostrapper. {W}e performed a more detailed exploration of
	functional predictions on the adenosine-5'-monophosphate/adenosine
	deaminase family and the lactate/malate dehydrogenase family, in
	the former case comparing the predictions against a gold standard
	set of published functional characterizations. {G}iven function annotations
	for 3\% of the proteins in the deaminase family, {SIFTER} achieves
	96\% accuracy in predicting molecular function for experimentally
	characterized proteins as reported in the literature. {T}he accuracy
	of {SIFTER} on this dataset is a significant improvement over other
	currently available methods such as {BLAST} (75\%), {G}ene{Q}uiz
	(64\%), {GO}tcha (89\%), and {O}rthostrapper (11\%). {W}e also experimentally
	characterized the adenosine deaminase from {P}lasmodium falciparum,
	confirming {SIFTER}'s prediction. {T}he results illustrate the predictive
	power of exploiting a statistical model of function evolution in
	phylogenomic problems. {A} software implementation of {SIFTER} is
	available from the authors.},
  doi = {10.1371/journal.pcbi.0010045},
  pdf = {../local/Engelhardt2005Protein.pdf},
  file = {Engelhardt2005Protein.pdf:local/Engelhardt2005Protein.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pmid = {16217548},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1371/journal.pcbi.0010045}
}

@article{Eroes2004Comparison,
  author = {D. Er\"os and G. K\'eri and I. K\"ovesdi and C. Sz\'antai-Kis and
	G. M\'esz\'aros and L. Orfi},
  title = {{C}omparison of predictive ability of water solubility {QSPR} models
	generated by {MLR}, {PLS} and {ANN} methods.},
  journal = {Mini Rev Med Chem},
  year = {2004},
  volume = {4},
  pages = {167--177},
  number = {2},
  month = {Feb},
  abstract = {ADME/Tox computational screening is one of the most hot topics of
	modern drug research. About one half of the potential drug candidates
	fail because of poor ADME/Tox properties. Since the experimental
	determination of water solubility is time-consuming also, reliable
	computational predictions are needed for the pre-selection of acceptable
	"drug-like" compounds from diverse combinatorial libraries. Recently
	many successful attempts were made for predicting water solubility
	of compounds. A comprehensive review of previously developed water
	solubility calculation methods is presented here, followed by the
	description of the solubility prediction method designed and used
	in our laboratory. We have selected carefully 1381 compounds from
	scientific publications in a unified database and used this dataset
	in the calculations. The externally validated models were based on
	calculated descriptors only. The aim of model optimization was to
	improve repeated evaluations statistics of the predictions and effective
	descriptor scoring functions were used to facilitate quick generation
	of multiple linear regression analysis (MLR), partial least squares
	method (PLS) and artificial neural network (ANN) models with optimal
	predicting ability. Standard error of prediction of the best model
	generated with ANN (with 39-7-1 network structure) was 0.72 in logS
	units while the cross validated squared correlation coefficient (Q(2))
	was better than 0.85. These values give a good chance for successful
	pre-selection of screening compounds from virtual libraries, based
	on the predicted water solubility.},
  keywords = {Chemical, Chemistry, Comparative Study, Cytochrome P-450 Enzyme System,
	Estradiol, Least-Squares Analysis, Ligands, Linear Models, Models,
	Molecular, Naphthalenes, Neural Networks (Computer), Non-U.S. Gov't,
	Physical, Quantitative Structure-Activity Relationship, Reproducibility
	of Results, Research Support, Solubility, Spectrum Analysis, Statistical,
	Water, 14965289},
  owner = {mahe},
  pmid = {14965289},
  timestamp = {2006.09.07}
}

@article{Erhan2006Collaborative,
  author = {Erhan, D. and L'heureux, P.-J. and Yue, S. Y. and Bengio, Y.},
  title = {Collaborative filtering on a family of biological targets.},
  journal = {J. Chem. Inf. Model.},
  year = {2006},
  volume = {46},
  pages = {626--635},
  number = {2},
  abstract = {Building a QSAR model of a new biological target for which few screening
	data are available is a statistical challenge. However, the new target
	may be part of a bigger family, for which we have more screening
	data. Collaborative filtering or, more generally, multi-task learning,
	is a machine learning approach that improves the generalization performance
	of an algorithm by using information from related tasks as an inductive
	bias. We use collaborative filtering techniques for building predictive
	models that link multiple targets to multiple examples. The more
	commonalities between the targets, the better the multi-target model
	that can be built. We show an example of a multi-target neural network
	that can use family information to produce a predictive model of
	an undersampled target. We evaluate JRank, a kernel-based method
	designed for collaborative filtering. We show their performance on
	compound prioritization for an HTS campaign and the underlying shared
	representation between targets. JRank outperformed the neural network
	both in the single- and multi-target models.},
  doi = {10.1021/ci050367t},
  pdf = {../local/Erhan2006Collaborative.pdf},
  file = {Erhan2006Collaborative.pdf:Erhan2006Collaborative.pdf:PDF},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16562992},
  timestamp = {2007.10.11},
  url = {http://dx.doi.org/10.1021/ci050367t}
}

@article{Ernst2010Discovery,
  author = {Ernst, J. and Kellis, M.},
  title = {Discovery and characterization of chromatin states for systematic
	annotation of the human genome.},
  journal = {Nat. Biotechnol.},
  year = {2010},
  volume = {28},
  pages = {817--825},
  number = {8},
  month = {Aug},
  abstract = {A plethora of epigenetic modifications have been described in the
	human genome and shown to play diverse roles in gene regulation,
	cellular differentiation and the onset of disease. Although individual
	modifications have been linked to the activity levels of various
	genetic functional elements, their combinatorial patterns are still
	unresolved and their potential for systematic de novo genome annotation
	remains untapped. Here, we use a multivariate Hidden Markov Model
	to reveal 'chromatin states' in human T cells, based on recurrent
	and spatially coherent combinations of chromatin marks. We define
	51 distinct chromatin states, including promoter-associated, transcription-associated,
	active intergenic, large-scale repressed and repeat-associated states.
	Each chromatin state shows specific enrichments in functional annotations,
	sequence motifs and specific experimentally observed characteristics,
	suggesting distinct biological roles. This approach provides a complementary
	functional annotation of the human genome that reveals the genome-wide
	locations of diverse classes of epigenetic function.},
  doi = {10.1038/nbt.1662},
  institution = {MIT Computer Science and Artificial Intelligence Laboratory, Cambridge,
	Massachusetts, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt.1662},
  pmid = {20657582},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1038/nbt.1662}
}

@inproceedings{Eskin2000Protein,
  author = {Eskin, E. and Grundy, W.N. and Singer, Y.},
  title = {Protein family classification using sparse {M}arkov transducers},
  booktitle = {Proceedings of the {E}ighth {I}nternational {C}onference on {I}ntelligent
	{S}ystems for {M}olecular {B}iology ({ISMB} 2000)},
  year = {2000},
  pages = {134-145},
  owner = {vert}
}

@article{Esquela-Kerscher2006Oncomirs,
  author = {Esquela-Kerscher, A. and Slack, F. J.},
  title = {Oncomirs - micro{RNA}s with a role in cancer.},
  journal = {Nat. Rev. Cancer},
  year = {2006},
  volume = {6},
  pages = {259--269},
  number = {4},
  month = {Apr},
  abstract = {MicroRNAs (miRNAs) are an abundant class of small non-protein-coding
	RNAs that function as negative gene regulators. They regulate diverse
	biological processes, and bioinformatic data indicates that each
	miRNA can control hundreds of gene targets, underscoring the potential
	influence of miRNAs on almost every genetic pathway. Recent evidence
	has shown that miRNA mutations or mis-expression correlate with various
	human cancers and indicates that miRNAs can function as tumour suppressors
	and oncogenes. miRNAs have been shown to repress the expression of
	important cancer-related genes and might prove useful in the diagnosis
	and treatment of cancer.},
  doi = {10.1038/nrc1840},
  pdf = {../local/Esquela-Kerscher2006Oncomirs.pdf},
  file = {Esquela-Kerscher2006Oncomirs.pdf:Esquela-Kerscher2006Oncomirs.pdf:PDF},
  institution = { Developmental Biology, 266 Whitney Avenue, New Haven, Connecticut
	06520, USA.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {nrc1840},
  pmid = {16557279},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/nrc1840}
}

@article{Esteller2008Epigenetics,
  author = {Esteller, M.},
  title = {Epigenetics in cancer},
  journal = {N. Engl. J. Med.},
  year = {2008},
  volume = {358},
  pages = {1148--1159},
  number = {11},
  month = {Mar},
  doi = {10.1056/NEJMra072067},
  pdf = {../local/Esteller2008Epigenetics.pdf},
  file = {Esteller2008Epigenetics.pdf:Esteller2008Epigenetics.pdf:PDF},
  institution = {Cancer Epigenetics Laboratory, Spanish National Cancer Research Center,
	Madrid, Spain. mesteller@cnio.es},
  keywords = {csbcbook},
  owner = {jp},
  pii = {358/11/1148},
  pmid = {18337604},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1056/NEJMra072067}
}

@article{Esteller2007Cancer,
  author = {Esteller, M.},
  title = {Cancer epigenomics: {DNA} methylomes and histone-modification maps},
  journal = {Nat. Rev. Genet.},
  year = {2007},
  volume = {8},
  pages = {286--298},
  number = {4},
  month = {Apr},
  abstract = {An altered pattern of epigenetic modifications is central to many
	common human diseases, including cancer. Many studies have explored
	the mosaic patterns of DNA methylation and histone modification in
	cancer cells on a gene-by-gene basis; among their results has been
	the seminal finding of transcriptional silencing of tumour-suppressor
	genes by CpG-island-promoter hypermethylation. However, recent technological
	advances are now allowing cancer epigenetics to be studied genome-wide
	- an approach that has already begun to provide both biological insight
	and new avenues for translational research. It is time to 'upgrade'
	cancer epigenetics research and put together an ambitious plan to
	tackle the many unanswered questions in this field using epigenomics
	approaches.},
  doi = {10.1038/nrg2005},
  pdf = {../local/Esteller2007Cancer.pdf},
  file = {Esteller2007Cancer.pdf:Esteller2007Cancer.pdf:PDF},
  institution = {Cancer Epigenetics Laboratory, Spanish National Cancer Centre (CNIO),
	Melchor Fernandez Almagro 3, 28029 Madrid, Spain. mesteller@cnio.es},
  keywords = {csbcbook},
  owner = {jp},
  pii = {nrg2005},
  pmid = {17339880},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/nrg2005}
}

@phdthesis{Estrach2012Scattering,
  author = {Estrach, J. B.},
  title = {Scattering representations for recognition},
  school = {Ecole Polytechnique},
  year = {2012},
  pdf = {../local/Estrach2012Scattering.pdf},
  file = {Estrach2012Scattering.pdf:Estrach2012Scattering.pdf:PDF},
  owner = {jp},
  timestamp = {2013.03.29}
}

@article{Eulalio2008Getting,
  author = {Ana Eulalio and Eric Huntzinger and Elisa Izaurralde},
  title = {Getting to the root of miRNA-mediated gene silencing.},
  journal = {Cell},
  year = {2008},
  volume = {132},
  pages = {9--14},
  number = {1},
  month = {Jan},
  abstract = {MicroRNAs are approximately 22 nucleotide-long RNAs that silence gene
	expression posttranscriptionally by binding to the 3' untranslated
	regions of target mRNAs. Although much is known about their biogenesis
	and biological functions, the mechanisms allowing miRNAs to silence
	gene expression in animal cells are still under debate. Here, we
	discuss current models for miRNA-mediated gene silencing and formulate
	a hypothesis to reconcile differences.},
  doi = {10.1016/j.cell.2007.12.024},
  institution = {Max-Planck-Institute for Developmental Biology, Spemannstrasse 35,
	D-72076 Tübingen, Germany.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092-8674(07)01697-2},
  pmid = {18191211},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.cell.2007.12.024}
}

@article{Evers2005Structure-based,
  author = {Evers, A. and Klabunde, T.},
  title = {Structure-based drug discovery using {GPCR} homology modeling: successful
	virtual screening for antagonists of the {alpha1A} adrenergic receptor.},
  journal = {J. Med. Chem.},
  year = {2005},
  volume = {48},
  pages = {1088--1097},
  number = {4},
  month = {Feb},
  abstract = {In this paper, we describe homology modeling of the alpha1A receptor
	based on the X-ray structure of bovine rhodopsin. The protein model
	has been generated by applying ligand-supported homology modeling,
	using mutational and ligand SAR data to guide the protein modeling
	procedure. We performed a virtual screening of the company's compound
	collection to test how well this model is suited to identify alpha1A
	antagonists. We applied a hierarchical virtual screening procedure
	guided by 2D filters and three-dimensional pharmacophore models.
	The ca. 23,000 filtered compounds were docked into the alpha1A homology
	model with GOLD and scored with PMF. From the top-ranked compounds,
	80 diverse compounds were tested in a radioligand displacement assay.
	37 compounds revealed K(i) values better than 10 microM; the most
	active compound binds with 1.4 nM to the alpha1A receptor. Our findings
	suggest that rhodopsin-based homology models may be used as the structural
	basis for GPCR lead finding and compound optimization.},
  doi = {10.1021/jm0491804},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {15715476},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1021/jm0491804}
}

@article{Evgeniou2005Learning,
  author = {Evgeniou, T. and Micchelli, C. and Pontil, M.},
  title = {Learning multiple tasks with kernel methods},
  journal = {J. Mach. Learn. Res.},
  year = {2005},
  volume = {6},
  pages = {615-637},
  abstract = {We study the problem of learning many related tasks simultaneously
	using kernel methods and 
	
	regularization. The standard single-task kernel methods, such as support
	vector machines and 
	
	regularization networks, are extended to the case of multi-task learning.
	Our analysis shows that 
	
	the problem of estimating many task functions with regularization
	can be cast as a single task 
	
	learning problem if a family of multi-task kernel functions we define
	is used. These kernels model 
	
	relations among the tasks and are derived from a novel form of regularizers.
	Specific kernels that 
	
	can be used for multi-task learning are provided and experimentally
	tested on two real data sets. 
	
	In agreement with past empirical work on multi-task learning, the
	experiments show that learning 
	
	multiple related tasks simultaneously using the proposed approach
	can significantly outperform 
	
	standard single-task learning particularly when there are many related
	tasks but few data per task.},
  timestamp = {2006.05.18},
  url = {http://jmlr.csail.mit.edu/papers/volume6/evgeniou05a}
}

@inproceedings{Evgeniou2004Regularized,
  author = {Evgeniou, Theodoros and Pontil, Massimiliano},
  title = {Regularized multi--task learning},
  booktitle = {KDD '04: Proceedings of the tenth ACM SIGKDD international conference
	on Knowledge discovery and data mining},
  year = {2004},
  pages = {109--117},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1014052.1014067},
  isbn = {1-58113-888-1},
  location = {Seattle, WA, USA}
}

@article{Evgeniou2000Regularization,
  author = {Evgeniou, T. and Pontil, M. and Poggio, T.},
  title = {Regularization {N}etworks and {S}upport {V}ector {M}achines},
  journal = {Adv. {C}omput. {M}ath.},
  year = {2000},
  volume = {13},
  pages = {1--50},
  number = {1},
  doi = {10.1023/A:1018946025316},
  pdf = {../local/Evgeniou2000Regularization.pdf},
  file = {Evgeniou2000Regularization.pdf:local/Evgeniou2000Regularization.pdf:PDF},
  url = {http://dx.doi.org/10.1023/A:1018946025316}
}

@article{Fabbri2008MicroRNAs,
  author = {Fabbri, M. and Croce, C. M. and Calin, G. A.},
  title = {{MicroRNAs}},
  journal = {Cancer J.},
  year = {2008},
  volume = {14},
  pages = {1--6},
  number = {1},
  abstract = {MicroRNAs (miRNAs) are small, noncoding RNAs with regulatory functions,
	which play an important role in many human diseases, including cancer.
	An emerging number of studies show that miRNAs can act either as
	oncogenes or as tumor suppressor genes or sometimes as both. Germline,
	somatic mutations and polymorphisms can contribute to cancer predisposition.
	miRNA expression levels have diagnostic and prognostic implications,
	and their roles as anticancer therapeutic agents is promising and
	currently under investigation.},
  doi = {10.1097/PPO.0b013e318164145e},
  institution = {Human Cancer Genetics, Molecular Virology, Immunology and Medical
	Genetics, Ohio State University, Columbus, OH, USA.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {00130404-200801000-00001},
  pmid = {18303474},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1097/PPO.0b013e318164145e}
}

@article{Theres2006Structural,
  author = {Theres Fagerberg and Jean-Charles Cerottini and Olivier Michielin},
  title = {{S}tructural prediction of peptides bound to {MHC} class {I}.},
  journal = {J. Mol. Biol.},
  year = {2006},
  volume = {356},
  pages = {521--546},
  number = {2},
  month = {Feb},
  abstract = {An ab initio structure prediction approach adapted to the peptide-major
	histocompatibility complex (MHC) class I system is presented. Based
	on structure comparisons of a large set of peptide-MHC class I complexes,
	a molecular dynamics protocol is proposed using simulated annealing
	(SA) cycles to sample the conformational space of the peptide in
	its fixed MHC environment. A set of 14 peptide-human leukocyte antigen
	(HLA) A0201 and 27 peptide-non-HLA A0201 complexes for which X-ray
	structures are available is used to test the accuracy of the prediction
	method. For each complex, 1000 peptide conformers are obtained from
	the SA sampling. A graph theory clustering algorithm based on heavy
	atom root-mean-square deviation (RMSD) values is applied to the sampled
	conformers. The clusters are ranked using cluster size, mean effective
	or conformational free energies, with solvation free energies computed
	using Generalized Born MV 2 (GB-MV2) and Poisson-Boltzmann (PB) continuum
	models. The final conformation is chosen as the center of the best-ranked
	cluster. With conformational free energies, the overall prediction
	success is 83\% using a 1.00 Angstroms crystal RMSD criterion for
	main-chain atoms, and 76\% using a 1.50 Angstroms RMSD criterion
	for heavy atoms. The prediction success is even higher for the set
	of 14 peptide-HLA A0201 complexes: 100\% of the peptides have main-chain
	RMSD values < or =1.00 Angstroms and 93\% of the peptides have heavy
	atom RMSD values < or =1.50 Angstroms. This structure prediction
	method can be applied to complexes of natural or modified antigenic
	peptides in their MHC environment with the aim to perform rational
	structure-based optimizations of tumor vaccines.},
  doi = {10.1016/j.jmb.2005.11.059},
  keywords = {, Algorithms, Amino Acid Sequence, Antibodies, Artificial Intelligence,
	Automated, Binding Sites, Chemical, Computer Simulation, Databases,
	Epitope Mapping, Genes, HLA-A Antigens, HLA-DQ Antigens, Histocompatibility
	Antigens Class I, Humans, Immunoassay, Immunological, MHC Class I,
	Models, Molecular, Molecular Sequence Data, Pattern Recognition,
	Peptides, Protein, Protein Binding, Protein Conformation, Protein
	Interaction Mapping, Protein Structure, Sequence Alignment, Sequence
	Analysis, Software, Tertiary, Water, 16368108},
  pii = {S0022-2836(05)01462-2},
  pmid = {16368108},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.11.059}
}

@article{Faith2008Many,
  author = {Faith, J.J. and Driscoll, M.E. and Fusaro, V.A. and Cosgrove, E.J.
	and Hayete, B. and Juhn, F.S. and Schneider, S.J. and Gardner, T.S.},
  title = {Many Microbe Microarrays Database: uniformly normalized Affymetrix
	compendia with structured experimental metadata.},
  journal = {Nucleic Acids Res.},
  year = {2008},
  volume = {36},
  pages = {D866--D870},
  number = {Database issue},
  month = {Jan},
  abstract = {Many Microbe Microarrays Database (M3D) is designed to facilitate
	the analysis and visualization of expression data in compendia compiled
	from multiple laboratories. M3D contains over a thousand Affymetrix
	microarrays for Escherichia coli, Saccharomyces cerevisiae and Shewanella
	oneidensis. The expression data is uniformly normalized to make the
	data generated by different laboratories and researchers more comparable.
	To facilitate computational analyses, M3D provides raw data (CEL
	file) and normalized data downloads of each compendium. In addition,
	web-based construction, visualization and download of custom datasets
	are provided to facilitate efficient interrogation of the compendium
	for more focused analyses. The experimental condition metadata in
	M3D is human curated with each chemical and growth attribute stored
	as a structured and computable set of experimental features with
	consistent naming conventions and units. All versions of the normalized
	compendia constructed for each species are maintained and accessible
	in perpetuity to facilitate the future interpretation and comparison
	of results published on M3D data. M3D is accessible at http://m3d.bu.edu/.},
  doi = {10.1093/nar/gkm815},
  pdf = {../local/Faith2008Many.pdf},
  file = {Faith2008Many.pdf:Faith2008Many.pdf:PDF},
  institution = {Program in Bioinformatics, Boston University, 24 Cummington St. and
	Department of Biomedical Engineering, Boston University, 44 Cummington
	St., Boston, Massachusetts, 02215, USA.},
  owner = {mordelet},
  pii = {gkm815},
  pmid = {17932051},
  timestamp = {2010.07.16},
  url = {http://dx.doi.org/10.1093/nar/gkm815}
}

@article{Faith2007Large-scale,
  author = {Faith, J. J. and Hayete, B. and Thaden, J. T. and Mogno, I. and Wierzbowski,
	J. and Cottarel, G. and Kasif, S. and Collins, J. J. and Gardner,
	T. S.},
  title = {Large-scale mapping and validation of {E}scherichia coli transcriptional
	regulation from a compendium of expression profiles},
  journal = {PLoS Biol.},
  year = {2007},
  volume = {5},
  pages = {e8},
  number = {1},
  month = {Jan},
  abstract = {Machine learning approaches offer the potential to systematically
	identify transcriptional regulatory interactions from a compendium
	of microarray expression profiles. However, experimental validation
	of the performance of these methods at the genome scale has remained
	elusive. Here we assess the global performance of four existing classes
	of inference algorithms using 445 Escherichia coli Affymetrix arrays
	and 3,216 known E. coli regulatory interactions from RegulonDB. We
	also developed and applied the context likelihood of relatedness
	(CLR) algorithm, a novel extension of the relevance networks class
	of algorithms. CLR demonstrates an average precision gain of 36\%
	relative to the next-best performing algorithm. At a 60\% true positive
	rate, CLR identifies 1,079 regulatory interactions, of which 338
	were in the previously known network and 741 were novel predictions.
	We tested the predicted interactions for three transcription factors
	with chromatin immunoprecipitation, confirming 21 novel interactions
	and verifying our RegulonDB-based performance estimates. CLR also
	identified a regulatory link providing central metabolic control
	of iron transport, which we confirmed with real-time quantitative
	PCR. The compendium of expression data compiled in this study, coupled
	with RegulonDB, provides a valuable model system for further improvement
	of network inference algorithms using experimental data.},
  doi = {10.1371/journal.pbio.0050008},
  pdf = {../local/Faith2007Large-scale.pdf},
  file = {Faith2007Large-scale.pdf:Faith2007Large-scale.pdf:PDF},
  pii = {06-PLBI-RA-0740R3},
  pmid = {17214507},
  timestamp = {2008.02.03},
  url = {http://dx.doi.org/10.1371/journal.pbio.0050008}
}

@article{Faloutsos1999On,
  author = {Faloutsos, M. and Faloutsos, P. and Faloutsos, C.},
  title = {On power-law relationships of the internet topology},
  journal = {Comput. {C}omm. {R}ev.},
  year = {1999},
  volume = {29},
  pages = {251--262},
  pdf = {../local/falo99.pdf},
  file = {falo99.pdf:local/falo99.pdf:PDF},
  subject = {compnet},
  url = {http://www.acm.org/sigcomm/sigcomm99/papers/session7-2.html}
}

@inproceedings{Fan2001Stock,
  author = {Fan, A. and Palaniswami, M.},
  title = {Stock selection using support vector machines},
  booktitle = {Proc. Int. Joint Conf. Neural Networks IJCNN '01},
  year = {2001},
  volume = {3},
  pages = {1793--1798},
  abstract = {We used the support vector machines (SVM) in a classification approach
	to `beat the market'. Given the fundamental accounting and price
	information of stocks trading on the Australian Stock Exchange, we
	attempt to use SVM to identify stocks that are likely to outperform
	the market by having exceptional returns. The equally weighted portfolio
	formed by the stocks selected by SVM has a total return of 208% over
	a five years period, significantly outperformed the benchmark of
	71%. We also give a new perspective with a class sensitivity tradeoff,
	whereby the output of SVM is interpreted as a probability measure
	and ranked, such that the stocks selected can be fixed to the top
	25%},
  doi = {10.1109/IJCNN.2001.938434},
  pdf = {../local/Fan2001Stock.pdf},
  file = {Fan2001Stock.pdf:Fan2001Stock.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.08}
}

@article{Fan2006Concordance,
  author = {Fan, C. and Oh, D.S. and Wessels, L. and Weigelt, B. and Nuyten,
	D.S.A. and Nobel, A.B. and van't Veer, L.J. and Perou, C.M.},
  title = {Concordance among gene-expression-based predictors for breast cancer},
  journal = {N. Engl. J. Med.},
  year = {2006},
  volume = {355},
  pages = {560},
  number = {6},
  doi = {10.1056/NEJMoa052933},
  pdf = {../local/Fan2006Concordance.pdf},
  file = {Fan2006Concordance.pdf:Fan2006Concordance.pdf:PDF},
  keywords = {breastcancer, microarray},
  owner = {jp},
  publisher = {Mass Med Soc},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1056/NEJMoa052933}
}

@article{Fan2006Illumina,
  author = {Jian-Bing Fan and Kevin L Gunderson and Marina Bibikova and Joanne
	M Yeakley and Jing Chen and Eliza Wickham Garcia and Lori L Lebruska
	and Marc Laurent and Richard Shen and David Barker},
  title = {Illumina universal bead arrays.},
  journal = {Methods Enzymol},
  year = {2006},
  volume = {410},
  pages = {57--73},
  abstract = {This chapter describes an accurate, scalable, and flexible microarray
	technology. It includes a miniaturized array platform where each
	individual feature is quality controlled and a versatile assay that
	can be adapted for various genetic analyses, such as single nucleotide
	polymorphism genotyping, DNA methylation detection, and gene expression
	profiling. This chapter describes the concept of the BeadArray technology,
	two different Array of Arrays formats, the assay scheme and protocol,
	the performance of the system, and its use in large-scale genetic,
	epigenetic, and expression studies.},
  doi = {10.1016/S0076-6879(06)10003-8},
  institution = {Illumina, Inc., San Diego, California, USA.},
  keywords = {Animals; Humans; Microspheres; Oligonucleotide Array Sequence Analysis,
	instrumentation/methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S0076-6879(06)10003-8},
  pmid = {16938546},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1016/S0076-6879(06)10003-8}
}

@article{Fang2011Design,
  author = {Zhide Fang and Xiangqin Cui},
  title = {Design and validation issues in RNA-seq experiments.},
  journal = {Brief Bioinform},
  year = {2011},
  volume = {12},
  pages = {280--287},
  number = {3},
  month = {May},
  abstract = {The next-generation sequencing technologies are being rapidly applied
	in biological research. Tens of millions of short sequences generated
	in a single experiment provide us enormous information on genome
	composition, genetic variants, gene expression levels and protein
	binding sites depending on the applications. Various methods are
	being developed for analyzing the data generated by these technologies.
	However, the relevant experimental design issues have rarely been
	discussed. In this review, we use RNA-seq as an example to bring
	this topic into focus and to discuss experimental design and validation
	issues pertaining to next-generation sequencing in the quantification
	of transcripts.},
  doi = {10.1093/bib/bbr004},
  institution = {Assistant Professor, Department of Biostatistics, Section on Statistical
	Genetics, University of Alabama at Birmingham, 327 Ryals Public Health
	Building, 1665 University BLVD, Birmingham, AL 35294, USA. xcui@uab.edu.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {bbr004},
  pmid = {21498551},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1093/bib/bbr004}
}

@article{Farago1993Strong,
  author = {Farago, A. and Lugosi, G.},
  title = {Strong universal consistency of neural network classifiers},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {1146-1151},
  number = {4},
  month = {Jul},
  abstract = {In statistical pattern recognition, a classifier is called universally
	consistent if its error probability converges to the {B}ayes-risk
	as the size of the training data grows for all possible distributions
	of the random variable pair of the observation vector and its class.
	{I}t is proven that if a one-layered neural network with properly
	chosen number of nodes is trained to minimize the empirical risk
	on the training data, then a universally consistent classifier results.
	{I}t is shown that the exponent in the rate of convergence does not
	depend on the dimension if certain smoothness conditions on the distribution
	are satisfied. {T}hat is, this class of universally consistent classifiers
	does not suffer from the curse of dimensionality. {A} training algorithm
	is presented that finds the optimal set of parameters in polynomial
	time if the number of nodes and the space dimension is fixed and
	the amount of training data grows },
  pdf = {../local/Farago1993Strong.pdf},
  file = {Farago1993Strong.pdf:local/Farago1993Strong.pdf:PDF},
  owner = {vert}
}

@article{Fare2003Effects,
  author = {Thomas L Fare and Ernest M Coffey and Hongyue Dai and Yudong D He
	and Deborah A Kessler and Kristopher A Kilian and John E Koch and
	Eric LeProust and Matthew J Marton and Michael R Meyer and Roland
	B Stoughton and George Y Tokiwa and Yanqun Wang},
  title = {Effects of atmospheric ozone on microarray data quality.},
  journal = {Anal Chem},
  year = {2003},
  volume = {75},
  pages = {4672--4675},
  number = {17},
  month = {Sep},
  abstract = {A data anomaly was observed that affected the uniformity and reproducibility
	of fluorescent signal across DNA microarrays. Results from experimental
	sets designed to identify potential causes (from microarray production
	to array scanning) indicated that the anomaly was linked to a batch
	process; further work allowed us to localize the effect to the posthybridization
	array stringency washes. Ozone levels were monitored and highly correlated
	with the batch effect. Controlled exposures of microarrays to ozone
	confirmed this factor as the root cause, and we present data that
	show susceptibility of a class of cyanine dyes (e.g., Cy5, Alexa
	647) to ozone levels as low as 5-10 ppb for periods as short as 10-30
	s. Other cyanine dyes (e.g., Cy3, Alexa 555) were not significantly
	affected until higher ozone levels (> 100 ppb). To address this environmental
	effect, laboratory ozone levels should be kept below 2 ppb (e.g.,
	with filters in HVAC) to achieve high quality microarray data.},
  institution = {Rosetta Inpharmatics LLC, 12040 115th Avenue NE, Kirkland, Washington
	98034, USA.},
  keywords = {Artifacts; Atmosphere, chemistry; Carbocyanines, chemistry; Desiccation;
	Fluorescence; Oligonucleotide Array Sequence Analysis, instrumentation/standards;
	Ozone, analysis/chemistry; Quality Control; Reproducibility of Results},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {14632079},
  timestamp = {2012.02.29}
}

@article{Farid2006New,
  author = {Farid, R. and Day, T. and Friesner, R. A. and Pearlstein, R. A.},
  title = {{N}ew insights about {HERG} blockade obtained from protein modeling,
	potential energy mapping, and docking studies.},
  journal = {Bioorg. Med. Chem.},
  year = {2006},
  volume = {14},
  pages = {3160--3173},
  number = {9},
  month = {May},
  abstract = {We created a homology model of the homo-tetrameric pore domain of
	HERG using the crystal structure of the bacterial potassium channel,
	KvAP, as a template. We docked a set of known blockers with well-characterized
	effects on channel function into the lumen of the pore between the
	selectivity filter and extracellular entrance using a novel docking
	and refinement procedure incorporating Glide and Prime. Key aromatic
	groups of the blockers are predicted to form multiple simultaneous
	ring stacking and hydrophobic interactions among the eight aromatic
	residues lining the pore. Furthermore, each blocker can achieve these
	interactions via multiple docking configurations. To further interpret
	the docking results, we mapped hydrophobic and hydrophilic potentials
	within the lumen of each refined docked complex. Hydrophilic iso-potential
	contours define a 'propeller-shaped' volume at the selectivity filter
	entrance. Hydrophobic contours define a hollow 'crown-shaped' volume
	located above the 'propeller', whose hydrophobic 'rim' extends along
	the pore axis between Tyr652 and Phe656. Blockers adopt conformations/binding
	orientations that closely mimic the shapes and properties of these
	contours. Blocker basic groups are localized in the hydrophilic 'propeller',
	forming electrostatic interactions with Ser624 rather than a generally
	accepted pi-cation interaction with Tyr652. Terfenadine, cisapride,
	sertindole, ibutilide, and clofilium adopt similar docked poses,
	in which their N-substituents bridge radially across the hollow interior
	of the 'crown' (analogous to the hub and spokes of a wheel), and
	project aromatic/hydrophobic portions into the hydrophobic 'rim'.
	MK-499 docks with its longitudinal axis parallel to the axis of the
	pore and 'crown', and its hydrophobic groups buried within the hydrophobic
	'rim'.},
  doi = {10.1016/j.bmc.2005.12.032},
  pdf = {../local/Farid2006New.pdf},
  file = {Farid2006New.pdf:local/Farid2006New.pdf:PDF},
  keywords = {chemoinformatics herg},
  pii = {S0968-0896(05)01214-9},
  pmid = {16413785},
  timestamp = {2007.02.03},
  url = {http://dx.doi.org/10.1016/j.bmc.2005.12.032}
}

@article{Faugeras2004Variational,
  author = {Olivier Faugeras and Geoffray Adde and Guillaume Charpiat and Christophe
	Chefd'hotel and Maureen Clerc and Thomas Deneux and Rachid Deriche
	and Gerardo Hermosillo and Renaud Keriven and Pierre Kornprobst and
	Jan Kybic and Christophe Lenglet and Lucero Lopez-Perez and ThÃ©o
	Papadopoulo and Jean-Philippe Pons and Florent Segonne and Bertrand
	Thirion and David TschumperlÃ© and Thierry ViÃ©ville and Nicolas
	Wotawa},
  title = {Variational, geometric, and statistical methods for modeling brain
	anatomy and function.},
  journal = {Neuroimage},
  year = {2004},
  volume = {23 Suppl 1},
  pages = {S46-55},
  abstract = {We survey the recent activities of the {O}dyssÃ©e {L}aboratory in
	the area of the application of mathematics to the design of models
	for studying brain anatomy and function. {W}e start with the problem
	of reconstructing sources in {MEG} and {EEG}, and discuss the variational
	approach we have developed for solving these inverse problems. {T}his
	motivates the need for geometric models of the head. {W}e present
	a method for automatically and accurately extracting surface meshes
	of several tissues of the head from anatomical magnetic resonance
	({MR}) images. {A}natomical connectivity can be extracted from diffusion
	tensor magnetic resonance images but, in the current state of the
	technology, it must be preceded by a robust estimation and regularization
	stage. {W}e discuss our work based on variational principles and
	show how the results can be used to track fibers in the white matter
	({WM}) as geodesics in some {R}iemannian space. {W}e then go to the
	statistical modeling of functional magnetic resonance imaging (f{MRI})
	signals from the viewpoint of their decomposition in a pseudo-deterministic
	and stochastic part that we then use to perform clustering of voxels
	in a way that is inspired by the theory of support vector machines
	and in a way that is grounded in information theory. {M}ultimodal
	image matching is discussed next in the framework of image statistics
	and partial differential equations ({PDE}s) with an eye on registering
	f{MRI} to the anatomy. {T}he paper ends with a discussion of a new
	theory of random shapes that may prove useful in building anatomical
	and functional atlases.},
  doi = {10.1016/j.neuroimage.2004.07.015},
  pdf = {../local/Faugeras2004Variational.pdf},
  file = {Faugeras2004Variational.pdf:local/Faugeras2004Variational.pdf:PDF},
  keywords = {Adolescent, Adult, Algorithms, Anatomic, Bacterial Proteins, Brain,
	Brain Mapping, Comparative Study, Computer Simulation, Computer-Assisted,
	Diffusion Magnetic Resonance Imaging, Facial Asymmetry, Facial Expression,
	Facial Paralysis, Female, Gene Expression Profiling, Gram-Negative
	Bacteria, Gram-Positive Bacteria, Humans, Image Interpretation, Magnetoencephalography,
	Male, Middle Aged, Models, Motion, Neural Pathways, Non-U.S. Gov't,
	Photography, Protein, Proteome, Research Support, Retina, Sequence
	Alignment, Sequence Analysis, Severity of Illness Index, Software,
	Statistical, Subcellular Fractions, 15501100},
  pii = {S1053-8119(04)00380-5},
  url = {http://dx.doi.org/10.1016/j.neuroimage.2004.07.015}
}

@techreport{Fawcett2003ROC,
  author = {Fawcett, T.},
  title = {R{OC} graphs: notes and practical considerations for data mining
	researchers},
  institution = {HP Laboratories},
  year = {2003},
  number = {2003-4},
  address = {Palo Alto, CA, USA},
  owner = {vert},
  timestamp = {2006.01.19}
}

@inproceedings{Fazel2001rank,
  author = {M. Fazel and H. Hindi and S. Boyd},
  title = {A rank minimization heuristic with application to minimum order system
	approximation},
  booktitle = {Proceedings of the 2001 American Control Conference},
  year = {2001},
  volume = {6},
  pages = {4734--4739},
  doi = {http://dx.doi.org/10.1109/ACC.2001.945730}
}

@article{Feder1986Maximum,
  author = {Feder, M. },
  title = {Maximum entropy as a special case of the minimum description length
	criterion},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1986},
  volume = {32},
  pages = {847 - 849},
  number = {6},
  month = {Nov},
  abstract = {The {M}aximum {E}ntropy ({ME}) and {M}aximum {L}ikelihood ({ML}) criteria
	are the bases for two approaches to statistical inference problems.
	{A} new criterion, called the {M}inimum {D}escription {L}ength ({MDL}),
	has been recently introduced. {T}his criterion generalizes the {ML}
	method so it can be applied to more general situations, e.g., when
	the number of parameters is unknown. {I}t is shown that {ME} is also
	a special case of the {MDL} criterion; maximizing the entropy subject
	to some constraints on the underlying probability function is identical
	to minimizing the code length required to represent all possible
	i.i.d, realizations of the random variable such that the sample frequencies
	(or histogram) satisfy those given constraints.},
  pdf = {../local/Feder1986Maximum.pdf},
  file = {Feder1986Maximum.pdf:local/Feder1986Maximum.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Feder1996Hierarchical,
  author = {Feder, M. and Merhav, N. },
  title = {Hierarchical universal coding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {1354-1364},
  number = {5},
  month = {Sep},
  abstract = {In an earlier paper, we proved a strong version of the redundancy-capacity
	converse theorem of universal coding, stating that for ?most? sources
	in a given class, the universal coding redundancy is essentially
	lower-bounded by the capacity of the channel induced by this class.
	{S}ince this result holds for general classes of sources, it extends
	{R}issanen's (1986) strong converse theorem for parametric families.
	{W}hile our earlier result has established strong optimality only
	for mixture codes weighted by the capacity-achieving prior, our first
	result herein extends this finding to a general prior. {F}or some
	cases our technique also leads to a simplified proof of the above
	mentioned strong converse theorem. {T}he major interest in this paper,
	however, is in extending the theory of universal coding to hierarchical
	structures of classes, where each class may have a different capacity.
	{I}n this setting, one wishes to incur redundancy essentially as
	small as that corresponding to the active class, and not the union
	of classes. {O}ur main result is that the redundancy of a code based
	on a two-stage mixture (first, within each class, and then over the
	classes), is no worse than that of any other code for ?most? sources
	of ?most? classes. {I}f, in addition, the classes can be efficiently
	distinguished by a certain decision rule, then the best attainable
	redundancy is given explicitly by the capacity of the active class
	plus the normalized negative logarithm of the prior probability assigned
	to this class. {T}hese results suggest some interesting guidelines
	as for the choice of the prior. {W}e also discuss some examples with
	a natural hierarchical partition into classes },
  pdf = {../local/Feder1996Hierarchical.pdf},
  file = {Feder1996Hierarchical.pdf:local/Feder1996Hierarchical.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Feder1994Relations,
  author = {Feder, M. and Merhav, N.},
  title = {Relations between entropy and error probability},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1994},
  volume = {40},
  pages = {259 - 266},
  number = {1},
  month = {Jan},
  abstract = {The relation between the entropy of a discrete random variable and
	the minimum attainable probability of error made in guessing its
	value is examined. {W}hile {F}ano's inequality provides a tight lower
	bound on the error probability in terms of the entropy, the present
	authors derive a converse result-a tight upper bound on the minimal
	error probability in terms of the entropy. {B}oth bounds are sharp,
	and can draw a relation, as well, between the error probability for
	the maximum a posteriori ({MAP}) rule, and the conditional entropy
	(equivocation), which is a useful uncertainty measure in several
	applications. {C}ombining this relation and the classical channel
	coding theorem, the authors present a channel coding theorem for
	the equivocation which, unlike the channel coding theorem for error
	probability, is meaningful at all rates. {T}his theorem is proved
	directly for {DMC}s, and from this proof it is further concluded
	that for {R}⩾{C} the equivocation achieves its minimal value
	of {R}-{C} at the rate of n1/2 where n is the block length},
  pdf = {../local/Feder1994Relations.pdf},
  file = {Feder1994Relations.pdf:local/Feder1994Relations.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Feder1992Universal,
  author = {Feder, M. and Merhav, N. and Gutman, M. },
  title = {Universal prediction of individual sequences},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1992},
  volume = {38},
  pages = {1258-1270},
  number = {4},
  month = {Jul},
  abstract = {The problem of predicting the next outcome of an individual binary
	sequence using finite memory is considered. {T}he finite-state predictability
	of an infinite sequence is defined as the minimum fraction of prediction
	errors that can be made by any finite-state ({FS}) predictor. {I}t
	is proven that this {FS} predictability can be achieved by universal
	sequential prediction schemes. {A}n efficient prediction procedure
	based on the incremental parsing procedure of the {L}empel-{Z}iv
	data compression algorithm is shown to achieve asymptotically the
	{FS} predictability. {S}ome relations between compressibility and
	predictability are discussed, and the predictability is proposed
	as an additional measure of the complexity of a sequence },
  pdf = {../local/Feder1992Universal.pdf},
  file = {Feder1992Universal.pdf:local/Feder1992Universal.pdf:PDF},
  keywords = {information-theory universal-coding},
  owner = {vert}
}

@article{Feder1998Universal,
  author = {Feder, M. and Singer, A.C.},
  title = {Universal data compression and linear prediction},
  journal = {Data {C}ompression {C}onference},
  year = {1998},
  abstract = {The relationship between prediction and data compression can be extended
	to universal prediction schemes and universal data compression. {P}revious
	work shows that minimizing the sequential squared prediction error
	for individual sequences can be achieved using the same strategies
	which minimize the sequential code length for data compression of
	individual sequences. {D}efining a ?probability? as an exponential
	function of sequential loss, results from universal data compression
	can be used to develop universal linear prediction algorithms. {S}pecifically,
	we present an algorithm for linear prediction of individual sequences
	which is twice-universal, over parameters and model orders},
  pdf = {../local/Feder1998Universal.pdf},
  file = {Feder1998Universal.pdf:local/Feder1998Universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Felsenstein1981Evolutionary,
  author = {J. Felsenstein},
  title = {Evolutionary trees from {DNA} sequences: a maximum likelihood approach},
  journal = {Journal of {M}olecular {E}volution},
  year = {1981},
  volume = {17},
  pages = {368--376},
  subject = {bio}
}

@article{Feng2005Boosting,
  author = {Kai-Yan Feng and Yu-Dong Cai and Kuo-Chen Chou},
  title = {Boosting classifier for predicting protein domain structural class.},
  journal = {Biochem {B}iophys {R}es {C}ommun},
  year = {2005},
  volume = {334},
  pages = {213-7},
  number = {1},
  month = {Aug},
  abstract = {A novel classifier, the so-called "{L}ogit{B}oost" classifier, was
	introduced to predict the structural class of a protein domain according
	to its amino acid sequence. {L}ogit{B}oost is featured by introducing
	a log-likelihood loss function to reduce the sensitivity to noise
	and outliers, as well as by performing classification via combining
	many weak classifiers together to build up a very strong and robust
	classifier. {I}t was demonstrated thru jackknife cross-validation
	tests that {L}ogit{B}oost outperformed other classifiers including
	"support vector machine," a very powerful classifier widely used
	in biological literatures. {I}t is anticipated that {L}ogit{B}oost
	can also become a useful vehicle in classifying other attributes
	of proteins according to their sequences, such as subcellular localization
	and enzyme family class, among many others.},
  doi = {10.1016/j.bbrc.2005.06.075},
  pdf = {../local/Feng2005Boosting.pdf},
  file = {Feng2005Boosting.pdf:local/Feng2005Boosting.pdf:PDF},
  keywords = {Archaeal, Artificial Intelligence, Bacterial, Cytomegalovirus, Gene
	Transfer, Genome, Genomics, Horizontal, Non-U.S. Gov't, Research
	Support, Viral, 15993842},
  pii = {S0006-291X(05)01299-4},
  url = {http://dx.doi.org/10.1016/j.bbrc.2005.06.075}
}

@article{Ferea1999Systematic,
  author = {Ferea, T. L. and Botstein, D. and Brown, P. O. and Rosenzweig, R.
	F.},
  title = {Systematic changes in gene expression patterns following adaptive
	evolution in yeast},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {1999},
  volume = {96},
  pages = {9721--9726},
  number = {17},
  pdf = {../local/fere99.pdf},
  file = {fere99.pdf:local/fere99.pdf:PDF},
  subject = {microarray},
  url = {http://www.pnas.org/cgi/reprint/96/17/9721.pdf}
}

@article{Ferrer2005Offline,
  author = {Miguel A Ferrer and JesÃºs B Alonso and Carlos M Travieso},
  title = {Offline geometric parameters for automatic signature verification
	using fixed-point arithmetic.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2005},
  volume = {27},
  pages = {993-7},
  number = {6},
  month = {Jun},
  abstract = {This paper presents a set of geometric signature features for offline
	automatic signature verification based on the description of the
	signature envelope and the interior stroke distribution in polar
	and {C}artesian coordinates. {T}he features have been calculated
	using 16 bits fixed-point arithmetic and tested with different classifiers,
	such as hidden {M}arkov models, support vector machines, and {E}uclidean
	distance classifier. {T}he experiments have shown promising results
	in the task of discriminating random and simple forgeries.}
}

@article{Fiebitz2008High-throughput,
  author = {Andrea Fiebitz and Lajos Nyarsik and Bernard Haendler and Yu-Hui
	Hu and Florian Wagner and Sabine Thamm and Hans Lehrach and Michal
	Janitz and Dominique Vanhecke},
  title = {High-throughput mammalian two-hybrid screening for protein-protein
	interactions using transfected cell arrays.},
  journal = {BMC Genomics},
  year = {2008},
  volume = {9},
  pages = {68},
  abstract = {BACKGROUND: Most of the biological processes rely on the formation
	of protein complexes. Investigation of protein-protein interactions
	(PPI) is therefore essential for understanding of cellular functions.
	It is advantageous to perform mammalian PPI analysis in mammalian
	cells because the expressed proteins can then be subjected to essential
	post-translational modifications. Until now mammalian two-hybrid
	assays have been performed on individual gene scale. We here describe
	a new and cost-effective method for the high-throughput detection
	of protein-protein interactions in mammalian cells that combines
	the advantages of mammalian two-hybrid systems with those of DNA
	microarrays. RESULTS: In this cell array protein-protein interaction
	assay (CAPPIA), mixtures of bait and prey expression plasmids together
	with an auto-fluorescent reporter are immobilized on glass slides
	in defined array formats. Adherent cells that grow on top of the
	micro-array will become fluorescent only if the expressed proteins
	interact and subsequently trans-activate the reporter. Using known
	interaction partners and by screening 160 different combinations
	of prey and bait proteins associated with the human androgen receptor
	we demonstrate that this assay allows the quantitative detection
	of specific protein interactions in different types of mammalian
	cells and under the influence of different compounds. Moreover, different
	strategies in respect to bait-prey combinations are presented. CONCLUSION:
	We demonstrate that the CAPPIA assay allows the quantitative detection
	of specific protein interactions in different types of mammalian
	cells and under the influence of different compounds. The high number
	of preys that can be tested per slide together with the flexibility
	to interrogate any bait of interest and the small amounts of reagents
	that are required makes this assay currently one of the most economical
	high-throughput detection assays for protein-protein interactions
	in mammalian cells.},
  doi = {10.1186/1471-2164-9-68},
  pdf = {../local/Fiebitz2008High-throughput.pdf},
  file = {Fiebitz2008High-throughput.pdf:Fiebitz2008High-throughput.pdf:PDF},
  institution = {Max Planck Institute for Molecular Genetics, Department Vertebrate
	Genomics, Fabeckstr, 60-62, 14195 Berlin, Germany. fiebitz@molgen.mpg.de},
  owner = {phupe},
  pii = {1471-2164-9-68},
  pmid = {18254948},
  timestamp = {2010.08.31},
  url = {http://dx.doi.org/10.1186/1471-2164-9-68}
}

@article{Fields1999Functional,
  author = {Fields, S. and Kohara, Y. and Lockhart, D. J.},
  title = {Functional genomics},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {1999},
  volume = {96},
  pages = {8825--8826},
  month = {August},
  pdf = {../local/fiel99.pdf},
  file = {fiel99.pdf:local/fiel99.pdf:PDF},
  subject = {bio},
  url = {http://www.pnas.org/cgi/reprint/96/16/8825.pdf}
}

@article{Fields1989novel,
  author = {Fields, S. and Song, O.},
  title = {A novel genetic system to detect protein-protein interactions},
  journal = {Nature},
  year = {1989},
  volume = {340},
  pages = {245--246},
  number = {6230},
  month = {Jul},
  abstract = {Protein-protein interactions between two proteins have generally been
	studied using biochemical techniques such as crosslinking, co-immunoprecipitation
	and co-fractionation by chromatography. We have generated a novel
	genetic system to study these interactions by taking advantage of
	the properties of the GAL4 protein of the yeast Saccharomyces cerevisiae.
	This protein is a transcriptional activator required for the expression
	of genes encoding enzymes of galactose utilization. It consists of
	two separable and functionally essential domains: an N-terminal domain
	which binds to specific DNA sequences (UASG); and a C-terminal domain
	containing acidic regions, which is necessary to activate transcription.
	We have generated a system of two hybrid proteins containing parts
	of GAL4: the GAL4 DNA-binding domain fused to a protein 'X' and a
	GAL4 activating region fused to a protein 'Y'. If X and Y can form
	a protein-protein complex and reconstitute proximity of the GAL4
	domains, transcription of a gene regulated by UASG occurs. We have
	tested this system using two yeast proteins that are known to interact--SNF1
	and SNF4. High transcriptional activity is obtained only when both
	hybrids are present in a cell. This system may be applicable as a
	general method to identify proteins that interact with a known protein
	by the use of a simple galactose selection.},
  comment = {The paper that introduces the yeast two-hybrid system for detection
	of protein-protein interactions},
  doi = {10.1038/340245a0},
  institution = {Department of Microbiology, State University of New York at Stony
	Brook, Stony Brook 11794.},
  owner = {jp},
  pmid = {2547163},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1038/340245a0}
}

@inproceedings{Filatov1995Graph,
  author = {Filatov, A. and Gitis, A. and Kil, I.},
  title = {Graph-based handwritten digit string recognition},
  booktitle = {ICDAR '95: Proceedings of the Third International Conference on Document
	Analysis and Recognition (Volume 2)},
  year = {1995},
  pages = {845},
  address = {Washington, DC, USA},
  publisher = {IEEE Computer Society},
  isbn = {0-8186-7128-9}
}

@article{Filipowicz2008Mechanisms,
  author = {Filipowicz, W. and Bhattacharyya, S. N. and Sonenberg, N.},
  title = {Mechanisms of post-transcriptional regulation by micro{RNA}s: are
	the answers in sight?},
  journal = {Nat. Rev. Genet.},
  year = {2008},
  volume = {9},
  pages = {102--114},
  number = {2},
  month = {Feb},
  abstract = {MicroRNAs constitute a large family of small, approximately 21-nucleotide-long,
	non-coding RNAs that have emerged as key post-transcriptional regulators
	of gene expression in metazoans and plants. In mammals, microRNAs
	are predicted to control the activity of approximately 30\% of all
	protein-coding genes, and have been shown to participate in the regulation
	of almost every cellular process investigated so far. By base pairing
	to mRNAs, microRNAs mediate translational repression or mRNA degradation.
	This Review summarizes the current understanding of the mechanistic
	aspects of microRNA-induced repression of translation and discusses
	some of the controversies regarding different modes of microRNA function.},
  doi = {10.1038/nrg2290},
  pdf = {../local/Filipowicz2008Mechanisms.pdf},
  file = {Filipowicz2008Mechanisms.pdf:Filipowicz2008Mechanisms.pdf:PDF},
  institution = {Friedrich Miescher Institute for Biomedical Research, 4002 Basel,
	Switzerland. witold.filipowicz@fmi.ch},
  keywords = {csbcbook},
  owner = {jp},
  pii = {nrg2290},
  pmid = {18197166},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/nrg2290}
}

@article{Fine2001Efficient,
  author = {Fine, S. and Scheinberg, K.},
  title = {Efficient {SVM} training using low-rank kernel representations},
  journal = {J. Mach. Learn. Res.},
  year = {2001},
  volume = {2},
  pages = {243--264}
}

@article{Finetti2008Sixteen-kinase,
  author = {Finetti, P. and Cervera, N. and Charafe-Jauffret, E. and Chabannon,
	C. and Charpin, C. and Chaffanet, M. and Jacquemier, J. and Viens,
	P. and Birnbaum, D. and Bertucci, F.},
  title = {Sixteen-kinase gene expression identifies luminal breast cancers
	with poor prognosis},
  journal = {Cancer Res.},
  year = {2008},
  volume = {68},
  pages = {767--776},
  number = {3},
  month = {Feb},
  abstract = {Breast cancer is a heterogeneous disease made of various molecular
	subtypes with different prognosis. However, evolution remains difficult
	to predict within some subtypes, such as luminal A, and treatment
	is not as adapted as it should be. Refinement of prognostic classification
	and identification of new therapeutic targets are needed. Using oligonucleotide
	microarrays, we profiled 227 breast cancers. We focused our analysis
	on two major breast cancer subtypes with opposite prognosis, luminal
	A (n = 80) and basal (n = 58), and on genes encoding protein kinases.
	Whole-kinome expression separated luminal A and basal tumors. The
	expression (measured by a kinase score) of 16 genes encoding serine/threonine
	kinases involved in mitosis distinguished two subgroups of luminal
	A tumors: Aa, of good prognosis and Ab, of poor prognosis. This classification
	and its prognostic effect were validated in 276 luminal A cases from
	three independent series profiled across different microarray platforms.
	The classification outperformed the current prognostic factors in
	univariate and multivariate analyses in both training and validation
	sets. The luminal Ab subgroup, characterized by high mitotic activity
	compared with luminal Aa tumors, displayed clinical characteristics
	and a kinase score intermediate between the luminal Aa subgroup and
	the luminal B subtype, suggesting a continuum in luminal tumors.
	Some of the mitotic kinases of the signature represent therapeutic
	targets under investigation. The identification of luminal A cases
	of poor prognosis should help select appropriate treatment, whereas
	the identification of a relevant kinase set provides potential targets.},
  doi = {10.1158/0008-5472.CAN-07-5516},
  pdf = {../local/Finetti2008Sixteen-kinase.pdf},
  file = {Finetti2008Sixteen-kinase.pdf:Finetti2008Sixteen-kinase.pdf:PDF},
  institution = {UMR599 Inserm, Institut Paoli-Calmettes, Laboratoire d'Oncologie
	Moléculaire, Centre de Recherche en Cancérologie de Marseille, Marseille,
	France.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {68/3/767},
  pmid = {18245477},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-07-5516}
}

@article{Finn1998Pharmacophore,
  author = {P. Finn and S. Muggleton and D. Page and A. Srinivasan},
  title = {Pharmacophore discovery using the inductive logic programming language
	{P}rogol},
  journal = {Machine {L}earning},
  year = {1998},
  volume = {30},
  pages = {241-270},
  citeseerurl = {http://citeseer.ist.psu.edu/finn98pharmacophore.html},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.02.03}
}

@article{Fire1998Potent,
  author = {Fire, A. and Xu, S. and Montgomery, M. K. and Kostas, S. A. and Driver,
	S. E. and Mello, C. C.},
  title = {{P}otent and specific genetic interference by double-stranded {RNA}
	in {C}aenorhabditis elegans.},
  journal = {Nature},
  year = {1998},
  volume = {391},
  pages = {806--811},
  number = {6669},
  month = {Feb},
  abstract = {Experimental introduction of RNA into cells can be used in certain
	biological systems to interfere with the function of an endogenous
	gene. Such effects have been proposed to result from a simple antisense
	mechanism that depends on hybridization between the injected RNA
	and endogenous messenger RNA transcripts. RNA interference has been
	used in the nematode Caenorhabditis elegans to manipulate gene expression.
	Here we investigate the requirements for structure and delivery of
	the interfering RNA. To our surprise, we found that double-stranded
	RNA was substantially more effective at producing interference than
	was either strand individually. After injection into adult animals,
	purified single strands had at most a modest effect, whereas double-stranded
	mixtures caused potent and specific interference. The effects of
	this interference were evident in both the injected animals and their
	progeny. Only a few molecules of injected double-stranded RNA were
	required per affected cell, arguing against stochiometric interference
	with endogenous mRNA and suggesting that there could be a catalytic
	or amplification component in the interference process.},
  doi = {10.1038/35888},
  pdf = {../local/Fire1998Potent.pdf},
  file = {Fire1998Potent.pdf:Fire1998Potent.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pmid = {9486639},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1038/35888}
}

@inproceedings{Fischetti02Generalized,
  author = {M. Fischetti and J.J. Salazar-Gonzalez and P. Toth},
  title = {The Generalized Travelling Salesman and Orienteering Problems},
  booktitle = {The Travelling Salesman Problem and Its Variations},
  year = {2002},
  pages = {609--662}
}

@article{Fischler1981Random,
  author = {Fischler, M. A. and Bolles, R. C.},
  title = {Random Sample Consensus: A Paradigm for Model Fitting with Applications
	to Image Analysis and Automated Cartography},
  journal = {Commun. of the ACM},
  year = {1981},
  volume = {24},
  pages = {381-395},
  number = {6},
  abstract = {A new paradigm, Random Sample Consensus (RANSAC), for fitting a model
	to experimental data is introduced. RANSAC is capable of interpreting/smoothing
	data containing a significant percentage of gross errors, and is
	thus ideally suited for applications in automated image analysis
	where interpretation is based on the data provided by error-prone
	feature detectors. A major portion of this paper describes the application
	of RANSAC to the Location Determination Problem (LDP): Given an image
	depicting a set of landmarks with know locations, determine that
	point in space from which the image was obtained. In response to
	a RANSAC requirement, new results are derived on the minimum number
	of landmarks needed to obtain a solution, and algorithms are presented
	for computing these minimum-landmark solutions in closed form. These
	results provide the basis for an automatic system that can solve
	the LDP under difficult viewing.}
}

@article{Fisher1950Gene,
  author = {Fisher, R.A.},
  title = {Gene frequencies in a cline determined by selection and diffusion},
  journal = {Biometrics},
  year = {1950},
  volume = {6},
  pages = {353--361},
  number = {4},
  publisher = {JSTOR}
}

@article{Flannick2006Graemlin,
  author = {Flannick, J. and Novak, A. and Srinivasan, B.S. and McAdams, H.H.
	and Batzoglou, S.},
  title = {Graemlin: general and robust alignment of multiple large interaction
	networks},
  journal = {Genome Res.},
  year = {2006},
  volume = {16},
  pages = {1169--1181},
  number = {9},
  month = {Sep},
  abstract = {The recent proliferation of protein interaction networks has motivated
	research into network alignment: the cross-species comparison of
	conserved functional modules. Previous studies have laid the foundations
	for such comparisons and demonstrated their power on a select set
	of sparse interaction networks. Recently, however, new computational
	techniques have produced hundreds of predicted interaction networks
	with interconnection densities that push existing alignment algorithms
	to their limits. To find conserved functional modules in these new
	networks, we have developed Graemlin, the first algorithm capable
	of scalable multiple network alignment. Graemlin's explicit model
	of functional evolution allows both the generalization of existing
	alignment scoring schemes and the location of conserved network topologies
	other than protein complexes and metabolic pathways. To assess Graemlin's
	performance, we have developed the first quantitative benchmarks
	for network alignment, which allow comparisons of algorithms in terms
	of their ability to recapitulate the KEGG database of conserved functional
	modules. We find that Graemlin achieves substantial scalability gains
	over previous methods while improving sensitivity.},
  doi = {10.1101/gr.5235706},
  pdf = {../local/Flannick2006Graemlin.pdf},
  file = {Flannick2006Graemlin.pdf:local/Flannick2006Graemlin.pdf:PDF},
  institution = {Department of Computer Science, Stanford University, Stanford, California
	94305, USA.},
  owner = {jp},
  pii = {gr.5235706},
  pmid = {16899655},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1101/gr.5235706}
}

@article{Fliri2005Analysis,
  author = {Fliri, A. F. and Loging, W. T. and Thadeio, P. F. and Volkmann, R.
	A.},
  title = {Analysis of drug-induced effect patterns to link structure and side
	effects of medicines.},
  journal = {Nat. Chem. Biol.},
  year = {2005},
  volume = {1},
  pages = {389--397},
  number = {7},
  month = {Dec},
  abstract = {The high failure rate of experimental medicines in clinical trials
	accentuates inefficiencies of current drug discovery processes caused
	by a lack of tools for translating the information exchange between
	protein and organ system networks. Recently, we reported that biological
	activity spectra (biospectra), derived from in vitro protein binding
	assays, provide a mechanism for assessing a molecule's capacity to
	modulate the function of protein-network components. Herein we describe
	the translation of adverse effect data derived from 1,045 prescription
	drug labels into effect spectra and show their utility for diagnosing
	drug-induced effects of medicines. In addition, notwithstanding the
	limitation imposed by the quality of drug label information, we show
	that biospectrum analysis, in concert with effect spectrum analysis,
	provides an alignment between preclinical and clinical drug-induced
	effects. The identification of this alignment provides a mechanism
	for forecasting clinical effect profiles of medicines.},
  doi = {10.1038/nchembio747},
  pdf = {../local/Fliri2005Analysis.pdf},
  file = {Fliri2005Analysis.pdf:Fliri2005Analysis.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {vert},
  pmid = {16370374},
  timestamp = {2006.11.24},
  url = {http://dx.doi.org/10.1038/nchembio747}
}

@article{Fliri2005Biological,
  author = {Fliri, A. F. and Loging, W. T. and Thadeio, P. F. and Volkmann, R.
	A.},
  title = {Biological spectra analysis: Linking biological activity profiles
	to molecular structure.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2005},
  volume = {102},
  pages = {261--266},
  number = {2},
  month = {Jan},
  abstract = {Establishing quantitative relationships between molecular structure
	and broad biological effects has been a longstanding challenge in
	science. Currently, no method exists for forecasting broad biological
	activity profiles of medicinal agents even within narrow boundaries
	of structurally similar molecules. Starting from the premise that
	biological activity results from the capacity of small organic molecules
	to modulate the activity of the proteome, we set out to investigate
	whether descriptor sets could be developed for measuring and quantifying
	this molecular property. Using a 1,567-compound database, we show
	that percent inhibition values, determined at single high drug concentration
	in a battery of in vitro assays representing a cross section of the
	proteome, provide precise molecular property descriptors that identify
	the structure of molecules. When broad biological activity of molecules
	is represented in spectra form, organic molecules can be sorted by
	quantifying differences between biological spectra. Unlike traditional
	structure-activity relationship methods, sorting of molecules by
	using biospectra comparisons does not require knowledge of a molecule's
	putative drug targets. To illustrate this finding, we selected as
	starting point the biological activity spectra of clotrimazole and
	tioconazole because their putative target, lanosterol demethylase
	(CYP51), was not included in the bioassay array. Spectra similarity
	obtained through profile similarity measurements and hierarchical
	clustering provided an unbiased means for establishing quantitative
	relationships between chemical structures and biological activity
	spectra. This methodology, which we have termed biological spectra
	analysis, provides the capability not only of sorting molecules on
	the basis of biospectra similarity but also of predicting simultaneous
	interactions of new molecules with multiple proteins.},
  doi = {10.1073/pnas.0407790101},
  pdf = {../local/Fliri2005Biological.pdf},
  file = {Fliri2005Biological.pdf:Fliri2005Biological.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {vert},
  pii = {0407790101},
  pmid = {15625110},
  timestamp = {2006.11.24},
  url = {http://dx.doi.org/10.1073/pnas.0407790101}
}

@article{Fliri2005Biospectra,
  author = {Anton F Fliri and William T Loging and Peter F Thadeio and Robert
	A Volkmann},
  title = {Biospectra analysis: model proteome characterizations for linking
	molecular structure and biological response.},
  journal = {J. Med. Chem.},
  year = {2005},
  volume = {48},
  pages = {6918--6925},
  number = {22},
  month = {Nov},
  abstract = {Establishing quantitative relationships between molecular structure
	and broad biological effects has been a long-standing goal in drug
	discovery. Evaluation of the capacity of molecules to modulate protein
	functions is a prerequisite for understanding the relationship between
	molecular structure and in vivo biological response. A particular
	challenge in these investigations is to derive quantitative measurements
	of a molecule's functional activity pattern across different proteins.
	Herein we describe an operationally simple probabilistic structure-activity
	relationship (SAR) approach, termed biospectra analysis, for identifying
	agonist and antagonist effect profiles of medicinal agents by using
	pattern similarity between biological activity spectra (biospectra)
	of molecules as the determinant. Accordingly, in vitro binding data
	(percent inhibition values of molecules determined at single high
	drug concentration in a battery of assays representing a cross section
	of the proteome) are useful for identifying functional effect profile
	similarity between medicinal agents. To illustrate this finding,
	the relationship between biospectra similarity of 24 molecules, identified
	by hierarchical clustering of a 1567 molecule dataset as being most
	closely aligned with the neurotransmitter dopamine, and their agonist
	or antagonist properties was probed. Distinguishing the results described
	in this study from those obtained with affinity-based methods, the
	observed association between biospectra and biological response profile
	similarity remains intact even upon removal of putative drug targets
	from the dataset (four dopaminergic [D1/D2/D3/D4] and two adrenergic
	[alpha1 and alpha2] receptors). These findings indicate that biospectra
	analysis provides an unbiased new tool for forecasting structure-response
	relationships and for translating broad biological effect information
	into chemical structure design.},
  doi = {10.1021/jm050494g},
  pdf = {../local/Fliri2005Biospectra.pdf},
  file = {Fliri2005Biospectra.pdf:Fliri2005Biospectra.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {vert},
  pmid = {16250650},
  timestamp = {2006.11.24},
  url = {http://dx.doi.org/10.1021/jm050494g}
}

@article{Florens2002proteomic,
  author = {Laurence Florens and Michael P Washburn and J. Dale Raine and Robert
	M Anthony and Munira Grainger and J. David Haynes and J. Kathleen
	Moch and Nemone Muster and John B Sacci and David L Tabb and Adam
	A Witney and Dirk Wolters and Yimin Wu and Malcolm J Gardner and
	Anthony A Holder and Robert E Sinden and John R Yates and Daniel
	J Carucci},
  title = {{A} proteomic view of the {P}lasmodium falciparum life cycle.},
  journal = {Nature},
  year = {2002},
  volume = {419},
  pages = {520--526},
  number = {6906},
  month = {Oct},
  abstract = {The completion of the Plasmodium falciparum clone 3D7 genome provides
	a basis on which to conduct comparative proteomics studies of this
	human pathogen. Here, we applied a high-throughput proteomics approach
	to identify new potential drug and vaccine targets and to better
	understand the biology of this complex protozoan parasite. We characterized
	four stages of the parasite life cycle (sporozoites, merozoites,
	trophozoites and gametocytes) by multidimensional protein identification
	technology. Functional profiling of over 2,400 proteins agreed with
	the physiology of each stage. Unexpectedly, the antigenically variant
	proteins of var and rif genes, defined as molecules on the surface
	of infected erythrocytes, were also largely expressed in sporozoites.
	The detection of chromosomal clusters encoding co-expressed proteins
	suggested a potential mechanism for controlling gene expression.},
  doi = {10.1038/nature01107},
  pdf = {../local/Florens2002proteomic.pdf},
  file = {Florens2002proteomic.pdf:local/Florens2002proteomic.pdf:PDF},
  keywords = {plasmodium},
  pii = {nature01107},
  pmid = {12368862},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1038/nature01107}
}

@article{Flower1998Properties,
  author = {D. R. Flower},
  title = {On the Properties of Bit String-Based Measures of Chemical Similarity},
  journal = {J Chem Inf Comput Sci},
  year = {1998},
  volume = {38},
  pages = {379-386},
  owner = {mahe},
  timestamp = {2006.09.03}
}

@article{Flusberg2010Direct,
  author = {Benjamin A Flusberg and Dale R Webster and Jessica H Lee and Kevin
	J Travers and Eric C Olivares and Tyson A Clark and Jonas Korlach
	and Stephen W Turner},
  title = {Direct detection of DNA methylation during single-molecule, real-time
	sequencing.},
  journal = {Nat Methods},
  year = {2010},
  volume = {7},
  pages = {461--465},
  number = {6},
  month = {Jun},
  abstract = {We describe the direct detection of DNA methylation, without bisulfite
	conversion, through single-molecule, real-time (SMRT) sequencing.
	In SMRT sequencing, DNA polymerases catalyze the incorporation of
	fluorescently labeled nucleotides into complementary nucleic acid
	strands. The arrival times and durations of the resulting fluorescence
	pulses yield information about polymerase kinetics and allow direct
	detection of modified nucleotides in the DNA template, including
	N6-methyladenine, 5-methylcytosine and 5-hydroxymethylcytosine. Measurement
	of polymerase kinetics is an intrinsic part of SMRT sequencing and
	does not adversely affect determination of primary DNA sequence.
	The various modifications affect polymerase kinetics differently,
	allowing discrimination between them. We used these kinetic signatures
	to identify adenine methylation in genomic samples and found that,
	in combination with circular consensus sequencing, they can enable
	single-molecule identification of epigenetic modifications with base-pair
	resolution. This method is amenable to long read lengths and will
	likely enable mapping of methylation patterns in even highly repetitive
	genomic regions.},
  doi = {10.1038/nmeth.1459},
  institution = {Pacific Biosciences, Menlo Park, California, USA.},
  keywords = {DNA Methylation; DNA-Directed DNA Polymerase, metabolism; Escherichia
	coli, genetics; Kinetics; Principal Component Analysis; Sequence
	Analysis, DNA, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nmeth.1459},
  pmid = {20453866},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.1038/nmeth.1459}
}

@article{Fodor1991Light-directed,
  author = {Fodor, S. P. and Read, J. L. and Pirrung, M. C. and Stryer, L. and
	Lu, A. T. and Solas, D.},
  title = {Light-directed, spatially addressable parallel chemical synthesis},
  journal = {Science},
  year = {1991},
  volume = {251},
  pages = {767--773},
  pdf = {../local/Fodor1991Light-directed.pdf},
  file = {Fodor1991Light-directed.pdf:local/Fodor1991Light-directed.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0036-8075%2819910215%293%3A251%3A4995%3C767%3ALSAPCS%3E2.0.CO%3B2-J}
}

@article{Fong2004Predicting,
  author = {Fong, J. H. and Keating, A. E. and Singh, M.},
  title = {Predicting specificity in b{ZIP} coiled-coil protein interactions},
  journal = {Genome {B}iol.},
  year = {2004},
  volume = {5},
  number = {R11},
  abstract = {We present a method for predicting protein-protein interactions mediated
	by the coiled-coil motif. {W}hen tested on interactions between nearly
	all human and yeast b{ZIP} proteins, our method identifies 70% of
	strong interactions while maintaining that 92% of predictions are
	correct. {F}urthermore, cross-validation testing shows that including
	the b{ZIP} experimental data significantly improves performance.
	{O}ur method can be used to predict b{ZIP} interactions in other
	genomes and is a promising approach for predicting coiled-coil interactions
	more generally.},
  pdf = {../local/Fong2004Predicting.pdf},
  file = {Fong2004Predicting.pdf:local/Fong2004Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://genomebiology.com/2004/5/2/R11}
}

@article{Formosa2003Changing,
  author = {T. Formosa},
  title = {Changing the DNA landscape: putting a SPN on chromatin.},
  journal = {Curr Top Microbiol Immunol},
  year = {2003},
  volume = {274},
  pages = {171--201},
  abstract = {In eukaryotic cells, transcription and replication each occur on DNA
	templates that are incorporated into nucleosomes. Formation of chromatin
	generally limits accessibility of specific DNA sequences and inhibits
	progression of polymerases as they copy information from the DNA.
	The processes that select sites for initiating either transcription
	or replication are therefore strongly influenced by factors that
	modulate the properties of chromatin proteins. Further, in order
	to elongate their products, both DNA and RNA polymerases must be
	able to overcome the inhibition presented by chromatin (Lipford and
	Bell 2001; Workman and Kingston 1998). One way to adjust the properties
	of chromatin proteins is to covalently modify them by adding or removing
	chemical moieties. Both histone and non-histone chromatin proteins
	are altered by acetylation, methylation, and other changes, and the
	'nucleosome modifying' complexes that perform these reactions are
	important components of pathways of transcriptional regulation (Cote
	2002; Orphanides and Reinberg 2000; Roth et al. 2001; Strahl and
	Allis 2000; Workman and Kingston 1998). Another way to alter the
	effects of nucleosomes is to change the position of the histone octamers
	relative to specific DNA sequences (Orphanides and Reinberg 2000;
	Verrijzer 2002; Wang 2002; Workman and Kingston 1998). Since the
	ability of a sequence to be bound by specific proteins can vary significantly
	whether the sequence is in the linkers between nucleosomes or at
	various positions within a nucleosome, 'nucleosome remodeling' complexes
	that rearrange nucleosome positioning are also important regulators
	of transcription. Since the DNA replication machinery has to encounter
	many of the same challenges posed by chromatin, it seems likely that
	modifying and remodeling complexes also act during duplication of
	the genome, but most of the current information on these factors
	relates to regulation of transcription. This chapter describes the
	factor known variously as FACT in humans, where it promotes elongation
	of RNA polymerase II on nucleosomal templates in vitro (Orphanides
	et al. 1998, 1999), DUF in frogs, where it is needed for DNA replication
	in oocyte extracts (Okuhara et al. 1999), and CP or SPN in yeast,
	where it is linked in vivo to both transcription and replication
	(Brewster et al. 2001; Formosa et al. 2001). Like the nucleosome
	modifying and remodeling complexes, it is broadly conserved among
	eukaryotes, affects a wide range of processes that utilize chromatin,
	and directly alters the properties of nucleosomes. However, it does
	not have nucleosome modifying or standard ATP-dependent remodeling
	activity, and therefore represents a third class of chromatin modulating
	factors. It is also presently unique in the extensive connections
	it displays with both transcription and replication: FACT/DUF/CP/SPN
	appears to modify nucleosomes in a way that is directly important
	for the efficient functioning of both RNA polymerases and DNA polymerases.
	While less is known about the mechanisms it uses to promote its functions
	than for other factors that affect chromatin, it is clearly an essential
	part of the complex mixture of activities that modulate access to
	DNA within chromatin. Physical and genetic interactions suggest that
	FACT/DUF/CP/SPN affects multiple pathways within replication and
	transcription as a member of several distinct complexes. Some of
	the interactions are easy to assimilate into models for replication
	or transcription, such as direct binding to DNA polymerase alpha
	(Wittmeyer and Formosa 1997; Wittmeyer et al. 1999), association
	with nucleosome modifying complexes (John et al. 2000), and interaction
	with factors that participate in elongation of RNA Polymerase II
	(Gavin et al. 2002; Squazzo et al. 2002). Others are more surprising
	such as an association with the 19S complex that regulates the function
	of the 20S proteasome (Ferdous et al. 2001; Xu et al. 1995), and
	the indication that FACT/DUF/CP/SPN can act as a specificity factor
	for casein kinase II (Keller et al. 2001). This chapter reviews the
	varied approaches that have each revealed different aspects of the
	function of FACT/DUF/CP/SPN, and presents a picture of a factor that
	can both alter nucleosomes and orchestrate the assembly or activity
	of a broad range of complexes that act upon chromatin.},
  institution = {University of Utah, Biochemistry, 20 N 1900 E RM 211, Salt Lake City,
	UT 84132-3201, USA. Tim.Formosa@hsc.utah.edu},
  keywords = {Animals; Cell Cycle Proteins, metabolism; Chromatin, metabolism; DNA,
	metabolism; Eukaryotic Cells, metabolism; Gene Expression Regulation;
	Humans; Saccharomyces cerevisiae Proteins; Transcription Factors,
	metabolism; Transcription, Genetic; Transcriptional Elongation Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12596908},
  timestamp = {2010.11.23}
}

@article{Forney1973Viterbi,
  author = {G. D. Forney},
  title = {The Viterbi algorithm},
  journal = {Proc. IEEE},
  year = {1973},
  volume = {61},
  pdf = {../local/Forney1973Viterbi.pdf},
  file = {Forney1973Viterbi.pdf:Forney1973Viterbi.pdf:PDF},
  owner = {michael},
  timestamp = {2008.10.02}
}

@techreport{Fortin1996graph,
  author = {S. Fortin},
  title = {The Graph Isomorphism Problem},
  institution = {MIT},
  year = {1996}
}

@article{Foster1994A,
  author = {Foster, Dean P. and George, Edward I.},
  title = {The Risk Inflation Criterion for Multiple Regression},
  journal = {The Annals of Statistics},
  year = {1994},
  volume = {22},
  pages = {1947--1975},
  number = {4},
  abstract = {A new criterion is proposed for the evaluation of variable selection
	procedures in multiple regression. This criterion, which we call
	the risk inflation, is based on an adjustment to the risk. Essentially,
	the risk inflation is the maximum increase in risk due to selecting
	rather than knowing the "correct" predictors. A new variable selection
	procedure is obtained which, in the case of orthogonal predictors,
	substantially improves on AIC, Cp and BIC and is close to optimal.
	In contrast to AIC, Cp and BIC which use dimensionality penalties
	of 2, 2 and log n, respectively, this new procedure uses a penalty
	2 log p, where p is the number of available predictors. For the case
	of nonorthogonal predictors, bounds for the optimal penalty are obtained.},
  doi = {10.2307/2242493},
  issn = {00905364},
  keywords = {mdl, regression},
  publisher = {Institute of Mathematical Statistics},
  url = {http://dx.doi.org/10.2307/2242493}
}

@article{Foucart2009Sparsest,
  author = {Foucart, Simon and Lai, Ming-Jun},
  title = {Sparsest solutions of underdetermined linear systems via $\ell_q$-minimization
	for $0 < q \leq 1$},
  journal = {Applied and {C}omputational {H}armonic {A}nalysis},
  year = {2009},
  volume = {26},
  pages = {395--407},
  number = {3},
  month = {May},
  url = {http://dx.doi.org/10.1016/j.acha.2008.09.001}
}

@article{Frank1956Algorithm,
  author = {M. Frank and P. Wolfe},
  title = {An algorithm for quadratic programming},
  journal = {Naval Research Logistics Quarterly},
  year = {1956},
  volume = {3},
  pages = {95-110},
  publisher = {INFORMS}
}

@article{Franke2006Reconstruction,
  author = {Franke, L. and van Bakel, H. and Fokkens, L. and D de Jong, E.D and
	Egmont-Petersen, M. and Wijmenga, C.},
  title = {Reconstruction of a functional human gene network, with an application
	for prioritizing positional candidate genes.},
  journal = {Am. J. Hum. Genet.},
  year = {2006},
  volume = {78},
  pages = {1011--1025},
  number = {6},
  month = {Jun},
  abstract = {Most common genetic disorders have a complex inheritance and may result
	from variants in many genes, each contributing only weak effects
	to the disease. Pinpointing these disease genes within the myriad
	of susceptibility loci identified in linkage studies is difficult
	because these loci may contain hundreds of genes. However, in any
	disorder, most of the disease genes will be involved in only a few
	different molecular pathways. If we know something about the relationships
	between the genes, we can assess whether some genes (which may reside
	in different loci) functionally interact with each other, indicating
	a joint basis for the disease etiology. There are various repositories
	of information on pathway relationships. To consolidate this information,
	we developed a functional human gene network that integrates information
	on genes and the functional relationships between genes, based on
	data from the Kyoto Encyclopedia of Genes and Genomes, the Biomolecular
	Interaction Network Database, Reactome, the Human Protein Reference
	Database, the Gene Ontology database, predicted protein-protein interactions,
	human yeast two-hybrid interactions, and microarray co-expressions.
	We applied this network to interrelate positional candidate genes
	from different disease loci and then tested 96 heritable disorders
	for which the Online Mendelian Inheritance in Man database reported
	at least three disease genes. Artificial susceptibility loci, each
	containing 100 genes, were constructed around each disease gene,
	and we used the network to rank these genes on the basis of their
	functional interactions. By following up the top five genes per artificial
	locus, we were able to detect at least one known disease gene in
	54\% of the loci studied, representing a 2.8-fold increase over random
	selection. This suggests that our method can significantly reduce
	the cost and effort of pinpointing true disease genes in analyses
	of disorders for which numerous loci have been reported but for which
	most of the genes are unknown.},
  doi = {10.1086/504300},
  pdf = {../local/Franke2006Reconstruction.pdf},
  file = {Franke2006Reconstruction.pdf:Franke2006Reconstruction.pdf:PDF},
  institution = {Complex Genetics Section, Department of Biomedical Genetics-Department
	of Medical Genetics, University Medical Centre Utrecht, Utrecht,
	The Netherlands.},
  owner = {mordelet},
  pii = {S0002-9297(07)63922-6},
  pmid = {16685651},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1086/504300}
}

@article{Fraunholz2005Systems,
  author = {M. J. Fraunholz},
  title = {{S}ystems biology in malaria research.},
  journal = {Trends Parasitol.},
  year = {2005},
  volume = {21},
  pages = {393--395},
  number = {9},
  month = {Sep},
  abstract = {A recent publication of genome and expression analyses of the murine
	parasites Plasmodium chabaudi chabaudi and Plasmodium berghei presents
	the state of the art in Plasmodium systems biology. By integrating
	genomics, transcriptomics and proteomics, the authors can classify
	and annotate genes by their expression profiles and can even detect
	evidence of posttranscriptional gene silencing in the murine malaria
	species.},
  doi = {10.1016/j.pt.2005.07.007},
  pdf = {../local/Fraunholz2005Systems.pdf},
  file = {Fraunholz2005Systems.pdf:Fraunholz2005Systems.pdf:PDF},
  keywords = {plasmodium},
  pii = {S1471-4922(05)00194-7},
  pmid = {16043412},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1016/j.pt.2005.07.007}
}

@article{Fredholm2007G-protein-coupled,
  author = {Fredholm, B. B. and H{\"o}kfelt, T. and Milligan, G.},
  title = {G-protein-coupled receptors: an update.},
  journal = {Acta Physiol.},
  year = {2007},
  volume = {190},
  pages = {3--7},
  number = {1},
  month = {May},
  abstract = {The receptors that couple to G proteins (GPCR) and which span the
	cell membranes seven times (7-TM receptors) were the focus of a symposium
	in Stockholm 2006. The ensemble of GPCR has now been mapped in several
	animal species. They remain a major focus of interest in drug development,
	and their diverse physiological and pathophysiological roles are
	being clarified, i.a. by genetic targeting. Recent developments hint
	at novel levels of complexity. First, many, if not all, GPCRs are
	part of multimeric ensembles, and physiology and pharmacology of
	a given GPCR may be at least partly guided by the partners it was
	formed together with. Secondly, at least some GPCRs may be constitutively
	active. Therefore, drugs that are inverse agonists may prove useful.
	Furthermore, the level of activity may vary in such a profound way
	between cells and tissues that this could offer new ways of achieving
	specificity of drug action. Finally, it is becoming increasingly
	clear that some of these receptors can signal via novel types of
	pathways, and hence that 'GPCRs' may not always be G-protein-coupled.
	Thus there are many challenges for the basic scientist and the drug
	industry.},
  doi = {10.1111/j.1365-201X.2007.01689.x},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {APS1689},
  pmid = {17428227},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1111/j.1365-201X.2007.01689.x}
}

@article{Freedman2010Lies,
  author = {Freedman, D.H.},
  title = {Lies, damned lies, and medical science},
  journal = {The Atlantic},
  year = {2010},
  volume = {306},
  pages = {76--84},
  number = {4}
}

@article{Freedman2006Statistical,
  author = {Freedman, D.A.},
  title = {Statistical Models for Causation What Inferential Leverage Do They
	Provide?},
  journal = {Evaluation Review},
  year = {2006},
  volume = {30},
  pages = {691--713},
  number = {6},
  publisher = {Sage Publications}
}

@article{Freeman2006Copy,
  author = {Freeman, J. L. and Perry, G. H. and Feuk, L. and Redon, R. and McCarroll,
	S. A. and Altshuler, D. M. and Aburatani, H. and Jones, K. W. and
	Tyler-Smith, C. and Hurles, M. E. and Carter, N. P. and Scherer,
	S. W. and Lee, C.},
  title = {Copy number variation: new insights in genome diversity},
  journal = {Genome Res},
  year = {2006},
  volume = {16},
  pages = {949--961},
  number = {8},
  month = {Aug},
  abstract = {DNA copy number variation has long been associated with specific chromosomal
	rearrangements and genomic disorders, but its ubiquity in mammalian
	genomes was not fully realized until recently. Although our understanding
	of the extent of this variation is still developing, it seems likely
	that, at least in humans, copy number variants (CNVs) account for
	a substantial amount of genetic variation. Since many CNVs include
	genes that result in differential levels of gene expression, CNVs
	may account for a significant proportion of normal phenotypic variation.
	Current efforts are directed toward a more comprehensive cataloging
	and characterization of CNVs that will provide the basis for determining
	how genomic diversity impacts biological function, evolution, and
	common human diseases.},
  doi = {10.1101/gr.3677206},
  pdf = {../local/Freeman2006Copy.pdf},
  file = {Freeman2006Copy.pdf:Freeman2006Copy.pdf:PDF},
  institution = {Department of Pathology, Brigham and Women's Hospital, Boston, Massachusetts
	02115, USA.},
  keywords = {cgh, csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.3677206},
  pmid = {16809666},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1101/gr.3677206}
}

@article{Freier1986Improved,
  author = {Freier, S. M. and Kierzek, R. and Jaeger, J. A. and Sugimoto, N.
	and Caruthers, M. H. and Neilson, T. and Turner, D. H.},
  title = {Improved free-energy parameters for predictions of {RNA} duplex stability.},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {1986},
  volume = {83},
  pages = {9373-7},
  number = {24},
  month = {Dec},
  abstract = {Thermodynamic parameters for prediction of {RNA} duplex stability
	are reported. {O}ne parameter for duplex initiation and 10 parameters
	for helix propagation are derived from enthalpy and free-energy changes
	for helix formation by 45 {RNA} oligonucleotide duplexes. {T}he oligomer
	sequences were chosen to maximize reliability of secondary structure
	predictions. {E}ach of the 10 nearest-neighbor sequences is well-represented
	among the 45 oligonucleotides, and the sequences were chosen to minimize
	experimental errors in delta {GO} at 37 degrees {C}. {T}hese parameters
	predict melting temperatures of most oligonucleotide duplexes within
	5 degrees {C}. {T}his is about as good as can be expected from the
	nearest-neighbor model. {F}ree-energy changes for helix propagation
	at dangling ends, terminal mismatches, and internal {G} {X} {U} mismatches,
	and free-energy changes for helix initiation at hairpin loops, internal
	loops, or internal bulges are also tabulated.}
}

@article{Freudenberg2002similarity-based,
  author = {Freudenberg, J. and Propping, P.},
  title = {A similarity-based method for genome-wide prediction of disease-relevant
	human genes},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18 Suppl 2},
  pages = {S110--S115},
  abstract = {MOTIVATION: A method for prediction of disease relevant human genes
	from the phenotypic appearance of a query disease is presented. Diseases
	of known genetic origin are clustered according to their phenotypic
	similarity. Each cluster entry consists of a disease and its underlying
	disease gene. Potential disease genes from the human genome are scored
	by their functional similarity to known disease genes in these clusters,
	which are phenotypically similar to the query disease. RESULTS: For
	assessment of the approach, a leave-one-out cross-validation of 878
	diseases from the OMIM database, using 10672 candidate genes from
	the human genome, is performed. Depending on the applied parameters,
	in roughly one-third of cases the true solution is contained within
	the top scoring 3\% of predictions and in two-third of cases the
	true solution is contained within the top scoring 15\% of predictions.
	The prediction results can either be used to identify target genes,
	when searching for a mutation in monogenic diseases or for selection
	of loci in genotyping experiments in genetically complex diseases.},
  pdf = {../local/Freudenberg2002similarity-based.pdf},
  file = {Freudenberg2002similarity-based.pdf:Freudenberg2002similarity-based.pdf:PDF},
  institution = {Institute of Human Genetics, Bonn University Hospital, Germany. jan.freudenberg@uni-bonn.de},
  owner = {jp},
  pmid = {12385992},
  timestamp = {2009.03.18}
}

@article{Freund2003efficient,
  author = {Freund, Y. and Iyer, R. and Schapire, R. E. and Singer, Y.},
  title = {An efficient boosting algorithm for combining preferences},
  journal = {J. Mach. Learn. Res.},
  year = {2003},
  volume = {4},
  pages = {933--969},
  pdf = {../local/Freund2003efficient.pdf},
  file = {Freund2003efficient.pdf:Freund2003efficient.pdf:PDF},
  owner = {jp},
  timestamp = {2013.02.05},
  url = {http://www.ai.mit.edu/projects/jmlr/papers/volume4/freund03a/freund03a.pdf}
}

@inproceedings{Freund2000Analysis,
  author = {Freund, Y. and Mansour, Y. and Schapire, R. E.},
  title = {Analysis of a {P}seudo-{B}ayesian {P}rediction {M}ethod},
  booktitle = {Conference on {I}nformation {S}ciences and {S}ystems, {P}rinceton
	{U}niversity, {M}arch 15-17},
  year = {2000},
  pdf = {../local/freu00.pdf},
  file = {freu00.pdf:local/freu00.pdf:PDF},
  subject = {ml}
}

@article{Freyhult2005Unbiased,
  author = {Freyhult, E. and Prusis, P. and Lapinsh, M. and Wikberg, J. E. S.
	and Moulton, V. and Gustafsson, M. G.},
  title = {Unbiased descriptor and parameter selection confirms the potential
	of proteochemometric modelling.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {50},
  abstract = {BACKGROUND: Proteochemometrics is a new methodology that allows prediction
	of protein function directly from real interaction measurement data
	without the need of 3D structure information. Several reported proteochemometric
	models of ligand-receptor interactions have already yielded significant
	insights into various forms of bio-molecular interactions. The proteochemometric
	models are multivariate regression models that predict binding affinity
	for a particular combination of features of the ligand and protein.
	Although proteochemometric models have already offered interesting
	results in various studies, no detailed statistical evaluation of
	their average predictive power has been performed. In particular,
	variable subset selection performed to date has always relied on
	using all available examples, a situation also encountered in microarray
	gene expression data analysis. RESULTS: A methodology for an unbiased
	evaluation of the predictive power of proteochemometric models was
	implemented and results from applying it to two of the largest proteochemometric
	data sets yet reported are presented. A double cross-validation loop
	procedure is used to estimate the expected performance of a given
	design method. The unbiased performance estimates (P2) obtained for
	the data sets that we consider confirm that properly designed single
	proteochemometric models have useful predictive power, but that a
	standard design based on cross validation may yield models with quite
	limited performance. The results also show that different commercial
	software packages employed for the design of proteochemometric models
	may yield very different and therefore misleading performance estimates.
	In addition, the differences in the models obtained in the double
	CV loop indicate that detailed chemical interpretation of a single
	proteochemometric model is uncertain when data sets are small. CONCLUSION:
	The double CV loop employed offer unbiased performance estimates
	about a given proteochemometric modelling procedure, making it possible
	to identify cases where the proteochemometric design does not result
	in useful predictive models. Chemical interpretations of single proteochemometric
	models are uncertain and should instead be based on all the models
	selected in the double CV loop employed here.},
  doi = {10.1186/1471-2105-6-50},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {1471-2105-6-50},
  pmid = {15760465},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1186/1471-2105-6-50}
}

@article{Friedel2005Support,
  author = {Friedel, C. C. and Jahn, K. H. V. and Sommer, S. and Rudd, S. and
	Mewes, H. W. and Tetko, I. V.},
  title = {Support vector machines for separation of mixed plant-pathogen {EST}
	collections based on codon usage},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1383-1388},
  abstract = {Motivation: {D}iscovery of host and pathogen genes expressed at the
	plant-pathogen interface often requires the construction of mixed
	libraries that contain sequences from both genomes. {S}equence identification
	requires high-throughput and reliable classification of genome origin.
	{W}hen using single-pass c{DNA} sequences difficulties arise from
	the short sequence length, the lack of sufficient taxonomically relevant
	sequence data in public databases and ambiguous sequence homology
	between plant and pathogen genes.{R}esults: {A} novel method is described,
	which is independent of the availability of homologous genes and
	relies on subtle differences in codon usage between plant and fungal
	genes. {W}e used support vector machines ({SVM}s) to identify the
	probable origin of sequences. {SVM}s were compared to several other
	machine learning techniques and to a probabilistic algorithm ({PF}-{IND},
	{M}aor et al., 2003) for {EST} classification also based on codon
	bias differences. {O}ur software ({ECLAT}) has achieved a classification
	accuracy of 93.1% on a test set of 3217 {EST} sequences from {H}.
	vulgare and {B}. graminis, which is a significant improvement compared
	to {PF}-{IND} (prediction accuracy of 81.2% on the same test set).
	{EST} sequences with at least 50 nt of coding sequence can be classified
	by {ECLAT} with high confidence. {ECLAT} allows training of classifiers
	for any host-pathogen combination for which there are sufficient
	classified training sequences.{A}vailability: {ECLAT} is freely available
	on the internet (http://mips.gsf.de/proj/est) or on request as a
	standalone version.},
  doi = {10.1093/bioinformatics/bti200},
  pdf = {../local/Friedel2005Support.pdf},
  file = {Friedel2005Support.pdf:local/Friedel2005Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti200v1}
}

@article{Friedman1993Some,
  author = {Friedman, J.},
  title = {Some geometric aspects of graphs and their eigenfunctions},
  journal = {Duke {M}ath. {J}.},
  year = {1993},
  volume = {69},
  pages = {487--525},
  month = {March},
  subject = {net}
}

@article{Friedman2007Pathwise,
  author = {Friedman, J. and Hastie, T. and H{\"o}fling, H. and Tibshirani, R.},
  title = {Pathwise coordinate optimization},
  journal = {Ann. Appl. Statist.},
  year = {2007},
  volume = {1},
  pages = {302--332},
  number = {1},
  abstract = {We consider "one-at-a-time" coordinate-wise descent algorithms for
	a class of convex optimization problems. An algorithm of this kind
	has been proposed for the L1-penalized regression (lasso) in the
	literature, but it seems to have been largely ignored. Indeed, it
	seems that coordinate-wise algorithms are not often used in convex
	optimization. We show that this algorithm is very competitive with
	the well-known LARS (or homotopy) procedure in large lasso problems,
	and that it can be applied to related methods such as the garotte
	and elastic net. It turns out that coordinate-wise descent does not
	work in the "fused lasso", however, so we derive a generalized algorithm
	that yields the solution in much less time that a standard convex
	optimizer. Finally, we generalize the procedure to the two-dimensional
	fused lasso, and demonstrate its performance on some image smoothing
	problems.},
  doi = {10.1214/07-AOAS131},
  pdf = {../local/Friedman2007Pathwise.pdf},
  file = {Friedman2007Pathwise.pdf:Friedman2007Pathwise.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.08},
  url = {http://dx.doi.org/10.1214/07-AOAS131}
}

@article{Friedman2008Sparse,
  author = {Friedman, J. and Hastie, T. and Tibshirani, R.},
  title = {Sparse inverse covariance estimation with the graphical lasso},
  journal = {Biostatistics},
  year = {2008},
  volume = {9},
  pages = {432--441},
  number = {3},
  month = {Jul},
  abstract = {We consider the problem of estimating sparse graphs by a lasso penalty
	applied to the inverse covariance matrix. Using a coordinate descent
	procedure for the lasso, we develop a simple algorithm--the graphical
	lasso--that is remarkably fast: It solves a 1000-node problem ( approximately
	500,000 parameters) in at most a minute and is 30-4000 times faster
	than competing methods. It also provides a conceptual link between
	the exact problem and the approximation suggested by Meinshausen
	and BÃ¼hlmann (2006). We illustrate the method on some cell-signaling
	data from proteomics.},
  doi = {10.1093/biostatistics/kxm045},
  pdf = {../local/Friedman2008Sparse.pdf},
  file = {Friedman2008Sparse.pdf:Friedman2008Sparse.pdf:PDF},
  institution = {Department of Statistics, Stanford University, CA 94305, USA.},
  owner = {jp},
  pii = {kxm045},
  pmid = {18079126},
  timestamp = {2008.11.27},
  url = {http://dx.doi.org/10.1093/biostatistics/kxm045}
}

@article{Friedman2004Inferring,
  author = {Friedman, N.},
  title = {Inferring cellular networks using probabilistic graphical models},
  journal = {Science},
  year = {2004},
  volume = {303},
  pages = {799},
  number = {5659},
  publisher = {AAAS}
}

@article{Friedman2000Using,
  author = {Friedman, N. and Linial, M. and Nachman, I. and Pe'er, D.},
  title = {Using {B}ayesian Networks to Analyze Expression Data},
  journal = {J. Comput. Biol.},
  year = {2000},
  volume = {7},
  pages = {601--620},
  number = {3-4},
  abstract = {D{NA} hybridization arrays simultaneously measure the expression level
	for thousands of genes. {T}hese measurements provide a "snapshot"
	of transcription levels within the cell. {A} major challenge in computational
	biology is to uncover, from such measurements, gene/protein interactions
	and key biological features of cellular systems. {I}n this paper,
	we propose a new framework for discovering interactions between genes
	based on multiple expression measurements. {T}his framework builds
	on the use of {B}ayesian networks for representing statistical dependencies.
	{A} {B}ayesian network is a graph-based model of joint multivariate
	probability distributions that captures properties of conditional
	independence between variables. {S}uch models are attractive for
	their ability to describe complex stochastic processes and because
	they provide a clear methodology for learning from (noisy) observations.
	{W}e start by showing how {B}ayesian networks can describe interactions
	between genes. {W}e then describe a method for recovering gene interactions
	from microarray data using tools for learning {B}ayesian networks.
	{F}inally, we demonstrate this method on the {S}. cerevisiae cell-cycle
	measurements of {S}pellman et al. (1998).},
  doi = {10.1089/106652700750050961},
  pdf = {../local/Friedman2000Using.pdf},
  file = {Friedman2000Using.pdf:local/Friedman2000Using.pdf:PDF},
  keywords = {biogm},
  subject = {microarray},
  url = {http://dx.doi.org/10.1089/106652700750050961}
}

@article{Frigola2006Epigenetic,
  author = {Frigola, J. and Song, J. and Stirzaker, C. and Hinshelwood, R. A.
	and Peinado, M. A. and Clark, S. J.},
  title = {Epigenetic remodeling in colorectal cancer results in coordinate
	gene suppression across an entire chromosome band},
  journal = {Nat. Genet.},
  year = {2006},
  volume = {38},
  pages = {540--549},
  number = {5},
  month = {May},
  abstract = {We report a new mechanism in carcinogenesis involving coordinate long-range
	epigenetic gene silencing. Epigenetic silencing in cancer has always
	been envisaged as a local event silencing discrete genes. However,
	in this study of silencing in colorectal cancer, we found common
	repression of the entire 4-Mb band of chromosome 2q.14.2, associated
	with global methylation of histone H3 Lys9. DNA hypermethylation
	within the repressed genomic neighborhood was localized to three
	separate enriched CpG island 'suburbs', with the largest hypermethylated
	suburb spanning 1 Mb. These data change our understanding of epigenetic
	gene silencing in cancer cells: namely, epigenetic silencing can
	span large regions of the chromosome, and both DNA-methylated and
	neighboring unmethylated genes can be coordinately suppressed by
	global changes in histone modification. We propose that loss of gene
	expression can occur through long-range epigenetic silencing, with
	similar implications as loss of heterozygosity in cancer.},
  doi = {10.1038/ng1781},
  pdf = {../local/Frigola2006Epigenetic.pdf},
  file = {Frigola2006Epigenetic.pdf:Frigola2006Epigenetic.pdf:PDF},
  institution = {Cancer Program, Garvan Institute of Medical Research, 384 Victoria
	Street, Darlinghurst, Sydney 2010, New South Wales, Australia.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {ng1781},
  pmid = {16642018},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/ng1781}
}

@article{Frimurer2005physicogenetic,
  author = {Frimurer, T. M. and Ulven, T. and Elling, C. E. and Gerlach, L.-O.
	and Kostenis, E. and H{\"o}gberg, T.},
  title = {A physicogenetic method to assign ligand-binding relationships between
	7TM receptors.},
  journal = {Bioorg. Med. Chem. Lett.},
  year = {2005},
  volume = {15},
  pages = {3707--3712},
  number = {16},
  month = {Aug},
  abstract = {A computational protocol has been devised to relate 7TM receptor proteins
	(GPCRs) with respect to physicochemical features of the core ligand-binding
	site as defined from the crystal structure of bovine rhodopsin. The
	identification of such receptors that already are associated with
	ligand information (e.g., small molecule ligands with mutagenesis
	or SAR data) is used to support structure-guided drug design of novel
	ligands. A case targeting the newly identified prostaglandin D2 receptor
	CRTH2 serves as a primary example to illustrate the procedure.},
  doi = {10.1016/j.bmcl.2005.05.102},
  pdf = {../local/Frimurer2005physicogenetic.pdf},
  file = {Frimurer2005physicogenetic.pdf:Frimurer2005physicogenetic.pdf:PDF},
  keywords = {chemogenomics},
  owner = {vert},
  pii = {S0960-894X(05)00704-3},
  pmid = {15993056},
  timestamp = {2007.12.12},
  url = {http://dx.doi.org/10.1016/j.bmcl.2005.05.102}
}

@article{Frith2008Discovering,
  author = {Frith, Martin C. and Saunders, Neil F. W. and Kobe, Bostjan and Bailey,
	Timothy L.},
  title = {Discovering Sequence Motifs with Arbitrary Insertions and Deletions},
  journal = {PLoS Comput. Biol.},
  year = {2008},
  volume = {4},
  pages = {e1000071+},
  number = {5},
  month = {May},
  address = {Computational Biology Research Center, National Institute of Advanced
	Industrial Science and Technology (AIST), Tokyo, Japan.},
  citeulike-article-id = {2771903},
  citeulike-linkout-0 = {http://dx.doi.org/10.1371/journal.pcbi.1000071},
  citeulike-linkout-1 = {http://view.ncbi.nlm.nih.gov/pubmed/18437229},
  citeulike-linkout-2 = {http://www.hubmed.org/display.cgi?uids=18437229},
  doi = {10.1371/journal.pcbi.1000071},
  issn = {1553-7358},
  keywords = {glam},
  posted-at = {2009-07-31 17:34:55},
  priority = {1},
  publisher = {Public Library of Science},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000071}
}

@article{Fritz2002Microarray-based,
  author = {Fritz, B. and Schubert, F. and Wrobel, G. and Schwaenen, C. and Wessendorf,
	S. and Nessling, M. and Korz, C. and Rieker, R. J. and Montgomery,
	K. and Kucherlapati, R. and Mechtersheimer, G. and Eils, R. and Joos,
	S. and Lichter, P.},
  title = {Microarray-based {C}opy {N}umber and {E}xpression {P}rofiling in
	{D}edifferentiated and {P}leomorphic {L}iposarcoma},
  journal = {Cancer {R}es.},
  year = {2002},
  volume = {62},
  pages = {2993-2998},
  number = {11},
  abstract = {Sixteen dedifferentiated and pleomorphic liposarcomas were analyzed
	by comparative genomic hybridization ({CGH}) to genomic microarrays
	(matrix-{CGH}), c{DNA}-derived microarrays for expression profiling,
	and by quantitative {PCR}. {M}atrix-{CGH} revealed copy number gains
	of numerous oncogenes, i.e., {CCND}1, {MDM}2, {GLI}, {CDK}4, {MYB},
	{ESR}1, and {AIB}1, several of which correlate with a high level
	of transcripts from the respective gene. {I}n addition, a number
	of genes were found differentially expressed in dedifferentiated
	and pleomorphic liposarcomas. {A}pplication of dedicated clustering
	algorithms revealed that both tumor subtypes are clearly separated
	by the genomic profiles but only with a lesser power by the expression
	profiles. {U}sing a support vector machine, a subset of five clones
	was identified as "class discriminators." {T}hus, for the distinction
	of these types of liposarcomas, genomic profiling appears to be more
	advantageous than {RNA} expression analysis.},
  pdf = {../local/Fritz2002Microarray-based.pdf},
  file = {Fritz2002Microarray-based.pdf:local/Fritz2002Microarray-based.pdf:PDF},
  keywords = {biosvm, cgh},
  owner = {jeanphilippevert},
  url = {http://cancerres.aacrjournals.org/cgi/content/abstract/62/11/2993}
}

@inproceedings{Frohlich2005Optimal,
  author = {H. Fr{\"o}hlich and J. K. Wegner and F. Sieker and A. Zell},
  title = {Optimal assignment kernels for attributed molecular graphs},
  booktitle = {Proceedings of the 22nd international conference on Machine learning},
  year = {2005},
  pages = {225 - 232},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  doi = {http://doi.acm.org/10.1145/1102351.1102380},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.28}
}

@article{Fu2005Image,
  author = {J. C. Fu and S. K. Lee and S. T C Wong and J. Y. Yeh and A. H. Wang
	and H. K. Wu},
  title = {Image segmentation feature selection and pattern classification for
	mammographic microcalcifications.},
  journal = {Comput {M}ed {I}maging {G}raph},
  year = {2005},
  month = {Jul},
  abstract = {Since microcalcifications in {X}-ray mammograms are the primary indicator
	of breast cancer, detection of microcalcifications is central to
	the development of an effective diagnostic system. {T}his paper proposes
	a two-stage detection procedure. {I}n the first stage, a data driven,
	closed form mathematical model is used to calculate the location
	and shape of suspected microcalcifications. {W}hen tested on the
	{N}ijmegen {U}niversity {H}ospital ({N}etherlands) database, data
	analysis shows that the proposed model can effectively detect the
	occurrence of microcalcifications. {T}he proposed mathematical model
	not only eliminates the need for system training, but also provides
	information on the borders of suspected microcalcifications for further
	feature extraction. {I}n the second stage, 61 features are extracted
	for each suspected microcalcification, representing texture, the
	spatial domain and the spectral domain. {F}rom these features, a
	sequential forward search ({SFS}) algorithm selects the classification
	input vector, which consists of features sensitive only to microcalcifications.
	{T}wo types of classifiers-a general regression neural network ({GRNN})
	and a support vector machine ({SVM})-are applied, and their classification
	performance is compared using the {A}z value of the {R}eceiver {O}perating
	{C}haracteristic curve. {F}or all 61 features used as input vectors,
	the test data set yielded {A}z values of 97.01\% for the {SVM} and
	96.00\% for the {GRNN}. {W}ith input features selected by {SFS},
	the corresponding {A}z values were 98.00\% for the {SVM} and 97.80\%
	for the {GRNN}. {T}he {SVM} outperformed the {GRNN}, whether or not
	the input vectors first underwent {SFS} feature selection. {I}n both
	cases, feature selection dramatically reduced the dimension of the
	input vectors (82\% for the {SVM} and 59\% for the {GRNN}). {M}oreover,
	{SFS} feature selection improved the classification performance,
	increasing the {A}z value from 97.01 to 98.00\% for the {SVM} and
	from 96.00 to 97.80\% for the {GRNN}.},
  doi = {10.1016/j.compmedimag.2005.03.002},
  keywords = {Archaeal, Artificial Intelligence, Bacterial, Cytomegalovirus, Gene
	Transfer, Genome, Genomics, Horizontal, Non-U.S. Gov't, Research
	Support, Viral, 16002263},
  pii = {S0895-6111(05)00038-8},
  url = {http://dx.doi.org/10.1016/j.compmedimag.2005.03.002}
}

@article{Fu1998Penalized,
  author = {W. Fu},
  title = {Penalized regressions: the Bridge versus the Lasso},
  journal = {Journal of {C}omputational and {G}raphical {S}tatistics},
  year = {1998},
  volume = {7},
  pages = {397--416}
}

@article{Fu2009DISCOVER,
  author = {Fu, Wenjie and Ray, Pradipta and Xing, Eric P.},
  title = {DISCOVER: a feature-based discriminative method for motif search
	in complex genomes.},
  journal = {Bioinformatics (Oxford, England)},
  year = {2009},
  volume = {25},
  pages = {i321--329},
  number = {12},
  month = {June},
  abstract = {MOTIVATION: Identifying transcription factor binding sites (TFBSs)
	encoding complex regulatory signals in metazoan genomes remains a
	challenging problem in computational genomics. Due to degeneracy
	of nucleotide content among binding site instances or motifs, and
	intricate 'grammatical organization' of motifs within cis-regulatory
	modules (CRMs), extant pattern matching-based in silico motif search
	methods often suffer from impractically high false positive rates,
	especially in the context of analyzing large genomic datasets, and
	noisy position weight matrices which characterize binding sites.
	Here, we try to address this problem by using a framework to maximally
	utilize the information content of the genomic DNA in the region
	of query, taking cues from values of various biologically meaningful
	genetic and epigenetic factors in the query region such as clade-specific
	evolutionary parameters, presence/absence of nearby coding regions,
	etc. We present a new method for TFBS prediction in metazoan genomes
	that utilizes both the CRM architecture of sequences and a variety
	of features of individual motifs. Our proposed approach is based
	on a discriminative probabilistic model known as conditional random
	fields that explicitly optimizes the predictive probability of motif
	presence in large sequences, based on the joint effect of all such
	features. RESULTS: This model overcomes weaknesses in earlier methods
	based on less effective statistical formalisms that are sensitive
	to spurious signals in the data. We evaluate our method on both simulated
	CRMs and real Drosophila sequences in comparison with a wide spectrum
	of existing models, and outperform the state of the art by 22\% in
	F1 score. Availability and Implementation: The code is publicly available
	at http://www.sailing.cs.cmu.edu/discover.html. SUPPLEMENTARY INFORMATION:
	Supplementary data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btp230},
  issn = {1460-2059},
  keywords = {complex, discovery, genomes, motif, tfbs},
  posted-at = {2009-06-17 15:21:34},
  priority = {2},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp230}
}

@techreport{fujibuchi1998,
  author = {W. Fujibuchi and K. Sato and H. Ogata and S. Goto and M. Kanehisa},
  title = {K{EGG} and {DBGET}/{L}ink{DB}: {I}ntegration of biological relationships
	in divergenet molecular biology data},
  institution = {AAAI Press},
  year = {1998},
  type = {Knowledge Sharing Across Biological and Medical Knowledge-Based Systems},
  number = {WS-98-04},
  owner = {franck},
  timestamp = {2006.02.22}
}

@article{Fukumizu2008Statistical,
  author = {Fukumizu, K. and Bach, F. R. and Gretton, A.},
  title = {Statistical consistency of kernel canonical correlation analysis},
  journal = {J. Mach. Learn. Res.},
  year = {2008},
  volume = {8},
  pages = {361--383},
  pdf = {../local/Fukumizu2008Statistical.pdf},
  file = {Fukumizu2008Statistical.pdf:Fukumizu2008Statistical.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.05},
  url = {http://jmlr.csail.mit.edu/papers/volume8/fukumizu07a/fukumizu07a.pdf}
}

@article{Fullwood2010Chromatin,
  author = {Melissa J Fullwood and Yuyuan Han and Chia-Lin Wei and Xiaoan Ruan
	and Yijun Ruan},
  title = {Chromatin interaction analysis using paired-end tag sequencing.},
  journal = {Curr Protoc Mol Biol},
  year = {2010},
  volume = {Chapter 21},
  pages = {Unit 21.15.1--Unit 21.1525},
  month = {Jan},
  abstract = {Chromatin Interaction Analysis using Paired-End Tag sequencing (ChIA-PET)
	is a technique developed for large-scale, de novo analysis of higher-order
	chromatin structures. Cells are treated with formaldehyde to cross-link
	chromatin interactions, DNA segments bound by protein factors are
	enriched by chromatin immunoprecipitation, and interacting DNA fragments
	are then captured by proximity ligation. The Paired-End Tag (PET)
	strategy is applied to the construction of ChIA-PET libraries, which
	are sequenced by high-throughput next-generation sequencing technologies.
	Finally, raw PET sequences are subjected to bioinformatics analysis,
	resulting in a genome-wide map of binding sites and chromatin interactions
	mediated by the protein factor under study. This unit describes ChIA-PET
	for genome-wide analysis of chromatin interactions in mammalian cells,
	with the application of Roche/454 and Illumina sequencing technologies.},
  doi = {10.1002/0471142727.mb2115s89},
  institution = {Genome Institute of Singapore, Agency for Science, Technology and
	Research, Singapore.},
  keywords = {Animals; Chromatin; Computational Biology; Databases, Nucleic Acid;
	Genome-Wide Association Study; Humans; Sequence Analysis, DNA},
  owner = {phupe},
  pmid = {20069536},
  timestamp = {2010.08.26},
  url = {http://dx.doi.org/10.1002/0471142727.mb2115s89}
}

@article{Fullwood2009oestrogen-receptor-alpha-bound,
  author = {Melissa J Fullwood and Mei Hui Liu and You Fu Pan and Jun Liu and
	Han Xu and Yusoff Bin Mohamed and Yuriy L Orlov and Stoyan Velkov
	and Andrea Ho and Poh Huay Mei and Elaine G Y Chew and Phillips Yao
	Hui Huang and Willem-Jan Welboren and Yuyuan Han and Hong Sain Ooi
	and Pramila N Ariyaratne and Vinsensius B Vega and Yanquan Luo and
	Peck Yean Tan and Pei Ye Choy and K. D Senali Abayratna Wansa and
	Bing Zhao and Kar Sian Lim and Shi Chi Leow and Jit Sin Yow and Roy
	Joseph and Haixia Li and Kartiki V Desai and Jane S Thomsen and Yew
	Kok Lee and R. Krishna Murthy Karuturi and Thoreau Herve and Guillaume
	Bourque and Hendrik G Stunnenberg and Xiaoan Ruan and Valere Cacheux-Rataboul
	and Wing-Kin Sung and Edison T Liu and Chia-Lin Wei and Edwin Cheung
	and Yijun Ruan},
  title = {An oestrogen-receptor-alpha-bound human chromatin interactome.},
  journal = {Nature},
  year = {2009},
  volume = {462},
  pages = {58--64},
  number = {7269},
  month = {Nov},
  abstract = {Genomes are organized into high-level three-dimensional structures,
	and DNA elements separated by long genomic distances can in principle
	interact functionally. Many transcription factors bind to regulatory
	DNA elements distant from gene promoters. Although distal binding
	sites have been shown to regulate transcription by long-range chromatin
	interactions at a few loci, chromatin interactions and their impact
	on transcription regulation have not been investigated in a genome-wide
	manner. Here we describe the development of a new strategy, chromatin
	interaction analysis by paired-end tag sequencing (ChIA-PET) for
	the de novo detection of global chromatin interactions, with which
	we have comprehensively mapped the chromatin interaction network
	bound by oestrogen receptor alpha (ER-alpha) in the human genome.
	We found that most high-confidence remote ER-alpha-binding sites
	are anchored at gene promoters through long-range chromatin interactions,
	suggesting that ER-alpha functions by extensive chromatin looping
	to bring genes together for coordinated transcriptional regulation.
	We propose that chromatin interactions constitute a primary mechanism
	for regulating transcription in mammalian genomes.},
  doi = {10.1038/nature08497},
  institution = {Genome Institute of Singapore, Agency for Science, Technology and
	Research, Singapore 138672.},
  keywords = {Binding Sites; Cell Line; Chromatin; Chromatin Immunoprecipitation;
	Cross-Linking Reagents; Estrogen Receptor alpha; Formaldehyde; Genome,
	Human; Humans; Promoter Regions, Genetic; Protein Binding; Reproducibility
	of Results; Sequence Analysis, DNA; Transcription, Genetic; Transcriptional
	Activation},
  owner = {phupe},
  pii = {nature08497},
  pmid = {19890323},
  timestamp = {2010.08.26},
  url = {http://dx.doi.org/10.1038/nature08497}
}

@article{Fullwood2009Next-generation,
  author = {Melissa J Fullwood and Chia-Lin Wei and Edison T Liu and Yijun Ruan},
  title = {Next-generation DNA sequencing of paired-end tags (PET) for transcriptome
	and genome analyses.},
  journal = {Genome Res},
  year = {2009},
  volume = {19},
  pages = {521--532},
  number = {4},
  month = {Apr},
  abstract = {Comprehensive understanding of functional elements in the human genome
	will require thorough interrogation and comparison of individual
	human genomes and genomic structures. Such an endeavor will require
	improvements in the throughputs and costs of DNA sequencing. Next-generation
	sequencing platforms have impressively low costs and high throughputs
	but are limited by short read lengths. An immediate and widely recognized
	solution to this critical limitation is the paired-end tag (PET)
	sequencing for various applications, collectively called the PET
	sequencing strategy, in which short and paired tags are extracted
	from the ends of long DNA fragments for ultra-high-throughput sequencing.
	The PET sequences can be accurately mapped to the reference genome,
	thus demarcating the genomic boundaries of PET-represented DNA fragments
	and revealing the identities of the target DNA elements. PET protocols
	have been developed for the analyses of transcriptomes, transcription
	factor binding sites, epigenetic sites such as histone modification
	sites, and genome structures. The exclusive advantage of the PET
	technology is its ability to uncover linkages between the two ends
	of DNA fragments. Using this unique feature, unconventional fusion
	transcripts, genome structural variations, and even molecular interactions
	between distant genomic elements can be unraveled by PET analysis.
	Extensive use of PET data could lead to efficient assembly of individual
	human genomes, transcriptomes, and interactomes, enabling new biological
	and clinical insights. With its versatile and powerful nature for
	DNA analysis, the PET sequencing strategy has a bright future ahead.},
  doi = {10.1101/gr.074906.107},
  pdf = {../local/Fullwood2009Next-generation.pdf},
  file = {Fullwood2009Next-generation.pdf:Fullwood2009Next-generation.pdf:PDF},
  institution = {Genome Institute of Singapore, Agency for Science, Technology and
	Research, Singapore 138672, Singapore.},
  owner = {phupe},
  pii = {19/4/521},
  pmid = {19339662},
  timestamp = {2010.08.20},
  url = {http://dx.doi.org/10.1101/gr.074906.107}
}

@article{Fundel2005simple,
  author = {Katrin Fundel and Daniel GÃ¼ttler and Ralf Zimmer and Joannis Apostolakis},
  title = {A simple approach for protein name identification: prospects and
	limits.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6 Suppl 1},
  pages = {S15},
  abstract = {B{ACKGROUND}: {S}ignificant parts of biological knowledge are available
	only as unstructured text in articles of biomedical journals. {B}y
	automatically identifying gene and gene product (protein) names and
	mapping these to unique database identifiers, it becomes possible
	to extract and integrate information from articles and various data
	sources. {W}e present a simple and efficient approach that identifies
	gene and protein names in texts and returns database identifiers
	for matches. {I}t has been evaluated in the recent {B}io{C}re{A}t{I}v{E}
	entity extraction and mention normalization task by an independent
	jury. {METHODS}: {O}ur approach is based on the use of synonym lists
	that map the unique database identifiers for each gene/protein to
	the different synonym names. {F}or yeast and mouse, synonym lists
	were used as provided by the organizers who generated them from public
	model organism databases. {T}he synonym list for fly was generated
	directly from the corresponding organism database. {T}he lists were
	then extensively curated in largely automated procedure and matched
	against {MEDLINE} abstracts by exact text matching. {R}ule-based
	and support vector machine-based post filters were designed and applied
	to improve precision. {RESULTS}: {O}ur procedure showed high recall
	and precision with {F}-measures of 0.897 for yeast and 0.764/0.773
	for mouse in the {B}io{C}re{A}t{I}v{E} assessment ({T}ask 1{B}) and
	0.768 for fly in a post-evaluation. {CONCLUSION}: {T}he results were
	close to the best over all submissions. {D}epending on the synonym
	properties it can be crucial to consider context and to filter out
	erroneous matches. {T}his is especially important for fly, which
	has a very challenging nomenclature for the protein name identification
	task. {H}ere, the support vector machine-based post filter proved
	to be very effective.},
  doi = {10.1186/1471-2105-6-S1-S15},
  pdf = {../local/Fundel2005simple.pdf},
  file = {Fundel2005simple.pdf:local/Fundel2005simple.pdf:PDF},
  keywords = {, , 15960827},
  pii = {1471-2105-6-S1-S15},
  url = {http://dx.doi.org/10.1186/1471-2105-6-S1-S15}
}

@article{Furey2000Support,
  author = {Furey, T. S. and Cristianini, N. and Duffy, N. and Bednarski, D.
	W. and Schummer, M. and Haussler, D.},
  title = {Support vector machine classification and validation of cancer tissue
	samples using microarray expression data},
  journal = {Bioinformatics},
  year = {2000},
  volume = {16},
  pages = {906-914},
  number = {10},
  month = {Oct},
  abstract = {Motivation: {DNA} microarray experiments generating thousands of gene
	expression measurements, are being used to gather information from
	tissue and cell samples regarding gene expression differences that
	will be useful in diagnosing disease. {W}e have developed a new method
	to analyse this kind of data using support vector machines ({SVM}s).
	{T}his analysis consists of both classification of the tissue samples,
	and an exploration of the data for mis-labeled or questionable tissue
	results. {R}esults: {W}e demonstrate the method in detail on samples
	consisting of ovarian cancer tissues, normal ovarian tissues, and
	other normal tissues. {T}he dataset consists of expression experiment
	results for 97802 c{DNA}s for each tissue. {A}s a result of computational
	analysis, a tissue sample is discovered and confirmed to be wrongly
	labeled. {U}pon correction of this mistake and the removal of an
	outlier, perfect classification of tissues is achieved, but not with
	high confidence. {W}e identify and analyse a subset of genes from
	the ovarian dataset whose expression is highly differentiated between
	the types of tissues. {T}o show robustness of the {SVM} method, two
	previously published datasets from other types of tissues or cells
	are analysed. {T}he results are comparable to those previously obtained.
	{W}e show that other machine learning methods also perform comparably
	to the {SVM} on many of those datasets. {A}vailability: {T}he {SVM}
	software is available at http://www.cs.columbia.edu/~bgrundy/svm.
	{C}ontact: booch@cse.ucsc.edu},
  pdf = {../local/Furey2000Support.pdf},
  file = {Furey2000Support.pdf:local/Furey2000Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/16/10/906}
}

@article{Furlanello2003Entropy-based,
  author = {Furlanello, C. and Serafini, M. and Merler, S. and Jurman, G.},
  title = {Entropy-based gene ranking without selection bias for the predictive
	classification of microarray data},
  journal = {B{MC} {B}ioinformatics},
  year = {2003},
  volume = {4},
  number = {54},
  abstract = {Background {W}e describe the {E}-{RFE} method for gene ranking, which
	is useful for the identification of markers in the predictive classification
	of array data. {T}he method supports a practical modeling scheme
	designed to avoid the construction of classification rules based
	on the selection of too small gene subsets (an effect known as the
	selection bias, in which the estimated predictive errors are too
	optimistic due to testing on samples already considered in the feature
	selection process). {R}esults {W}ith {E}-{RFE}, we speed up the recursive
	feature elimination ({RFE}) with {SVM} classifiers by eliminating
	chunks of uninteresting genes using an entropy measure of the {SVM}
	weights distribution. {A}n optimal subset of genes is selected according
	to a two-strata model evaluation procedure: modeling is replicated
	by an external stratified-partition resampling scheme, and, within
	each run, an internal {K}-fold cross-validation is used for {E}-{RFE}
	ranking. {A}lso, the optimal number of genes can be estimated according
	to the saturation of {Z}ipf's law profiles. {C}onclusions {W}ithout
	a decrease of classification accuracy, {E}-{RFE} allows a speed-up
	factor of 100 with respect to standard {RFE}, while improving on
	alternative parametric {RFE} reduction strategies. {T}hus, a process
	for gene selection and error estimation is made practical, ensuring
	control of the selection bias, and providing additional diagnostic
	indicators of gene importance.},
  doi = {10.1186/1471-2105-4-54},
  pdf = {../local/Furlanello2003Entropy-based.pdf},
  file = {Furlanello2003Entropy-based.pdf:local/Furlanello2003Entropy-based.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/4/54}
}

@article{Furnival1974Regressions,
  author = {Furnival, G.M. and Wilson, R.W.},
  title = {Regressions by leaps and bounds},
  journal = {Technometrics},
  year = {1974},
  volume = {16},
  pages = {499--511},
  number = {4},
  publisher = {Taylor \& Francis}
}

@article{Fussenegger2000NatBio,
  author = {Fussenegger, M. and Bailey, J. and Varner, J.},
  title = {A mathematical model of caspase function in apoptosis},
  journal = {Nat. Biotechnol.},
  year = {2000},
  volume = {18},
  pages = {768-774},
  abstract = {Caspases (cysteine-containing aspartate-specific proteases) are at
	the core of the cell's suicide machinery. These enzymes, once activated,
	dismantle the cell by selectively cleaving key proteins after aspartate
	residues. The events culminating in caspase activation are the subject
	of intense study because of their role in cancer, and neurodegenerative
	and autoimmune disorders. Here we present a mechanistic mathematical
	model, formulated on the basis of newly emerging information, describing
	key elements of receptor-mediated and stress-induced caspase activation.
	We have used mass-conservation principles in conjunction with kinetic
	rate laws to formulate ordinary differential equations that describe
	the temporal evolution of caspase activation. Qualitative strategies
	for the prevention of caspase activation are simulated and compared
	with experimental data. We show that model predictions are consistent
	with available information. Thus, the model could aid in better understanding
	caspase activation and identifying therapeutic approaches promoting
	or retarding apoptotic cell death.},
  doi = {doi:10.1038/77589},
  pdf = {../local/Fuster2005sweet.pdf},
  file = {Fuster2005sweet.pdf:Fuster2005sweet.pdf:PDF},
  keywords = {csbcbook}
}

@article{Fuster2005sweet,
  author = {Fuster, M. N. and Esko, J. D.},
  title = {The sweet and sour of cancer: glycans as novel therapeutic targets.},
  journal = {Nat. {R}ev. {C}ancer},
  year = {2005},
  volume = {5},
  pages = {526-42},
  number = {7},
  month = {Jul},
  abstract = {A growing body of evidence supports crucial roles for glycans at various
	pathophysiological steps of tumour progression. {G}lycans regulate
	tumour proliferation, invasion, haematogenous metastasis and angiogenesis,
	and increased understanding of these roles sets the stage for developing
	pharmaceutical agents that target these molecules. {S}uch novel agents
	might be used alone or in combination with operative and/or chemoradiation
	strategies for treating cancer.},
  doi = {10.1038/nrc1649},
  pdf = {../local/Fuster2005sweet.pdf},
  file = {Fuster2005sweet.pdf:Fuster2005sweet.pdf:PDF},
  keywords = {glycans},
  pii = {nrc1649},
  url = {http://dx.doi.org/10.1038/nrc1649}
}

@article{Galluzzi2008Cell,
  author = {Galluzzi, L. and Kroemer, G.},
  title = {Necroptosis: A Specialized Pathway of Programmed Necrosis},
  journal = {Cell},
  year = {2008},
  volume = {135},
  pages = {1161-1163},
  number = {7},
  note = {doi: DOI: 10.1016/j.cell.2008.12.004},
  keywords = {csbcbook}
}

@book{Galton1869Hereditary,
  title = {Hereditary genius},
  publisher = {Macmillan and Company},
  year = {1869},
  author = {Galton, S.F.}
}

@article{Galan2004Odor-driven,
  author = {Roberto Fdez GalÃ¡n and Silke Sachse and C. Giovanni Galizia and
	Andreas V M Herz},
  title = {Odor-driven attractor dynamics in the antennal lobe allow for simple
	and rapid olfactory pattern classification.},
  journal = {Neural {C}omput},
  year = {2004},
  volume = {16},
  pages = {999-1012},
  number = {5},
  month = {May},
  abstract = {The antennal lobe plays a central role for odor processing in insects,
	as demonstrated by electrophysiological and imaging experiments.
	{H}ere we analyze the detailed temporal evolution of glomerular activity
	patterns in the antennal lobe of honeybees. {W}e represent these
	spatiotemporal patterns as trajectories in a multidimensional space,
	where each dimension accounts for the activity of one glomerulus.
	{O}ur data show that the trajectories reach odor-specific steady
	states (attractors) that correspond to stable activity patterns at
	about 1 second after stimulus onset. {A}s revealed by a detailed
	mathematical investigation, the trajectories are characterized by
	different phases: response onset, steady-state plateau, response
	offset, and periods of spontaneous activity. {A}n analysis based
	on support-vector machines quantifies the odor specificity of the
	attractors and the optimal time needed for odor discrimination. {T}he
	results support the hypothesis of a spatial olfactory code in the
	antennal lobe and suggest a perceptron-like readout mechanism that
	is biologically implemented in a downstream network, such as the
	mushroom body.},
  doi = {10.1162/089976604773135078},
  url = {http://dx.doi.org/10.1162/089976604773135078}
}

@article{Gama-Castro2011RegulonDB,
  author = {Gama-Castro, S. and Salgado, H. and Peralta-Gil, M. and Santos-Zavaleta,
	A. and Mu{\~n}iz-Rascado, L. and Solano-Lira, H. and Jimenez-Jacinto,
	V. and Weiss, Verena and Garc{\'i}a-Sotelo, J. S. and L{\'o}pez-Fuentes,
	A. and Porr{\'o}n-Sotelo, L. and Alquicira-Hern{\'a}ndez, S. and
	Medina-Rivera, A. and Mart{\'i}nez-Flores, I. and Alquicira-Hern{\'a}ndez,
	K. and Mart{\'i}nez-Adame, R. and Bonavides-Mart{\'i}nez, C. and
	Miranda-R{\'i}os, J. and Huerta, A. M. and Mendoza-Vargas, A. and
	Collado-Torres, L. and Taboada, B. and Vega-Alvarado, L. and Olvera,
	M. and Olvera, L. and Grande, R. and Morett, E. and Collado-Vides,
	J.},
  title = {{RegulonDB} version 7.0: transcriptional regulation of {E}scherichia
	coli {K-12} integrated within genetic sensory response units (Gensor
	Units)},
  journal = {Nucleic Acids Res.},
  year = {2011},
  volume = {39},
  pages = {D98-D105},
  number = {suppl 1},
  abstract = {RegulonDB (http://regulondb.ccg.unam.mx/) is the primary reference
	database of the best-known regulatory network of any free-living
	organism, that of Escherichia coli K-12. The major conceptual change
	since 3 years ago is an expanded biological context so that transcriptional
	regulation is now part of a unit that initiates with the signal and
	continues with the signal transduction to the core of regulation,
	modifying expression of the affected target genes responsible for
	the response. We call these genetic sensory response units, or Gensor
	Units. We have initiated their high-level curation, with graphic
	maps and superreactions with links to other databases. Additional
	connectivity uses expandable submaps. RegulonDB has summaries for
	every transcription factor (TF) and TF-binding sites with internal
	symmetry. Several DNA-binding motifs and their sizes have been redefined
	and relocated. In addition to data from the literature, we have incorporated
	our own information on transcription start sites (TSSs) and transcriptional
	units (TUs), obtained by using high-throughput whole-genome sequencing
	technologies. A new portable drawing tool for genomic features is
	also now available, as well as new ways to download the data, including
	web services, files for several relational database manager systems
	and text files including BioPAX format.},
  doi = {10.1093/nar/gkq1110},
  eprint = {http://nar.oxfordjournals.org/content/39/suppl_1/D98.full.pdf+html},
  url = {http://nar.oxfordjournals.org/content/39/suppl_1/D98.abstract}
}

@article{Gangal2005Human,
  author = {Rajeev Gangal and Pankaj Sharma},
  title = {Human pol {II} promoter prediction: time series descriptors and machine
	learning.},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {1332-6},
  number = {4},
  abstract = {Although several in silico promoter prediction methods have been developed
	to date, they are still limited in predictive performance. {T}he
	limitations are due to the challenge of selecting appropriate features
	of promoters that distinguish them from non-promoters and the generalization
	or predictive ability of the machine-learning algorithms. {I}n this
	paper we attempt to define a novel approach by using unique descriptors
	and machine-learning methods for the recognition of eukaryotic polymerase
	{II} promoters. {I}n this study, non-linear time series descriptors
	along with non-linear machine-learning algorithms, such as support
	vector machine ({SVM}), are used to discriminate between promoter
	and non-promoter regions. {T}he basic idea here is to use descriptors
	that do not depend on the primary {DNA} sequence and provide a clear
	distinction between promoter and non-promoter regions. {T}he classification
	model built on a set of 1000 promoter and 1500 non-promoter sequences,
	showed a 10-fold cross-validation accuracy of 87\% and an independent
	test set had an accuracy >85\% in both promoter and non-promoter
	identification. {T}his approach correctly identified all 20 experimentally
	verified promoters of human chromosome 22. {T}he high sensitivity
	and selectivity indicates that n-mer frequencies along with non-linear
	time series descriptors, such as {L}yapunov component stability and
	{T}sallis entropy, and supervised machine-learning methods, such
	as {SVM}s, can be useful in the identification of pol {II} promoters.},
  doi = {10.1093/nar/gki271},
  pdf = {../local/Gangal2005Human.pdf},
  file = {Gangal2005Human.pdf:local/Gangal2005Human.pdf:PDF},
  keywords = {biosvm},
  pii = {33/4/1332},
  url = {http://dx.doi.org/10.1093/nar/gki271}
}

@article{Gao2005Improving,
  author = {Yuan Gao and George Church},
  title = {Improving molecular cancer class discovery through sparse non-negative
	matrix factorization.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {3970--3975},
  number = {21},
  month = {Nov},
  abstract = {MOTIVATION: Identifying different cancer classes or subclasses with
	similar morphological appearances presents a challenging problem
	and has important implication in cancer diagnosis and treatment.
	Clustering based on gene-expression data has been shown to be a powerful
	method in cancer class discovery. Non-negative matrix factorization
	is one such method and was shown to be advantageous over other clustering
	techniques, such as hierarchical clustering or self-organizing maps.
	In this paper, we investigate the benefit of explicitly enforcing
	sparseness in the factorization process. RESULTS: We report an improved
	unsupervised method for cancer classification by the use of gene-expression
	profile via sparse non-negative matrix factorization. We demonstrate
	the improvement by direct comparison with classic non-negative matrix
	factorization on the three well-studied datasets. In addition, we
	illustrate how to identify a small subset of co-expressed genes that
	may be directly involved in cancer.},
  doi = {10.1093/bioinformatics/bti653},
  pdf = {../local/Gao2005Improving.pdf},
  file = {Gao2005Improving.pdf:Gao2005Improving.pdf:PDF},
  institution = {Department of Genetics, Harvard Medical School Boston, MA 02115,
	USA. g1m1c1@receptor.med.harvard.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {21/21/3970},
  pmid = {16244221},
  timestamp = {2012.02.28},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti653}
}

@article{Garber2001Diversity,
  author = {Garber, M. E. and Troyanskaya, O. G. and Schluens, K. and Petersen,
	S. and Thaesler, Z. and Pacyna-Gengelbach, M. and {van de Rijn},
	M. and Rosen, G. D. and Perou, C. M. and Whyte, R. I. and Altman,
	R. B. and Brown, P. O. and Botstein, D. and Petersen, I.},
  title = {Diversity of gene expression in adenocarcinoma of the lung.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2001},
  volume = {98},
  pages = {13784--13789},
  number = {24},
  month = {Nov},
  abstract = {The global gene expression profiles for 67 human lung tumors representing
	56 patients were examined by using 24,000-element cDNA microarrays.
	Subdivision of the tumors based on gene expression patterns faithfully
	recapitulated morphological classification of the tumors into squamous,
	large cell, small cell, and adenocarcinoma. The gene expression patterns
	made possible the subclassification of adenocarcinoma into subgroups
	that correlated with the degree of tumor differentiation as well
	as patient survival. Gene expression analysis thus promises to extend
	and refine standard pathologic analysis.},
  doi = {10.1073/pnas.241500798},
  pdf = {../local/Garber2001Diversity.pdf},
  file = {Garber2001Diversity.pdf:Garber2001Diversity.pdf:PDF},
  institution = {Department of Genetics, Stanford University School of Medicine, Stanford,
	CA 94305, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {241500798},
  pmid = {11707590},
  timestamp = {2012.02.27},
  url = {http://dx.doi.org/10.1073/pnas.241500798}
}

@article{Garcia2007Organismal,
  author = {Benjamin A Garcia and Sandra B Hake and Robert L Diaz and Monika
	Kauer and Stephanie A Morris and Judith Recht and Jeffrey Shabanowitz
	and Nilamadhab Mishra and Brian D Strahl and C. David Allis and Donald
	F Hunt},
  title = {Organismal differences in post-translational modifications in histones
	H3 and H4.},
  journal = {J Biol Chem},
  year = {2007},
  volume = {282},
  pages = {7641--7655},
  number = {10},
  month = {Mar},
  abstract = {Post-translational modifications (PTMs) of histones play an important
	role in many cellular processes, notably gene regulation. Using a
	combination of mass spectrometric and immunobiochemical approaches,
	we show that the PTM profile of histone H3 differs significantly
	among the various model organisms examined. Unicellular eukaryotes,
	such as Saccharomyces cerevisiae (yeast) and Tetrahymena thermophila
	(Tet), for example, contain more activation than silencing marks
	as compared with mammalian cells (mouse and human), which are generally
	enriched in PTMs more often associated with gene silencing. Close
	examination reveals that many of the better-known modified lysines
	(Lys) can be either methylated or acetylated and that the overall
	modification patterns become more complex from unicellular eukaryotes
	to mammals. Additionally, novel species-specific H3 PTMs from wild-type
	asynchronously grown cells are also detected by mass spectrometry.
	Our results suggest that some PTMs are more conserved than previously
	thought, including H3K9me1 and H4K20me2 in yeast and H3K27me1, -me2,
	and -me3 in Tet. On histone H4, methylation at Lys-20 showed a similar
	pattern as H3 methylation at Lys-9, with mammals containing more
	methylation than the unicellular organisms. Additionally, modification
	profiles of H4 acetylation were very similar among the organisms
	examined.},
  doi = {10.1074/jbc.M607900200},
  institution = {Department of Chemistry, University of Virginia, Charlottesville,
	Virginia 22901, USA.},
  keywords = {Acetylation; Animals; Hela Cells; Histones, chemistry/metabolism;
	Humans; Methylation; Mice; NIH 3T3 Cells; Protein Processing, Post-Translational;
	Saccharomyces cerevisiae, metabolism; Species Specificity; Tandem
	Mass Spectrometry; Tetrahymena, metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {M607900200},
  pmid = {17194708},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1074/jbc.M607900200}
}

@article{Garcia-Gomez2004Benign,
  author = {Juan M GarcÃa-GÃ³mez and CÃ©sar Vidal and Luis MartÃ-BonmatÃ and
	JoaquÃn Galant and Nicolas Sans and Montserrat Robles and Francisco
	Casacuberta},
  title = {Benign/malignant classifier of soft tissue tumors using {MR} imaging.},
  journal = {M{AGMA}},
  year = {2004},
  volume = {16},
  pages = {194-201},
  number = {4},
  month = {Mar},
  abstract = {This article presents a pattern-recognition approach to the soft tissue
	tumors ({STT}) benign/malignant character diagnosis using magnetic
	resonance ({MR}) imaging applied to a large multicenter database.
	{OBJECTIVE}: {T}o develop and test an automatic classifier of {STT}
	into benign or malignant by using classical {MR} imaging findings
	and epidemiological information. {MATERIALS} {AND} {METHODS}: {A}
	database of 430 patients (62\% benign and 38\% malignant) from several
	{E}uropean multicenter registers. {T}here were 61 different histologies
	(36 with benign and 25 with malignant nature). {T}hree pattern-recognition
	methods (artificial neural networks, support vector machine, k-nearest
	neighbor) were applied to learn the discrimination between benignity
	and malignancy based on a defined {MR} imaging findings protocol.
	{A}fter the systems had learned by using training samples (with 302
	cases), the clinical decision support system was tested in the diagnosis
	of 128 new {STT} cases. {RESULTS}: {A}n 88-92\% efficacy was obtained
	in a not-viewed set of tumors using the pattern-recognition techniques.
	{T}he best results were obtained with a back-propagation artificial
	neural network. {CONCLUSION}: {B}enign vs. malignant {STT} discrimination
	is accurate by using pattern-recognition methods based on classical
	{MR} image findings. {T}his objective tool will assist radiologists
	in {STT} grading.},
  doi = {10.1007/s10334-003-0023-7},
  pdf = {../local/Garcia-Gomez2004Benign.pdf},
  file = {Garcia-Gomez2004Benign.pdf:local/Garcia-Gomez2004Benign.pdf:PDF},
  url = {http://dx.doi.org/10.1007/s10334-003-0023-7}
}

@article{Gardner2002Genome,
  author = {Malcolm J Gardner and Neil Hall and Eula Fung and Owen White and
	Matthew Berriman and Richard W Hyman and Jane M Carlton and Arnab
	Pain and Karen E Nelson and Sharen Bowman and Ian T Paulsen and Keith
	James and Jonathan A Eisen and Kim Rutherford and Steven L Salzberg
	and Alister Craig and Sue Kyes and Man-Suen Chan and Vishvanath Nene
	and Shamira J Shallom and Bernard Suh and Jeremy Peterson and Sam
	Angiuoli and Mihaela Pertea and Jonathan Allen and Jeremy Selengut
	and Daniel Haft and Michael W Mather and Akhil B Vaidya and David
	M A Martin and Alan H Fairlamb and Martin J Fraunholz and David S
	Roos and Stuart A Ralph and Geoffrey I McFadden and Leda M Cummings
	and G. Mani Subramanian and Chris Mungall and J. Craig Venter and
	Daniel J Carucci and Stephen L Hoffman and Chris Newbold and Ronald
	W Davis and Claire M Fraser and Bart Barrell},
  title = {{G}enome sequence of the human malaria parasite {P}lasmodium falciparum.},
  journal = {Nature},
  year = {2002},
  volume = {419},
  pages = {498--511},
  number = {6906},
  month = {Oct},
  abstract = {The parasite Plasmodium falciparum is responsible for hundreds of
	millions of cases of malaria, and kills more than one million African
	children annually. Here we report an analysis of the genome sequence
	of P. falciparum clone 3D7. The 23-megabase nuclear genome consists
	of 14 chromosomes, encodes about 5,300 genes, and is the most (A
	+ T)-rich genome sequenced to date. Genes involved in antigenic variation
	are concentrated in the subtelomeric regions of the chromosomes.
	Compared to the genomes of free-living eukaryotic microbes, the genome
	of this intracellular parasite encodes fewer enzymes and transporters,
	but a large proportion of genes are devoted to immune evasion and
	host-parasite interactions. Many nuclear-encoded proteins are targeted
	to the apicoplast, an organelle involved in fatty-acid and isoprenoid
	metabolism. The genome sequence provides the foundation for future
	studies of this organism, and is being exploited in the search for
	new drugs and vaccines to fight malaria.},
  doi = {10.1038/nature01097},
  pdf = {../local/Gardner2002Genome.pdf},
  file = {Gardner2002Genome.pdf:local/Gardner2002Genome.pdf:PDF},
  keywords = {plasmodium},
  pii = {nature01097},
  pmid = {12511928},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1038/nature01097}
}

@article{Gardner2003Inferring,
  author = {Gardner, T. S. and Bernardo, D. and Lorenz, D. and Collins, J. J.},
  title = {Inferring genetic networks and identifying compound mode of action
	via expression profiling},
  journal = {Science},
  year = {2003},
  volume = {301},
  pages = {102--105},
  number = {5629},
  month = {Jul},
  abstract = {The complexity of cellular gene, protein, and metabolite networks
	can hinder attempts to elucidate their structure and function. To
	address this problem, we used systematic transcriptional perturbations
	to construct a first-order model of regulatory interactions in a
	nine-gene subnetwork of the SOS pathway in Escherichia coli. The
	model correctly identified the major regulatory genes and the transcriptional
	targets of mitomycin C activity in the subnetwork. This approach,
	which is experimentally and computationally scalable, provides a
	framework for elucidating the functional properties of genetic networks
	and identifying molecular targets of pharmacological compounds.},
  doi = {10.1126/science.1081900},
  pdf = {../local/Gardner2003Inferring.pdf},
  file = {Gardner2003Inferring.pdf:Gardner2003Inferring.pdf:PDF},
  institution = {Center for BioDynamics and Department of Biomedical Engineering,
	Boston University, 44 Cummington Street, Boston, MA 02215, USA.},
  owner = {fantine},
  pii = {301/5629/102},
  pmid = {12843395},
  timestamp = {2008.01.22},
  url = {http://dx.doi.org/10.1126/science.1081900}
}

@article{Gardner2010Reverse,
  author = {Gardner, T. S. and Faith, J. J.},
  title = {Reverse-engineering transcription control networks.},
  journal = {Phys. Life Rev.},
  year = {2010},
  volume = {2},
  pages = {65--88},
  number = {1},
  month = {Apr},
  abstract = {Microarray technologies, which enable the simultaneous measurement
	of all RNA transcripts in a cell, have spawned the development of
	algorithms for reverse-engineering transcription control networks.
	In this article, we classify the algorithms into two general strategies:
	physical modeling and influence modeling. We discuss the biological
	and computational principles underlying each strategy, and provide
	leading examples of each. We also discuss the practical considerations
	for developing and applying the various methods.},
  doi = {10.1016/j.plrev.2005.01.001},
  pdf = {../local/Gardner2010Reverse.pdf},
  file = {Gardner2010Reverse.pdf:Gardner2010Reverse.pdf:PDF},
  institution = {Department of Biomedical Engineering, Boston University, 44 Cummington
	St., Boston, MA 02215, USA.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {S1571-0645(05)00003-5},
  pmid = {20416858},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1016/j.plrev.2005.01.001}
}

@article{Gardy2005PSORTb,
  author = {Gardy, J. L. and Laird, M. R. and Chen, F. and Rey, S. and Walsh,
	C. J. and Ester, M. and Brinkman, F. S. L.},
  title = {{{PSORT}b v.2.0}: expanded prediction of bacterial protein subcellular
	localization and insights gained from comparative proteome analysis},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {617-623},
  number = {5},
  month = {Mar},
  abstract = {Motivation: {PSORT}b v.1.1 is the most precise bacterial localization
	prediction tool available. {H}owever the program's predictive coverage
	and recall are low and the method is only applicable to {G}ram-negative
	bacteria. {T}he goals of the present work were: increase {PSORT}b's
	coverage while maintaining the existing precision level, expand it
	to include {G}ram-positive bacteria, and then carry out a comparative
	analysis of localization.{R}esults: {A}n expanded database of proteins
	of known localization and new modules using frequent subsequence-based
	support vector machines were introduced into {PSORT}b v.2.0. {T}he
	program attains a precision of 96% for {G}ram-positive and {G}ram-negative
	bacteria and predictive coverage comparable to other tools for whole
	proteome analysis. {W}e show that the proportion of proteins at each
	localization is remarkably consistent across species, even in species
	with varying proteome size.{A}vailability: {W}eb-based version: http://www.psort.org/psortb.
	{S}tandalone version: {A}vailable through the website under {GNU}
	{G}eneral {P}ublic {L}icense.{S}upplementary {I}nformation: http://www.psort.org/psortb/supplementaryinfo.html.},
  doi = {10.1093/bioinformatics/bti057},
  pdf = {../local/Gardy2005PSORTb.pdf},
  file = {Gardy2005PSORTb.pdf:local/Gardy2005PSORTb.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti057}
}

@book{Garey1979Computer,
  title = {Computer and intractability: A guide to the theory of NP-completeness},
  publisher = {San Francisco, CA: W. H. Freeman},
  year = {1979},
  author = {M. R. Garey and D. S. Johnson}
}

@article{Garg2005SVM-based,
  author = {Garg, A. and Bhasin, M. and Raghava, G.P.},
  title = {S{VM}-based method for subcellular localization of human proteins
	using amino acid compositions, their order and similarity search},
  journal = {J. {B}iol. {C}hem.},
  year = {2005},
  volume = {280},
  pages = {14427-32},
  number = {15},
  month = {Apr},
  abstract = {Here we report a systematic approach for predicting subcellular localization
	(cytoplasm, mitochondrial, nuclear and plasma membrane) of human
	proteins. {F}irstly, {SVM} based modules for predicting subcellular
	localization using traditional amino acid and dipeptide (i+1) composition
	achieved overall accuracy of 76.6% and 77.8%, respectively. {PSI}-{BLAST}
	when carried out using similarity-based search against non-redundant
	database of experimentally annotated proteins yielded 73.3% accuracy.
	{T}o gain further insight, hybrid module (hybrid1) was developed
	based on amino acid composition, dipeptide composition, and similarity
	information and attained better accuracy of 84.9%. {I}n addition,
	{SVM} module based on different higher order dipeptide i.e. i+2,
	i+3, and i+4 were also constructed for the prediction of subcellular
	localization of human proteins and overall accuracy of 79.7%, 77.5%
	and 77.1% was accomplished respectively. {F}urthermore, another {SVM}
	module hybrid2 was developed using traditional dipeptide (i+1) and
	higher order dipeptide (i+2, i+3, and i+4) compositions, which gave
	an overall accuracy of 81.3%. {W}e also developed {SVM} module hybrid3
	based on amino acid composition, traditional and higher order dipeptide
	compositions and {PSI}-{BLAST} output and achieved an overall accuracy
	of 84.4%. {A} web server {HSLP}red (http://www.imtech.res.in/raghava/hslpred/
	or http://bioinformatics.uams.edu/raghava/hslpred/) has been designed
	to predict subcellular localization of human proteins using the above
	approaches.},
  doi = {10.1074/jbc.M411789200},
  pdf = {../local/Garg2005SVM-based.pdf},
  file = {Garg2005SVM-based.pdf:local/Garg2005SVM-based.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1074/jbc.M411789200}
}

@article{Garnis2006High,
  author = {C. Garnis and W. W. Lockwood and E. Vucic and Y. Ge and L. Girard
	and J. D. Minna and A. F. Gazdar and S. Lam and C. MacAulay and W.
	L. Lam},
  title = {High resolution analysis of non-small cell lung cancer cell lines
	by whole genome tiling path array {CGH}.},
  journal = {Int. J. Cancer},
  year = {2006},
  volume = {118},
  pages = {1556--1564},
  number = {6},
  abstract = {Chromosomal regions harboring tumor suppressors and oncogenes are
	often deleted or amplified. Array comparative genomic hybridization
	detects segmental DNA copy number alterations in tumor DNA relative
	to a normal control. The recent development of a bacterial artificial
	chromosome array, which spans the human genome in a tiling path manner
	with >32,000 clones, has facilitated whole genome profiling at an
	unprecedented resolution. Using this technology, we comprehensively
	describe and compare the genomes of 28 commonly used non-small cell
	lung carcinoma (NSCLC) cell models, derived from 18 adenocarcinomas
	(AC), 9 squamous cell carcinomas and 1 large cell carcinoma. Analysis
	at such resolution not only provided a detailed genomic alteration
	template for each of these model cell lines, but revealed novel regions
	of frequent duplication and deletion. Significantly, a detailed analysis
	of chromosome 7 identified 6 distinct regions of alterations across
	this chromosome, implicating the presence of multiple novel oncogene
	loci on this chromosome. As well, a comparison between the squamous
	and AC cells revealed alterations common to both subtypes, such as
	the loss of 3p and gain of 5p, in addition to multiple hotspots more
	frequently associated with only 1 subtype. Interestingly, chromosome
	3q, which is known to be amplified in both subtypes, showed 2 distinct
	regions of alteration, 1 frequently altered in squamous and 1 more
	frequently altered in AC. In summary, our data demonstrate the unique
	information generated by high resolution analysis of NSCLC genomes
	and uncover the presence of genetic alterations prevalent in the
	different NSCLC subtypes.},
  doi = {10.1002/ijc.21491},
  institution = {British Columbia Cancer Research Centre, Vancouver, BC, Canada. cgarnis@bccrc.ca},
  keywords = {Carcinoma, Non-Small-Cell Lung, genetics/pathology; Cell Line, Tumor;
	Chromosomes, Artificial, Bacterial, genetics; Gene Amplification;
	Gene Dosage; Gene Expression Profiling; Genome, Human, genetics;
	Humans; Loss of Heterozygosity; Lung Neoplasms, genetics/pathology;
	Microarray Analysis, methods; Nucleic Acid Hybridization, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16187286},
  timestamp = {2010.01.08},
  url = {http://dx.doi.org/10.1002/ijc.21491}
}

@article{Garrett2003Comparison,
  author = {D. Garrett and D. A Peterson and C. Anderson and M. Thaut},
  title = {Comparison of linear, nonlinear, and feature selection methods for
	{EEG} signal classification.},
  journal = {I{EEE} {T}rans {N}eural {S}yst {R}ehabil {E}ng},
  year = {2003},
  volume = {11},
  pages = {141-4},
  number = {2},
  month = {Jun},
  abstract = {The reliable operation of brain-computer interfaces ({BCI}s) based
	on spontaneous electroencephalogram ({EEG}) signals requires accurate
	classification of multichannel {EEG}. {T}he design of {EEG} representations
	and classifiers for {BCI} are open research questions whose difficulty
	stems from the need to extract complex spatial and temporal patterns
	from noisy multidimensional time series obtained from {EEG} measurements.
	{T}he high-dimensional and noisy nature of {EEG} may limit the advantage
	of nonlinear classification methods over linear ones. {T}his paper
	reports the results of a linear (linear discriminant analysis) and
	two nonlinear classifiers (neural networks and support vector machines)
	applied to the classification of spontaneous {EEG} during five mental
	tasks, showing that nonlinear classifiers produce only slightly better
	classification results. {A}n approach to feature selection based
	on genetic algorithms is also presented with preliminary results
	of application to {EEG} during finger movement.},
  keywords = {80 and over, Adnexal Diseases, Adult, Aged, Algorithms, Artificial
	Intelligence, Automated, Bayes Theorem, Biological, Brain, Brain
	Mapping, Breast Neoplasms, Case-Control Studies, Chromatography,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	DNA, Diagnosis, Differential, Discriminant Analysis, Electroencephalography,
	Evoked Potentials, Feasibility Studies, Female, Fingers, Gene Expression
	Profiling, Gene Expression Regulation, Genetic, Genetic Markers,
	Genetic Predisposition to Disease, Genetic Screening, Habituation
	(Psychophysiology), High Pressure Liquid, Humans, Linear Models,
	Logistic Models, Male, Middle Aged, Migraine, Models, Movement, Neural
	Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nonlinear
	Dynamics, Nucleosides, Ovarian Neoplasms, Pattern Recognition, Photic
	Stimulation, Predictive Value of Tests, ROC Curve, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Signal
	Processing, Software, Statistical, Thinking, Tumor Markers, U.S.
	Gov't, User-Computer Interface, Visual, 12899257}
}

@article{Gasch2001Genomic,
  author = {A.P. Gasch and M. Huang and S. Metzner and D. Botstein and S.J. Elledge
	and P.O. Brown},
  title = {Genomic expression responses to {DNA}-damaging agents and the regulatory
	role of the yeast {ATR} homolog {M}ec1p},
  journal = {Mol. {B}iol. {C}ell},
  year = {2001},
  volume = {12},
  pages = {2987--3003},
  number = {10},
  pdf = {../local/gasc01.pdf},
  file = {gasc01.pdf:local/gasc01.pdf:PDF},
  subject = {microarray},
  url = {http://www.molbiolcell.org/cgi/content/full/12/10/2987}
}

@article{Gasch2000Genomic,
  author = {Gasch, A. P. and Spellman, P. T. and Kao, C. M. and Carmel-Harel,
	O. and Eisen, M. B. and Storz, G. and Botstein, D. and Brown, P.
	O.},
  title = {Genomic {E}xpression {P}rograms in the {R}esponse of {Y}east {C}ells
	to {E}nvironmental {C}hanges},
  journal = {Mol. {B}iol. {C}ell},
  year = {2000},
  volume = {11},
  pages = {4241--4257},
  month = {Dec},
  pdf = {../local/gasc00.pdf},
  file = {gasc00.pdf:local/gasc00.pdf:PDF},
  subject = {microarray},
  url = {http://www.molbiolcell.org/cgi/reprint/11/12/4241.pdf}
}

@article{Gasteigner1996Chemical,
  author = {Gasteiger, J. and Sadowski, J. and Schuur, J. and Selzer, P. and
	Steinhauer, L. and Steinhauer, V.},
  title = {Chemical Information in 3D Space},
  journal = {J. Chem. Inform. Comput. Sci.},
  year = {1996},
  volume = {36},
  pages = {1030-1037},
  number = {5},
  doi = {10.1021/ci960343+},
  eprint = {http://pubs.acs.org/doi/pdf/10.1021/ci960343%2B},
  url = {http://pubs.acs.org/doi/abs/10.1021/ci960343%2B}
}

@article{GatViks2004,
  author = {Gat-Viks, I. and Tanay, A. and Shamir, R.},
  title = {Modeling and analysis of heterogeneous regulation in biological networks},
  journal = {J {C}omput {B}iol},
  year = {2004},
  volume = {11},
  pages = {1034-49},
  number = {6},
  abstract = {In this study, we propose a novel model for the representation of
	biological networks and provide algorithms for learning model parameters
	from experimental data. {O}ur approach is to build an initial model
	based on extant biological knowledge and refine it to increase the
	consistency between model predictions and experimental data. {O}ur
	model encompasses networks which contain heterogeneous biological
	entities (m{RNA}, proteins, metabolites) and aims to capture diverse
	regulatory circuitry on several levels (metabolism, transcription,
	translation, post-translation and feedback loops, among them). {A}lgorithmically,
	the study raises two basic questions: how to use the model for predictions
	and inference of hidden variables states, and how to extend and rectify
	model components. {W}e show that these problems are hard in the biologically
	relevant case where the network contains cycles. {W}e provide a prediction
	methodology in the presence of cycles and a polynomial time, constant
	factor approximation for learning the regulation of a single entity.
	{A} key feature of our approach is the ability to utilize both high-throughput
	experimental data, which measure many model entities in a single
	experiment, as well as specific experimental measurements of few
	entities or even a single one. {I}n particular, we use together gene
	expression, growth phenotypes, and proteomics data. {W}e tested our
	strategy on the lysine biosynthesis pathway in yeast. {W}e constructed
	a model of more than 150 variables based on an extensive literature
	survey and evaluated it with diverse experimental data. {W}e used
	our learning algorithms to propose novel regulatory hypotheses in
	several cases where the literature-based model was inconsistent with
	the experiments. {W}e showed that our approach has better accuracy
	than extant methods of learning regulation.}
}

@article{Gati1979Further,
  author = {G. Gati},
  title = {Further annotated bibliography on the isomorphism disease},
  journal = {J. Graph Theor.},
  year = {1979},
  volume = {3},
  pages = {95--109},
  acknowledgement = {#ack-fg#},
  coden = {JGTHDO},
  issn = {0364-9024}
}

@article{Gaudan2005Resolving,
  author = {Gaudan, S. and Kirsch, H. and Rebholz-Schuhmann, D.},
  title = {Resolving abbreviations to their senses in {M}edline.},
  journal = {Bioinformatics},
  year = {2005},
  month = {Jul},
  abstract = {M{OTIVATION}: {B}iological literature contains many abbreviations
	with one particular sense in each document. {H}owever, most abbreviations
	do not have a unique sense across the literature. {F}urthermore,
	many documents do not contain the long-forms of the abbreviations.
	{R}esolving an abbreviation in a document consists of retrieving
	its sense in use. {A}bbreviation resolution improves accuracy of
	document retrieval engines and of information extraction systems.
	{RESULTS}: {W}e combine an automatic analysis of {M}edline abstracts
	and linguistic methods to build a dictionary of abbreviation/sense
	pairs. {T}he dictionary is used for the resolution of abbreviations
	occurring with their long-forms. {A}mbiguous global abbreviations
	are resolved using {S}upport {V}ector {M}achines that have been trained
	on the context of each instance of the abbreviation/sense pairs,
	previously extracted for the dictionary setup. {T}he system disambiguates
	abbreviations with a precision of 98.9\% for a recall of 98.2\% (98.5\%
	accuracy). {T}his performance is superior in comparison to previously
	reported research work. {AVAILABILITY}: {T}he abbreviation resolution
	module is available at http://www.ebi.ac.uk/{R}ebholz/software.html.},
  doi = {10.1093/bioinformatics/bti586},
  pdf = {../local/Gaudan2005Resolving.pdf},
  file = {Gaudan2005Resolving.pdf:local/Gaudan2005Resolving.pdf:PDF},
  keywords = {biosvm nlp},
  pii = {bti586},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti586}
}

@article{Gaudet2005MCPorteomics,
  author = {Gaudet, S. and Janes, K. A. and Albeck, J. G. and Pace, E. A. and
	Lauffenburger, D. A. and Sorger, P. K.},
  title = {A Compendium of Signals and Responses Triggered by Prodeath and Prosurvival
	Cytokines},
  journal = {Mol Cell Proteomics},
  year = {2005},
  volume = {4},
  pages = {1569-1590},
  number = {10},
  abstract = {Cell-signaling networks consist of proteins with a variety of functions
	(receptors, adaptor proteins, GTPases, kinases, proteases, and transcription
	factors) working together to control cell fate. Although much is
	known about the identities and biochemical activities of these signaling
	proteins, the ways in which they are combined into networks to process
	and transduce signals are poorly understood. Network-level understanding
	of signaling requires data on a wide variety of biochemical processes
	such as posttranslational modification, assembly of macromolecular
	complexes, enzymatic activity, and localization. No single method
	can gather such heterogeneous data in high throughput, and most studies
	of signal transduction therefore rely on series of small, discrete
	experiments. Inspired by the power of systematic datasets in genomics,
	we set out to build a systematic signaling dataset that would enable
	the construction of predictive models of cell-signaling networks.
	Here we describe the compilation and fusion of [~]10,000 signal and
	response measurements acquired from HT-29 cells treated with tumor
	necrosis factor-{alpha}, a proapoptotic cytokine, in combination
	with epidermal growth factor or insulin, two prosurvival growth factors.
	Nineteen protein signals were measured over a 24-h period using kinase
	activity assays, quantitative immunoblotting, and antibody microarrays.
	Four different measurements of apoptotic response were also collected
	by flow cytometry for each time course. Partial least squares regression
	models that relate signaling data to apoptotic response data reveal
	which aspects of compendium construction and analysis were important
	for the reproducibility, internal consistency, and accuracy of the
	fused set of signaling measurements. We conclude that it is possible
	to build self-consistent compendia of cell-signaling data that can
	be mined computationally to yield important insights into the control
	of mammalian cell responses.},
  keywords = {csbcbook}
}

@article{Gavin2002Functionala,
  author = {Anne-Claude Gavin and Markus BÃ¶sche and Roland Krause and Paola
	Grandi and Martina Marzioch and Andreas Bauer and JÃ¶rg Schultz and
	Jens M Rick and Anne-Marie Michon and Cristina-Maria Cruciat and
	Marita Remor and Christian HÃ¶fert and Malgorzata Schelder and Miro
	Brajenovic and Heinz Ruffner and Alejandro Merino and Karin Klein
	and Manuela Hudak and David Dickson and Tatjana Rudi and Volker Gnau
	and Angela Bauch and Sonja Bastuck and Bettina Huhse and Christina
	Leutwein and Marie-Anne Heurtier and Richard R Copley and Angela
	Edelmann and Erich Querfurth and Vladimir Rybin and Gerard Drewes
	and Manfred Raida and Tewis Bouwmeester and Peer Bork and Bertrand
	Seraphin and Bernhard Kuster and Gitte Neubauer and Giulio Superti-Furga},
  title = {Functional organization of the yeast proteome by systematic analysis
	of protein complexes.},
  journal = {Nature},
  year = {2002},
  volume = {415},
  pages = {141-7},
  number = {6868},
  month = {Jan},
  abstract = {Most cellular processes are carried out by multiprotein complexes.
	{T}he identification and analysis of their components provides insight
	into how the ensemble of expressed proteins (proteome) is organized
	into functional units. {W}e used tandem-affinity purification ({TAP})
	and mass spectrometry in a large-scale approach to characterize multiprotein
	complexes in {S}accharomyces cerevisiae. {W}e processed 1,739 genes,
	including 1,143 human orthologues of relevance to human biology,
	and purified 589 protein assemblies. {B}ioinformatic analysis of
	these assemblies defined 232 distinct multiprotein complexes and
	proposed new cellular roles for 344 proteins, including 231 proteins
	with no previous functional annotation. {C}omparison of yeast and
	human complexes showed that conservation across species extends from
	single proteins to their molecular environment. {O}ur analysis provides
	an outline of the eukaryotic proteome as a network of protein complexes
	at a level of organization beyond binary interactions. {T}his higher-order
	map contains fundamental biological information and offers the context
	for a more reasoned and informed approach to drug discovery.},
  doi = {10.1038/415141a},
  pdf = {../local/gavi02.pdf},
  file = {gavi02.pdf:local/gavi02.pdf:PDF},
  keywords = {Affinity, Affinity Labels, Amino Acid Sequence, Animals, Cell Cycle
	Proteins, Cells, Chromatography, Cloning, Comparative Study, Cultured,
	DNA, DNA Damage, DNA Repair, Electrospray Ionization, Fungal, Gene
	Targeting, Genetic, Humans, Macromolecular Substances, Mass, Matrix-Assisted
	Laser Desorption-Ionization, Mitosis, Molecular, Molecular Sequence
	Data, Non-P.H.S., Non-U.S. Gov't, P.H.S., Phosphoric Monoester Hydrolases,
	Protein Binding, Protein Interaction Mapping, Protein Kinases, Proteome,
	Proteomics, Recombinant Fusion Proteins, Research Support, Ribonucleoproteins,
	Ribosomes, Saccharomyces cerevisiae, Saccharomyces cerevisiae Proteins,
	Sensitivity and Specificity, Sequence Alignment, Signal Transduction,
	Species Specificity, Spectrometry, Spectrum Analysis, Transcription,
	U.S. Gov't, 11805813},
  owner = {vert},
  pii = {415141a},
  url = {http://dx.doi.org/10.1038/415141a}
}

@article{Ge2003Reducing,
  author = {Xijin Ge and Shuichi Tsutsumi and Hiroyuki Aburatani and Shuichi
	Iwata},
  title = {Reducing false positives in molecular pattern recognition.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2003},
  volume = {14},
  pages = {34-43},
  abstract = {In the search for new cancer subtypes by gene expression profiling,
	it is essential to avoid misclassifying samples of unknown subtypes
	as known ones. {I}n this paper, we evaluated the false positive error
	rates of several classification algorithms through a 'null test'
	by presenting classifiers a large collection of independent samples
	that do not belong to any of the tumor types in the training dataset.
	{T}he benchmark dataset is available at www2.genome.rcast.u-tokyo.ac.jp/pm/.
	{W}e found that k-nearest neighbor ({KNN}) and support vector machine
	({SVM}) have very high false positive error rates when fewer genes
	(<100) are used in prediction. {T}he error rate can be partially
	reduced by including more genes. {O}n the other hand, prototype matching
	({PM}) method has a much lower false positive error rate. {S}uch
	robustness can be achieved without loss of sensitivity by introducing
	suitable measures of prediction confidence. {W}e also proposed a
	cluster-and-select technique to select genes for classification.
	{T}he nonparametric {K}ruskal-{W}allis {H} test is employed to select
	genes differentially expressed in multiple tumor types. {T}o reduce
	the redundancy, we then divided these genes into clusters with similar
	expression patterns and selected a given number of genes from each
	cluster. {T}he reliability of the new algorithm is tested on three
	public datasets.},
  keywords = {Amino Acid Sequence, Amino Acids, Animals, Automated, Base Sequence,
	Bayes Theorem, Biological, Carbohydrate Conformation, Carbohydrate
	Sequence, Cattle, Computational Biology, Computer Simulation, Crystallography,
	DNA, Databases, Factual, False Positive Reactions, Gene Expression
	Profiling, Genes, Genetic, Genetic Techniques, Genome, Histocompatibility
	Antigens Class I, Human, Humans, Introns, Least-Squares Analysis,
	MHC Class I, Major Histocompatibility Complex, Markov Chains, Messenger,
	Mice, Models, Monosaccharides, Neoplasms, Non-U.S. Gov't, Nonparametric,
	Pattern Recognition, Peptides, Phylogeny, Plants, Poly A, Polysaccharides,
	Predictive Value of Tests, Protein, Protein Structure, Proteins,
	RNA, Rats, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sequence Alignment, Software, Species Specificity,
	Statistics, Theoretical, X-Ray, 15706518}
}

@article{Gedela2011Integration,
  author = {Srinubabu Gedela},
  title = {Integration, warehousing, and analysis strategies of Omics data.},
  journal = {Methods Mol Biol},
  year = {2011},
  volume = {719},
  pages = {399--414},
  abstract = {"-Omics" is a current suffix for numerous types of large-scale biological
	data generation procedures, which naturally demand the development
	of novel algorithms for data storage and analysis. With next generation
	genome sequencing burgeoning, it is pivotal to decipher a coding
	site on the genome, a gene's function, and information on transcripts
	next to the pure availability of sequence information. To explore
	a genome and downstream molecular processes, we need umpteen results
	at the various levels of cellular organization by utilizing different
	experimental designs, data analysis strategies and methodologies.
	Here comes the need for controlled vocabularies and data integration
	to annotate, store, and update the flow of experimental data. This
	chapter explores key methodologies to merge Omics data by semantic
	data carriers, discusses controlled vocabularies as eXtensible Markup
	Languages (XML), and provides practical guidance, databases, and
	software links supporting the integration of Omics data.},
  doi = {10.1007/978-1-61779-027-0\_18},
  institution = {Stanford University School of Medicine, Stanford, CA, USA. gedela@stanford.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pmid = {21370094},
  timestamp = {2011.05.31},
  url = {http://dx.doi.org/10.1007/978-1-61779-027-0\_18}
}

@article{Gehlenborg2010Visualization,
  author = {Nils Gehlenborg and Seán I O'Donoghue and Nitin S Baliga and Alexander
	Goesmann and Matthew A Hibbs and Hiroaki Kitano and Oliver Kohlbacher
	and Heiko Neuweger and Reinhard Schneider and Dan Tenenbaum and Anne-Claude
	Gavin},
  title = {Visualization of omics data for systems biology.},
  journal = {Nat Methods},
  year = {2010},
  volume = {7},
  pages = {S56--S68},
  number = {3 Suppl},
  month = {Mar},
  abstract = {High-throughput studies of biological systems are rapidly accumulating
	a wealth of 'omics'-scale data. Visualization is a key aspect of
	both the analysis and understanding of these data, and users now
	have many visualization methods and tools to choose from. The challenge
	is to create clear, meaningful and integrated visualizations that
	give biological insight, without being overwhelmed by the intrinsic
	complexity of the data. In this review, we discuss how visualization
	tools are being used to help interpret protein interaction, gene
	expression and metabolic profile data, and we highlight emerging
	new directions.},
  doi = {10.1038/nmeth.1436},
  institution = {European Bioinformatics Institute, Cambridge, UK.},
  keywords = {Genomics; Image Processing, Computer-Assisted; Mass Spectrometry;
	Metabolomics; Nuclear Magnetic Resonance, Biomolecular; Protein Binding;
	Proteomics; Systems Biology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nmeth.1436},
  pmid = {20195258},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.1038/nmeth.1436}
}

@article{Genuer2010Variable,
  author = {Genuer, R. and Poggi, J.M. and Tuleau-Malot, C.},
  title = {Variable selection using random forests},
  journal = {Pattern Recognition Letters},
  year = {2010},
  volume = {31},
  pages = {2225--2236},
  number = {14},
  publisher = {Elsevier}
}

@article{Geppert2008Support-vector-machine-based,
  author = {Geppert, H. and Horv{\'a}th, T. and G{\"a}rtner, T. and Wrobel, S.
	and Bajorath, J.},
  title = {Support-vector-machine-based ranking significantly improves the effectiveness
	of similarity searching using 2D fingerprints and multiple reference
	compounds.},
  journal = {J Chem Inf Model},
  year = {2008},
  volume = {48},
  pages = {742--746},
  number = {4},
  month = {Apr},
  abstract = {Similarity searching using molecular fingerprints is computationally
	efficient and a surprisingly effective virtual screening tool. In
	this study, we have compared ranking methods for similarity searching
	using multiple active reference molecules. Different 2D fingerprints
	were used as search tools and also as descriptors for a support vector
	machine (SVM) algorithm. In systematic database search calculations,
	a SVM-based ranking scheme consistently outperformed nearest neighbor
	and centroid approaches, regardless of the fingerprints that were
	tested, even if only very small training sets were used for SVM learning.
	The superiority of SVM-based ranking over conventional fingerprint
	methods is ascribed to the fact that SVM makes use of information
	about database molecules, in addition to known active compounds,
	during the learning phase.},
  doi = {10.1021/ci700461s},
  pdf = {../local/Geppert2008Support-vector-machine-based.pdf},
  file = {Geppert2008Support-vector-machine-based.pdf:Geppert2008Support-vector-machine-based.pdf:PDF},
  institution = {Fraunhofer IAIS, Schloss Birlinghoven, D-53754 Sankt Augustin, Germany.},
  keywords = {chemoinformatics, PUlearning},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {18318473},
  timestamp = {2010.04.02},
  url = {http://dx.doi.org/10.1021/ci700461s}
}

@inproceedings{Germann01Fast,
  author = {U. Germann and M. Jahr and K. Knight and D. Marcu},
  title = {Fast decoding and optimal decoding for machine translation},
  booktitle = {In Proceedings of ACL 39},
  year = {2001},
  pages = {228--235}
}

@article{Gerstein2000current,
  author = {Gerstein, M. and Jansen, R.},
  title = {The current excitement in bioinformatics-analysis of whole-genome
	expression data: how does it relate to protein structure and function?},
  journal = {Curr. Opin. Struct. Biol.},
  year = {2000},
  volume = {10},
  pages = {574--584},
  number = {5},
  month = {Oct},
  abstract = {Whole-genome expression profiles provide a rich new data-trove for
	bioinformatics. Initial analyses of the profiles have included clustering
	and cross-referencing to 'external' information on protein structure
	and function. Expression profile clusters do relate to protein function,
	but the correlation is not perfect, with the discrepancies partially
	resulting from the difficulty in consistently defining function.
	Other attributes of proteins can also be related to expression-in
	particular, structure and localization-and sometimes show a clearer
	relationship than function.},
  pdf = {../local/Gerstein2000current.pdf},
  file = {Gerstein2000current.pdf:Gerstein2000current.pdf:PDF},
  institution = {Department of Molecular Biophysics and Biochemistry, 266 Whitney
	Avenue, Yale University, PO Box 208114, New Haven, CT 06520, USA.
	Mark.Gerstein@yale.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0959-440X(00)00134-2},
  pmid = {11042457},
  timestamp = {2011.10.04}
}

@article{Gestel2002Bayesian,
  author = {T. Van Gestel and J. A K Suykens and G. Lanckriet and A. Lambrechts
	and B. De Moor and J. Vandewalle},
  title = {Bayesian framework for least-squares support vector machine classifiers,
	gaussian processes, and kernel {F}isher discriminant analysis.},
  journal = {Neural {C}omput},
  year = {2002},
  volume = {14},
  pages = {1115-47},
  number = {5},
  month = {May},
  abstract = {The {B}ayesian evidence framework has been successfully applied to
	the design of multilayer perceptrons ({MLP}s) in the work of {M}ac{K}ay.
	{N}evertheless, the training of {MLP}s suffers from drawbacks like
	the nonconvex optimization problem and the choice of the number of
	hidden units. {I}n support vector machines ({SVM}s) for classification,
	as introduced by {V}apnik, a nonlinear decision boundary is obtained
	by mapping the input vector first in a nonlinear way to a high-dimensional
	kernel-induced feature space in which a linear large margin classifier
	is constructed. {P}ractical expressions are formulated in the dual
	space in terms of the related kernel function, and the solution follows
	from a (convex) quadratic programming ({QP}) problem. {I}n least-squares
	{SVM}s ({LS}-{SVM}s), the {SVM} problem formulation is modified by
	introducing a least-squares cost function and equality instead of
	inequality constraints, and the solution follows from a linear system
	in the dual space. {I}mplicitly, the least-squares formulation corresponds
	to a regression formulation and is also related to kernel {F}isher
	discriminant analysis. {T}he least-squares regression formulation
	has advantages for deriving analytic expressions in a {B}ayesian
	evidence framework, in contrast to the classification formulations
	used, for example, in gaussian processes ({GP}s). {T}he {LS}-{SVM}
	formulation has clear primal-dual interpretations, and without the
	bias term, one explicitly constructs a model that yields the same
	expressions as have been obtained with {GP}s for regression. {I}n
	this article, the {B}ayesian evidence framework is combined with
	the {LS}-{SVM} classifier formulation. {S}tarting from the feature
	space formulation, analytic expressions are obtained in the dual
	space on the different levels of {B}ayesian inference, while posterior
	class probabilities are obtained by marginalizing over the model
	parameters. {E}mpirical results obtained on 10 public domain data
	sets show that the {LS}-{SVM} classifier designed within the {B}ayesian
	evidence framework consistently yields good generalization performances.},
  doi = {10.1162/089976602753633411},
  pdf = {../local/Gestel2002Bayesian.pdf},
  file = {Gestel2002Bayesian.pdf:Gestel2002Bayesian.pdf:PDF},
  url = {http://dx.doi.org/10.1162/089976602753633411}
}

@article{Gether2000Uncovering,
  author = {U. Gether},
  title = {Uncovering molecular mechanisms involved in activation of G protein-coupled
	receptors.},
  journal = {Endocr Rev},
  year = {2000},
  volume = {21},
  pages = {90--113},
  number = {1},
  month = {Feb},
  abstract = {G protein-coupled, seven-transmembrane segment receptors (GPCRs or
	7TM receptors), with more than 1000 different members, comprise the
	largest superfamily of proteins in the body. Since the cloning of
	the first receptors more than a decade ago, extensive experimental
	work has uncovered multiple aspects of their function and challenged
	many traditional paradigms. However, it is only recently that we
	are beginning to gain insight into some of the most fundamental questions
	in the molecular function of this class of receptors. How can, for
	example, so many chemically diverse hormones, neurotransmitters,
	and other signaling molecules activate receptors believed to share
	a similar overall tertiary structure? What is the nature of the physical
	changes linking agonist binding to receptor activation and subsequent
	transduction of the signal to the associated G protein on the cytoplasmic
	side of the membrane and to other putative signaling pathways? The
	goal of the present review is to specifically address these questions
	as well as to depict the current awareness about GPCR structure-function
	relationships in general.},
  keywords = {Animals; GTP-Binding Proteins; Humans; Ligands; Models, Biological;
	Molecular Conformation; Receptors, Cell Surface},
  owner = {laurent},
  pmid = {10696571},
  timestamp = {2007.09.22}
}

@article{Geurts2011Learning,
  author = {Geurts, Pierre},
  title = {Learning from positive and unlabeled examples by enforcing statistical
	significance.},
  journal = {Journal of Machine Learning Research - Proceedings Track},
  year = {2011},
  volume = {15},
  pages = {305-314},
  biburl = {http://www.bibsonomy.org/bibtex/2d8d9fb95ceea0cae30df5f6c1b46a04a/dblp},
  ee = {http://www.jmlr.org/proceedings/papers/v15/geurts11a/geurts11a.pdf},
  interhash = {9290ccc55163a155f7b7387fe01f7845},
  intrahash = {d8d9fb95ceea0cae30df5f6c1b46a04a},
  keywords = {dblp},
  owner = {fantinemordelet},
  timestamp = {2013.01.09},
  url = {http://dblp.uni-trier.de/db/journals/jmlr/jmlrp15.html#Geurts11}
}

@article{GevaZatorsky2006MSB,
  author = {Geva-Zatorsky, N. and Rosenfeld, N. and Itzkovitz, S. and Milo, R.
	and Sigal, A. and Dekel, E. and Yarnitzky, T. and Liron, Y. and Polak,
	P. and Lahav, G. and Alon, U.},
  title = {Oscillations and variability in the p53 system},
  journal = {Mol Syst Biol},
  year = {2006},
  volume = {2},
  pages = {2006 0033},
  abstract = {Understanding the dynamics and variability of protein circuitry requires
	accurate measurements in living cells as well as theoretical models.
	To address this, we employed one of the best-studied protein circuits
	in human cells, the negative feedback loop between the tumor suppressor
	p53 and the oncogene Mdm2. We measured the dynamics of fluorescently
	tagged p53 and Mdm2 over several days in individual living cells.
	We found that isogenic cells in the same environment behaved in highly
	variable ways following DNA-damaging gamma irradiation: some cells
	showed undamped oscillations for at least 3 days (more than 10 peaks).
	The amplitude of the oscillations was much more variable than the
	period. Sister cells continued to oscillate in a correlated way after
	cell division, but lost correlation after about 11 h on average.
	Other cells showed low-frequency fluctuations that did not resemble
	oscillations. We also analyzed different families of mathematical
	models of the system, including a novel checkpoint mechanism. The
	models point to the possible source of the variability in the oscillations:
	low-frequency noise in protein production rates, rather than noise
	in other parameters such as degradation rates. This study provides
	a view of the extensive variability of the behavior of a protein
	circuit in living human cells, both from cell to cell and in the
	same cell over time.},
  keywords = {csbcbook}
}

@article{Gevaert2006Predicting,
  author = {Gevaert, O. and Smet, F.D. and Timmerman, D. and Moreau, Y. and Moor,
	B.D.},
  title = {Predicting the prognosis of breast cancer by integrating clinical
	and microarray data with Bayesian networks},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {e184--e190},
  number = {14},
  publisher = {Oxford Univ Press}
}

@article{Ghosh2005Classification,
  author = {Debashis Ghosh and Arul M Chinnaiyan},
  title = {Classification and {S}election of {B}iomarkers in {G}enomic {D}ata
	{U}sing {LASSO}.},
  journal = {J {B}iomed {B}iotechnol},
  year = {2005},
  volume = {2005},
  pages = {147-54},
  number = {2},
  abstract = {High-throughput gene expression technologies such as microarrays have
	been utilized in a variety of scientific applications. {M}ost of
	the work has been done on assessing univariate associations between
	gene expression profiles with clinical outcome (variable selection)
	or on developing classification procedures with gene expression data
	(supervised learning). {W}e consider a hybrid variable selection/classification
	approach that is based on linear combinations of the gene expression
	profiles that maximize an accuracy measure summarized using the receiver
	operating characteristic curve. {U}nder a specific probability model,
	this leads to the consideration of linear discriminant functions.
	{W}e incorporate an automated variable selection approach using {LASSO}.
	{A}n equivalence between {LASSO} estimation with support vector machines
	allows for model fitting using standard software. {W}e apply the
	proposed method to simulated data as well as data from a recently
	published prostate cancer study.},
  doi = {10.1155/JBB.2005.147},
  pdf = {../local/Ghosh2005Classification.pdf},
  file = {Ghosh2005Classification.pdf:local/Ghosh2005Classification.pdf:PDF},
  keywords = {, , 16046820},
  pii = {S1110724304406020_THIS_PII_IS_INCORRECT_},
  url = {http://dx.doi.org/10.1155/JBB.2005.147}
}

@article{Giallourakis2005Disease,
  author = {Giallourakis, C. and Henson, C. and Reich, M. and Xie, X. and Mootha,
	V. K.},
  title = {Disease gene discovery through integrative genomics.},
  journal = {Annu. Rev. Genomics Hum. Genet.},
  year = {2005},
  volume = {6},
  pages = {381--406},
  abstract = {The availability of complete genome sequences and the wealth of large-scale
	biological data sets now provide an unprecedented opportunity to
	elucidate the genetic basis of rare and common human diseases. Here
	we review some of the emerging genomics technologies and data resources
	that can be used to infer gene function to prioritize candidate genes.
	We then describe some computational strategies for integrating these
	large-scale data sets to provide more faithful descriptions of gene
	function, and how such approaches have recently been applied to discover
	genes underlying Mendelian disorders. Finally, we discuss future
	prospects and challenges for using integrative genomics to systematically
	discover not only single genes but also entire gene networks that
	underlie and modify human disease.},
  doi = {10.1146/annurev.genom.6.080604.162234},
  pdf = {../local/Giallourakis2005Disease.pdf},
  file = {Giallourakis2005Disease.pdf:Giallourakis2005Disease.pdf:PDF},
  institution = {Broad Institute of Harvard and MIT, Cambridge, Massachusetts 02139,
	USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16124867},
  timestamp = {2010.11.01},
  url = {http://dx.doi.org/10.1146/annurev.genom.6.080604.162234}
}

@article{Gibney2010Epigenetics,
  author = {E. R. Gibney and C. M. Nolan},
  title = {Epigenetics and gene expression.},
  journal = {Heredity},
  year = {2010},
  volume = {105},
  pages = {4--13},
  number = {1},
  month = {Jul},
  abstract = {Transcription, translation and subsequent protein modification represent
	the transfer of genetic information from the archival copy of DNA
	to the short-lived messenger RNA, usually with subsequent production
	of protein. Although all cells in an organism contain essentially
	the same DNA, cell types and functions differ because of qualitative
	and quantitative differences in their gene expression. Thus, control
	of gene expression is at the heart of differentiation and development.
	Epigenetic processes, including DNA methylation, histone modification
	and various RNA-mediated processes, are thought to influence gene
	expression chiefly at the level of transcription; however, other
	steps in the process (for example, translation) may also be regulated
	epigenetically. The following paper will outline the role epigenetics
	is believed to have in influencing gene expression.},
  doi = {10.1038/hdy.2010.54},
  institution = {UCD Institute of Food and Health, Dublin, Ireland.},
  keywords = {Animals; Cell Differentiation; Cell Lineage; DNA Modification Methylases,
	metabolism; Epigenesis, Genetic; Gene Expression; Heredity; Humans},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {hdy201054},
  pmid = {20461105},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.1038/hdy.2010.54}
}

@article{Gillet2003Similarity,
  author = {V. Gillet and P. Willett and J. Bradshaw},
  title = {Similarity searching using reduced graphs},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {338-345},
  owner = {mahe},
  timestamp = {2006.09.13}
}

@article{Girirajan2009Sequencing,
  author = {Santhosh Girirajan and Lin Chen and Tina Graves and Tomas Marques-Bonet
	and Mario Ventura and Catrina Fronick and Lucinda Fulton and Mariano
	Rocchi and Robert S Fulton and Richard K Wilson and Elaine R Mardis
	and Evan E Eichler},
  title = {Sequencing human-gibbon breakpoints of synteny reveals mosaic new
	insertions at rearrangement sites},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {178--190},
  number = {2},
  month = {Feb},
  abstract = {The gibbon genome exhibits extensive karyotypic diversity with an
	increased rate of chromosomal rearrangements during evolution. In
	an effort to understand the mechanistic origin and implications of
	these rearrangement events, we sequenced 24 synteny breakpoint regions
	in the white-cheeked gibbon (Nomascus leucogenys, NLE) in the form
	of high-quality BAC insert sequences (4.2 Mbp). While there is a
	significant deficit of breakpoints in genes, we identified seven
	human gene structures involved in signaling pathways (DEPDC4, GNG10),
	phospholipid metabolism (ENPP5, PLSCR2), beta-oxidation (ECH1), cellular
	structure and transport (HEATR4), and transcription (ZNF461), that
	have been disrupted in the NLE gibbon lineage. Notably, only three
	of these genes show the expected evolutionary signatures of pseudogenization.
	Sequence analysis of the breakpoints suggested both nonclassical
	nonhomologous end-joining (NHEJ) and replication-based mechanisms
	of rearrangement. A substantial number (11/24) of human-NLE gibbon
	breakpoints showed new insertions of gibbon-specific repeats and
	mosaic structures formed from disparate sequences including segmental
	duplications, LINE, SINE, and LTR elements. Analysis of these sites
	provides a model for a replication-dependent repair mechanism for
	double-strand breaks (DSBs) at rearrangement sites and insights into
	the structure and formation of primate segmental duplications at
	sites of genomic rearrangements during evolution.},
  doi = {10.1101/gr.086041.108},
  pdf = {../local/Girirajan2009Sequencing.pdf},
  file = {Girirajan2009Sequencing.pdf:Girirajan2009Sequencing.pdf:PDF},
  institution = {Department of Genome Sciences, Howard Hughes Medical Institute, University
	of Washington School of Medicine, Seattle, Washington 98195, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.086041.108},
  pmid = {19029537},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.086041.108}
}

@article{Girosi1998Equivalence,
  author = {Girosi},
  title = {An {E}quivalence {B}etween {S}parse {A}pproximation and {S}upport
	{V}ector {M}achines.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {1455-80},
  number = {6},
  month = {Jul},
  abstract = {This article shows a relationship between two different approximation
	techniques: the support vector machines ({SVM}), proposed by {V}.
	{V}apnik (1995) and a sparse approximation scheme that resembles
	the basis pursuit denoising algorithm ({C}hen, 1995; {C}hen, {D}onoho,
	and {S}aunders, 1995). {SVM} is a technique that can be derived from
	the structural risk minimization principle ({V}apnik, 1982) and can
	be used to estimate the parameters of several different approximation
	schemes, including radial basis functions, algebraic and trigonometric
	polynomials, {B}-splines, and some forms of multilayer perceptrons.
	{B}asis pursuit denoising is a sparse approximation technique in
	which a function is reconstructed by using a small number of basis
	functions chosen from a large set (the dictionary). {W}e show that
	if the data are noiseless, the modified version of basis pursuit
	denoising proposed in this article is equivalent to {SVM} in the
	following sense: if applied to the same data set, the two techniques
	give the same solution, which is obtained by solving the same quadratic
	programming problem. {I}n the appendix, we present a derivation of
	the {SVM} technique in one framework of regularization theory, rather
	than statistical learning theory, establishing a connection between
	{SVM}, sparse approximation, and regularization theory.},
  keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual,
	Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes,
	Learning, Markov Chains, Models, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9698353}
}

@article{Girosi1995Regularization,
  author = {Girosi, F. and Jones, M. and Poggio, T.},
  title = {Regularization {T}heory and {N}eural {N}etworks {A}rchitectures},
  journal = {Neural {C}omput.},
  year = {1995},
  volume = {7},
  pages = {219--269},
  number = {2},
  pdf = {../local/giro95.pdf},
  file = {giro95.pdf:local/giro95.pdf:PDF},
  subject = {ml},
  url = {http://citeseer.nj.nec.com/girosi95regularization.html}
}

@article{Glaser2006Method,
  author = {Glaser, F. and Morris, R. J. and Najmanovich, R. J. and Laskowski,
	R. A. and Thornton, J. M. },
  title = {A method for localizing ligand binding pockets in protein structures.},
  journal = {Proteins},
  year = {2006},
  volume = {62},
  pages = {479--488},
  number = {2},
  month = {February},
  abstract = {The accurate identification of ligand binding sites in protein structures
	can be valuable in determining protein function. Once the binding
	site is known, it becomes easier to perform in silico and experimental
	procedures that may allow the ligand type and the protein function
	to be determined. For example, binding pocket shape analysis relies
	heavily on the correct localization of the ligand binding site. We
	have developed SURFNET-ConSurf, a modular, two-stage method for identifying
	the location and shape of potential ligand binding pockets in protein
	structures. In the first stage, the SURFNET program identifies clefts
	in the protein surface that are potential binding sites. In the second
	stage, these clefts are trimmed in size by cutting away regions distant
	from highly conserved residues, as defined by the ConSurf-HSSP database.
	The largest clefts that remain tend to be those where ligands bind.
	To test the approach, we analyzed a nonredundant set of 244 protein
	structures from the PDB and found that SURFNET-ConSurf identifies
	a ligand binding pocket in 75\% of them. The trimming procedure reduces
	the original cleft volumes by 30\% on average, while still encompassing
	an average 87\% of the ligand volume. From the analysis of the results
	we conclude that for those cases in which the ligands are found in
	large, highly conserved clefts, the combined SURFNET-ConSurf method
	gives pockets that are a better match to the ligand shape and location.
	We also show that this approach works better for enzymes than for
	nonenzyme proteins.},
  address = {European Bioinformatics Institute, European Molecular Biology Laboratory,
	Wellcome Trust Genome Campus, Hinxton, Cambridge, United Kingdom.
	fabian@ebi.ac.uk},
  citeulike-article-id = {472870},
  doi = {http://dx.doi.org/10.1002/prot.20769},
  issn = {1097-0134},
  keywords = {ligand-volume, protein-ligand, surface},
  posted-at = {2006-01-20 20:31:25},
  priority = {2},
  url = {http://dx.doi.org/10.1002/prot.20769}
}

@article{Glenisson2004TXTGate:,
  author = {Glenisson, P. and Coessens, B. and Van Vooren, S. and Mathys, J.
	and Moreau, Y. and De Moor, B.},
  title = {TXTGate: profiling gene groups with text-based information.},
  journal = {Genome Biol},
  year = {2004},
  volume = {5},
  pages = {R43},
  number = {6},
  abstract = {We implemented a framework called TXTGate that combines literature
	indices of selected public biological resources in a flexible text-mining
	system designed towards the analysis of groups of genes. By means
	of tailored vocabularies, term- as well as gene-centric views are
	offered on selected textual fields and MEDLINE abstracts used in
	LocusLink and the Saccharomyces Genome Database. Subclustering and
	links to external resources allow for in-depth analysis of the resulting
	term profiles.},
  doi = {10.1186/gb-2004-5-6-r43},
  institution = {Departement Elektrotechniek (ESAT), Faculteit Toegepaste Wetenschappen,
	Katholieke Universiteit Leuven, Kasteelpark Arenberg 10, 3001 Heverlee
	(Leuven), Belgium.},
  keywords = {Animals; Cluster Analysis; Databases, Genetic; Disease Models, Animal;
	Gene Expression Profiling; Gene Expression Regulation, Fungal; Gene
	Expression Regulation, Neoplastic; Genes, Fungal; Genes, Neoplasm;
	Genome, Fungal; Genome, Human; Humans; Information Storage and Retrieval;
	MEDLINE; Mice; Saccharomyces; Salivary Gland Neoplasms; Vocabulary},
  owner = {fantine},
  pii = {gb-2004-5-6-r43},
  pmid = {15186494},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1186/gb-2004-5-6-r43}
}

@article{Glotsos2004Automated,
  author = {Dimitris Glotsos and Panagiota Spyridonos and Dionisis Cavouras and
	Panagiota Ravazoula and Petroula-Arampantoni Dadioti and George Nikiforidis},
  title = {Automated segmentation of routinely hematoxylin-eosin-stained microscopic
	images by combining support vector machine clustering and active
	contour models.},
  journal = {Anal {Q}uant {C}ytol {H}istol},
  year = {2004},
  volume = {26},
  pages = {331-40},
  number = {6},
  month = {Dec},
  abstract = {O{BJECTIVE}: {T}o develop a method for the automated segmentation
	of images of routinely hematoxylin-eosin ({H}-{E})-stained microscopic
	sections to guarantee correct results in computer-assisted microscopy.
	{STUDY} {DESIGN}: {C}linical material was composed 50 {H}-{E}-stained
	biopsies of astrocytomas and 50 {H}-{E}-stained biopsies of urinary
	bladder cancer. {T}he basic idea was to use a support vector machine
	clustering ({SVMC}) algorithm to provide gross segmentation of regions
	holding nuclei and subsequently to refine nuclear boundary detection
	with active contours. {T}he initialization coordinates of the active
	contour model were defined using a {SVMC} pixel-based classification
	algorithm that discriminated nuclear regions from the surrounding
	tissue. {S}tarting from the boundaries of these regions, the snake
	fired and propagated until converging to nuclear boundaries. {RESULTS}:
	{T}he method was validated for 2 different types of {H}-{E}-stained
	images. {R}esults were evaluated by 2 histopathologists. {O}n average,
	94\% of nuclei were correctly delineated. {CONCLUSION}: {T}he proposed
	algorithm could be of value in computer-based systems for automated
	interpretation of microscopic images.},
  keywords = {Adenosinetriphosphatase, Adolescent, Adult, Algorithms, Amino Acid
	Sequence, Amino Acids, Animals, Astrocytoma, Automated, Automation,
	Base Sequence, Bayes Theorem, Biological, Biopsy, Bladder Neoplasms,
	Breast Neoplasms, Carbohydrate Conformation, Carbohydrate Sequence,
	Cattle, Cell Cycle Proteins, Cell Nucleus, Computational Biology,
	Computer Simulation, Computer-Assisted, Crystallography, DNA, Databases,
	Diagnosis, Differential, Eosine Yellowish-(YS), Exoribonucleases,
	Factual, False Negative Reactions, False Positive Reactions, Female,
	Gene Expression, Gene Expression Profiling, Genes, Genetic, Genetic
	Techniques, Genetic Vectors, Genome, Hematoxylin, Histocompatibility
	Antigens Class I, Human, Humans, Image Interpretation, Image Processing,
	Introns, Least-Squares Analysis, MHC Class I, Major Histocompatibility
	Complex, Markov Chains, Messenger, Mice, Middle Aged, Models, Molecular
	Structure, Monosaccharides, Multigene Family, Mutation, Neoplasms,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonparametric,
	Nucleotidyltransferases, Observer Variation, Oligonucleotide Array
	Sequence Analysis, P.H.S., Pattern Recognition, Peptides, Phenotype,
	Phylogeny, Plants, Poly A, Polysaccharides, Predictive Value of Tests,
	Protein, Protein Biosynthesis, Protein Kinase Inhibitors, Protein
	Structure, Proteins, RNA, RNA Helicases, RNA Splicing, Rats, Reproducibility
	of Results, Research Support, Retrospective Studies, Saccharomyces
	cerevisiae, Saccharomyces cerevisiae Proteins, Secondary, Sensitivity
	and Specificity, Sequence Alignment, Software, Species Specificity,
	Staining and Labeling, Statistics, Theoretical, Transcription, U.S.
	Gov't, Ultrasonography, X-Ray, 15678615}
}

@article{Glotsos2004Computer-based,
  author = {Dimitris Glotsos and Panagiota Spyridonos and Panagiotis Petalas
	and Dionisis Cavouras and Panagiota Ravazoula and Petroula-Arampatoni
	Dadioti and Ioanna Lekka and George Nikiforidis},
  title = {Computer-based malignancy grading of astrocytomas employing a support
	vector machine classifier, the {WHO} grading system and the regular
	hematoxylin-eosin diagnostic staining procedure.},
  journal = {Anal {Q}uant {C}ytol {H}istol},
  year = {2004},
  volume = {26},
  pages = {77-83},
  number = {2},
  month = {Apr},
  abstract = {O{BJECTIVE}: {T}o investigate and develop an automated technique for
	astrocytoma malignancy grading compatible with the clinical routine.
	{STUDY} {DESIGN}: {O}ne hundred forty biopsies of astrocytomas were
	collected from 2 hospitals. {T}he degree of tumor malignancy was
	defined as low or high according to the {W}orld {H}ealth {O}rganization
	grading system. {F}rom each biopsy, images were digitized and segmented
	to isolate nuclei from background tissue. {M}orphologic and textural
	nuclear features were quantified to encode tumor malignancy. {E}ach
	case was represented by a 40-dimensional feature vector. {A}n exhaustive
	search procedure in feature space was utilized to determine the best
	feature combination that resulted in the smallest classification
	error. {L}ow and high grade tumors were discriminated using support
	vector machines ({SVM}s). {T}o evaluate the system performance, all
	available data were split randomly into training and test sets. {RESULTS}:
	{T}he best vector combination consisted of 3 textural and 2 morphologic
	features. {L}ow and high grade cases were discriminated with an accuracy
	of 90.7\% and 88.9\%, respectively, using an {SVM} classifier with
	polynomial kernel of degree 2. {CONCLUSION}: {T}he proposed methodology
	was based on standards that are common in daily clinical practice
	and might be used in parallel with conventional grading as a second-opinion
	tool to reduce subjectivity in the classification of astrocytomas.},
  keywords = {Amino Acids, Antibodies, Artificial Intelligence, Astrocytoma, Biological,
	Biopsy, Brain, Brain Mapping, Brain Neoplasms, Calibration, Comparative
	Study, Computational Biology, Computer-Assisted, Cysteine, Cystine,
	Electrodes, Electroencephalography, Eosine Yellowish-(YS), Evoked
	Potentials, Female, Hematoxylin, Horseradish Peroxidase, Humans,
	Image Processing, Imagery (Psychotherapy), Imagination, Laterality,
	Male, Monoclonal, Movement, Neoplasms, Non-P.H.S., Non-U.S. Gov't,
	P.H.S., Perception, Principal Component Analysis, Protein, Protein
	Array Analysis, Proteins, Research Support, Sensitivity and Specificity,
	Sequence Analysis, Software, Tumor Markers, U.S. Gov't, User-Computer
	Interface, World Health Organization, 15131894}
}

@article{Glotsos2005Automated,
  author = {Dimitris Glotsos and Jussi Tohka and Panagiota Ravazoula and Dionisis
	Cavouras and George Nikiforidis},
  title = {Automated diagnosis of brain tumours astrocytomas using probabilistic
	neural network clustering and support vector machines.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2005},
  volume = {15},
  pages = {1-11},
  number = {1-2},
  abstract = {A computer-aided diagnosis system was developed for assisting brain
	astrocytomas malignancy grading. {M}icroscopy images from 140 astrocytic
	biopsies were digitized and cell nuclei were automatically segmented
	using a {P}robabilistic {N}eural {N}etwork pixel-based clustering
	algorithm. {A} decision tree classification scheme was constructed
	to discriminate low, intermediate and high-grade tumours by analyzing
	nuclear features extracted from segmented nuclei with a {S}upport
	{V}ector {M}achine classifier. {N}uclei were segmented with an average
	accuracy of 86.5\%. {L}ow, intermediate, and high-grade tumours were
	identified with 95\%, 88.3\%, and 91\% accuracies respectively. {T}he
	proposed algorithm could be used as a second opinion tool for the
	histopathologists.},
  pii = {S0129065705000013}
}

@book{Godsil2000Algebraic,
  title = {Algebraic graph theory},
  publisher = {Springer-Verlag},
  year = {2000},
  author = {C. Godsil and G. Royle}
}

@article{Goffeau1996Life,
  author = {A. Goffeau and B.G. Barrell and H. Bussey and R.W. Davis and B. Dujon
	and H. Feldmann and F. Galibert and J.D. Hoheisel and C. Jacq and
	M. Johnston and E.J. Louis and H.W. Mewes and Y. Murakami and P.
	Philippsen and H. Tettelin and S. G. Oliver},
  title = {Life with 6000 genes},
  journal = {Science},
  year = {1996},
  volume = {274},
  pages = {546--567},
  month = {October},
  doi = {10.1126/science.274.5287.546},
  pdf = {../local/Goffeau1996Life.pdf},
  file = {Goffeau1996Life.pdf:local/Goffeau1996Life.pdf:PDF},
  subject = {bio},
  url = {http://www.sciencemag.org/cgi/content/abstract/274/5287/546}
}

@article{Goh2007human,
  author = {Goh, K.-O. and Cusick, M. E. and Valle, D. and Childs, B. and Vidal,
	M. and Barab{\'a}si, A.-L.},
  title = {The human disease network},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2007},
  volume = {104},
  pages = {8685--8690},
  number = {21},
  month = {May},
  abstract = {A network of disorders and disease genes linked by known disorder-gene
	associations offers a platform to explore in a single graph-theoretic
	framework all known phenotype and disease gene associations, indicating
	the common genetic origin of many diseases. Genes associated with
	similar disorders show both higher likelihood of physical interactions
	between their products and higher expression profiling similarity
	for their transcripts, supporting the existence of distinct disease-specific
	functional modules. We find that essential human genes are likely
	to encode hub proteins and are expressed widely in most tissues.
	This suggests that disease genes also would play a central role in
	the human interactome. In contrast, we find that the vast majority
	of disease genes are nonessential and show no tendency to encode
	hub proteins, and their expression pattern indicates that they are
	localized in the functional periphery of the network. A selection-based
	model explains the observed difference between essential and disease
	genes and also suggests that diseases caused by somatic mutations
	should not be peripheral, a prediction we confirm for cancer genes.},
  doi = {10.1073/pnas.0701361104},
  pdf = {../local/Goh2007human.pdf},
  file = {Goh2007human.pdf:Goh2007human.pdf:PDF},
  institution = {Center for Complex Network Research and Department of Physics, University
	of Notre Dame, Notre Dame, IN 46556, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0701361104},
  pmid = {17502601},
  timestamp = {2011.09.23},
  url = {http://dx.doi.org/10.1073/pnas.0701361104}
}

@article{Gold2006Fold,
  author = {Gold, N.D. and Jackson, R.M.},
  title = {Fold independent structural comparisons of protein-ligand binding
	sites for exploring functional relationships.},
  journal = {J. Mol. Biol.},
  year = {2006},
  volume = {355},
  pages = {1112--1124},
  number = {5},
  month = {Feb},
  abstract = {The rapid growth in protein structural data and the emergence of structural
	genomics projects have increased the need for automatic structure
	analysis and tools for function prediction. Small molecule recognition
	is critical to the function of many proteins; therefore, determination
	of ligand binding site similarity is important for understanding
	ligand interactions and may allow their functional classification.
	Here, we present a binding sites database (SitesBase) that given
	a known protein-ligand binding site allows rapid retrieval of other
	binding sites with similar structure independent of overall sequence
	or fold similarity. However, each match is also annotated with sequence
	similarity and fold information to aid interpretation of structure
	and functional similarity. Similarity in ligand binding sites can
	indicate common binding modes and recognition of similar molecules,
	allowing potential inference of function for an uncharacterised protein
	or providing additional evidence of common function where sequence
	or fold similarity is already known. Alternatively, the resource
	can provide valuable information for detailed studies of molecular
	recognition including structure-based ligand design and in understanding
	ligand cross-reactivity. Here, we show examples of atomic similarity
	between superfamily or more distant fold relatives as well as between
	seemingly unrelated proteins. Assignment of unclassified proteins
	to structural superfamiles is also undertaken and in most cases substantiates
	assignments made using sequence similarity. Correct assignment is
	also possible where sequence similarity fails to find significant
	matches, illustrating the potential use of binding site comparisons
	for newly determined proteins.},
  keywords = {geometric hashing, SitesBase, structural genomics, 3D structure comparison},
  owner = {vero},
  pmid = {16359705},
  timestamp = {2009.02.04}
}

@article{Gold2006SitesBase,
  author = {Gold, N.D. and Jackson, R.M.},
  title = {SitesBase: a database for structure-based protein-ligand binding
	site comparisons.},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {D231--D234},
  month = {Jan},
  abstract = {There are many components which govern the function of a protein within
	a cell. Here, we focus on the molecular recognition of small molecules
	and the prediction of common recognition by similarity between protein-ligand
	binding sites. SitesBase is an easily accessible database which is
	simple to use and holds information about structural similarities
	between known ligand binding sites found in the Protein Data Bank.
	These similarities are presented to the wider community enabling
	full analysis of molecular recognition and potentially protein structure-function
	relationships. SitesBase is accessible at http://www.bioinformatics.leeds.ac.uk/sb.},
  owner = {vero},
  pmid = {16381853},
  timestamp = {2009.02.04}
}

@article{Gold1996graduated,
  author = {Gold, S. and Rangarajan, A.},
  title = {A graduated assignment algorithm for graph matching},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {1996},
  volume = {18},
  pages = {377--388},
  number = {4},
  month = {April},
  abstract = {A graduated assignment algorithm for graph matching is presented which
	is fast and accurate even in the presence of high noise. By combining
	graduated nonconvexity, two-way (assignment) constraints, and sparsity,
	large improvements in accuracy and speed are achieved. Its low order
	computational complexity [O(lm), where l and m are the number of
	links in the two graphs] and robustness in the presence of noise
	offer advantages over traditional combinatorial approaches. The algorithm,
	not restricted to any special class of graph, is applied to subgraph
	isomorphism, weighted graph matching, and attributed relational graph
	matching. To illustrate the performance of the algorithm, attributed
	relational graphs derived from objects are matched. Then, results
	from twenty-five thousand experiments conducted on 100 node random
	graphs of varying types (graphs with only zero-one links, weighted
	graphs, and graphs with node attributes and multiple link types)
	are reported. No comparable results have been reported by any other
	graph matching algorithm before in the research literature. Twenty-five
	hundred control experiments are conducted using a relaxation labeling
	algorithm and large improvements in accuracy are demonstrated.},
  doi = {10.1109/34.491619},
  pdf = {../local/Gold1996graduated.pdf},
  file = {Gold1996graduated.pdf:Gold1996graduated.pdf:PDF},
  owner = {jp},
  timestamp = {2008.10.05},
  url = {http://dx.doi.org/10.1109/34.491619}
}

@article{Goldbaum2002Comparing,
  author = {Michael H Goldbaum and Pamela A Sample and Kwokleung Chan and Julia
	Williams and Te-Won Lee and Eytan Blumenthal and Christopher A Girkin
	and Linda M Zangwill and Christopher Bowd and Terrence Sejnowski
	and Robert N Weinreb},
  title = {Comparing machine learning classifiers for diagnosing glaucoma from
	standard automated perimetry.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2002},
  volume = {43},
  pages = {162-9},
  number = {1},
  month = {Jan},
  abstract = {P{URPOSE}: {T}o determine which machine learning classifier learns
	best to interpret standard automated perimetry ({SAP}) and to compare
	the best of the machine classifiers with the global indices of {STATPAC}
	2 and with experts in glaucoma. {METHODS}: {M}ultilayer perceptrons
	({MLP}), support vector machines ({SVM}), mixture of {G}aussian ({M}o{G}),
	and mixture of generalized {G}aussian ({MGG}) classifiers were trained
	and tested by cross validation on the numerical plot of absolute
	sensitivity plus age of 189 normal eyes and 156 glaucomatous eyes,
	designated as such by the appearance of the optic nerve. {T}he authors
	compared performance of these classifiers with the global indices
	of {STATPAC}, using the area under the {ROC} curve. {T}wo human experts
	were judged against the machine classifiers and the global indices
	by plotting their sensitivity-specificity pairs. {RESULTS}: {M}o{G}
	had the greatest area under the {ROC} curve of the machine classifiers.
	{P}attern {SD} ({PSD}) and corrected {PSD} ({CPSD}) had the largest
	areas under the curve of the global indices. {M}o{G} had significantly
	greater {ROC} area than {PSD} and {CPSD}. {H}uman experts were not
	better at classifying visual fields than the machine classifiers
	or the global indices. {CONCLUSIONS}: {M}o{G}, using the entire visual
	field and age for input, interpreted {SAP} better than the global
	indices of {STATPAC}. {M}achine classifiers may augment the global
	indices of {STATPAC}.}
}

@techreport{Goldfarb2000What,
  author = {Goldfarb, L. and Golubitsky, O. and Korkin, D.},
  title = {What is a structural representation?},
  institution = {University of New Brunswick},
  year = {2000},
  note = {Technical report TR00-137},
  url = {http://www.cs.unb.ca/profs/goldfarb/struct.ps}
}

@article{Goldstein2009Common,
  author = {Goldstein, D. B.},
  title = {Common genetic variation and human traits.},
  journal = {N. Engl. J. Med.},
  year = {2009},
  volume = {360},
  pages = {1696--1698},
  number = {17},
  month = {Apr},
  doi = {10.1056/NEJMp0806284},
  pdf = {../local/Goldstein2009Common.pdf},
  file = {Goldstein2009Common.pdf:Goldstein2009Common.pdf:PDF},
  institution = {Center for Human Genome Variation, Institute for Genome Sciences
	and Policy, Duke University, Durham, NC, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {NEJMp0806284},
  pmid = {19369660},
  timestamp = {2010.10.12},
  url = {http://dx.doi.org/10.1056/NEJMp0806284}
}

@article{Golland2005Detection,
  author = {Polina Golland and W. Eric L Grimson and Martha E Shenton and Ron
	Kikinis},
  title = {Detection and analysis of statistical differences in anatomical shape.},
  journal = {Med {I}mage {A}nal},
  year = {2005},
  volume = {9},
  pages = {69-86},
  number = {1},
  month = {Feb},
  abstract = {We present a computational framework for image-based analysis and
	interpretation of statistical differences in anatomical shape between
	populations. {A}pplications of such analysis include understanding
	developmental and anatomical aspects of disorders when comparing
	patients versus normal controls, studying morphological changes caused
	by aging, or even differences in normal anatomy, for example, differences
	between genders. {O}nce a quantitative description of organ shape
	is extracted from input images, the problem of identifying differences
	between the two groups can be reduced to one of the classical questions
	in machine learning of constructing a classifier function for assigning
	new examples to one of the two groups while making as few misclassifications
	as possible. {T}he resulting classifier must be interpreted in terms
	of shape differences between the two groups back in the image domain.
	{W}e demonstrate a novel approach to such interpretation that allows
	us to argue about the identified shape differences in anatomically
	meaningful terms of organ deformation. {G}iven a classifier function
	in the feature space, we derive a deformation that corresponds to
	the differences between the two classes while ignoring shape variability
	within each class. {B}ased on this approach, we present a system
	for statistical shape analysis using distance transforms for shape
	representation and the support vector machines learning algorithm
	for the optimal classifier estimation and demonstrate it on artificially
	generated data sets, as well as real medical studies.},
  doi = {10.1016/j.media.2004.07.003},
  keywords = {Algorithms, Amino Acid, Artificial Intelligence, Ascomycota, Automated,
	Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms, Comparative
	Study, Computer-Assisted, Crystallography, DNA, DNA Primers, Databases,
	Diagnostic Imaging, Gene Expression Profiling, Hordeum, Host-Parasite
	Relations, Humans, Image Interpretation, Informatics, Kinetics, Magnetic
	Resonance Spectroscopy, Models, Nanotechnology, Non-P.H.S., Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition,
	Plant, Plants, Predictive Value of Tests, Protein, Research Support,
	Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence
	Homology, Skin, Software, Statistical, Theoretical, Thermodynamics,
	U.S. Gov't, Viral Proteins, X-Ray, 15581813},
  pii = {S1361-8415(04)00059-3},
  url = {http://dx.doi.org/10.1016/j.media.2004.07.003}
}

@book{Golub1996Matrix,
  title = {Matrix computations (3rd ed.)},
  publisher = {Johns Hopkins University Press},
  year = {1996},
  author = {G. H. Golub and C. F. Van Loan},
  address = {Baltimore, MD, USA},
  isbn = {0-8018-5414-8}
}

@article{Golub2010Counterpoint,
  author = {Golub, T.},
  title = {Counterpoint: Data first.},
  journal = {Nature},
  year = {2010},
  volume = {464},
  pages = {679},
  number = {7289},
  month = {Apr},
  doi = {10.1038/464679a},
  institution = {Cancer Program at the Broad Institute, Cambridge, Massachusetts 02142,
	USA. golub@broadinstitute.org},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {464679a},
  pmid = {20360719},
  timestamp = {2010.10.07},
  url = {http://dx.doi.org/10.1038/464679a}
}

@article{Golub1999Molecular,
  author = {Golub, T. R. and Slonim, D. K. and Tamayo, P. and Huard, C. and Gaasenbeek,
	M. and Mesirov, J. P. and Coller, H. and Loh, M. L. and Downing,
	J. R. and Caligiuri, M. A. and Bloomfield, C. D. and Lander, E. S.},
  title = {Molecular classification of cancer: class discovery and class prediction
	by gene expression monitoring},
  journal = {Science},
  year = {1999},
  volume = {286},
  pages = {531--537},
  abstract = {Although cancer classification has improved over the past 30 years,
	there has been no general approach for identifying new cancer classes
	(class discovery) or for assigning tumors to known classes (class
	prediction). Here, a generic approach to cancer classification based
	on gene expression monitoring by DNA microarrays is described and
	applied to human acute leukemias as a test case. A class discovery
	procedure automatically discovered the distinction between acute
	myeloid leukemia (AML) and acute lymphoblastic leukemia (ALL) without
	previous knowledge of these classes. An automatically derived class
	predictor was able to determine the class of new leukemia cases.
	The results demonstrate the feasibility of cancer classification
	based solely on gene expression moni- toring and suggest a general
	strategy for discovering and predicting cancer classes for other
	types of cancer, independent of previous biological knowledge.},
  doi = {10.1126/science.286.5439.531},
  pdf = {../local/Golub1999Molecular.pdf},
  file = {Golub1999Molecular.pdf:Golub1999Molecular.pdf:PDF},
  keywords = {csbcbook, csbcbook-ch3, csbcbook-ch4},
  subject = {microarray},
  url = {http://dx.doi.org/10.1126/science.286.5439.531}
}

@article{Gomez2003Learning,
  author = {Gomez, S. M. and Noble, W. S. and Rzhetsky, A.},
  title = {Learning to predict protein-protein interactions from protein sequences},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1875-1881},
  number = {15},
  abstract = {In order to understand the molecular machinery of the cell, we need
	to know about the multitude of protein-protein interactions that
	allow the cell to function. {H}igh-throughput technologies provide
	some data about these interactions, but so far that data is fairly
	noisy. {T}herefore, computational techniques for predicting protein-protein
	interactions could be of significant value. {O}ne approach to predicting
	interactions in silico is to produce from first principles a detailed
	model of a candidate interaction. {W}e take an alternative approach,
	employing a relatively simple model that learns dynamically from
	a large collection of data. {I}n this work, we describe an attraction-repulsion
	model, in which the interaction between a pair of proteins is represented
	as the sum of attractive and repulsive forces associated with small,
	domain- or motif-sized features along the length of each protein.
	{T}he model is discriminative, learning simultaneously from known
	interactions and from pairs of proteins that are known (or suspected)
	not to interact. {T}he model is efficient to compute and scales well
	to very large collections of data. {I}n a cross-validated comparison
	using known yeast interactions, the attraction-repulsion method performs
	better than several competing techniques.},
  pdf = {../local/Gomez2003Learning.pdf},
  file = {Gomez2003Learning.pdf:local/Gomez2003Learning.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/15/1875}
}

@article{Gong2004Picking,
  author = {Gong, D. and Ferrell, J. E.},
  title = {Picking a winner: new mechanistic insights into the design of effective
	si{RNA}s.},
  journal = {Trends {B}iotechnol.},
  year = {2004},
  volume = {22},
  pages = {451-4},
  number = {9},
  month = {Sep},
  abstract = {Recent work has shown that the efficacy of a small interfering {RNA}
	(si{RNA}) for silencing gene expression is a function of how easy
	it is to unwind the si{RNA} from the 5'-antisense end. {B}ased on
	these insights, one group has designed an algorithm that substantially
	improves the odds of picking an effective si{RNA}, and two groups
	have shown that 'forked' or 'frayed' si{RNA}s, which should be easier
	to unwind from the 5'-antisense end, are more effective than conventional
	si{RNA}s. {T}hese strategies represent important steps towards the
	rational design of effective si{RNA}s.},
  doi = {10.1016/j.tibtech.2004.07.008},
  keywords = {sirna},
  pii = {S0167-7799(04)00201-X},
  url = {http://dx.doi.org/10.1016/j.tibtech.2004.07.008}
}

@article{Gonzalez2009Highlighting,
  author = {Gonz\'alez, I. and D\'ejean, S. and Martin, P. G. P. and Gonçalves,
	O. and Besse, P. and Baccini, A.},
  title = {Highlighting relationships between heterogeneous biological data
	through graphical displays based on regularized canonical correlation
	analysis.},
  journal = {J Biol Syst},
  year = {2009},
  volume = {17},
  pages = {173--199},
  owner = {jp},
  timestamp = {2012.02.29}
}

@article{Good1993Rapid,
  author = {A.C. Good and W.G. Richards},
  title = {Rapid {E}valuation of {M}olecular {S}hape {S}imilarity {U}sing {G}aussian
	{F}unctions},
  journal = {J Chem Inf Comput Sci},
  year = {1993},
  volume = {33},
  pages = {112-116},
  owner = {mahe},
  timestamp = {2006.09.01}
}

@book{Gordon1999Classification,
  title = {Classification},
  publisher = {Chapman \& Hall/CRC},
  year = {1999},
  author = {Gordon, A. D.},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Gordon2003Sequence,
  author = {Gordon, L. and Chervonenkis, A. Y. and Gammerman, A. J. and Shahmuradov,
	I. A. and Solovyev, V. V.},
  title = {Sequence alignment kernel for recognition of promoter regions},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1964-1971},
  number = {15},
  abstract = {In this paper we propose a new method for recognition of prokaryotic
	promoter regions with startpoints of transcription. {T}he method
	is based on {S}equence {A}lignment {K}ernel, a function reflecting
	the quantitative measure of match between two sequences. {T}his kernel
	function is further used in {D}ual {SVM}, which performs the recognition.
	{S}everal recognition methods have been trained and tested on positive
	data set, consisting of 669 {sigma}70-promoter regions with known
	transcription startpoints of {E}scherichia coli and two negative
	data sets of 709 examples each, taken from coding and non-coding
	regions of the same genome. {T}he results show that our method performs
	well and achieves 16.5% average error rate on positive & coding negative
	data and 18.6% average error rate on positive & non-coding negative
	data. {A}vailability:{T}he demo version of our method is accessible
	from our website http://mendel.cs.rhul.ac.uk/},
  pdf = {../local/Gordon2003Sequence.pdf},
  file = {Gordon2003Sequence.pdf:local/Gordon2003Sequence.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/15/1964}
}

@article{Goto1998LIGAND:,
  author = {S. Goto and T. Nishioka and M. Kanehisa},
  title = {L{IGAND}: chemical database for enzyme reactions},
  journal = {Bioinformatics},
  year = {1998},
  volume = {14},
  pages = {591--599},
  pdf = {../local/goto98.pdf},
  file = {goto98.pdf:local/goto98.pdf:PDF},
  subject = {bionet},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/14/7/591}
}

@article{Goto2002LIGAND:,
  author = {S. Goto and Y. Okuno and M. Hattori and T. Nishioka and M. Kanehisa},
  title = {L{IGAND}: database of chemical compounds and reactions in biological
	pathways},
  journal = {Nucleic {A}cids {R}es.},
  year = {2002},
  volume = {30},
  pages = {402--404},
  pdf = {../local/goto02.pdf},
  file = {goto02.pdf:local/goto02.pdf:PDF},
  subject = {bionet},
  url = {http://nar.oupjournals.org/cgi/content/full/30/1/402}
}

@article{Gotusso1957Fortran,
  author = {L. Gotusso and A. T. Santolini},
  title = {A Fortran IV quasi decision algorithm for the P-equivalence of two
	matrices},
  journal = {Calcolo},
  year = {1957},
  volume = {5},
  pages = {17--35}
}

@article{Gower1971general,
  author = {Gower, J. C.},
  title = {A general coefficient of similarity and some of its properties},
  journal = {Biometrics},
  year = {1971},
  volume = {27},
  pages = {857--871},
  number = {4},
  pdf = {../local/Gower1971general.pdf},
  file = {Gower1971general.pdf:Gower1971general.pdf:PDF},
  owner = {jp},
  timestamp = {2013.01.31}
}

@article{Graumann2004Applicability,
  author = {Johannes Graumann and Leslie A Dunipace and Jae Hong Seol and W.
	Hayes McDonald and John R Yates and Barbara J Wold and Raymond J
	Deshaies},
  title = {Applicability of tandem affinity purification {M}ud{PIT} to pathway
	proteomics in yeast.},
  journal = {Mol {C}ell {P}roteomics},
  year = {2004},
  volume = {3},
  pages = {226-37},
  number = {3},
  month = {Mar},
  abstract = {A combined multidimensional chromatography-mass spectrometry approach
	known as "{M}ud{PIT}" enables rapid identification of proteins that
	interact with a tagged bait while bypassing some of the problems
	associated with analysis of polypeptides excised from {SDS}-polyacrylamide
	gels. {H}owever, the reproducibility, success rate, and applicability
	of {M}ud{PIT} to the rapid characterization of dozens of proteins
	have not been reported. {W}e show here that {M}ud{PIT} reproducibly
	identified bona fide partners for budding yeast {G}cn5p. {A}dditionally,
	we successfully applied {M}ud{PIT} to rapidly screen through a collection
	of tagged polypeptides to identify new protein interactions. {T}wenty-five
	proteins involved in transcription and progression through mitosis
	were modified with a new tandem affinity purification ({TAP}) tag.
	{TAP}-{M}ud{PIT} analysis of 22 yeast strains that expressed these
	tagged proteins uncovered known or likely interacting partners for
	21 of the baits, a figure that compares favorably with traditional
	approaches. {T}he proteins identified here comprised 102 previously
	known and 279 potential physical interactions. {E}ven for the intensively
	studied {S}wi2p/{S}nf2p, the catalytic subunit of the {S}wi/{S}nf
	chromatin remodeling complex, our analysis uncovered a new interacting
	protein, {R}tt102p. {R}eciprocal tagging and {TAP}-{M}ud{PIT} analysis
	of {R}tt102p revealed subunits of both the {S}wi/{S}nf and {RSC}
	complexes, identifying {R}tt102p as a common interactor with, and
	possible integral component of, these chromatin remodeling machines.
	{O}ur experience indicates it is feasible for an investigator working
	with a single ion trap instrument in a conventional molecular/cellular
	biology laboratory to carry out proteomic characterization of a pathway,
	organelle, or process (i.e. "pathway proteomics") by systematic application
	of {TAP}-{M}ud{PIT}.},
  doi = {10.1074/mcp.M300099-MCP200},
  pdf = {../local/Graumann2004Applicability.pdf},
  file = {Graumann2004Applicability.pdf:local/Graumann2004Applicability.pdf:PDF},
  keywords = {Affinity Labels, Comparative Study, Electrospray Ionization, Genetic,
	Mass, Mitosis, Non-P.H.S., Non-U.S. Gov't, P.H.S., Protein Interaction
	Mapping, Proteome, Proteomics, Research Support, Saccharomyces cerevisiae,
	Saccharomyces cerevisiae Proteins, Signal Transduction, Spectrometry,
	Transcription, U.S. Gov't, 14660704},
  owner = {vert},
  pii = {M300099-MCP200},
  url = {http://dx.doi.org/10.1074/mcp.M300099-MCP200}
}

@article{Green1978Conjoint,
  author = {Green, Paul E. and Srinivasan, V.},
  title = {Conjoint Analysis in Consumer Research: Issues and Outlook},
  journal = {The Journal of Consumer Research},
  year = {1978},
  volume = {5},
  pages = {103--123},
  number = {2},
  abstract = {Since 1971 conjoint analysis has been applied to a wide variety of
	problems in consumer research. This paper discusses various issues
	involved in implementing conjoint analysis and describes some new
	technical developments and application areas for the methodology.},
  citeulike-article-id = {5239458},
  citeulike-linkout-0 = {http://www.jstor.org/stable/2489001},
  keywords = {conjoint\_analysis},
  posted-at = {2009-07-23 16:00:51},
  priority = {0},
  url = {http://www.jstor.org/stable/2489001}
}

@article{Greenshtein2004Persistence,
  author = {Greenshtein, E. and Ritov, Y.},
  title = {Persistence in high-dimensional linear predictor selection and the
	virtue of overparametrization},
  journal = {Bernoulli},
  year = {2004},
  volume = {10},
  pages = {971--988},
  number = {6},
  abstract = {Let $Z^i=(Y^i,X_1^i,\dots,X_m^i)$, $i=1,\dots,n$, be independent and
	identically distributed random vectors, $Z^i \sim F, \;F \in {\cal
	F}$. It is desired to predict Y by $\sum \beta_j X_j$, where $(\beta_1,\dots,\beta_m)
	\in B^n \subseteq \R^m, under a prediction loss. Suppose that $m=n^\alpha$,
	$\alpha>1$, that is, there are many more explanatory variables than
	observations. We consider sets Bn restricted by the maximal number
	of non-zero coefficients of their members, or by their l1 radius.
	We study the following asymptotic question: how 'large' may the set
	Bn be, so that it is still possible to select empirically a predictor
	whose risk under F is close to that of the best predictor in the
	set? Sharp bounds for orders of magnitudes are given under various
	assumptions on ${\cal F}$. Algorithmic complexity of the ensuing
	procedures is also studied. The main message of this paper and the
	implications of the orders derived are that under various sparsity
	assumptions on the optimal predictor there is 'asymptotically no
	harm' in introducing many more explanatory variables than observations.
	Furthermore, such practice can be beneficial in comparison with a
	procedure that screens in advance a small subset of explanatory variables.
	Another main result is that 'lasso' procedures, that is, optimization
	under l1 constraints, could be efficient in finding optimal sparse
	predictors in high dimensions.},
  doi = {10.3150/bj/1106314846},
  pdf = {../local/Greenshtein2004Persistence.pdf},
  file = {Greenshtein2004Persistence.pdf:local/Greenshtein2004Persistence.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.3150/bj/1106314846}
}

@article{Gribskov1990Profile,
  author = {Gribskov, M. and L{\"u}thy, R. and and Eisenberg, D.},
  title = {Profile {A}nalysis},
  journal = {Methods in {E}nzymology},
  year = {1990},
  volume = {183},
  pages = {146--159}
}

@article{Gribskov1996Use,
  author = {Gribskov, M. and Robinson, N. L.},
  title = {Use of receiver operating characteristic ({ROC}) analysis to evaluate
	sequence matching},
  journal = {Comput. {C}hem.},
  year = {1996},
  volume = {20},
  pages = {25--33},
  number = {1}
}

@article{Grimson2007MicroRNA,
  author = {Andrew Grimson and Kyle Kai-How Farh and Wendy K Johnston and Philip
	Garrett-Engele and Lee P Lim and David P Bartel},
  title = {MicroRNA targeting specificity in mammals: determinants beyond seed
	pairing.},
  journal = {Mol Cell},
  year = {2007},
  volume = {27},
  pages = {91--105},
  number = {1},
  month = {Jul},
  abstract = {Mammalian microRNAs (miRNAs) pair to 3'UTRs of mRNAs to direct their
	posttranscriptional repression. Important for target recognition
	are approximately 7 nt sites that match the seed region of the miRNA.
	However, these seed matches are not always sufficient for repression,
	indicating that other characteristics help specify targeting. By
	combining computational and experimental approaches, we uncovered
	five general features of site context that boost site efficacy: AU-rich
	nucleotide composition near the site, proximity to sites for coexpressed
	miRNAs (which leads to cooperative action), proximity to residues
	pairing to miRNA nucleotides 13-16, positioning within the 3'UTR
	at least 15 nt from the stop codon, and positioning away from the
	center of long UTRs. A model combining these context determinants
	quantitatively predicts site performance both for exogenously added
	miRNAs and for endogenous miRNA-message interactions. Because it
	predicts site efficacy without recourse to evolutionary conservation,
	the model also identifies effective nonconserved sites and siRNA
	off-targets.},
  doi = {10.1016/j.molcel.2007.06.017},
  pdf = {../local/Grimson2007MicroRNA.pdf},
  file = {Grimson2007MicroRNA.pdf:Grimson2007MicroRNA.pdf:PDF},
  institution = {Howard Hughes Medical Institute, Massachusetts Institute of Technology,
	Cambridge, MA 02139, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S1097-2765(07)00407-8},
  pmid = {17612493},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.molcel.2007.06.017}
}

@article{Groebe1988Characterization,
  author = {Groebe, D. R. and Uhlenbeck, O. C.},
  title = {Characterization of {RNA} hairpin loop stability.},
  journal = {Nucleic {A}cids {R}es.},
  year = {1988},
  volume = {16},
  pages = {11725-35},
  number = {24},
  month = {Dec},
  abstract = {Fifteen {RNA} hairpins that share the same stem sequence and have
	homopolymer loops of {A}, {C} and {U} residues which vary in length
	from three to nine nucleotides were synthesized and their thermal
	stabilities determined. {T}m varies as a function of loop size but
	is almost independent of loop composition. {L}oops of four or five
	nucleotides are found to be the most stable loop size. {T}his is
	consistent with the observation that four-membered loops are the
	most prevalent loop size in 16{S}-like {RNA}s. {T}he contribution
	of each loop to hairpin stability was calculated by subtracting the
	known contribution of the helical stem. {T}hese data should be useful
	for predicting the stability of other hairpins.}
}

@article{Gross2000Identification,
  author = {C. Gross and M. Kelleher and V.R. Iyer and P.O. Brown and D.R. Winge},
  title = {Identification of the copper regulon in {S}accharomyces cerevisiae
	by {DNA} microarrays},
  journal = {J. {B}iol. {C}hem.},
  year = {2000},
  volume = {275},
  pages = {32310--32316},
  number = {41},
  pdf = {../local/gros00.pdf},
  file = {gros00.pdf:local/gros00.pdf:PDF},
  subject = {microarray},
  url = {http://www.jbc.org/cgi/content/full/275/41/32310}
}

@inproceedings{Grundy1998Family-based,
  author = {Grundy, W. N.},
  title = {Family-based {H}omology {D}etection via {P}airwise {S}equence {C}omparison},
  booktitle = {Proceedings of the {S}econd {A}nnual {I}nternational {C}onference
	on {C}omputational {M}olecular {B}iology, {M}arch 22-25},
  year = {1998},
  pages = {94--100},
  pdf = {../local/grun98.pdf},
  file = {grun98.pdf:local/grun98.pdf:PDF},
  subject = {biocasp},
  url = {http://www.cs.columbia.edu/~bgrundy/papers/compare.html}
}

@article{Guba2006Chemogenomics,
  author = {Guba, W.},
  title = {Chemogenomics strategies for G-protein coupled receptor hit finding.},
  journal = {Ernst Schering Res Found Workshop},
  year = {2006},
  volume = {58},
  pages = {21--29},
  abstract = {Targeting protein superfamilies via chemogenomics is based on a similarity
	clustering of gene sequences and molecular structures of ligands.
	Both target and ligand clusters are linked by generating binding
	affinity profiles of chemotypes vs a target panel. The application
	of this multidimensional similarity paradigm will be described in
	the context of Lead Generation to identify novel chemical hit classes
	for G-protein coupled receptors.},
  doi = {10.1007/3-540-37635-6_2},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16708996},
  timestamp = {2007.07.30}
}

@article{Guedj2011refined,
  author = {Guedj, M. and Marisa, L. and {de Reynies}, A. and Orsetti, B. and
	Schiappa, R. and Bibeau, F. and Macgrogan, G. and Lerebours, F. and
	Finetti, P. and Longy, M. and Bertheau, P. and Bertrand, F. and Bonnet,
	F. and Martin, A. L. and Feugeas, J. P. and Bièche, I. and Lehmann-Che,
	J. and Lidereau, R. and Birnbaum, D. and Bertucci, F. and {de Thé},
	H. and Theillet, C.},
  title = {A refined molecular taxonomy of breast cancer.},
  journal = {Oncogene},
  year = {2011},
  month = {Jul},
  abstract = {The current histoclinical breast cancer classification is simple but
	imprecise. Several molecular classifications of breast cancers based
	on expression profiling have been proposed as alternatives. However,
	their reliability and clinical utility have been repeatedly questioned,
	notably because most of them were derived from relatively small initial
	patient populations. We analyzed the transcriptomes of 537 breast
	tumors using three unsupervised classification methods. A core subset
	of 355 tumors was assigned to six clusters by all three methods.
	These six subgroups overlapped with previously defined molecular
	classes of breast cancer, but also showed important differences,
	notably the absence of an ERBB2 subgroup and the division of the
	large luminal ER+ group into four subgroups, two of them being highly
	proliferative. Of the six subgroups, four were ER+/PR+/AR+, one was
	ER-/PR-/AR+ and one was triple negative (AR-/ER-/PR-). ERBB2-amplified
	tumors were split between the ER-/PR-/AR+ subgroup and the highly
	proliferative ER+ LumC subgroup. Importantly, each of these six molecular
	subgroups showed specific copy-number alterations. Gene expression
	changes were correlated to specific signaling pathways. Each of these
	six subgroups showed very significant differences in tumor grade,
	metastatic sites, relapse-free survival or response to chemotherapy.
	All these findings were validated on large external datasets including
	more than 3000 tumors. Our data thus indicate that these six molecular
	subgroups represent well-defined clinico-biological entities of breast
	cancer. Their identification should facilitate the detection of novel
	prognostic factors or therapeutical targets in breast cancer.Oncogene
	advance online publication, 25 July 2011; doi:10.1038/onc.2011.301.},
  doi = {10.1038/onc.2011.301},
  pdf = {../local/Guedj2011refined.pdf},
  file = {Guedj2011refined.pdf:Guedj2011refined.pdf:PDF},
  institution = {Ligue Nationale Contre le Cancer, Cartes d'Identité des Tumeurs program,
	Paris, France.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {onc2011301},
  pmid = {21785460},
  timestamp = {2011.12.01},
  url = {http://dx.doi.org/10.1038/onc.2011.301}
}

@article{Guelzim2002Topological,
  author = {Guelzim, N. and Bottani, S. and Bourgine, P. and K{\'e}p{\`e}s, F.},
  title = {Topological and causal structure of the yeast transcriptional regulatory
	network},
  journal = {Nat. {G}enet.},
  year = {2002},
  volume = {31},
  pages = {60--63},
  pdf = {../local/guel02.pdf},
  file = {guel02.pdf:local/guel02.pdf:PDF},
  subject = {bionet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v31/n1/full/ng873.html}
}

@article{Guermeur2002Combining,
  author = {Guermeur, Y.},
  title = {Combining {D}iscriminant {M}odels with {N}ew {M}ulti-{C}lass {SVM}s},
  journal = {Pattern {A}nal. {A}ppl.},
  year = {2002},
  volume = {5},
  pages = {168-179},
  number = {2},
  abstract = {The idea of performing model combination, instead of model selection,
	has a long theoretical background in statistics. {H}owever, making
	use of theoretical results is ordinarily subject to the satisfaction
	of strong hypotheses (weak error correlation, availability of large
	training sets, possibility to rerun the training procedure an arbitrary
	number of times, etc.). {I}n contrast, the practitioner is frequently
	faced with the problem of combining a given set of pre-trained classifiers,
	with highly correlated errors, using only a small training sample.
	{O}verfitting is then the main risk, which cannot be overcome but
	with a strict complexity control of the combiner selected. {T}his
	suggests that {SVM}s should be well suited for these difficult situations.
	{I}nvestigating this idea, we introduce a family of multi-class {SVM}s
	and assess them as ensemble methods on a real-world problem. {T}his
	task, protein secondary structure prediction, is an open problem
	in biocomputing for which model combination appears to be an issue
	of central importance. {E}xperimental evidence highlights the gain
	in quality resulting from combining some of the most widely used
	prediction methods with our {SVM}s rather than with the ensemble
	methods traditionally used in the field. {T}he gain increases when
	the outputs of the combiners are post-processed with a {DP} algorithm.},
  doi = {10.1007/s100440200015},
  pdf = {../local/Guermeur2002Combining.pdf},
  file = {Guermeur2002Combining.pdf:local/Guermeur2002Combining.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1007/s100440200015}
}

@incollection{Guermeur2004kernel,
  author = {Guermeur, Y. and Lifschitz, A. and Vert, R.},
  title = {A kernel for protein secondary structure prediction},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {193-206},
  keywords = {biosvm},
  owner = {vert}
}

@article{Guermeur2004Combining,
  author = {Guermeur, Y. and Pollastri, G. and Elisseeff, A. and Zelus, D. and
	Paugam-Moisy, H. and Baldi, P.},
  title = {Combining protein secondary structure prediction models with ensemble
	methods of optimal complexity},
  journal = {Neurocomputing},
  year = {2004},
  volume = {56},
  pages = {305-327},
  abstract = {Many sophisticated methods are currently available to perform protein
	secondary structure prediction. {S}ince they are frequently based
	on different principles, and different knowledge sources, significant
	benefits can be expected from combining them. {H}owever, the choice
	of an appropriate combiner appears to be an issue in its own right.
	{T}he first difficulty to overcome when combining prediction methods
	is overfitting. {T}his is the reason why we investigate the implementation
	of {S}upport {V}ector {M}achines to perform the task. {A} family
	of multi-class {SVM}s is introduced. {T}wo of these machines are
	used to combine some of the current best protein secondary structure
	prediction methods. {T}heir performance is consistently superior
	to the performance of the ensemble methods traditionally used in
	the field. {T}hey also outperform the decomposition approaches based
	on bi-class {SVM}s. {F}urthermore, initial experimental evidence
	suggests that their outputs could be processed by the biologist to
	perform higher-level treatments.},
  doi = {10.1016/j.neucom.2003.10.004},
  pdf = {../local/Guermeur2004Combining.pdf},
  file = {Guermeur2004Combining.pdf:local/Guermeur2004Combining.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.neucom.2003.10.004}
}

@article{Guiot2007Morphological,
  author = {Caterina Guiot and Pier P Delsanto and Thomas S Deisboeck},
  title = {Morphological instability and cancer invasion: a `splashing water
	drop' analogy.},
  journal = {Theor. Biol. Med. Model.},
  year = {2007},
  volume = {4},
  pages = {4},
  abstract = {BACKGROUND: Tissue invasion, one of the hallmarks of cancer, is a
	major clinical problem. Recent studies suggest that the process of
	invasion is driven at least in part by a set of physical forces that
	may be susceptible to mathematical modelling which could have practical
	clinical value. MODEL AND CONCLUSION: We present an analogy between
	two unrelated instabilities. One is caused by the impact of a drop
	of water on a solid surface while the other concerns a tumor that
	develops invasive cellular branches into the surrounding host tissue.
	In spite of the apparent abstractness of the idea, it yields a very
	practical result, i.e. an index that predicts tumor invasion based
	on a few measurable parameters. We discuss its application in the
	context of experimental data and suggest potential clinical implications.},
  doi = {10.1186/1742-4682-4-4},
  institution = {Dip. Neuroscience and CNISM, UniversitÃ  di Torino, Italy. caterina.guiot@unito.it},
  keywords = {Animals; Biomechanics; Cell Adhesion; Humans; Mathematics; Models,
	Biological; Neoplasm Invasiveness; Neoplasms, pathology; Surface
	Tension},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1742-4682-4-4},
  pmid = {17254360},
  timestamp = {2011.07.15},
  url = {http://dx.doi.org/10.1186/1742-4682-4-4}
}

@article{Gulukota1997Two,
  author = {K. Gulukota and J. Sidney and A. Sette and C. DeLisi},
  title = {Two complementary methods for predicting peptides binding major histocompatibility
	complex molecules.},
  journal = {J. Mol. Biol.},
  year = {1997},
  volume = {267},
  pages = {1258--1267},
  number = {5},
  month = {Apr},
  abstract = {Peptides that bind to major histocompatibility complex products (MHC)
	are known to exhibit certain sequence motifs which, though common,
	are neither necessary nor sufficient for binding: MHCs bind certain
	peptides that do not have the characteristic motifs and only about
	30\% of the peptides having the required motif, bind. In order to
	develop and test more accurate methods we measured the binding affinity
	of 463 nonamer peptides to HLA-A2.1. We describe two methods for
	predicting whether a given peptide will bind to an MHC and apply
	them to these peptides. One method is based on simulating a neural
	network and another, called the polynomial method, is based on statistical
	parameter estimation assuming independent binding of the side-chains
	of residues. We compare these methods with each other and with standard
	motif-based methods. The two methods are complementary, and both
	are superior to sequence motifs. The neural net is superior to simple
	motif searches in eliminating false positives. Its behavior can be
	coarsely tuned to the strength of binding desired and it is extendable
	in a straightforward fashion to other alleles. The polynomial method,
	on the other hand, has high sensitivity and is a superior method
	for eliminating false negatives. We discuss the validity of the independent
	binding assumption in such predictions.},
  doi = {10.1006/jmbi.1997.0937},
  keywords = {Artificial Intelligence; Computing Methodologies; HLA-A2 Antigen;
	Neural Networks (Computer); Oligopeptides; Protein Binding; Reproducibility
	of Results},
  owner = {laurent},
  pii = {S0022-2836(97)90937-2},
  pmid = {9150410},
  timestamp = {2007.01.27},
  url = {http://dx.doi.org/10.1006/jmbi.1997.0937}
}

@article{Gunderson2004Decoding,
  author = {Kevin L Gunderson and Semyon Kruglyak and Michael S Graige and Francisco
	Garcia and Bahram G Kermani and Chanfeng Zhao and Diping Che and
	Todd Dickinson and Eliza Wickham and Jim Bierle and Dennis Doucet
	and Monika Milewski and Robert Yang and Chris Siegmund and Juergen
	Haas and Lixin Zhou and Arnold Oliphant and Jian-Bing Fan and Steven
	Barnard and Mark S Chee},
  title = {Decoding randomly ordered DNA arrays.},
  journal = {Genome Res},
  year = {2004},
  volume = {14},
  pages = {870--877},
  number = {5},
  month = {May},
  abstract = {We have developed a simple and efficient algorithm to identify each
	member of a large collection of DNA-linked objects through the use
	of hybridization, and have applied it to the manufacture of randomly
	assembled arrays of beads in wells. Once the algorithm has been used
	to determine the identity of each bead, the microarray can be used
	in a wide variety of applications, including single nucleotide polymorphism
	genotyping and gene expression profiling. The algorithm requires
	only a few labels and several sequential hybridizations to identify
	thousands of different DNA sequences with great accuracy. We have
	decoded tens of thousands of arrays, each with 1520 sequences represented
	at approximately 30-fold redundancy by up to approximately 50,000
	beads, with a median error rate of <1 x 10(-4) per bead. The approach
	makes use of error checking codes and provides, for the first time,
	a direct functional quality control of every element of each array
	that is manufactured. The algorithm can be applied to any spatially
	fixed collection of objects or molecules that are associated with
	specific DNA sequences.},
  doi = {10.1101/gr.2255804},
  institution = {Illumina, Inc., San Diego, California 92121, USA.},
  keywords = {Algorithms; Computational Biology, methods; Oligonucleotide Array
	Sequence Analysis, methods/trends; Random Allocation; Research Design;
	Sequence Analysis, DNA, methods; Silicon Dioxide, chemistry},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {2255804},
  pmid = {15078854},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1101/gr.2255804}
}

@article{Guo2005Learning,
  author = {Guodong Guo and Charles R Dyer},
  title = {Learning from examples in the small sample case: face expression
	recognition.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2005},
  volume = {35},
  pages = {477-88},
  number = {3},
  month = {Jun},
  abstract = {Example-based learning for computer vision can be difficult when a
	large number of examples to represent each pattern or object class
	is not available. {I}n such situations, learning from a small number
	of samples is of practical value. {T}o study this issue, the task
	of face expression recognition with a small number of training images
	of each expression is considered. {A} new technique based on linear
	programming for both feature selection and classifier training is
	introduced. {A} pairwise framework for feature selection, instead
	of using all classes simultaneously, is presented. {E}xperimental
	results compare the method with three others: a simplified {B}ayes
	classifier, support vector machine, and {A}da{B}oost. {F}inally,
	each algorithm is analyzed and a new categorization of these algorithms
	is given, especially for learning from examples in the small sample
	case.}
}

@article{Guo2005Feature,
  author = {Hong Guo and Lindsay B Jack and Asoke K Nandi},
  title = {Feature generation using genetic programming with application to
	fault classification.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2005},
  volume = {35},
  pages = {89-99},
  number = {1},
  month = {Feb},
  abstract = {One of the major challenges in pattern recognition problems is the
	feature extraction process which derives new features from existing
	features, or directly from raw data in order to reduce the cost of
	computation during the classification process, while improving classifier
	efficiency. {M}ost current feature extraction techniques transform
	the original pattern vector into a new vector with increased discrimination
	capability but lower dimensionality. {T}his is conducted within a
	predefined feature space, and thus, has limited searching power.
	{G}enetic programming ({GP}) can generate new features from the original
	dataset without prior knowledge of the probabilistic distribution.
	{I}n this paper, a {GP}-based approach is developed for feature extraction
	from raw vibration data recorded from a rotating machine with six
	different conditions. {T}he created features are then used as the
	inputs to a neural classifier for the identification of six bearing
	conditions. {E}xperimental results demonstrate the ability of {GP}
	to discover autimatically the different bearing conditions using
	features expressed in the form of nonlinear functions. {F}urthermore,
	four sets of results--using {GP} extracted features with artificial
	neural networks ({ANN}) and support vector machines ({SVM}), as well
	as traditional features with {ANN} and {SVM}--have been obtained.
	{T}his {GP}-based approach is used for bearing fault classification
	for the first time and exhibits superior searching power over other
	techniques. {A}dditionaly, it significantly reduces the time for
	computation compared with genetic algorithm ({GA}), therefore, makes
	a more practical realization of the solution.}
}

@article{Guo2004novel,
  author = {Guo, J. and Chen, H. and Sun, Z. and Lin, Y.},
  title = {A novel method for protein secondary structure prediction using dual-layer
	{SVM} and profiles},
  journal = {Proteins},
  year = {2004},
  volume = {54},
  pages = {738-743},
  number = {4},
  abstract = {A high-performance method was developed for protein secondary structure
	prediction based on the dual-layer support vector machine ({SVM})
	and position-specific scoring matrices ({PSSM}s). {SVM} is a new
	machine learning technology that has been successfully applied in
	solving problems in the field of bioinformatics. {T}he {SVM}'s performance
	is usually better than that of traditional machine learning approaches.
	{T}he performance was further improved by combining {PSSM} profiles
	with the {SVM} analysis. {T}he {PSSM}s were generated from {PSI}-{BLAST}
	profiles, which contain important evolution information. {T}he final
	prediction results were generated from the second {SVM} layer output.
	{O}n the {CB}513 data set, the three-state overall per-residue accuracy,
	{Q}3, reached 75.2%, while segment overlap ({SOV}) accuracy increased
	to 80.0%. {O}n the {CB}396 data set, the {Q}3 of our method reached
	74.0% and the {SOV} reached 78.1%. {A} web server utilizing the method
	has been constructed and is available at http://www.bioinfo.tsinghua.edu.cn/pmsvm.},
  doi = {10.1002/prot.10634Â },
  pdf = {../local/Guo2004novel.pdf},
  file = {Guo2004novel.pdf:local/Guo2004novel.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.10634Â }
}

@article{Guo2005novel,
  author = {Ting Guo and Yanxin Shi and Zhirong Sun},
  title = {A novel statistical ligand-binding site predictor: application to
	{ATP}-binding sites.},
  journal = {Protein {E}ng {D}es {S}el},
  year = {2005},
  volume = {18},
  pages = {65-70},
  number = {2},
  month = {Feb},
  abstract = {Structural genomics initiatives are leading to rapid growth in newly
	determined protein 3{D} structures, the functional characterization
	of which may still be inadequate. {A}s an attempt to provide insights
	into the possible roles of the emerging proteins whose structures
	are available and/or to complement biochemical research, a variety
	of computational methods have been developed for the screening and
	prediction of ligand-binding sites in raw structural data, including
	statistical pattern classification techniques. {I}n this paper, we
	report a novel statistical descriptor (the {O}riented {S}hell {M}odel)
	for protein ligand-binding sites, which utilizes the distance and
	angular position distribution of various structural and physicochemical
	features present in immediate proximity to the center of a binding
	site. {U}sing the support vector machine ({SVM}) as the classifier,
	our model identified 69\% of the {ATP}-binding sites in whole-protein
	scanning tests and in eukaryotic proteins the accuracy is particularly
	high. {W}e propose that this feature extraction and machine learning
	procedure can screen out ligand-binding-capable protein candidates
	and can yield valuable biochemical information for individual proteins.},
  doi = {10.1093/protein/gzi006},
  pdf = {../local/Guo2005novel.pdf},
  file = {Guo2005novel.pdf:local/Guo2005novel.pdf:PDF},
  keywords = {biosvm},
  pii = {gzi006},
  url = {http://dx.doi.org/10.1093/protein/gzi006}
}

@article{Guo2007Edge-based,
  author = {Guo, Z. and Wang, L. and Li, Y. and Gong, X. and Yao, C. and Ma,
	W. and Wang, D. and Li, Y. and Zhu, J. and Zhang, M. and Yang, D.
	and Rao, S. and Wan, J.},
  title = {Edge-based scoring and searching method for identifying condition-responsive
	protein-protein interaction sub-network},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {2121--2128},
  number = {16},
  month = {Aug},
  abstract = {Current high-throughput protein-protein interaction (PPI) data do
	not provide information about the condition(s) under which the interactions
	occur. Thus, the identification of condition-responsive PPI sub-networks
	is of great importance for investigating how a living cell adapts
	to changing environments.In this article, we propose a novel edge-based
	scoring and searching approach to extract a PPI sub-network responsive
	to conditions related to some investigated gene expression profiles.
	Using this approach, what we constructed is a sub-network connected
	by the selected edges (interactions), instead of only a set of vertices
	(proteins) as in previous works. Furthermore, we suggest a systematic
	approach to evaluate the biological relevance of the identified responsive
	sub-network by its ability of capturing condition-relevant functional
	modules. We apply the proposed method to analyze a human prostate
	cancer dataset and a yeast cell cycle dataset. The results demonstrate
	that the edge-based method is able to efficiently capture relevant
	protein interaction behaviors under the investigated conditions.Supplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btm294},
  pdf = {../local/Guo2007Edge-based.pdf},
  file = {Guo2007Edge-based.pdf:Guo2007Edge-based.pdf:PDF},
  institution = {Department of Bioinformatics, Bio-pharmaceutical Key Laboratory of
	Heilongjiang Province-Incubator of State Key Laboratory, Harbin Medical
	University, Harbin 150086, China. guoz@ems.hrbmu.edu.cn},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btm294},
  pmid = {17545181},
  timestamp = {2011.10.03},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm294}
}

@article{Guo2005Towards,
  author = {Guo, Z. and Zhang, T. and Li, X. and Wang, Q. and Xu, J. and Yu,
	H. and Zhu, J. and Wang, H. and Wang, C. and Topol, E. J. and Wang,
	Q. and Rao, S.},
  title = {Towards precise classification of cancers based on robust gene functional
	expression profiles.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {58},
  abstract = {Development of robust and efficient methods for analyzing and interpreting
	high dimension gene expression profiles continues to be a focus in
	computational biology. The accumulated experiment evidence supports
	the assumption that genes express and perform their functions in
	modular fashions in cells. Therefore, there is an open space for
	development of the timely and relevant computational algorithms that
	use robust functional expression profiles towards precise classification
	of complex human diseases at the modular level.Inspired by the insight
	that genes act as a module to carry out a highly integrated cellular
	function, we thus define a low dimension functional expression profile
	for data reduction. After annotating each individual gene to functional
	categories defined in a proper gene function classification system
	such as Gene Ontology applied in this study, we identify those functional
	categories enriched with differentially expressed genes. For each
	functional category or functional module, we compute a summary measure
	(s) for the raw expression values of the annotated genes to capture
	the overall activity level of the module. In this way, we can treat
	the gene expressions within a functional module as an integrative
	data point to replace the multiple values of individual genes. We
	compare the classification performance of decision trees based on
	functional expression profiles with the conventional gene expression
	profiles using four publicly available datasets, which indicates
	that precise classification of tumour types and improved interpretation
	can be achieved with the reduced functional expression profiles.This
	modular approach is demonstrated to be a powerful alternative approach
	to analyzing high dimension microarray data and is robust to high
	measurement noise and intrinsic biological variance inherent in microarray
	data. Furthermore, efficient integration with current biological
	knowledge has facilitated the interpretation of the underlying molecular
	mechanisms for complex human diseases at the modular level.},
  doi = {10.1186/1471-2105-6-58},
  pdf = {../local/Guo2005Towards.pdf},
  file = {Guo2005Towards.pdf:Guo2005Towards.pdf:PDF},
  institution = {Department of Computer Science, Harbin Institute of Technology, Harbin
	150001, China. guoz@ems.hrbmu.edu.cn},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-6-58},
  pmid = {15774002},
  timestamp = {2011.08.06},
  url = {http://dx.doi.org/10.1186/1471-2105-6-58}
}

@inproceedings{Guorong1996Bhattacharyya,
  author = {Guorong, X. and Peiqi, C. and Minhui, W.},
  title = {Bhattacharyya distance feature selection},
  booktitle = {Pattern Recognition, 1996., Proceedings of the 13th International
	Conference on},
  year = {1996},
  volume = {2},
  pages = {195--199},
  organization = {IEEE}
}

@article{Gururaja2003Multiple,
  author = {Gururaja, T. and Li, W. and Noble, W.S. and Payan, D.G. and Anderson,
	D.C.},
  title = {Multiple functional categories of proteins identified in an in vitro
	cellular ubiquitin affinity extract using shotgun peptide sequencing},
  journal = {J {P}roteome {R}es},
  year = {2003},
  volume = {2},
  pages = {394-404},
  number = {394-404},
  abstract = {Using endogenous human cellular ubiquitin system enzymes and added
	his-tagged ubiquitin, {ATP}, and an {ATP}-regenerating system, we
	labelled cellular proteins with hexahistidine tagged ubiquitin in
	vitro. {L}abeling was dependent on {ATP} and the {ATP} recycling
	system, on the proteasome inhibitor {MG}132 and the ubiquitin protease
	inhibitor ubiquitin aldehyde, and was inhibited by iodoacetamide.
	{L}abeled proteins were affinity extracted in quadruplicate and tryptic
	peptides identifed by 2{D} capillary {LC}/{MS}/{MS} comb9ined with
	{SEQUEST} and {MEDUSA} analyses. {S}upport vector machine analyais
	of the mass spectrometry data allowed prediction of correct matches
	between mass spectrometry data and peptide sequences. {O}verall,
	144 proteins were identified by peptides predicted to be correctly
	sequenced, and 113 were identified by at least three peptides or
	one or two peptides with at least an 80% chance of being correct.
	{I}dentified proteins included 22 proteasome subunits or associated
	proteins, 18 {E}1, {E}2 or {E}3 ubiquitin system enzymes or related
	proteins, and four ubiquitin domain proteins. {S}eventeen directly
	ubiquitinated proteins or proteins associated with the ubiquitin
	system were identified. {F}unctional clusters of other proteins included
	redox enzymes, proteins associated with endocytosis, cytoskeletal
	proteins, {DNA} damage or repair related proteins, calcium binding
	proteins, and splicing factor and related proteins, suggesting that
	in vitro ubiquitination is not random, and that these functions may
	be regulated by the ubiquitin system. {T}his map of cellular ubiquitinated
	proteins and their interacting proteins will be useful for further
	studies of ubiquitin system function.},
  pdf = {../local/Gururaja2003Multiple.pdf},
  file = {Gururaja2003Multiple.pdf:local/Gururaja2003Multiple.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Gusterson2009Do,
  author = {Gusterson, B.},
  title = {Do 'basal-like' breast cancers really exist?},
  journal = {Nat. Rev. Cancer},
  year = {2009},
  volume = {9},
  pages = {128--134},
  number = {2},
  month = {Feb},
  abstract = {It has been proposed that gene expression profiles will revolutionize
	the classification of breast cancer, eventually replacing histopathology
	with a more reproducible technology. These new approaches, combined
	with a better understanding of the cellular origins of breast cancer,
	should enable us to identify patient subgroups for more effective
	therapy. However, in such a rapidly advancing field it is essential
	that initial and thought-provoking results do not become established
	as 'facts' without question. This Opinion addresses some of the negatives
	and positives generated by the term 'basal-like' breast cancer, and
	questions its existence as an entity.},
  doi = {10.1038/nrc2571},
  pdf = {../local/Gusterson2009Do.pdf},
  file = {Gusterson2009Do.pdf:Gusterson2009Do.pdf:PDF},
  institution = {gy), Dumbarton Road, Glasgow G11 6NT, Scotland, UK. B.A.Gusterson@clinmed.gla.ac.uk},
  keywords = {breastcancer},
  owner = {jp},
  pii = {nrc2571},
  pmid = {19132008},
  timestamp = {2009.02.03},
  url = {http://dx.doi.org/10.1038/nrc2571}
}

@article{Gusterson2005Basal,
  author = {Gusterson, B. A. and Ross, D. T. and Heath, V. J. and Stein, T.},
  title = {Basal cytokeratins and their relationship to the cellular origin
	and functional classification of breast cancer},
  journal = {Breast Cancer Res.},
  year = {2005},
  volume = {7},
  pages = {143--148},
  number = {4},
  abstract = {Recent publications have classified breast cancers on the basis of
	expression of cytokeratin-5 and -17 at the RNA and protein levels,
	and demonstrated the importance of these markers in defining sporadic
	tumours with bad prognosis and an association with BRCA1-related
	breast cancers. These important observations using different technology
	platforms produce a new functional classification of breast carcinoma.
	However, it is important in developing hypotheses about the pathogenesis
	of this tumour type to review the nomenclature that is being used
	to emphasize potential confusion between terminology that defines
	clinical subgroups and markers of cell lineage. This article reviews
	the lineages in the normal breast in relation to what have become
	known as the 'basal-like' carcinomas.},
  doi = {10.1186/bcr1041},
  pdf = {../local/Gusterson2005Basal.pdf},
  file = {Gusterson2005Basal.pdf:Gusterson2005Basal.pdf:PDF},
  institution = {Division of Cancer Sciences and Molecular Pathology, Western Infirmary,
	University of Glasgow, Glasgow, UK. bag5f@clinmed.gla.ac.uk},
  keywords = {breastcancer},
  owner = {jp},
  pii = {bcr1041},
  pmid = {15987465},
  timestamp = {2009.02.04},
  url = {http://dx.doi.org/10.1186/bcr1041}
}

@inproceedings{Gutin03Traveling,
  author = {G. Gutin},
  title = {Travelling Salesman and Related Problems},
  booktitle = {Handbook of Graph Theory},
  year = {2003}
}

@article{Gutin09Generalized,
  author = {Gutin, G. and Karapetyan, D.},
  title = {A memetic algorithm for the generalized traveling salesman problem},
  journal = {Natural Computing},
  year = {2009},
  abstract = {The generalized traveling salesman problem (GTSP) is an extension
	of the well-known traveling salesman problem. In GTSP, we are given
	a partition of cities into groups and we are required to find a minimum
	length tour that includes exactly one city from each group. The recent
	studies on this subject consider different variations of a memetic
	algorithm approach to the GTSP. The aim of this paper is to present
	a new memetic algorithm for GTSP with a powerful local search procedure.
	The experiments show that the proposed algorithm clearly outperforms
	all of the known heuristics with respect to both solution quality
	and running time. While the other memetic algorithms were designed
	only for the symmetric GTSP, our algorithm can solve both symmetric
	and asymmetric instances.},
  citeulike-article-id = {4004734},
  doi = {http://dx.doi.org/10.1007/s11047-009-9111-6},
  posted-at = {2009-02-04 01:33:10},
  url = {http://dx.doi.org/10.1007/s11047-009-9111-6}
}

@inproceedings{Gutin09Asymmetric,
  author = {Gutin, G. and Karapetyan, D. and Krasnogor, N. },
  title = {Memetic Algorithm for the Generalized Asymmetric Traveling Salesman
	Problem},
  booktitle = {NICSO 2007},
  year = {2008},
  pages = {199-210},
  publisher = {Springer Berlin}
}

@article{Guyon2003introduction,
  author = {Guyon, I. and Elisseeff, A.},
  title = {An introduction to variable and feature selection},
  journal = {J. Mach. Learn. Res.},
  year = {2003},
  volume = {3},
  pages = {1157--1182},
  pdf = {../local/Guyon2003introduction.pdf},
  file = {Guyon2003introduction.pdf:Guyon2003introduction.pdf:PDF},
  owner = {jp},
  timestamp = {2010.07.01},
  url = {http://jmlr.csail.mit.edu/papers/volume3/guyon03a/guyon03a.pdf}
}

@article{Guyon2002Gene,
  author = {Guyon, I. and Weston, J. and Barnhill, S. and Vapnik, V.},
  title = {Gene selection for cancer classification using support vector machines},
  journal = {Mach. Learn.},
  year = {2002},
  volume = {46},
  pages = {389-422},
  number = {1/3},
  month = {Jan},
  abstract = {D{NA} micro-arrays now permit scientists to screen thousands of genes
	simultaneously and determine whether those genes are active, hyperactive
	or silent in normal or cancerous tissue. {B}ecause these new micro-array
	devices generate bewildering amounts of raw data, new analytical
	methods must be developed to sort out whether cancer tissues have
	distinctive signatures of gene expression over normal tissues or
	other types of cancer tissues. {I}n this paper, we address the problem
	of selection of a small subset of genes from broad patterns of gene
	expression data, recorded on {DNA} micro-arrays. {U}sing available
	training examples from cancer and normal patients, we build a classifier
	suitable for genetic diagnosis, as well as drug discovery. {P}revious
	attempts to address this problem select genes with correlation techniques.
	{W}e propose a new method of gene selection utilizing {S}upport {V}ector
	{M}achine methods based on {R}ecursive {F}eature {E}limination ({RFE}).
	{W}e demonstrate experimentally that the genes selected by our techniques
	yield better classification performance and are biologically relevant
	to cancer. {I}n contrast with the baseline method, our method eliminates
	gene redundancy automatically and yields better and more compact
	gene subsets. {I}n patients with leukemia our method discovered 2
	genes that yield zero leave-one-out error, while 64 genes are necessary
	for the baseline method to get the best result (one leave-one-out
	error). {I}n the colon cancer database, using only 4 genes our method
	is 98% accurate, while the baseline method is only 86% accurate.},
  pdf = {../local/Guyon2002Gene.pdf},
  file = {Guyon2002Gene.pdf:local/Guyon2002Gene.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://homepages.nyu.edu/~jaw281/genesel.pdf}
}

@article{Gygi1999Quantitative,
  author = {S. P. Gygi and B. Rist and S. A. Gerber and F. Turecek and M. H.
	Gelb and R. Aebersold},
  title = {Quantitative analysis of complex protein mixtures using isotope-coded
	affinity tags.},
  journal = {Nat Biotechnol},
  year = {1999},
  volume = {17},
  pages = {994--999},
  number = {10},
  month = {Oct},
  abstract = {We describe an approach for the accurate quantification and concurrent
	sequence identification of the individual proteins within complex
	mixtures. The method is based on a class of new chemical reagents
	termed isotope-coded affinity tags (ICATs) and tandem mass spectrometry.
	Using this strategy, we compared protein expression in the yeast
	Saccharomyces cerevisiae, using either ethanol or galactose as a
	carbon source. The measured differences in protein expression correlated
	with known yeast metabolic function under glucose-repressed conditions.
	The method is redundant if multiple cysteinyl residues are present,
	and the relative quantification is highly accurate because it is
	based on stable isotope dilution techniques. The ICAT approach should
	provide a widely applicable means to compare quantitatively global
	protein expression in cells and tissues.},
  doi = {10.1038/13690},
  institution = {Department of Molecular Biotechnology, University of Washington,
	Box 357730, Seattle WA 98195-7730, USA.},
  keywords = {Affinity Labels; Amino Acid Sequence; Chromatography, Liquid; Isotope
	Labeling; Mass Spectrometry; Proteins},
  owner = {phupe},
  pmid = {10504701},
  timestamp = {2010.08.19},
  url = {http://dx.doi.org/10.1038/13690}
}

@article{Gyorfi1999simple,
  author = {Gyorfi, L. and Lugosi, G. and Morvai, G. },
  title = {A simple randomized algorithm for sequential prediction of ergodic
	time series},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1999},
  volume = {47},
  pages = {2642 - 2650},
  number = {5},
  month = {Nov},
  abstract = {We present a simple randomized procedure for the prediction of a binary
	sequence. {T}he algorithm uses ideas from previous developments of
	the theory of the prediction of individual sequences. {W}e show that
	if the sequence is a realization of a stationary and ergodic random
	process then the average number of mistakes converges, almost surely,
	to that of the optimum, given by the {B}ayes predictor. {T}he desirable
	finite-sample properties of the predictor are illustrated by its
	performance for {M}arkov processes. {I}n such cases the predictor
	exhibits near-optimal behavior even without knowing the order of
	the {M}arkov process. {P}rediction with side information is also
	considered},
  pdf = {../local/Gyorfi1999simple.pdf},
  file = {Gyorfi1999simple.pdf:local/Gyorfi1999simple.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Gyorfi1994There,
  author = {Gyorfi, L. and Pali, I. and Van der Meulen, E.C.},
  title = {There is no universal source code for an infinite source alphabet},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1994},
  volume = {40},
  pages = {267-271},
  number = {1},
  month = {Jan},
  abstract = {Shows that a discrete infinite distribution with finite entropy cannot
	be estimated consistently in information divergence. {A}s a corollary
	the authors show that there is no universal source code for an infinite
	source alphabet over the class of all discrete memoryless sources
	with finite entropy },
  pdf = {../local/Gyorfi1994There.pdf},
  file = {Gyorfi1994There.pdf:local/Gyorfi1994There.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Gartner2003Survey,
  author = {G{\"a}rtner, T.},
  title = {A {S}urvey of {K}ernels for {S}tructured {D}ata},
  journal = {SIGKDD Explor. Newsl.},
  year = {2003},
  volume = {5},
  pages = {49-58},
  number = {1},
  doi = {http://doi.acm.org/10.1145/959242.959248},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@misc{Gartner2002Exponential,
  author = {G{\"a}rtner, T.},
  title = {Exponential and {G}eometric {K}ernels for {G}raphs},
  howpublished = {In NIPS {W}orkshop on {U}nreal {D}ata: {P}rinciples of {M}odeling
	{N}onvectorial {D}ata},
  year = {2002},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@inproceedings{Gartner2002Multi-Instance,
  author = {G{\"a}rtner, T. and Flach, P.A. and Kowalczyk, A. and Smola, A.J.},
  title = {Multi-{I}nstance {K}ernels},
  booktitle = {Proceedings of the {N}ineteenth {I}nternational {C}onference on {M}achine
	{L}earning},
  year = {2002},
  editor = {C. Sammut and A. Hoffmann},
  pages = {179-186},
  publisher = {Morgan Kaufmann},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@inproceedings{Gartner2003graph,
  author = {G{\"a}rtner, T. and Flach, P. and Wrobel, S.},
  title = {On graph kernels: hardness results and efficient alternatives},
  booktitle = {Proceedings of the {S}ixteenth {A}nnual {C}onference on {C}omputational
	{L}earning {T}heory and the {S}eventh {A}nnual {W}orkshop on {K}ernel
	{M}achines},
  year = {2003},
  editor = {Sch{\"o}lkopf, B. and Warmuth, M.},
  volume = {2777},
  series = {Lecture Notes in Computer Science},
  pages = {129--143},
  address = {Heidelberg},
  publisher = {Springer},
  abstract = {As most lsquoreal-worldrsquo data is structured, research in kernel
	methods has begun investigating kernels for various kinds of structured
	data. {O}ne of the most widely used tools for modeling structured
	data are graphs. {A}n interesting and important challenge is thus
	to investigate kernels on instances that are represented by graphs.
	{S}o far, only very specific graphs such as trees and strings have
	been considered. {T}his paper investigates kernels on labeled directed
	graphs with general structure. {I}t is shown that computing a strictly
	positive definite graph kernel is at least as hard as solving the
	graph isomorphism problem. {I}t is also shown that computing an inner
	product in a feature space indexed by all possible graphs, where
	each feature counts the number of subgraphs isomorphic to that graph,
	is {NP}-hard. {O}n the other hand, inner products in an alternative
	feature space, based on walks in the graph, can be computed in polynomial
	time. {S}uch kernels are defined in this paper.},
  doi = {10.1007/b12006},
  owner = {vert},
  timestamp = {2006.01.19},
  url = {http://dx.doi.org/10.1007/b12006}
}

@inproceedings{Gartner2003grapha,
  author = {G{\"a}rtner, T. and Glach, P. and Wrobel, S.},
  title = {On graph kernels: hardness results and efficient alternatives},
  booktitle = {Proceedings of COLT / Kernel workshop},
  year = {2003},
  timestamp = {2007.04.12}
}

@article{Gartner2004Kernels,
  author = {G{\"a}rtner, T. and Lloyd, J.W. and Flach, P.A.},
  title = {Kernels and Distances for Structured Data},
  journal = {Mach. Learn.},
  year = {2004},
  volume = {57},
  pages = {205-232},
  number = {3},
  abstract = {This paper brings together two strands of machine learning of increasing
	importance: kernel methods and highly structured data. We propose
	a general method for constructing a kernel following the syntactic
	structure of the data, as defined by its type signature in a higher-order
	logic. Our main theoretical result is the positive definiteness of
	any kernel thus defined. We report encouraging experimental results
	on a range of real-world data sets. By converting our kernel to a
	distance pseudo-metric for 1-nearest neighbour, we were able to improve
	the best accuracy from the literature on the Diterpene data set by
	more than 10%.},
  doi = {10.1023/B:MACH.0000039777.23772.30},
  keywords = {biosvm},
  timestamp = {2006.07.11},
  url = {http://dx.doi.org/10.1023/B:MACH.0000039777.23772.30}
}

@book{Guner2000Pharmacophore,
  title = {Pharmacophore {P}erception, {D}evelopment, and {U}se in {D}rug {D}esign},
  publisher = {International University Line},
  year = {2000},
  author = {G{\"u}ner, O. F.},
  volume = {2},
  series = {IUL Biotechnology Series}
}

@article{Goektuerk2001statistical,
  author = {S. B. GÃ¶ktÃ¼rk and C. Tomasi and B. Acar and C. F. Beaulieu and
	D. S. Paik and R. B. Jeffrey and J. Yee and S. Napel},
  title = {A statistical 3-{D} pattern processing method for computer-aided
	detection of polyps in {CT} colonography.},
  journal = {I{EEE} {T}rans {M}ed {I}maging},
  year = {2001},
  volume = {20},
  pages = {1251-60},
  number = {12},
  month = {Dec},
  abstract = {Adenomatous polyps in the colon are believed to be the precursor to
	colorectal carcinoma, the second leading cause of cancer deaths in
	{U}nited {S}tates. {I}n this paper, we propose a new method for computer-aided
	detection of polyps in computed tomography ({CT}) colonography (virtual
	colonoscopy), a technique in which polyps are imaged along the wall
	of the air-inflated, cleansed colon with {X}-ray {CT}. {I}nitial
	work with computer aided detection has shown high sensitivity, but
	at a cost of too many false positives. {W}e present a statistical
	approach that uses support vector machines to distinguish the differentiating
	characteristics of polyps and healthy tissue, and uses this information
	for the classification of the new cases. {O}ne of the main contributions
	of the paper is the new three-dimensional pattern processing approach,
	called random orthogonal shape sections method, which combines the
	information from many random images to generate reliable signatures
	of shape. {T}he input to the proposed system is a collection of volume
	data from candidate polyps obtained by a high-sensitivity, low-specificity
	system that we developed previously. {T}he results of our ten-fold
	cross-validation experiments show that, on the average, the system
	increases the specificity from 0.19 (0.35) to 0.69 (0.74) at a sensitivity
	level of 1.0 (0.95).}
}

@inproceedings{Gartner03graph,
  author = {T. Gärtner and K. Driessens and J. Ramon},
  title = {Exponential and geometric kernels for graphs},
  booktitle = {Mach. Learn.},
  year = {2002},
  pages = {146--163},
  publisher = {Springer}
}

@article{Goendoer2009Chromosome,
  author = {Anita Göndör and Rolf Ohlsson},
  title = {Chromosome crosstalk in three dimensions.},
  journal = {Nature},
  year = {2009},
  volume = {461},
  pages = {212--217},
  number = {7261},
  month = {Sep},
  abstract = {The genome forms extensive and dynamic physical interactions with
	itself in the form of chromosome loops and bridges, thus exploring
	the three-dimensional space of the nucleus. It is now possible to
	examine these interactions at the molecular level, and we have gained
	glimpses of their functional implications. Chromosomal interactions
	can contribute to the silencing and activation of genes within the
	three-dimensional context of the nuclear architecture. Technical
	advances in detecting these interactions contribute to our understanding
	of the functional organization of the genome, as well as its adaptive
	plasticity in response to environmental changes during development
	and disease.},
  doi = {10.1038/nature08453},
  institution = {Department of Microbiology, Tumor and Cell Biology, Nobels väg 16,
	Box 280, Karolinska Institute, SE-171 77 Stockholm, Sweden. anita.gondor@ki.se},
  keywords = {Animals; Cell Nucleus, genetics/metabolism; Chromosome Positioning;
	Chromosomes, chemistry/genetics/metabolism; Gene Expression Regulation;
	Humans; Nucleic Acid Conformation; Transcription, Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nature08453},
  pmid = {19741702},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1038/nature08453}
}

@article{Goendoer2008High-resolution,
  author = {Anita Göndör and Carole Rougier and Rolf Ohlsson},
  title = {High-resolution circular chromosome conformation capture assay.},
  journal = {Nat Protoc},
  year = {2008},
  volume = {3},
  pages = {303--313},
  number = {2},
  abstract = {The pioneering chromosome conformation capture (3C) method provides
	the opportunity to study chromosomal folding in the nucleus. It is
	based on formaldehyde cross-linking of living cells followed by enzyme
	digestion, intramolecular ligation and quantitative (Q)-PCR analysis.
	However, 3C requires prior knowledge of the bait and interacting
	sequence (termed interactor) rendering it less useful for genome-wide
	studies. As several recent reports document, this limitation has
	been overcome by exploiting a circular intermediate in a variant
	of the 3C method, termed 4C (for circular 3C). The strategic positioning
	of primers within the bait enables the identification of unknown
	interacting sequences, which form part of the circular DNA. Here,
	we describe a protocol for our 4C method, which produces a high-resolution
	interaction map potentially suitable for the analysis of cis-regulatory
	elements and for comparison with chromatin marks obtained by chromatin
	immunoprecipitation (ChIP) on chip at the sites of interaction. Following
	optimization of enzyme digestions and amplification conditions, the
	protocol can be completed in 2-3 weeks.},
  doi = {10.1038/nprot.2007.540},
  institution = {Department of Development and Genetics, Uppsala University, Norbyvägen
	18A, S-752 36 Uppsala, Sweden. anita.gondor@ebc.uu.se},
  keywords = {Chromatin; Chromosomes, Human, Pair 11; DNA; DNA Restriction Enzymes;
	DNA, Circular; Formaldehyde; Genetic Techniques; Humans; Nucleic
	Acid Conformation},
  owner = {phupe},
  pii = {nprot.2007.540},
  pmid = {18274532},
  timestamp = {2010.08.26},
  url = {http://dx.doi.org/10.1038/nprot.2007.540}
}

@article{Haasdonk2005Feature,
  author = {Bernard Haasdonk},
  title = {Feature space interpretation of {SVM}s with indefinite kernels.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2005},
  volume = {27},
  pages = {482-92},
  number = {4},
  month = {Apr},
  abstract = {Kernel methods are becoming increasingly popular for various kinds
	of machine learning tasks, the most famous being the support vector
	machine ({SVM}) for classification. {T}he {SVM} is well understood
	when using conditionally positive definite (cpd) kernel functions.
	{H}owever, in practice, non-cpd kernels arise and demand application
	in {SVM}s. {T}he procedure of "plugging" these indefinite kernels
	in {SVM}s often yields good empirical classification results. {H}owever,
	they are hard to interpret due to missing geometrical and theoretical
	understanding. {I}n this paper, we provide a step toward the comprehension
	of {SVM} classifiers in these situations. {W}e give a geometric interpretation
	of {SVM}s with indefinite kernel functions. {W}e show that such {SVM}s
	are optimal hyperplane classifiers not by margin maximization, but
	by minimization of distances between convex hulls in pseudo-{E}uclidean
	spaces. {B}y this, we obtain a sound framework and motivation for
	indefinite {SVM}s. {T}his interpretation is the basis for further
	theoretical analysis, e.g., investigating uniqueness, and for the
	derivation of practical guidelines like characterizing the suitability
	of indefinite {SVM}s.},
  doi = {10.1109/TPAMI.2005.78},
  pdf = {../local/Haasdonk2005Feature.pdf},
  file = {Haasdonk2005Feature.pdf:local/Haasdonk2005Feature.pdf:PDF},
  keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence,
	Automated, Automatic Data Processing, Butadienes, Chloroplasts, Cluster
	Analysis, Comparative Study, Computer Simulation, Computer-Assisted,
	Computing Methodologies, Database Management Systems, Databases,
	Diagnosis, Disinfectants, Dose-Response Relationship, Drug, Drug
	Toxicity, Electrodes, Electroencephalography, Ethylamines, Expert
	Systems, Factual, Feedback, Fungicides, Gene Expression Profiling,
	Genes, Genetic Markers, Humans, Image Enhancement, Image Interpretation,
	Implanted, Industrial, Information Storage and Retrieval, Kidney,
	Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis,
	Molecular Biology, Motor Cortex, Movement, Natural Language Processing,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Numerical
	Analysis, Pattern Recognition, Plant Proteins, Predictive Value of
	Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats,
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Signal Processing, Sprague-Dawley, Subcellular Fractions, Terminology,
	Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer
	Interface, 15794155},
  url = {http://dx.doi.org/10.1109/TPAMI.2005.78}
}

@book{Hadamard1923Lectures,
  title = {Lectures on Cauchy's Problem: In Linear Partial Differential Equations},
  publisher = {Dover Publications},
  year = {1923},
  author = {J. Hadamard}
}

@article{Hadamard1902Sur,
  author = {J. Hadamard},
  title = {Sur les probl\`emes aux D\'eriv\'ees partielles et leur signification
	physique},
  journal = {Princeton University Bulletin},
  year = {1902},
  volume = {13},
  pages = {49-52}
}

@article{Haferlach2005AML,
  author = {Torsten Haferlach and Alexander Kohlmann and Susanne Schnittger and
	Martin Dugas and Wolfgang Hiddemann and Wolfgang Kern and Claudia
	Schoch},
  title = {A{ML} {M}3 and {AML} {M}3 variant each have a distinct gene expression
	signature but also share patterns different from other genetically
	defined {AML} subtypes.},
  journal = {Genes {C}hromosomes {C}ancer},
  year = {2005},
  volume = {43},
  pages = {113-27},
  number = {2},
  month = {Jun},
  abstract = {Acute promyelocytic leukemia ({APL}) with t(15;17) appears in two
	phenotypes: {AML} {M}3, with abnormal promyelocytes showing heavy
	granulation and bundles of {A}uer rods, and {AML} {M}3 variant ({M}3v),
	with non- or hypogranular cytoplasm and a bilobed nucleus. {W}e investigated
	the global gene expression profiles of 35 {APL} patients (19 {AML}
	{M}3, 16 {AML} {M}3v) by using high-density {DNA}-oligonucleotide
	microarrays. {F}irst, an unsupervised approach clearly separated
	{APL} samples from other {AML}s characterized genetically as t(8;21)
	(n = 35), inv(16) (n = 35), or t(11q23)/{MLL} (n = 35) or as having
	a normal karyotype (n = 50). {S}econd, we found genes with functional
	relevance for blood coagulation that were differentially expressed
	between {APL} and other {AML}s. {F}urthermore, a supervised pairwise
	comparison between {M}3 and {M}3v revealed differential expression
	of genes that encode for biological functions and pathways such as
	granulation and maturation of hematologic cells, explaining morphologic
	and clinical differences. {D}iscrimination between {M}3 and {M}3v
	based on gene signatures showed a median classification accuracy
	of 90\% by use of 10-fold {CV} and support vector machines. {A}dditional
	molecular mutations such as {FLT}3-{LM}, which were significantly
	more frequent in {M}3v than in {M}3 ({P} < 0.0001), may partly contribute
	to the different phenotypes. {H}owever, linear regression analysis
	demonstrated that genes differentially expressed between {M}3 and
	{M}3v did not correlate with {FLT}3-{LM}.},
  doi = {10.1002/gcc.20175},
  pdf = {../local/Haferlach2005AML.pdf},
  file = {Haferlach2005AML.pdf:local/Haferlach2005AML.pdf:PDF},
  keywords = {biosvm microarray},
  url = {http://dx.doi.org/10.1002/gcc.20175}
}

@article{Haferlach2005global,
  author = {Torsten Haferlach and Alexander Kohlmann and Susanne Schnittger and
	Martin Dugas and Wolfgang Hiddemann and Wolfgang Kern and Claudia
	Schoch},
  title = {A global approach to the diagnosis of leukemia using gene expression
	profiling.},
  journal = {Blood},
  year = {2005},
  volume = {106},
  pages = {1189-1198},
  number = {4},
  month = {Aug},
  abstract = {Accurate diagnosis and classification of leukemias are the bases for
	the appropriate management of patients. {T}he diagnostic accuracy
	and efficiency of present methods may be improved by the use of microarrays
	for gene expression profiling. {W}e analyzed gene expression profiles
	in bone marrow and peripheral blood samples from 937 patients with
	all clinically relevant leukemia subtypes (n=892) and non-leukemic
	controls (n=45) by {U}133{A} and {B} {G}ene{C}hips ({A}ffymetrix).
	{F}or each subgroup differentially expressed genes were calculated.
	{C}lass prediction was performed using support vector machines. {P}rediction
	accuracies were estimated by 10-fold cross validation and assessed
	for robustness in a 100-fold resampling approach using randomly chosen
	test-sets consisting of 1/3 of the samples. {A}pplying the top 100
	genes of each subgroup an overall prediction accuracy of 95.1\% was
	achieved which was confirmed by resampling (median, 93.8\%; 95\%
	confidence interval, 91.4\%-95.8\%). {I}n particular, {AML} with
	t(15;17), t(8;21), or inv(16), {CLL}, and {P}ro-{B}-{ALL} with t(11q23)
	were classified with 100\% sensitivity and 100\% specificity. {A}ccordingly,
	cluster analysis completely separated all of the 13 subgroups analyzed.
	{G}ene expression profiling can predict all clinically relevant subentities
	of leukemia with high accuracy.},
  doi = {10.1182/blood-2004-12-4938},
  pdf = {../local/Haferlach2005global.pdf},
  file = {Haferlach2005global.pdf:local/Haferlach2005global.pdf:PDF},
  keywords = {biosvm microarray},
  pii = {2004-12-4938},
  url = {http://dx.doi.org/10.1182/blood-2004-12-4938}
}

@article{Haibe-Kains2008Comparison,
  author = {Haibe-Kains, B. and Desmedt, C. and Piette, F. and Buyse, M. and
	Cardoso, F. and Van't Veer, L. and Piccart, M. and Bontempi, G. and
	Sotiriou, C.},
  title = {Comparison of prognostic gene expression signatures for breast cancer},
  journal = {BMC Genomics},
  year = {2008},
  volume = {9},
  pages = {394},
  abstract = {BACKGROUND: During the last years, several groups have identified
	prognostic gene expression signatures with apparently similar performances.
	However, signatures were never compared on an independent population
	of untreated breast cancer patients, where risk assessment was computed
	using the original algorithms and microarray platforms. RESULTS:
	We compared three gene expression signatures, the 70-gene, the 76-gene
	and the Gene expression Grade Index (GGI) signatures, in terms of
	predicting distant metastasis free survival (DMFS) for the individual
	patient. To this end, we used the previously published TRANSBIG independent
	validation series of node-negative untreated primary breast cancer
	patients. We observed agreement in prediction for 135 of 198 patients
	(68\%) when considering the three signatures. When comparing the
	signatures two by two, the agreement in prediction was 71\% for the
	70- and 76-gene signatures, 76\% for the 76-gene signature and the
	GGI, and 88\% for the 70-gene signature and the GGI. The three signatures
	had similar capabilities of predicting DMFS and added significant
	prognostic information to that provided by the classical parameters.
	CONCLUSION: Despite the difference in development of these signatures
	and the limited overlap in gene identity, they showed similar prognostic
	performance, adding to the growing evidence that these prognostic
	signatures are of clinical relevance.},
  doi = {10.1186/1471-2164-9-394},
  pdf = {../local/Haibe-Kains2008Comparison.pdf},
  file = {Haibe-Kains2008Comparison.pdf:Haibe-Kains2008Comparison.pdf:PDF},
  institution = {Functional Genomics Unit, Jules Bordet Institute, UniversitÃ© Libre
	de Bruxelles, Brussels, Belgium. bhaibeka@ulb.ac.be},
  keywords = {breastcancer},
  owner = {jp},
  pii = {1471-2164-9-394},
  pmid = {18717985},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1186/1471-2164-9-394}
}

@article{Haibe-Kains2008comparative,
  author = {Haibe-Kains, B. and Desmedt, C. and Sotiriou, C. and Bontempi, G.},
  title = {A comparative study of survival models for breast cancer prognostication
	based on microarray data: does a single gene beat them all?},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {2200--2208},
  number = {19},
  doi = {10.1093/bioinformatics/btn374},
  pdf = {../local/Haibe-Kains2008comparative.pdf},
  file = {Haibe-Kains2008comparative.pdf:Haibe-Kains2008comparative.pdf:PDF},
  issn = {1367-4803},
  owner = {jp},
  publisher = {Oxford Univ Press},
  timestamp = {2011.01.14},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn374}
}

@manual{Haibe-Kains2011genefu,
  title = {genefu: Relevant Functions for Gene Expression Analysis, Especially
	in Breast Cancer.},
  author = {Haibe-Kains, B. and Schroeder, M. and Bontempi, G. and Sotiriou,
	C. and Quackenbush, J.},
  year = {2011},
  note = {R package version 1.4.0},
  owner = {jp},
  timestamp = {2012.02.27},
  url = {http://compbio.dfci.harvard.edu}
}

@article{Haigh2005Small,
  author = {J. A Haigh and B. T. Pickup and J. A. Grant and A. Nicholls},
  title = {{S}mall molecule shape-fingerprints.},
  journal = {J. Chem. Inf. Model.},
  year = {2005},
  volume = {45},
  pages = {673--684},
  number = {3},
  abstract = {The optimal overlap between two molecular structures is a useful measure
	of shape similarity. However, it usually requires significant computation.
	This work describes the design of shape-fingerprints: binary bit
	strings that encode molecular shape. Standard measures of similarity
	between two shape-fingerprints are shown to be an excellent surrogate
	for similarity based on volume overlap but several orders of magnitude
	faster to compute. Consequently, shape-fingerprints can be used for
	clustering of large data sets, evaluating the diversity of compound
	libraries, as descriptors in SAR and as a prescreen for exact shape
	comparison against large virtual databases. Our results show that
	a small set of shapes can be used to build these fingerprints and
	that this set can be applied universally.},
  doi = {10.1021/ci049651v},
  keywords = {15921457},
  owner = {mahe},
  pmid = {15921457},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/ci049651v}
}

@article{Hakenberg2005Systematic,
  author = {JÃ¶rg Hakenberg and Steffen Bickel and Conrad Plake and Ulf Brefeld
	and Hagen Zahn and Lukas Faulstich and Ulf Leser and Tobias Scheffer},
  title = {Systematic feature evaluation for gene name recognition.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6 Suppl 1},
  pages = {S9},
  abstract = {In task 1{A} of the {B}io{C}re{A}t{I}v{E} evaluation, systems had
	to be devised that recognize words and phrases forming gene or protein
	names in natural language sentences. {W}e approach this problem by
	building a word classification system based on a sliding window approach
	with a {S}upport {V}ector {M}achine, combined with a pattern-based
	post-processing for the recognition of phrases. {T}he performance
	of such a system crucially depends on the type of features chosen
	for consideration by the classification method, such as pre- or postfixes,
	character n-grams, patterns of capitalization, or classification
	of preceding or following words. {W}e present a systematic approach
	to evaluate the performance of different feature sets based on recursive
	feature elimination, {RFE}. {B}ased on a systematic reduction of
	the number of features used by the system, we can quantify the impact
	of different feature sets on the results of the word classification
	problem. {T}his helps us to identify descriptive features, to learn
	about the structure of the problem, and to design systems that are
	faster and easier to understand. {W}e observe that the {SVM} is robust
	to redundant features. {RFE} improves the performance by 0.7\%, compared
	to using the complete set of attributes. {M}oreover, a performance
	that is only 2.3\% below this maximum can be obtained using fewer
	than 5\% of the features.},
  doi = {10.1186/1471-2105-6-S1-S9},
  pdf = {../local/Hakenberg2005Systematic.pdf},
  file = {Hakenberg2005Systematic.pdf:local/Hakenberg2005Systematic.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-S1-S9},
  url = {http://dx.doi.org/10.1186/1471-2105-6-S1-S9}
}

@article{Hakenberg2004Finding,
  author = {Hakenberg, J. and Schmeier ,S. and Kowald, A. and Klipp, E. and Leser,
	U.},
  title = {Finding kinetic parameters using text mining.},
  journal = {O{MICS}},
  year = {2004},
  volume = {8},
  pages = {131-152},
  number = {2},
  abstract = {The mathematical modeling and description of complex biological processes
	has become more and more important over the last years. {S}ystems
	biology aims at the computational simulation of complex systems,
	up to whole cell simulations. {A}n essential part focuses on solving
	a large number of parameterized differential equations. {H}owever,
	measuring those parameters is an expensive task, and finding them
	in the literature is very laborious. {W}e developed a text mining
	system that supports researchers in their search for experimentally
	obtained parameters for kinetic models. {O}ur system classifies full
	text documents regarding the question whether or not they contain
	appropriate data using a support vector machine. {W}e evaluated our
	approach on a manually tagged corpus of 800 documents and found that
	it outperforms keyword searches in abstracts by a factor of five
	in terms of precision.},
  pdf = {../local/Hakenberg2004Finding.pdf},
  file = {Hakenberg2004Finding.pdf:local/Hakenberg2004Finding.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.liebertonline.com/doi/abs/10.1089%2F1536231041388366}
}

@article{Haley2004Kinetic,
  author = {Haley, B. and Zamore, P. D.},
  title = {{K}inetic analysis of the {RNA}i enzyme complex.},
  journal = {Nat. Struct. Mol. Biol.},
  year = {2004},
  volume = {11},
  pages = {599--606},
  number = {7},
  month = {Jul},
  abstract = {The siRNA-directed ribonucleoprotein complex, RISC, catalyzes target
	RNA cleavage in the RNA interference pathway. Here, we show that
	siRNA-programmed RISC is a classical Michaelis-Menten enzyme in the
	presence of ATP. In the absence of ATP, the rate of multiple rounds
	of catalysis is limited by release of the cleaved products from the
	enzyme. Kinetic analysis suggests that different regions of the siRNA
	play distinct roles in the cycle of target recognition, cleavage,
	and product release. Bases near the siRNA 5' end disproportionately
	contribute to target RNA-binding energy, whereas base pairs formed
	by the central and 3' regions of the siRNA provide a helical geometry
	required for catalysis. Finally, the position of the scissile phosphate
	on the target RNA seems to be determined during RISC assembly, before
	the siRNA encounters its RNA target.},
  doi = {10.1038/nsmb780},
  pdf = {../local/Haley2004Kinetic.pdf},
  file = {Haley2004Kinetic.pdf:Haley2004Kinetic.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {nsmb780},
  pmid = {15170178},
  timestamp = {2006.04.27},
  url = {http://dx.doi.org/10.1038/nsmb780}
}

@article{Hall2004Unravelling,
  author = {Hall, J.},
  title = {Unravelling the general properties of si{RNA}s: strength in numbers
	and lessons from the past.},
  journal = {Nat. {R}ev. {G}enet.},
  year = {2004},
  volume = {5},
  pages = {552-7},
  number = {7},
  month = {Jul},
  doi = {10.1038/nrg1382},
  pdf = {../local/Hall2004Unravelling.pdf},
  file = {Hall2004Unravelling.pdf:local/Hall2004Unravelling.pdf:PDF},
  keywords = {sirna},
  pii = {nrg1382},
  url = {http://dx.doi.org/10.1038/nrg1382}
}

@article{Hall2005comprehensive,
  author = {Neil Hall and Marianna Karras and J. Dale Raine and Jane M Carlton
	and Taco W A Kooij and Matthew Berriman and Laurence Florens and
	Christoph S Janssen and Arnab Pain and Georges K Christophides and
	Keith James and Kim Rutherford and Barbara Harris and David Harris
	and Carol Churcher and Michael A Quail and Doug Ormond and Jon Doggett
	and Holly E Trueman and Jacqui Mendoza and Shelby L Bidwell and Marie-Adele
	Rajandream and Daniel J Carucci and John R Yates and Fotis C Kafatos
	and Chris J Janse and Bart Barrell and C. Michael R Turner and Andrew
	P Waters and Robert E Sinden},
  title = {{A} comprehensive survey of the {P}lasmodium life cycle by genomic,
	transcriptomic, and proteomic analyses.},
  journal = {Science},
  year = {2005},
  volume = {307},
  pages = {82--86},
  number = {5706},
  month = {Jan},
  abstract = {Plasmodium berghei and Plasmodium chabaudi are widely used model malaria
	species. Comparison of their genomes, integrated with proteomic and
	microarray data, with the genomes of Plasmodium falciparum and Plasmodium
	yoelii revealed a conserved core of 4500 Plasmodium genes in the
	central regions of the 14 chromosomes and highlighted genes evolving
	rapidly because of stage-specific selective pressures. Four strategies
	for gene expression are apparent during the parasites' life cycle:
	(i) housekeeping; (ii) host-related; (iii) strategy-specific related
	to invasion, asexual replication, and sexual development; and (iv)
	stage-specific. We observed posttranscriptional gene silencing through
	translational repression of messenger RNA during sexual development,
	and a 47-base 3' untranslated region motif is implicated in this
	process.},
  doi = {10.1126/science.1103717},
  pdf = {../local/Hall2005comprehensive.pdf},
  file = {Hall2005comprehensive.pdf:local/Hall2005comprehensive.pdf:PDF},
  keywords = {plasmodium},
  pii = {307/5706/82},
  pmid = {15637271},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1126/science.1103717}
}

@article{Halperin2002Principles,
  author = {I. Halperin and B. Ma and H. Wolfson and R. Nussinov},
  title = {Principles of docking: {A}n overview of search algorithms and a guide
	to scoring functions.},
  journal = {Proteins},
  year = {2002},
  volume = {47},
  pages = {409--443},
  number = {4},
  month = {Jun},
  abstract = {The docking field has come of age. {T}he time is ripe to present the
	principles of docking, reviewing the current state of the field.
	{T}wo reasons are largely responsible for the maturity of the computational
	docking area. {F}irst, the early optimism that the very presence
	of the "correct" native conformation within the list of predicted
	docked conformations signals a near solution to the docking problem,
	has been replaced by the stark realization of the extreme difficulty
	of the next scoring/ranking step. {S}econd, in the last couple of
	years more realistic approaches to handling molecular flexibility
	in docking schemes have emerged. {A}s in folding, these derive from
	concepts abstracted from statistical mechanics, namely, populations.
	{D}ocking and folding are interrelated. {F}rom the purely physical
	standpoint, binding and folding are analogous processes, with similar
	underlying principles. {C}omputationally, the tools developed for
	docking will be tremendously useful for folding. {F}or large, multidomain
	proteins, domain docking is probably the only rational way, mimicking
	the hierarchical nature of protein folding. {T}he complexity of the
	problem is huge. {H}ere we divide the computational docking problem
	into its two separate components. {A}s in folding, solving the docking
	problem involves efficient search (and matching) algorithms, which
	cover the relevant conformational space, and selective scoring functions,
	which are both efficient and effectively discriminate between native
	and non-native solutions. {I}t is universally recognized that docking
	of drugs is immensely important. {H}owever, protein-protein docking
	is equally so, relating to recognition, cellular pathways, and macromolecular
	assemblies. {P}roteins function when they are bound to other molecules.
	{C}onsequently, we present the review from both the computational
	and the biological points of view. {A}lthough large, it covers only
	partially the extensive body of literature, relating to small (drug)
	and to large protein-protein molecule docking, to rigid and to flexible.
	{U}nfortunately, when reviewing these, a major difficulty in assessing
	the results is the non-uniformity in the formats in which they are
	presented in the literature. {C}onsequently, we further propose a
	way to rectify it here.},
  doi = {10.1002/prot.10115},
  keywords = {chemoinformatics},
  owner = {mahe},
  pmid = {12001221},
  timestamp = {2006.02.03},
  url = {http://dx.doi.org/10.1002/prot.10115}
}

@article{Hammond20043D,
  author = {Peter Hammond and Tim J Hutton and Judith E Allanson and Linda E
	Campbell and Raoul C M Hennekam and Sean Holden and Michael A Patton
	and Adam Shaw and I. Karen Temple and Matthew Trotter and Kieran
	C Murphy and Robin M Winter},
  title = {3{D} analysis of facial morphology.},
  journal = {Am {J} {M}ed {G}enet {A}},
  year = {2004},
  volume = {126},
  pages = {339-48},
  number = {4},
  month = {May},
  abstract = {Dense surface models can be used to analyze 3{D} facial morphology
	by establishing a correspondence of thousands of points across each
	3{D} face image. {T}he models provide dramatic visualizations of
	3{D} face-shape variation with potential for training physicians
	to recognize the key components of particular syndromes. {W}e demonstrate
	their use to visualize and recognize shape differences in a collection
	of 3{D} face images that includes 280 controls (2 weeks to 56 years
	of age), 90 individuals with {N}oonan syndrome ({NS}) (7 months to
	56 years), and 60 individuals with velo-cardio-facial syndrome ({VCFS};
	3 to 17 years of age). {T}en-fold cross-validation testing of discrimination
	between the three groups was carried out on unseen test examples
	using five pattern recognition algorithms (nearest mean, {C}5.0 decision
	trees, neural networks, logistic regression, and support vector machines).
	{F}or discriminating between individuals with {NS} and controls,
	the best average sensitivity and specificity levels were 92 and 93\%
	for children, 83 and 94\% for adults, and 88 and 94\% for the children
	and adults combined. {F}or individuals with {VCFS} and controls,
	the best results were 83 and 92\%. {I}n a comparison of individuals
	with {NS} and individuals with {VCFS}, a correct identification rate
	of 95\% was achieved for both syndromes. {T}his article contains
	supplementary material, which may be viewed at the {A}merican {J}ournal
	of {M}edical {G}enetics website at http://www.interscience.wiley.com/jpages/0148-7299/suppmat/index.html.},
  doi = {10.1002/ajmg.a.20665},
  pdf = {../local/Hammond20043D.pdf},
  file = {Hammond20043D.pdf:local/Hammond20043D.pdf:PDF},
  url = {http://dx.doi.org/10.1002/ajmg.a.20665}
}

@article{Han2004Evidence,
  author = {Han, J.-D. J. and Bertin, N. and Hao, T. and Goldberg, D. S. and
	Berriz, G. F. and Zhang, L. V. and Dupuy, D. and Walhout, A. J. M.
	and Cusick, M. E. and Roth, F. P. and Vidal, M.},
  title = {Evidence for dynamically organized modularity in the yeast protein-protein
	interaction network},
  journal = {Nature},
  year = {2004},
  volume = {430},
  pages = {88--93},
  number = {6995},
  month = {Jul},
  abstract = {In apparently scale-free protein-protein interaction networks, or
	'interactome' networks, most proteins interact with few partners,
	whereas a small but significant proportion of proteins, the 'hubs',
	interact with many partners. Both biological and non-biological scale-free
	networks are particularly resistant to random node removal but are
	extremely sensitive to the targeted removal of hubs. A link between
	the potential scale-free topology of interactome networks and genetic
	robustness seems to exist, because knockouts of yeast genes encoding
	hubs are approximately threefold more likely to confer lethality
	than those of non-hubs. Here we investigate how hubs might contribute
	to robustness and other cellular properties for protein-protein interactions
	dynamically regulated both in time and in space. We uncovered two
	types of hub: 'party' hubs, which interact with most of their partners
	simultaneously, and 'date' hubs, which bind their different partners
	at different times or locations. Both in silico studies of network
	connectivity and genetic interactions described in vivo support a
	model of organized modularity in which date hubs organize the proteome,
	connecting biological processes--or modules--to each other, whereas
	party hubs function inside modules.},
  doi = {10.1038/nature02555},
  pdf = {../local/Han2004Evidence.pdf},
  file = {Han2004Evidence.pdf:Han2004Evidence.pdf:PDF},
  institution = {Center for Cancer Systems Biology and Department of Cancer Biology,
	Dana-Farber Cancer Institute, and Department of Genetics, Harvard
	Medical School, Boston, Massachusetts 02115, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature02555},
  pmid = {15190252},
  timestamp = {2011.09.27},
  url = {http://dx.doi.org/10.1038/nature02555}
}

@article{Han2004Predicting,
  author = {Han, L.Y. and Cai, C.Z. and Ji, Z.L. and Cao, Z.W. and Cui, J. and
	Chen, Y.Z.},
  title = {Predicting functional family of novel enzymes irrespective of sequence
	similarity: a statistical learning approach.},
  journal = {Nucl. {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {6437-6444},
  number = {21},
  abstract = {The function of a protein that has no sequence homolog of known function
	is difficult to assign on the basis of sequence similarity. {T}he
	same problem may arise for homologous proteins of different functions
	if one is newly discovered and the other is the only known protein
	of similar sequence. {I}t is desirable to explore methods that are
	not based on sequence similarity. {O}ne approach is to assign functional
	family of a protein to provide useful hint about its function. {S}everal
	groups have employed a statistical learning method, support vector
	machines ({SVM}s), for predicting protein functional family directly
	from sequence irrespective of sequence similarity. {T}hese studies
	showed that {SVM} prediction accuracy is at a level useful for functional
	family assignment. {B}ut its capability for assignment of distantly
	related proteins and homologous proteins of different functions has
	not been critically and adequately assessed. {H}ere {SVM} is tested
	for functional family assignment of two groups of enzymes. {O}ne
	consists of 50 enzymes that have no homolog of known function from
	{PSI}-{BLAST} search of protein databases. {T}he other contains eight
	pairs of homologous enzymes of different families. {SVM} correctly
	assigns 72% of the enzymes in the first group and 62% of the enzyme
	pairs in the second group, suggesting that it is potentially useful
	for facilitating functional study of novel proteins. {A} web version
	of our software, {SVMP}rot, is accessible at http://jing.cz3.nus.edu.sg/cgi-bin/svmprot.cgi.},
  doi = {10.1093/nar/gkh984},
  pdf = {../local/Han2004Predicting.pdf},
  file = {Han2004Predicting.pdf:local/Han2004Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/nar/gkh984}
}

@article{Han2005Prediction,
  author = {Han, L.Y. and Cai, C.Z. and Ji, Z.L. and Chen, Y.Z.},
  title = {Prediction of functional class of novel viral proteins by a statistical
	learning method irrespective of sequence similarity},
  journal = {Virology},
  year = {2005},
  volume = {331},
  pages = {136-143},
  number = {1},
  abstract = {The function of a substantial percentage of the putative protein-coding
	open reading frames ({ORF}s) in viral genomes is unknown. {A}s their
	sequence is not similar to that of proteins of known function, the
	function of these {ORF}s cannot be assigned on the basis of sequence
	similarity. {M}ethods complement or in combination with sequence
	similarity-based approaches are being explored. {T}he web-based software
	{SVMP}rot () to some extent assigns protein functional family irrespective
	of sequence similarity and has been found to be useful for studying
	distantly related proteins [{C}ai, {C}.{Z}., {H}an, {L}.{Y}., {J}i,
	{Z}.{L}., {C}hen, {X}., {C}hen, {Y}.{Z}., 2003. {SVM}-{P}rot: web-based
	support vector machine software for functional classification of
	a protein from its primary sequence. {N}ucleic {A}cids {R}es. 31(13):
	3692-3697]. {H}ere 25 novel viral proteins are selected to test the
	capability of {SVMP}rot for functional family assignment of viral
	proteins whose function cannot be confidently predicted on by sequence
	similarity methods at present. {T}hese proteins are without a sequence
	homolog in the {S}wissprot database, with its precise function provided
	in the literature, and not included in the training sets of {SVMP}rot.
	{T}he predicted functional classes of 72% of these proteins match
	the literature-described function, which is compared to the overall
	accuracy of 87% for {SVMP}rot functional class assignment of 34582
	proteins. {T}his suggests that {SVMP}rot to some extent is capable
	of functional class assignment irrespective of sequence similarity
	and it is potentially useful for facilitating functional study of
	novel viral proteins.},
  doi = {10.1016/j.virol.2004.10.020},
  pdf = {../local/Han2005Prediction.pdf},
  file = {Han2005Prediction.pdf:local/Han2005Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.virol.2004.10.020}
}

@article{Han2004Prediction,
  author = {Han, L.Y. and Cai, C.Z. and Lo, S.L. and Chung, M.C. and Chen, Y.Z.},
  title = {Prediction of {RNA}-binding proteins from primary sequence by a support
	vector machine approach.},
  journal = {R{NA}},
  year = {2004},
  volume = {10},
  pages = {355-368},
  number = {3},
  abstract = {Elucidation of the interaction of proteins with different molecules
	is of significance in the understanding of cellular processes. {C}omputational
	methods have been developed for the prediction of protein-protein
	interactions. {B}ut insufficient attention has been paid to the prediction
	of protein-{RNA} interactions, which play central roles in regulating
	gene expression and certain {RNA}-mediated enzymatic processes. {T}his
	work explored the use of a machine learning method, support vector
	machines ({SVM}), for the prediction of {RNA}-binding proteins directly
	from their primary sequence. {B}ased on the knowledge of known {RNA}-binding
	and non-{RNA}-binding proteins, an {SVM} system was trained to recognize
	{RNA}-binding proteins. {A} total of 4011 {RNA}-binding and 9781
	non-{RNA}-binding proteins was used to train and test the {SVM} classification
	system, and an independent set of 447 {RNA}-binding and 4881 non-{RNA}-binding
	proteins was used to evaluate the classification accuracy. {T}esting
	results using this independent evaluation set show a prediction accuracy
	of 94.1%, 79.3%, and 94.1% for r{RNA}-, m{RNA}-, and t{RNA}-binding
	proteins, and 98.7%, 96.5%, and 99.9% for non-r{RNA}-, non-m{RNA}-,
	and non-t{RNA}-binding proteins, respectively. {T}he {SVM} classification
	system was further tested on a small class of sn{RNA}-binding proteins
	with only 60 available sequences. {T}he prediction accuracy is 40.0%
	and 99.9% for sn{RNA}-binding and non-sn{RNA}-binding proteins, indicating
	a need for a sufficient number of proteins to train {SVM}. {T}he
	{SVM} classification systems trained in this work were added to our
	{W}eb-based protein functional classification software {SVMP}rot,
	at http://jing.cz3.nus.edu.sg/cgi-bin/svmprot.cgi. {O}ur study suggests
	the potential of {SVM} as a useful tool for facilitating the prediction
	of protein-{RNA} interactions.},
  pdf = {../local/Han2004Prediction.pdf},
  file = {Han2004Prediction.pdf:local/Han2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.rnajournal.org/cgi/content/abstract/10/3/355}
}

@article{Han2008Apoptosis,
  author = {Han, L. and Zhao, Y. and Jia, X.},
  title = {Mathematical modeling identified c-FLIP as an apoptotic switch in
	death receptor induced apoptosis},
  journal = {Apoptosis},
  year = {2008},
  volume = {13},
  pages = {1198-204},
  number = {10},
  abstract = {Apoptosis is an essential process to get rid of injured or unwanted
	cells. In this study, we proposed a mathematical modeling for death
	receptor mediated apoptosis to investigate the role of c-FLIP in
	controlling the balance between apoptosis and survival. In order
	to get insight into how NF-kappa B mediated pro-survival pathway
	affects the outcome of our modeling, we implemented reduced models
	without taking such regulation into consideration. Our simulation
	revealed that c-FLIP could act as a pivotal death or life switch
	and this switch-like behavior is bistable, irreversible, and robust.
	We introduce a new term, probability apoptosis, to delineate the
	likelihood in occurrence of apoptosis events. This simulation system
	is plausible and may offer several valuable clinical indications
	for the abnormal apoptosis related disease, such as cancer.},
  keywords = {csbcbook}
}

@article{Han2005Fold,
  author = {Sangjo Han and Byung-Chul Lee and Seung Taek Yu and Chan-Seok Jeong
	and Soyoung Lee and Dongsup Kim},
  title = {Fold recognition by combining profile-profile alignment and support
	vector machine.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2667-73},
  number = {11},
  month = {Jun},
  abstract = {M{OTIVATION}: {C}urrently, the most accurate fold-recognition method
	is to perform profile-profile alignments and estimate the statistical
	significances of those alignments by calculating {Z}-score or {E}-value.
	{A}lthough this scheme is reliable in recognizing relatively close
	homologs related at the family level, it has difficulty in finding
	the remote homologs that are related at the superfamily or fold level.
	{RESULTS}: {I}n this paper, we present an alternative method to estimate
	the significance of the alignments. {T}he alignment between a query
	protein and a template of length n in the fold library is transformed
	into a feature vector of length n + 1, which is then evaluated by
	support vector machine ({SVM}). {T}he output from {SVM} is converted
	to a posterior probability that a query sequence is related to a
	template, given {SVM} output. {R}esults show that a new method shows
	significantly better performance than {PSI}-{BLAST} and profile-profile
	alignment with {Z}-score scheme. {W}hile {PSI}-{BLAST} and {Z}-score
	scheme detect 16 and 20\% of superfamily-related proteins, respectively,
	at 90\% specificity, a new method detects 46\% of these proteins,
	resulting in more than 2-fold increase in sensitivity. {M}ore significantly,
	at the fold level, a new method can detect 14\% of remotely related
	proteins at 90\% specificity, a remarkable result considering the
	fact that the other methods can detect almost none at the same level
	of specificity.},
  doi = {10.1093/bioinformatics/bti384},
  pdf = {../local/Han2005Fold.pdf},
  file = {Han2005Fold.pdf:local/Han2005Fold.pdf:PDF},
  keywords = {biosvm},
  pii = {bti384},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti384}
}

@inproceedings{Han2010variance,
  author = {Han, Y. and Yu, L.},
  title = {A variance reduction framework for stable feature selection},
  booktitle = {Data Mining (ICDM), 2010 IEEE 10th International Conference on},
  year = {2010},
  pages = {206--215},
  organization = {IEEE}
}

@article{Hanahan2011Hallmarks,
  author = {Hanahan, D. and Weinberg, R. A.},
  title = {Hallmarks of cancer: the next generation},
  journal = {Cell},
  year = {2011},
  volume = {144},
  pages = {646--674},
  number = {5},
  month = {Mar},
  abstract = {The hallmarks of cancer comprise six biological capabilities acquired
	during the multistep development of human tumors. The hallmarks constitute
	an organizing principle for rationalizing the complexities of neoplastic
	disease. They include sustaining proliferative signaling, evading
	growth suppressors, resisting cell death, enabling replicative immortality,
	inducing angiogenesis, and activating invasion and metastasis. Underlying
	these hallmarks are genome instability, which generates the genetic
	diversity that expedites their acquisition, and inflammation, which
	fosters multiple hallmark functions. Conceptual progress in the last
	decade has added two emerging hallmarks of potential generality to
	this list-reprogramming of energy metabolism and evading immune destruction.
	In addition to cancer cells, tumors exhibit another dimension of
	complexity: they contain a repertoire of recruited, ostensibly normal
	cells that contribute to the acquisition of hallmark traits by creating
	the "tumor microenvironment." Recognition of the widespread applicability
	of these concepts will increasingly affect the development of new
	means to treat human cancer.},
  doi = {10.1016/j.cell.2011.02.013},
  pdf = {../local/Hanahan2011Hallmarks.pdf},
  file = {Hanahan2011Hallmarks.pdf:Hanahan2011Hallmarks.pdf:PDF},
  institution = { Biophysics, UCSF, San Francisco, CA 94158, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {S0092-8674(11)00127-9},
  pmid = {21376230},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1016/j.cell.2011.02.013}
}

@article{Hanahan2000hallmarks,
  author = {Hanahan, D. and Weinberg, R. A.},
  title = {The hallmarks of cancer},
  journal = {Cell},
  year = {2000},
  volume = {100},
  pages = {57--70},
  doi = {10.1016/S0092-8674(00)81683-9},
  pdf = {../local/Hanahan2000hallmarks.pdf},
  file = {Hanahan2000hallmarks.pdf:local/Hanahan2000hallmarks.pdf:PDF},
  keywords = {csbcbook, csbcbook-mustread},
  owner = {jp},
  url = {http://dx.doi.org/10.1016/S0092-8674(00)81683-9}
}

@article{Hanash2004Integrated,
  author = {Hanash, S.},
  title = {Integrated global profiling of cancer},
  journal = {Nat. {R}ev. {C}ancer},
  year = {2004},
  volume = {4},
  pages = {638-644},
  number = {8},
  abstract = {Tumours are complex biological systems. {N}o single type of molecular
	approach fully elucidates tumour behaviour, necessitating analysis
	at multiple levels encompassing genomics and proteomics. {I}ntegrated
	data sets are required to fully determine the contributions of genome
	alterations, host factors and environmental exposures to tumour growth
	and progression, as well as the consequences of interactions between
	malignant or premalignant cells and their microenvironment. {T}he
	sheer amount and heterogeneous nature of data that need to be collected
	and integrated are daunting, but effort has already begun to address
	these obstacles.},
  doi = {doi:10.1038/nrc1414},
  pdf = {../local/Hanash2004Integrated.pdf},
  file = {Hanash2004Integrated.pdf:local/Hanash2004Integrated.pdf:PDF},
  url = {http://dx.doi.org/10.1038/nrc1414}
}

@article{Hanisch2002Co-clustering,
  author = {D. Hanisch and A. Zien and R. Zimmer and T. Lengauer},
  title = {Co-clustering of biological networks and gene expression data},
  journal = {Bioinformatics},
  year = {2002},
  annote = {To appear},
  subject = {microarraybionet},
  url = {http://cartan.gmd.de/~hanisch/paper/CoClustering.pdf}
}

@article{Hann1999Chemoinformatics,
  author = {M. Hann and R. Green},
  title = {{C}hemoinformatics--a new name for an old problem?},
  journal = {Curr. Opin. Chem. Biol.},
  year = {1999},
  volume = {3},
  pages = {379--383},
  number = {4},
  month = {Aug},
  abstract = {Library chemistry and high-throughput screening require greater use
	of chemoinformatics to increase their effectiveness. Recent advances
	in chemoinformatics include new molecular descriptors and pharmacophore
	techniques, statistical tools and their applications. Visualisation
	methods and hardware development are also opening new opportunities.
	The advent of a chemically aware web language and cross-platform
	working is ensuring that chemoinformatics methods are becoming available
	to all chemists in a more appropriate manner. Much time will continue
	to be wasted with incompatible file types without internationally
	agreed standards.},
  doi = {10.1016/S1367-5931(99)80057-X},
  keywords = {Chemistry, Computers, Drug Design, Information Science, Software,
	10419846},
  owner = {mahe},
  pii = {S1367-5931(99)80057-X},
  pmid = {10419846},
  timestamp = {2006.09.05},
  url = {http://dx.doi.org/10.1016/S1367-5931(99)80057-X}
}

@article{Hannon2004Unlocking,
  author = {Hannon, G. J. and Rossi, J. J.},
  title = {Unlocking the potential of the human genome with {RNA} interference.},
  journal = {Nature},
  year = {2004},
  volume = {431},
  pages = {371-8},
  number = {7006},
  month = {Sep},
  abstract = {The discovery of {RNA} interference ({RNA}i) may well be one of the
	transforming events in biology in the past decade. {RNA}i can result
	in gene silencing or even in the expulsion of sequences from the
	genome. {H}arnessed as an experimental tool, {RNA}i has revolutionized
	approaches to decoding gene function. {I}t also has the potential
	to be exploited therapeutically, and clinical trials to test this
	possibility are already being planned.},
  doi = {10.1038/nature02870},
  pdf = {../local/Hannon2004Unlocking.pdf},
  file = {Hannon2004Unlocking.pdf:local/Hannon2004Unlocking.pdf:PDF},
  keywords = {sirna},
  pii = {nature02870},
  url = {http://dx.doi.org/10.1038/nature02870}
}

@article{Hansch1964method,
  author = {C. Hansch and T. Fujita},
  title = {A method for the correlation of biological activity and chemical
	structure},
  journal = {J. Am. Chem. Soc},
  year = {1964},
  volume = {86},
  pages = {1616-1626},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@article{Hansch1968Linear,
  author = {C. Hansch and J. E. Quinlan and G. L. Lawrence},
  title = {Linear free-energy relationship between partition coefficients and
	the aqueous solubility of organic liquids},
  journal = {J. Org. Chem.},
  year = {1968},
  volume = {33},
  pages = {347 - 350},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@article{Harborth2003Sequence,
  author = {Harborth, J. and Elbashir, S. M. and Vandenburgh, K. and Manninga,
	H. and Scaringe, S. A. and Weber, K. and Tuschl, T.},
  title = {Sequence, chemical, and structural variation of small interfering
	{RNA}s and short hairpin {RNA}s and the effect on mammalian gene
	silencing.},
  journal = {Antisense {N}ucleic {A}cid. {D}rug. {D}ev.},
  year = {2003},
  volume = {13},
  pages = {83-105},
  number = {2},
  month = {Apr},
  abstract = {Small interfering {RNA}s (si{RNA}s) induce sequence-specific gene
	silencing in mammalian cells and guide m{RNA} degradation in the
	process of {RNA} interference ({RNA}i). {B}y targeting endogenous
	lamin {A}/{C} m{RNA} in human {H}e{L}a or mouse {SW}3{T}3 cells,
	we investigated the positional variation of si{RNA}-mediated gene
	silencing. {W}e find cell-type-dependent global effects and cell-type-independent
	positional effects. {H}e{L}a cells were about 2-fold more responsive
	to si{RNA}s than {SW}3{T}3 cells but displayed a very similar pattern
	of positional variation of lamin {A}/{C} silencing. {I}n {H}e{L}a
	cells, 26 of 44 tested standard 21-nucleotide (nt) si{RNA} duplexes
	reduced the protein expression by at least 90\%, and only 2 duplexes
	reduced the lamin {A}/{C} proteins to <50\%. {F}luorescent chromophores
	did not perturb gene silencing when conjugated to the 5'-end or 3'-end
	of the sense si{RNA} strand and the 5'-end of the antisense si{RNA}
	strand, but conjugation to the 3'-end of the antisense si{RNA} abolished
	gene silencing. {RN}ase-protecting phosphorothioate and 2'-fluoropyrimidine
	{RNA} backbone modifications of si{RNA}s did not significantly affect
	silencing efficiency, although cytotoxic effects were observed when
	every second phosphate of an si{RNA} duplex was replaced by phosphorothioate.
	{S}ynthetic {RNA} hairpin loops were subsequently evaluated for lamin
	{A}/{C} silencing as a function of stem length and loop composition.
	{A}s long as the 5'-end of the guide strand coincided with the 5'-end
	of the hairpin {RNA}, 19-29 base pair (bp) hairpins effectively silenced
	lamin {A}/{C}, but when the hairpin started with the 5'-end of the
	sense strand, only 21-29 bp hairpins were highly active.},
  doi = {10.1089/108729003321629638},
  keywords = {Adaptor Protein Complex alpha Subunits, Animal, Animals, Antisense,
	Apolipoproteins B, Base Sequence, Biological Transport, Blotting,
	Catalytic, Cell Line, Cell Membrane, Cell Survival, Chemical, Cholesterol,
	Clathrin, Clathrin Heavy Chains, Disease Models, Endocytosis, Epidermal
	Growth Factor, Fluorescence, Gene Expression Profiling, Gene Silencing,
	Gene Therapy, Hela Cells, Humans, Injections, Intravenous, Jejunum,
	Kinetics, Lamin Type A, Liver, Messenger, Metabolic Syndrome X, Mice,
	Microscopy, Models, Molecular Sequence Data, NIH 3T3 Cells, Non-U.S.
	Gov't, Nucleic Acid, Oligonucleotides, Open Reading Frames, Post-Transcriptional,
	Protein Isoforms, Pyrimidines, RNA, RNA Interference, RNA Processing,
	RNA Stability, Research Support, Reverse Transcriptase Polymerase
	Chain Reaction, Sensitivity and Specificity, Sequence Homology, Small
	Interfering, Subcellular Fractions, Swiss 3T3 Cells, Thionucleotides,
	Time Factors, Transfection, Transferrin, Transgenic, Tumor, Western,
	12804036},
  url = {http://dx.doi.org/10.1089/108729003321629638}
}

@phdthesis{Harchaoui2008Methodes,
  author = {Harchaoui, Z.},
  title = {M\'ethodes \`a noyaux pour la d\'etection},
  school = {Telecom ParisTech},
  year = {2008},
  owner = {jp},
  timestamp = {2009.05.01}
}

@inproceedings{Harchaoui2007Image,
  author = {Harchaoui, Z. and Bach, F.},
  title = {Image Classification with Segmentation Graph Kernels},
  booktitle = {2007 IEEE Computer Society Conference on Computer Vision and Pattern
	Recognition (CVPR 2007)},
  year = {2007},
  pages = {1--8},
  publisher = {IEEE Computer Society},
  abstract = {We propose a family of kernels between images, defined as kernels
	between their respective segmentation graphs. The kernels are based
	on soft matching of subtree-patterns of the respective graphs, leveraging
	the natural structure of images while remaining robust to the associated
	segmentation process uncertainty. Indeed, output from morphological
	segmentation is often represented by a labelled graph, each vertex
	corresponding to a segmented region, with edges joining neighboring
	regions. However, such image representations have mostly remained
	underused for learning tasks, partly because of the observed instability
	of the segmentation process and the inherent hardness of inexact
	graph matching with uncertain graphs. Our kernels count common virtual
	substructures amongst images, which enables to perform efficient
	supervised classification of natural images with a support vector
	machine. Moreover, the kernel machinery allows us to take advantage
	of recent advances in kernel-based learning: (i) semi-supervised
	learning reduces the required number of labelled images, while (ii)
	multiple kernel learning algorithms efficiently select the most relevant
	similarity measures between images within our family.},
  doi = {10.1109/CVPR.2007.383049},
  pdf = {../local/Harchaoui2007Image.pdf},
  file = {Harchaoui2007Image.pdf:local/Harchaoui2007Image.pdf:PDF},
  keywords = {image},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1109/CVPR.2007.383049}
}

@incollection{Harchaoui2008Catching,
  author = {Harchaoui, Z. and Levy-Leduc, C.},
  title = {Catching Change-points with Lasso},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  publisher = {MIT Press},
  year = {2008},
  editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  volume = {20},
  pages = {617--624},
  address = {Cambridge, MA}
}

@article{Harchaoui2010Multiple,
  author = {Harchaoui, Z. and Levy-Leduc, C.},
  title = {Multiple Change-Point Estimation With a Total Variation Penalty},
  journal = {J. Am. Stat. Assoc.},
  year = {2010},
  volume = {105},
  pages = {1480--1493},
  number = {492},
  doi = {10.1198/jasa.2010.tm09181},
  pdf = {../local/Harchaoui2010Multiple.pdf},
  file = {Harchaoui2010Multiple.pdf:Harchaoui2010Multiple.pdf:PDF},
  owner = {jp},
  timestamp = {2012.10.03},
  url = {http://dx.doi.org/10.1198/jasa.2010.tm09181}
}

@inproceedings{Harchaoui2009regularized,
  author = {Harchaoui, Z. and Vallet, F. and Lung-Yut-Fong, A. and Cappe, O.},
  title = {A regularized kernel-based approach to unsupervised audio segmentation},
  booktitle = {ICASSP '09: Proceedings of the 2009 IEEE International Conference
	on Acoustics, Speech and Signal Processing},
  year = {2009},
  pages = {1665--1668},
  address = {Washington, DC, USA},
  publisher = {IEEE Computer Society},
  doi = {10.1109/ICASSP.2009.4959921},
  pdf = {../local/Harchaoui2009regularized.pdf},
  file = {Harchaoui2009regularized.pdf:Harchaoui2009regularized.pdf:PDF},
  keywords = {segmentation},
  owner = {jp},
  timestamp = {2010.06.04},
  url = {http://dx.doi.org/10.1109/ICASSP.2009.4959921}
}

@article{Harismendy2009Evaluation,
  author = {Harismendy, O. and Ng, P. C. and Strausberg, R. L. and Wang, X. and
	Stockwell, T. B. and Beeson, K. Y. and Schork, N. J. and Murray,
	S. S. and Topol, E. J. and Levy, S. and Frazer, K. A.},
  title = {Evaluation of next generation sequencing platforms for population
	targeted sequencing studies.},
  journal = {Genome Biol.},
  year = {2009},
  volume = {10},
  pages = {R32},
  number = {3},
  abstract = {Next generation sequencing (NGS) platforms are currently being utilized
	for targeted sequencing of candidate genes or genomic intervals to
	perform sequence-based association studies. To evaluate these platforms
	for this application, we analyzed human sequence generated by the
	Roche 454, Illumina GA, and the ABI SOLiD technologies for the same
	260 kb in four individuals.Local sequence characteristics contribute
	to systematic variability in sequence coverage (>100-fold difference
	in per-base coverage), resulting in patterns for each NGS technology
	that are highly correlated between samples. A comparison of the base
	calls to 88 kb of overlapping ABI 3730xL Sanger sequence generated
	for the same samples showed that the NGS platforms all have high
	sensitivity, identifying >95\% of variant sites. At high coverage,
	depth base calling errors are systematic, resulting from local sequence
	contexts; as the coverage is lowered additional 'random sampling'
	errors in base calling occur.Our study provides important insights
	into systematic biases and data variability that need to be considered
	when utilizing NGS platforms for population targeted sequencing studies.},
  doi = {10.1186/gb-2009-10-3-r32},
  pdf = {../local/Harismendy2009Evaluation.pdf},
  file = {Harismendy2009Evaluation.pdf:Harismendy2009Evaluation.pdf:PDF},
  institution = {Scripps Genomic Medicine, Scripps Translational Science Institute,
	The Scripps Research Institute, La Jolla, CA 92037, USA. oharis@scripps.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2009-10-3-r32},
  pmid = {19327155},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1186/gb-2009-10-3-r32}
}

@article{Harris2008Single-molecule,
  author = {Timothy D Harris and Phillip R Buzby and Hazen Babcock and Eric Beer
	and Jayson Bowers and Ido Braslavsky and Marie Causey and Jennifer
	Colonell and James Dimeo and J. William Efcavitch and Eldar Giladi
	and Jaime Gill and John Healy and Mirna Jarosz and Dan Lapen and
	Keith Moulton and Stephen R Quake and Kathleen Steinmann and Edward
	Thayer and Anastasia Tyurina and Rebecca Ward and Howard Weiss and
	Zheng Xie},
  title = {Single-molecule DNA sequencing of a viral genome.},
  journal = {Science},
  year = {2008},
  volume = {320},
  pages = {106--109},
  number = {5872},
  month = {Apr},
  abstract = {The full promise of human genomics will be realized only when the
	genomes of thousands of individuals can be sequenced for comparative
	analysis. A reference sequence enables the use of short read length.
	We report an amplification-free method for determining the nucleotide
	sequence of more than 280,000 individual DNA molecules simultaneously.
	A DNA polymerase adds labeled nucleotides to surface-immobilized
	primer-template duplexes in stepwise fashion, and the asynchronous
	growth of individual DNA molecules was monitored by fluorescence
	imaging. Read lengths of >25 bases and equivalent phred software
	program quality scores approaching 30 were achieved. We used this
	method to sequence the M13 virus to an average depth of >150x and
	with 100\% coverage; thus, we resequenced the M13 genome with high-sensitivity
	mutation detection. This demonstrates a strategy for high-throughput
	low-cost resequencing.},
  doi = {10.1126/science.1150427},
  institution = {Helicos BioSciences Corporation, One Kendall Square, Cambridge, MA
	02139, USA. tharris@helicosbio.com},
  keywords = {Algorithms; Bacteriophage M13; Computational Biology; DNA Primers;
	DNA, Viral; Genome, Viral; Mutation; Sequence Alignment; Sequence
	Analysis, DNA; Software; Templates, Genetic},
  owner = {phupe},
  pii = {320/5872/106},
  pmid = {18388294},
  timestamp = {2010.08.24},
  url = {http://dx.doi.org/10.1126/science.1150427}
}

@inproceedings{Hartemink2002Using,
  author = {A.J. Hartemink and D.K. Gifford and T.S. Jaakkola and R.A. Young},
  title = {Using graphical models and genomic expression data to statistically
	validate models of genetic regulatory networks},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2002},
  editor = {Russ B. Altman and A. Keith Dunker and Lawrence Hunter and Kevin
	Lauerdale and Teri E. Klein},
  pages = {422-433},
  publisher = {World Scientific},
  pdf = {../local/Hartemink2002Using.pdf},
  file = {Hartemink2002Using.pdf:local/Hartemink2002Using.pdf:PDF},
  url = {http://helix-web.stanford.edu/psb01/abstracts/p422.html}
}

@book{Hartigan1975Clustering,
  title = {Clustering algorithms},
  publisher = {Wiley},
  year = {1975},
  author = {Hartigan, J.},
  address = {New-York},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Hartigan1987Estimation,
  author = {Hartigan, J. A.},
  title = {Estimation of a convex density contour in two dimensions},
  journal = {J. {A}mer. {S}tatist. {A}ssoc.},
  year = {1987},
  volume = {82},
  pages = {267--270},
  number = {397},
  pdf = {../local/Hartigan1987Estimation.pdf},
  file = {Hartigan1987Estimation.pdf:local/Hartigan1987Estimation.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0162-1459%28198703%2982%3A397%3C267%3AEOACDC%3E2.0.CO%3B2-G}
}

@article{Hartigan1979A,
  author = {J. A. Hartigan and M. A. Wong},
  title = {A {K}-Means Clustering Algorithm},
  journal = {Applied Statistics},
  year = {1979},
  volume = {28},
  pages = {100--108},
  entrydate = {20030618},
  key = {Hartigan/Wong:79}
}

@article{Hartwell1999a,
  author = {L. H. Hartwell and J. J. Hopfield and S. Leibler and A. W. Murray},
  title = {From molecular to modular cell biology.},
  journal = {Nature},
  year = {1999},
  volume = {402},
  pages = {C47--C52},
  number = {6761 Suppl},
  month = {Dec},
  abstract = {Cellular functions, such as signal transmission, are carried out by
	'modules' made up of many species of interacting molecules. Understanding
	how modules work has depended on combining phenomenological analysis
	with molecular studies. General principles that govern the structure
	and behaviour of modules may be discovered with help from synthetic
	sciences such as engineering and computer science, from stronger
	interactions between experiment and theory in cell biology, and from
	an appreciation of evolutionary constraints.},
  doi = {10.1038/35011540},
  institution = {Fred Hutchinson Cancer Center, Seattle, Washington 98109, USA.},
  keywords = {Action Potentials; Biological Evolution; Forecasting; Models, Biological;
	Molecular Biology, trends},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pmid = {10591225},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/35011540}
}

@book{Hastie1999Generalized,
  title = {Generalized Additive Models},
  publisher = {Chapman and Hall},
  year = {1999},
  author = {Hastie, T. and Tibshirani, R.},
  address = {London, UK}
}

@book{Hastie2001elements,
  title = {The elements of statistical learning: data mining, inference, and
	prediction},
  publisher = {Springer},
  year = {2001},
  author = {Hastie, T. and Tibshirani, R. and Friedman, J.}
}

@article{Haury2010Increasing,
  author = {Haury, A.C. and Jacob, L. and Vert, J.P.},
  title = {Increasing stability and interpretability of gene expression signatures},
  journal = {arXiv preprint arXiv:1001.3109},
  year = {2010}
}

@article{Haury2012TIGRESS,
  author = {Haury, A.C. and Mordelet, F. and Vera-Licona, P. and Vert, J.P.},
  title = {TIGRESS: trustful inference of gene regulation using stability selection},
  journal = {arXiv preprint arXiv:1205.1181},
  year = {2012}
}

@article{Haury2011influence,
  author = {Haury, A.-C. and Gestraud, P. and Vert, J.-P.},
  title = {The influence of feature selection methods on accuracy, stability
	and interpretability of molecular signatures.},
  journal = {PLoS One},
  year = {2011},
  volume = {6},
  pages = {e28210},
  number = {12},
  abstract = {Biomarker discovery from high-dimensional data is a crucial problem
	with enormous applications in biology and medicine. It is also extremely
	challenging from a statistical viewpoint, but surprisingly few studies
	have investigated the relative strengths and weaknesses of the plethora
	of existing feature selection methods. In this study we compare 32
	feature selection methods on 4 public gene expression datasets for
	breast cancer prognosis, in terms of predictive performance, stability
	and functional interpretability of the signatures they produce. We
	observe that the feature selection method has a significant influence
	on the accuracy, stability and interpretability of signatures. Surprisingly,
	complex wrapper and embedded methods generally do not outperform
	simple univariate feature selection methods, and ensemble feature
	selection has generally no positive effect. Overall a simple Student's
	t-test seems to provide the best results.},
  doi = {10.1371/journal.pone.0028210},
  pdf = {../local/Haury2011influence.pdf},
  file = {Haury2011influence.pdf:Haury2011influence.pdf:PDF},
  institution = {Mines ParisTech, Centre for Computational Biology, Fontainebleau,
	France. anne-claire.haury@mines-paristech.fr},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {PONE-D-11-13151},
  pmid = {22205940},
  timestamp = {2012.03.04},
  url = {http://dx.doi.org/10.1371/journal.pone.0028210}
}

@inproceedings{Haury2010stability,
  author = {Haury, A.-C. and Vert, J-P.},
  title = {On the stability and interpretability of prognosis signatures in
	breast cancer},
  booktitle = {Proceedings of the Fourth International Workshop on Machine Learning
	in Systems Biology (MLSB10)},
  year = {2010},
  note = {To appear.},
  owner = {jp},
  timestamp = {2010.10.12}
}

@techreport{Haussler1999Convolution,
  author = {Haussler, D.},
  title = {Convolution {K}ernels on {D}iscrete {S}tructures},
  institution = {UC Santa Cruz},
  year = {1999},
  number = {UCSC-CRL-99-10},
  abstract = {We introduce a new method of constructing kernels on sets whose elements
	are discrete structures like strings, trees and graphs. {T}he method
	can be applied iteratively to build a kernel on a infinite set from
	kernels involving generators of the set. {T}he family of kernels
	generated generalizes the family of radial basis kernels. {I}t can
	also be used to define kernels in the form of joint {G}ibbs probability
	distributions. {K}ernels can be built from hidden {M}arkov random
	fields, generalized regular expressions, pair-{HMM}s, or {ANOVA}
	decompositions. {U}ses of the method lead to open problems involving
	the theory of infinitely divisible positive definite functions. {F}undamentals
	of this theory and the theory of reproducing kernel {H}ilbert spaces
	are reviewed and applied in establishing the validity of the method.},
  pdf = {../local/Haussler1999Convolution.pdf},
  file = {Haussler1999Convolution.pdf:local/Haussler1999Convolution.pdf:PDF},
  keywords = {biosvm},
  subject = {kernel}
}

@article{Haussler1997general,
  author = {Haussler, D.},
  title = {A general minimax result for relative entropy},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1997},
  volume = {43},
  pages = {1276-1280},
  number = {4},
  month = {Jul},
  abstract = {Suppose nature picks a probability measure {P}&thetas; on a complete
	separable metric space {X} at random from a measurable set {P} ?={{P}&thetas;:&thetas;??}.
	{T}hen, without knowing &thetas;, a statistician picks a measure
	{Q} on {S}. {F}inally, the statistician suffers a loss {D}({P}0∥{Q}),
	the relative entropy between {P}&thetas; and {Q}. {W}e show that
	the minimax and maximin values of this game are always equal, and
	there is always a minimax strategy in the closure of the set of all
	{B}ayes strategies. {T}his generalizes previous results of {G}allager(1979),
	and {D}avisson and {L}eon-{G}arcia (1980) },
  pdf = {../local/Haussler1997general.pdf},
  file = {Haussler1997general.pdf:local/Haussler1997general.pdf:PDF},
  owner = {vert}
}

@article{Haverkamp2000potential,
  author = {Haverkamp, W. and Breithardt, G. and Camm, A. J. and Janse, M. J.
	and Rosen, M. R. and Antzelevitch, C. and Escande, D. and Franz,
	M. and Malik, M. and Moss, A. and Shah, R.},
  title = {{T}he potential for {QT} prolongation and proarrhythmia by non-antiarrhythmic
	drugs: clinical and regulatory implications. {R}eport on a policy
	conference of the {E}uropean {S}ociety of {C}ardiology.},
  journal = {Eur. Heart J.},
  year = {2000},
  volume = {21},
  pages = {1216--1231},
  number = {15},
  month = {Aug},
  doi = {10.1053/euhj.2000.2249},
  keywords = {herg},
  pii = {S0195668X00922498},
  pmid = {10924311},
  timestamp = {2006.10.05},
  url = {http://dx.doi.org/10.1053/euhj.2000.2249}
}

@article{Hawkins1997Analysis,
  author = {D.M. Hawkins and S.S. Young and A. Rusinko},
  title = {Analysis of a large structure-activity data set using recursive partitioning},
  journal = {Quantitative Structure-Activity Relationships},
  year = {1997},
  volume = {16},
  pages = {296-302},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@article{He2000Alternating,
  author = {He, BS and Yang, H. and Wang, SL},
  title = {Alternating direction method with self-adaptive penalty parameters
	for monotone variational inequalities},
  journal = {Journal of Optimization Theory and applications},
  year = {2000},
  volume = {106},
  pages = {337--356},
  number = {2},
  publisher = {Springer}
}

@article{He2006Why,
  author = {He, X. and Zhang, J.},
  title = {Why do hubs tend to be essential in protein networks?},
  journal = {PLoS Genet},
  year = {2006},
  volume = {2},
  pages = {e88},
  number = {6},
  month = {Jun},
  abstract = {The protein-protein interaction (PPI) network has a small number of
	highly connected protein nodes (known as hubs) and many poorly connected
	nodes. Genome-wide studies show that deletion of a hub protein is
	more likely to be lethal than deletion of a non-hub protein, a phenomenon
	known as the centrality-lethality rule. This rule is widely believed
	to reflect the special importance of hubs in organizing the network,
	which in turn suggests the biological significance of network architectures,
	a key notion of systems biology. Despite the popularity of this explanation,
	the underlying cause of the centrality-lethality rule has never been
	critically examined. We here propose the concept of essential PPIs,
	which are PPIs that are indispensable for the survival or reproduction
	of an organism. Our network analysis suggests that the centrality-lethality
	rule is unrelated to the network architecture, but is explained by
	the simple fact that hubs have large numbers of PPIs, therefore high
	probabilities of engaging in essential PPIs. We estimate that approximately
	3\% of PPIs are essential in the yeast, accounting for approximately
	43\% of essential genes. As expected, essential PPIs are evolutionarily
	more conserved than nonessential PPIs. Considering the role of essential
	PPIs in determining gene essentiality, we find the yeast PPI network
	functionally more robust than random networks, yet far less robust
	than the potential optimum. These and other findings provide new
	perspectives on the biological relevance of network structure and
	robustness.},
  doi = {10.1371/journal.pgen.0020088},
  institution = {Department of Ecology and Evolutionary Biology, University of Michigan,
	Ann Arbor, Michigan, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pmid = {16751849},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1371/journal.pgen.0020088}
}

@article{He2010Stable,
  author = {He, Z. and Yu, W.},
  title = {Stable feature selection for biomarker discovery},
  journal = {arXiv preprint arXiv:1001.0887},
  year = {2010}
}

@incollection{Heckerman1999tutorial,
  author = {Heckerman, D.},
  title = {A tutorial on learning with {B}ayesian networks},
  booktitle = {Learning in graphical models},
  publisher = {MIT Press},
  year = {1999},
  editor = {Jordan, M.},
  pages = {301--354},
  address = {Cambridge, MA, USA},
  pdf = {../local/Heckerman1999tutorial.pdf},
  file = {Heckerman1999tutorial.pdf:local/Heckerman1999tutorial.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  timestamp = {2006.01.18}
}

@article{Heckerman2000Dependency,
  author = {Heckerman, D. and Chickering, D. M. and Meek, C. and Rounthwaite,
	R. and Kadie, C.},
  title = {Dependency Networks for Inference, Collaborative Filtering, and Data
	Visualization},
  journal = {J. Mach. Learn. Res.},
  year = {2000},
  volume = {1},
  pages = {49--75}
}

@article{Heckerman2007Leveraging,
  author = {Heckerman, D. and Kadie, D. and Listgarten, J.},
  title = {Leveraging information across {HLA} alleles/supertypes improves epitope
	prediction.},
  journal = {J. Comput. Biol.},
  year = {2007},
  volume = {14},
  pages = {736--746},
  number = {6},
  abstract = {We present a model for predicting HLA class I restricted CTL epitopes.
	In contrast to almost all other work in this area, we train a single
	model on epitopes from all HLA alleles and supertypes, yet retain
	the ability to make epitope predictions for specific HLA alleles.
	We are therefore able to leverage data across all HLA alleles and/or
	their supertypes, automatically learning what information should
	be shared and also how to combine allele-specific, supertype-specific,
	and global information in a principled way. We show that this leveraging
	can improve prediction of epitopes having HLA alleles with known
	supertypes, and dramatically increases our ability to predict epitopes
	having alleles which do not fall into any of the known supertypes.
	Our model, which is based on logistic regression, is simple to implement
	and understand, is solved by finding a single global maximum, and
	is more accurate (to our knowledge) than any other model.},
  doi = {10.1089/cmb.2007.R013},
  owner = {vert},
  pmid = {17691891},
  timestamp = {2008.10.29},
  url = {http://dx.doi.org/10.1089/cmb.2007.R013}
}

@book{Hedges1985Statistical,
  title = {Statistical methods for meta-analysis},
  publisher = {Academic Press},
  year = {1985},
  author = {Hedges, L. V. and Olkin, I.},
  owner = {jp},
  timestamp = {2011.09.21}
}

@article{Heiner2004Biosystems,
  author = {Heiner, M. and Koch, I. and Will, J.},
  title = {Model validation of biological pathways using Petri nets--demonstrated
	for apoptosis},
  journal = {Biosystems},
  year = {2004},
  volume = {75},
  pages = {15--28},
  number = {1-3},
  abstract = {This paper demonstrates the first steps of a new integrating methodology
	to develop and analyse models of biological pathways in a systematic
	manner using well established Petri net technologies. The whole approach
	comprises step-wise modelling, animation, model validation as well
	as qualitative and quantitative analysis for behaviour prediction.
	In this paper, the first phase is addressed how to develop and validate
	a qualitative model, which might be extended afterwards to a quantitative
	model. The example used in this paper is devoted to apoptosis, the
	genetically programmed cell death. Apoptosis is an essential part
	of normal physiology for most metazoan species. Disturbances in the
	apoptotic process could lead to several diseases. The signal transduction
	pathway of apoptosis includes highly complex mechanisms to control
	and execute programmed cell death. This paper explains how to model
	and validate this pathway using qualitative Petri nets. The results
	provide a mathematically unique and valid model enabling the confirmation
	of known properties as well as new insights in this pathway.},
  keywords = {csbcbook}
}

@inproceedings{Helden2001Application,
  author = {van Helden, J. and Gilbert, D. and Wernisch, L. and Schroeder, M.
	and Wodak, S. J.},
  title = {Application of Regulatory Sequence Analysis and Metabolic Network
	Analysis to the Interpretation of Gene Expression Data},
  booktitle = {JOBIM '00: Selected papers from the First International Conference
	on Computational Biology, Biology, Informatics, and Mathematics},
  year = {2001},
  pages = {147--164},
  address = {London, UK},
  publisher = {Springer-Verlag},
  timestamp = {2006.11.21}
}

@article{Helma2004Data,
  author = {Helma, C. and Cramer, T. and Kramer, S. and De Raedt, L.},
  title = {Data mining and machine learning techniques for the identification
	of mutagenicity inducing substructures and structure activity relationships
	of noncongeneric compounds},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2004},
  volume = {44},
  pages = {1402-11},
  number = {4},
  abstract = {This paper explores the utility of data mining and machine learning
	algorithms for the induction of mutagenicity structure-activity relationships
	({SAR}s) from noncongeneric data sets. {W}e compare (i) a newly developed
	algorithm ({MOLFEA}) for the generation of descriptors (molecular
	fragments) for noncongeneric compounds with traditional {SAR} approaches
	(molecular properties) and (ii) different machine learning algorithms
	for the induction of {SAR}s from these descriptors. {I}n addition
	we investigate the optimal parameter settings for these programs
	and give an exemplary interpretation of the derived models. {T}he
	predictive accuracies of models using {MOLFEA} derived descriptors
	is approximately 10-15\%age points higher than those using molecular
	properties alone. {U}sing both types of descriptors together does
	not improve the derived models. {F}rom the applied machine learning
	techniques the rule learner {PART} and support vector machines gave
	the best results, although the differences between the learning algorithms
	are only marginal. {W}e were able to achieve predictive accuracies
	up to 78\% for 10-fold cross-validation. {T}he resulting models are
	relatively easy to interpret and usable for predictive as well as
	for explanatory purposes.},
  doi = {10.1021/ci034254q},
  pdf = {../local/Helma2004Data.pdf},
  file = {Helma2004Data.pdf:local/Helma2004Data.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci034254q}
}

@article{Helmbold1997Predicting,
  author = {Helmbold, D. P. and Schapire, R. E.},
  title = {Predicting {N}early {A}s {W}ell {A}s the {B}est {P}runing of a {D}ecision
	{T}ree},
  journal = {Machine {L}earning},
  year = {1997},
  volume = {27},
  pages = {51--68},
  number = {1},
  pdf = {../local/helm97.pdf},
  file = {helm97.pdf:local/helm97.pdf:PDF},
  subject = {ml},
  url = {http://www.research.att.com/~schapire/papers/HelmboldSc95.ps.Z}
}

@incollection{Hendrix2005Phosphodiesterase,
  author = {Martin Hendrix and Christopher Kallus},
  title = {Phosphodiesterase Inhibitors: A Chemogenomic View},
  booktitle = {Chemogenomics in Drug Discovery},
  publisher = {Wiley-VCH},
  year = {2005},
  chapter = {9},
  pages = {243-288}
}

@article{Henikoff1992Amino,
  author = {Henikoff, S. and Henikoff, J. G.},
  title = {Amino acid substitution matrices from protein blocks.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {1992},
  volume = {89},
  pages = {10915--10919},
  number = {22},
  month = {Nov},
  abstract = {Methods for alignment of protein sequences typically measure similarity
	by using a substitution matrix with scores for all possible exchanges
	of one amino acid with another. The most widely used matrices are
	based on the Dayhoff model of evolutionary rates. Using a different
	approach, we have derived substitution matrices from about 2000 blocks
	of aligned sequence segments characterizing more than 500 groups
	of related proteins. This led to marked improvements in alignments
	and in searches using queries from each of the groups.},
  keywords = {Algorithms; Amino Acid Sequence; Animals; Caenorhabditis elegans;
	Drosophila; Lod Score; Mathematics; Molecular Sequence Data; Probability;
	Proteins; Sequence Homology, Amino Acid; Software},
  owner = {laurent},
  pmid = {1438297},
  timestamp = {2008.01.15}
}

@article{Hershkovits1997On,
  author = {Hershkovits, Y. and Ziv, J. },
  title = {On fixed-database universal data compression with limited memory},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1997},
  volume = {43},
  pages = {1966-1976},
  number = {6},
  month = {Nov},
  abstract = {The amount of fixed side information required for lossless data compression
	is discussed. {N}onasymptotic coding and converse theorems are derived
	for data-compression algorithms with fixed statistical side information
	(?training sequence?) that is not large enough so as to yield the
	ultimate compression, namely, the entropy of the source },
  pdf = {../local/Hershkovits1997On.pdf},
  file = {Hershkovits1997On.pdf:local/Hershkovits1997On.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Hert2004Comparison,
  author = {Hert, J. and Willett, P. and Wilton, D. J. and Acklin, P. and Azzaoui,
	K. and Jacoby, E. and Schuffenhauer, A.},
  title = {Comparison of fingerprint-based methods for virtual screening using
	multiple bioactive reference structures.},
  journal = {J Chem Inf Comput Sci},
  year = {2004},
  volume = {44},
  pages = {1177--1185},
  number = {3},
  abstract = {Fingerprint-based similarity searching is widely used for virtual
	screening when only a single bioactive reference structure is available.
	This paper reviews three distinct ways of carrying out such searches
	when multiple bioactive reference structures are available: merging
	the individual fingerprints into a single combined fingerprint; applying
	data fusion to the similarity rankings resulting from individual
	similarity searches; and approximations to substructural analysis.
	Extended searches on the MDL Drug Data Report database suggest that
	fusing similarity scores is the most effective general approach,
	with the best individual results coming from the binary kernel discrimination
	technique.},
  comment = {Slides available at http://cisrg.shef.ac.uk/shef2004/talks/PWillett.pdf},
  doi = {10.1021/ci034231b},
  institution = {Krebs Institute for Biomolecular Research and Department of Information
	Studies, University of Sheffield, Western Bank, Sheffield S10 2TN,
	UK.},
  keywords = {chemoinformatics, PUlearning},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {15154787},
  timestamp = {2010.04.01},
  url = {http://dx.doi.org/10.1021/ci034231b}
}

@article{Hertz2007Identifying,
  author = {Tomer Hertz and Chen Yanover},
  title = {Identifying HLA supertypes by learning distance functions.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {e148--e155},
  number = {2},
  month = {Jan},
  abstract = {MOTIVATION: The development of epitope-based vaccines crucially relies
	on the ability to classify Human Leukocyte Antigen (HLA) molecules
	into sets that have similar peptide binding specificities, termed
	supertypes. In their seminal work, Sette and Sidney defined nine
	HLA class I supertypes and claimed that these provide an almost perfect
	coverage of the entire repertoire of HLA class I molecules. HLA alleles
	are highly polymorphic and polygenic and therefore experimentally
	classifying each of these molecules to supertypes is at present an
	impossible task. Recently, a number of computational methods have
	been proposed for this task. These methods are based on defining
	protein similarity measures, derived from analysis of binding peptides
	or from analysis of the proteins themselves. RESULTS: In this paper
	we define both peptide derived and protein derived similarity measures,
	which are based on learning distance functions. The peptide derived
	measure is defined using a peptide-peptide distance function, which
	is learned using information about known binding and non-binding
	peptides. The protein derived similarity measure is defined using
	a protein-protein distance function, which is learned using information
	about alleles previously classified to supertypes by Sette and Sidney
	(1999). We compare the classification obtained by these two complimentary
	methods to previously suggested classification methods. In general,
	our results are in excellent agreement with the classifications suggested
	by Sette and Sidney (1999) and with those reported by Buus et al.
	(2004). The main important advantage of our proposed distance-based
	approach is that it makes use of two different and important immunological
	sources of information-HLA alleles and peptides that are known to
	bind or not bind to these alleles. Since each of our distance measures
	is trained using a different source of information, their combination
	can provide a more confident classification of alleles to supertypes.},
  doi = {10.1093/Bioinformatics/btl324},
  owner = {laurent},
  pii = {23/2/e148},
  pmid = {17237084},
  timestamp = {2007.01.27},
  url = {http://dx.doi.org/10.1093/Bioinformatics/btl324}
}

@article{Hertz2006PepDist,
  author = {Hertz, T. and Yanover, C.},
  title = {{P}ep{D}ist: a new framework for protein-peptide binding prediction
	based on learning peptide distance functions.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7 Suppl 1},
  pages = {S3},
  abstract = {BACKGROUND: Many different aspects of cellular signalling, trafficking
	and targeting mechanisms are mediated by interactions between proteins
	and peptides. Representative examples are MHC-peptide complexes in
	the immune system. Developing computational methods for protein-peptide
	binding prediction is therefore an important task with applications
	to vaccine and drug design. METHODS: Previous learning approaches
	address the binding prediction problem using traditional margin based
	binary classifiers. In this paper we propose PepDist: a novel approach
	for predicting binding affinity. Our approach is based on learning
	peptide-peptide distance functions. Moreover, we suggest to learn
	a single peptide-peptide distance function over an entire family
	of proteins (e.g. MHC class I). This distance function can be used
	to compute the affinity of a novel peptide to any of the proteins
	in the given family. In order to learn these peptide-peptide distance
	functions, we formalize the problem as a semi-supervised learning
	problem with partial information in the form of equivalence constraints.
	Specifically, we propose to use DistBoost, which is a semi-supervised
	distance learning algorithm. RESULTS: We compare our method to various
	state-of-the-art binding prediction algorithms on MHC class I and
	MHC class II datasets. In almost all cases, our method outperforms
	all of its competitors. One of the major advantages of our novel
	approach is that it can also learn an affinity function over proteins
	for which only small amounts of labeled peptides exist. In these
	cases, our method's performance gain, when compared to other computational
	methods, is even more pronounced. We have recently uploaded the PepDist
	webserver which provides binding prediction of peptides to 35 different
	MHC class I alleles. The webserver which can be found at http://www.pepdist.cs.huji.ac.il
	is powered by a prediction engine which was trained using the framework
	presented in this paper. CONCLUSION: The results obtained suggest
	that learning a single distance function over an entire family of
	proteins achieves higher prediction accuracy than learning a set
	of binary classifiers for each of the proteins separately. We also
	show the importance of obtaining information on experimentally determined
	non-binders. Learning with real non-binders generalizes better than
	learning with randomly generated peptides that are assumed to be
	non-binders. This suggests that information about non-binding peptides
	should also be published and made publicly available.},
  doi = {10.1186/1471-2105-7-S1-S3},
  keywords = {immunoinformatics},
  pii = {1471-2105-7-S1-S3},
  pmid = {16723006},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1186/1471-2105-7-S1-S3}
}

@inproceedings{Heskes2000Empirical,
  author = {Tom Heskes},
  title = {Empirical Bayes for Learning to Learn},
  booktitle = {ICML '00: Proceedings of the Seventeenth International Conference
	on Machine Learning},
  year = {2000},
  pages = {367--374},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  isbn = {1-55860-707-2}
}

@article{Hess2006Pharmacogenomic,
  author = {Hess, K. R. and Anderson, K. and Symmans, W. F. and Valero, V. and
	Ibrahim, N. and Mejia, J. A. and Booser, D. and Theriault, R. L.
	and Buzdar, A. U. and Dempsey, P. J. and Rouzier, R. and Sneige,
	N. and Ross, J. S. and Vidaurre, T. and G\'omez, H. L. and Hortobagyi,
	G. N. and Pusztai, L.},
  title = {Pharmacogenomic predictor of sensitivity to preoperative chemotherapy
	with paclitaxel and fluorouracil, doxorubicin, and cyclophosphamide
	in breast cancer.},
  journal = {J Clin Oncol},
  year = {2006},
  volume = {24},
  pages = {4236--4244},
  number = {26},
  month = {Sep},
  abstract = {We developed a multigene predictor of pathologic complete response
	(pCR) to preoperative weekly paclitaxel and fluorouracil-doxorubicin-cyclophosphamide
	(T/FAC) chemotherapy and assessed its predictive accuracy on independent
	cases.One hundred thirty-three patients with stage I-III breast cancer
	were included. Pretreatment gene expression profiling was performed
	with oligonecleotide microarrays on fine-needle aspiration specimens.
	We developed predictors of pCR from 82 cases and assessed accuracy
	on 51 independent cases.Overall pCR rate was 26\% in both cohorts.
	In the training set, 56 probes were identified as differentially
	expressed between pCR versus residual disease, at a false discovery
	rate of 1\%. We examined the performance of 780 distinct classifiers
	(set of genes + prediction algorithm) in full cross-validation. Many
	predictors performed equally well. A nominally best 30-probe set
	Diagonal Linear Discriminant Analysis classifier was selected for
	independent validation. It showed significantly higher sensitivity
	(92\% v 61\%) than a clinical predictor including age, grade, and
	estrogen receptor status. The negative predictive value (96\% v 86\%)
	and area under the curve (0.877 v 0.811) were nominally better but
	not statistically significant. The combination of genomic and clinical
	information yielded a predictor not significantly different from
	the genomic predictor alone. In 31 samples, RNA was hybridized in
	replicate with resulting predictions that were 97\% concordant.A
	30-probe set pharmacogenomic predictor predicted pCR to T/FAC chemotherapy
	with high sensitivity and negative predictive value. This test correctly
	identified all but one of the patients who achieved pCR (12 of 13
	patients) and all but one of those who were predicted to have residual
	disease had residual cancer (27 of 28 patients).},
  doi = {10.1200/JCO.2006.05.6861},
  pdf = {../local/Hess2006Pharmacogenomic.pdf},
  file = {Hess2006Pharmacogenomic.pdf:Hess2006Pharmacogenomic.pdf:PDF},
  institution = {Department of Biostatistics and Applied Mathematics, The University
	of Texas M.D. Anderson Cancer Center, Houston, TX 77230-1439, USA.},
  keywords = {breastcancer},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {JCO.2006.05.6861},
  pmid = {16896004},
  timestamp = {2011.11.18},
  url = {http://dx.doi.org/10.1200/JCO.2006.05.6861}
}

@article{Hill2006G-protein-coupled,
  author = {Hill, S. J.},
  title = {{G}-protein-coupled receptors: past, present and future.},
  journal = {Br. J. Pharmacol.},
  year = {2006},
  volume = {147 Suppl 1},
  pages = {S27--S37},
  month = {Jan},
  abstract = {The G-protein-coupled receptor (GPCR) family represents the largest
	and most versatile group of cell surface receptors. Drugs active
	at these receptors have therapeutic actions across a wide range of
	human diseases ranging from allergic rhinitis to pain, hypertension
	and schizophrenia. This review provides a brief historical overview
	of the properties and signalling characteristics of this important
	family of receptors.},
  doi = {10.1038/sj.bjp.0706455},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {0706455},
  pmid = {16402114},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1038/sj.bjp.0706455}
}

@article{Hindsgaul1999Carbohydrate,
  author = {Hindsgaul, O.},
  title = {Carbohydrate chemistry. {S}ugars out in the open.},
  journal = {Nature},
  year = {1999},
  volume = {399},
  pages = {644-5},
  number = {6737},
  month = {Jun},
  doi = {10.1038/21335},
  pdf = {../local/Hindsgaul1999Carbohydrate.pdf},
  file = {Hindsgaul1999Carbohydrate.pdf:local/Hindsgaul1999Carbohydrate.pdf:PDF},
  keywords = {glycans},
  url = {http://dx.doi.org/10.1038/21335}
}

@article{Hinton2006Unsupervised,
  author = {Hinton, Geoffrey and Osindero, Simon and Welling, Max and Teh, Yee-Whye},
  title = {Unsupervised discovery of nonlinear structure using contrastive backpropagation.},
  journal = {Cogn Sci},
  year = {2006},
  volume = {30},
  pages = {725--731},
  number = {4},
  month = {Jul},
  abstract = {We describe a way of modeling high-dimensional data vectors by using
	an unsupervised, nonlinear, multilayer neural network in which the
	activity of each neuron-like unit makes an additive contribution
	to a global energy score that indicates how surprised the network
	is by the data vector. The connection weights that determine how
	the activity of each unit depends on the activities in earlier layers
	are learned by minimizing the energy assigned to data vectors that
	are actually observed and maximizing the energy assigned to "confabulations"
	that are generated by perturbing an observed data vector in a direction
	that decreases its energy under the current model.},
  doi = {10.1207/s15516709cog0000_76},
  institution = {Department of Computer Science, University of Toronto.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {21702832},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1207/s15516709cog0000_76}
}

@article{Hinton2006fast,
  author = {Hinton, G. E. and Osindero, S. and Teh, Y.-W.},
  title = {A fast learning algorithm for deep belief nets.},
  journal = {Neural Comput},
  year = {2006},
  volume = {18},
  pages = {1527--1554},
  number = {7},
  month = {Jul},
  abstract = {We show how to use "complementary priors" to eliminate the explaining-away
	effects that make inference difficult in densely connected belief
	nets that have many hidden layers. Using complementary priors, we
	derive a fast, greedy algorithm that can learn deep, directed belief
	networks one layer at a time, provided the top two layers form an
	undirected associative memory. The fast, greedy algorithm is used
	to initialize a slower learning procedure that fine-tunes the weights
	using a contrastive version of the wake-sleep algorithm. After fine-tuning,
	a network with three hidden layers forms a very good generative model
	of the joint distribution of handwritten digit images and their labels.
	This generative model gives better digit classification than the
	best discriminative learning algorithms. The low-dimensional manifolds
	on which the digits lie are modeled by long ravines in the free-energy
	landscape of the top-level associative memory, and it is easy to
	explore these ravines by using the directed connections to display
	what the associative memory has in mind.},
  doi = {10.1162/neco.2006.18.7.1527},
  institution = {Department of Computer Science, University of Toronto, Canada. hinton@cs.toronto.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16764513},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1162/neco.2006.18.7.1527}
}

@article{Hizukuri2004Extraction,
  author = {Yoshiyuki Hizukuri and Yoshihiro Yamanishi and Kosuke Hashimoto and
	Minoru Kanehisa},
  title = {Extraction of species-specific glycan substructures.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2004},
  volume = {15},
  pages = {69-81},
  number = {1},
  abstract = {Glycans, which are carbohydrate sugar chains attached to some lipids
	or proteins, have a huge variety of structures and play a key role
	in cell communication, protein interaction and immunity. {T}he availability
	of a number of glycan structures stored in the {KEGG}/{GLYCAN} database
	makes it possible for us to conduct a large-scale comparative research
	of glycans. {I}n this paper, we present a novel approach to compare
	glycan structures and extract characteristic glycan substructures
	of certain organisms. {I}n the algorithm we developed a new similarity
	measure of glycan structures taking into account of several biological
	aspects of glycan synthesis and glycosyltransferases, and we confirmed
	the validity of our similarity measure by conducting experiments
	on its ability to classify glycans between organisms in the framework
	of a support vector machine. {F}inally, our method successfully extracted
	a set of candidates of substructrues which are characteristic to
	human, rat, mouse, bovine, pig, chicken, yeast, wheat and sycamore,
	respectively. {W}e confirmed that the characteristic substructures
	extracted by our method correspond to the substructures which are
	known as the species-specific sugar chain of gamma-glutamyltranspeptidases
	in the kidney.},
  pdf = {../local/Hizukuri2004Extraction.pdf},
  file = {Hizukuri2004Extraction.pdf:local/Hizukuri2004Extraction.pdf:PDF},
  keywords = {Amino Acid Sequence, Animals, Carbohydrate Conformation, Carbohydrate
	Sequence, Cattle, Computer Simulation, Databases, Genes, Histocompatibility
	Antigens Class I, Humans, Least-Squares Analysis, MHC Class I, Major
	Histocompatibility Complex, Mice, Monosaccharides, Non-U.S. Gov't,
	Peptides, Phylogeny, Plants, Polysaccharides, Protein, Rats, Research
	Support, Saccharomyces cerevisiae, Species Specificity, 15712111},
  url = {http://www.jsbi.org/journal/IBSB04/IBSB04F018.html}
}

@article{Hizukuri2005Extraction,
  author = {Hizukuri, Y. and Yamanishi, Y. and Nakamura, O. and Yagi, F. and
	Goto, S. and Kanehisa, M.},
  title = {Extraction of leukemia specific glycan motifs in humans by computational
	glycomics.},
  journal = {Carbohydr. {R}es.},
  year = {2005},
  volume = {340},
  pages = {2270-8},
  number = {14},
  month = {Oct},
  abstract = {There have been almost no standard methods for conducting computational
	analyses on glycan structures in comparison to {DNA} and proteins.
	{I}n this paper, we present a novel method for extracting functional
	motifs from glycan structures using the {KEGG}/{GLYCAN} database.
	{F}irst, we developed a new similarity measure for comparing glycan
	structures taking into account the characteristic mechanisms of glycan
	biosynthesis, and we tested its ability to classify glycans of different
	blood components in the framework of support vector machines ({SVM}s).
	{T}he results show that our method can successfully classify glycans
	from four types of human blood components: leukemic cells, erythrocyte,
	serum, and plasma. {N}ext, we extracted characteristic functional
	motifs of glycans considered to be specific to each blood component.
	{W}e predicted the substructure alpha-d-{N}eup5{A}c-(2-->3)-beta-d-{G}alp-(1-->4)-d-{G}lcp{NA}c
	as a leukemia specific glycan motif. {B}ased on the fact that the
	{A}grocybe cylindracea galectin ({ACG}) specifically binds to the
	same substructure, we conducted an experiment using cell agglutination
	assay and confirmed that this fungal lectin specifically recognized
	human leukemic cells.},
  doi = {10.1016/j.carres.2005.07.012},
  keywords = {glycans},
  pii = {S0008-6215(05)00355-1},
  url = {http://dx.doi.org/10.1016/j.carres.2005.07.012}
}

@article{Ho2002Systematic,
  author = {Yuen Ho and Albrecht Gruhler and Adrian Heilbut and Gary D Bader
	and Lynda Moore and Sally-Lin Adams and Anna Millar and Paul Taylor
	and Keiryn Bennett and Kelly Boutilier and Lingyun Yang and Cheryl
	Wolting and Ian Donaldson and SÃ¸ren Schandorff and Juanita Shewnarane
	and Mai Vo and Joanne Taggart and Marilyn Goudreault and Brenda Muskat
	and Cris Alfarano and Danielle Dewar and Zhen Lin and Katerina Michalickova
	and Andrew R Willems and Holly Sassi and Peter A Nielsen and Karina
	J Rasmussen and Jens R Andersen and Lene E Johansen and Lykke H Hansen
	and Hans Jespersen and Alexandre Podtelejnikov and Eva Nielsen and
	Janne Crawford and Vibeke Poulsen and Birgitte D SÃ¸rensen and Jesper
	Matthiesen and Ronald C Hendrickson and Frank Gleeson and Tony Pawson
	and Michael F Moran and Daniel Durocher and Matthias Mann and Christopher
	W V Hogue and Daniel Figeys and Mike Tyers},
  title = {Systematic identification of protein complexes in {S}accharomyces
	cerevisiae by mass spectrometry.},
  journal = {Nature},
  year = {2002},
  volume = {415},
  pages = {180-3},
  number = {6868},
  month = {Jan},
  abstract = {The recent abundance of genome sequence data has brought an urgent
	need for systematic proteomics to decipher the encoded protein networks
	that dictate cellular function. {T}o date, generation of large-scale
	protein-protein interaction maps has relied on the yeast two-hybrid
	system, which detects binary interactions through activation of reporter
	gene expression. {W}ith the advent of ultrasensitive mass spectrometric
	protein identification methods, it is feasible to identify directly
	protein complexes on a proteome-wide scale. {H}ere we report, using
	the budding yeast {S}accharomyces cerevisiae as a test case, an example
	of this approach, which we term high-throughput mass spectrometric
	protein complex identification ({HMS}-{PCI}). {B}eginning with 10\%
	of predicted yeast proteins as baits, we detected 3,617 associated
	proteins covering 25\% of the yeast proteome. {N}umerous protein
	complexes were identified, including many new interactions in various
	signalling pathways and in the {DNA} damage response. {C}omparison
	of the {HMS}-{PCI} data set with interactions reported in the literature
	revealed an average threefold higher success rate in detection of
	known complexes compared with large-scale two-hybrid studies. {G}iven
	the high degree of connectivity observed in this study, even partial
	{HMS}-{PCI} coverage of complex proteomes, including that of humans,
	should allow comprehensive identification of cellular networks.},
  doi = {10.1038/415180a},
  pdf = {../local/ho02.pdf},
  file = {ho02.pdf:local/ho02.pdf:PDF},
  keywords = {Affinity Labels, Amino Acid Sequence, Animals, Cell Cycle Proteins,
	Cloning, Comparative Study, DNA, DNA Damage, DNA Repair, Electrospray
	Ionization, Fungal, Genetic, Humans, Macromolecular Substances, Mass,
	Mitosis, Molecular, Molecular Sequence Data, Non-P.H.S., Non-U.S.
	Gov't, P.H.S., Phosphoric Monoester Hydrolases, Protein Binding,
	Protein Interaction Mapping, Protein Kinases, Proteome, Proteomics,
	Research Support, Ribonucleoproteins, Ribosomes, Saccharomyces cerevisiae,
	Saccharomyces cerevisiae Proteins, Sequence Alignment, Signal Transduction,
	Spectrometry, Spectrum Analysis, Transcription, U.S. Gov't, 11805813},
  owner = {vert},
  pii = {415180a},
  url = {http://dx.doi.org/10.1038/415180a}
}

@inproceedings{Hoang08Moses,
  author = {Hoang, H. and Koehn, P.},
  title = {Design of the {Moses} Decoder for Statistical Machine Translation},
  booktitle = {ACL 2008 Software workshop},
  year = {2008},
  pages = {58--65},
  address = {Columbus, Ohio},
  month = {June},
  publisher = {ACL},
  citeulike-article-id = {2905903},
  keywords = {moses, smt},
  posted-at = {2008-06-18 18:53:32},
  priority = {0},
  url = {http://www.aclweb.org/anthology/W/W08/W08-0510}
}

@incollection{Hochreiter2004Gene,
  author = {Hochreiter, S. and Obermayer, K.},
  title = {Gene selection for microarray data},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {319-355},
  pdf = {../local/heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\},
  file = {heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Hockstein2004Diagnosis,
  author = {Neil G Hockstein and Erica R Thaler and Drew Torigian and Wallace
	T Miller and Olivia Deffenderfer and C. William Hanson},
  title = {Diagnosis of pneumonia with an electronic nose: correlation of vapor
	signature with chest computed tomography scan findings.},
  journal = {Laryngoscope},
  year = {2004},
  volume = {114},
  pages = {1701-5},
  number = {10},
  month = {Oct},
  abstract = {O{BJECTIVES}/{HYPOTHESIS}: {T}he electronic nose is a sensor of volatile
	molecules that is useful in the analysis of expired gases. {T}he
	device is well suited to testing the breath of patients receiving
	mechanical ventilation and is a potential diagnostic adjunct that
	can aid in the detection of patients with ventilator-associated pneumonia.
	{STUDY} {DESIGN}: {A} prospective study. {METHODS}: {W}e performed
	a prospective study of patients receiving mechanical ventilation
	in a surgical intensive care unit who underwent chest computed tomography
	({CT}) scanning. {A} single attending radiologist reviewed the chest
	{CT} scans, and imaging features were recorded on a standardized
	form. {W}ithin 48 hours of chest {CT} scan, five sets of exhaled
	gas were sampled from the expiratory limb of the ventilator circuit.
	{T}he gases were assayed with a commercially available electronic
	nose. {B}oth linear and nonlinear analyses were performed to identify
	correlations between imaging features and the assayed gas signatures.
	{RESULTS}: {T}wenty-five patients were identified, 13 of whom were
	diagnosed with pneumonia by {CT} scan. {S}upport vector machine analysis
	was performed in two separate analyses. {I}n the first analysis,
	in which a training set was identical to a prediction set, the accuracy
	of prediction results was greater than 91.6\%. {I}n the second analysis,
	in which the training set and the prediction set were different,
	the accuracy of prediction results was at least 80\%, with higher
	accuracy depending on the specific parameters and models being used.
	{CONCLUSION}: {T}he electronic nose is a new technology that continues
	to show promise as a potential diagnostic adjunct in the diagnosis
	of pneumonia and other infectious diseases.},
  pii = {00005537-200410000-00005}
}

@techreport{Hoefling2009path,
  author = {Hoefling, H.},
  title = {A path algorithm for the {F}used {L}asso {S}ignal {A}pproximator},
  institution = {arXiv},
  year = {2009},
  number = {0910.0526v1},
  month = {Oct.},
  pdf = {../local/Hoefling2009path.pdf},
  file = {Hoefling2009path.pdf:Hoefling2009path.pdf:PDF},
  keywords = {segmentation},
  owner = {jp},
  timestamp = {2010.05.31}
}

@article{Hoerl1962Application,
  author = {A. E. Hoerl},
  title = {Application of ridge regression analysis to regression problems},
  journal = {{C}hemical {E}ngineering {P}rogress},
  year = {1962},
  volume = {58},
  pages = {54-59}
}

@article{Hoerl1982Citation,
  author = {A. E. Hoerl and R. W. Kennard},
  title = {Citation Classic - Ridge regression~: biased estimation for nonorthogonal
	problems},
  journal = {CC/Eng. Tech. Appl. Sci.},
  year = {1982},
  volume = {35},
  pages = {18-18}
}

@article{Hoerl1970Ridge,
  author = {A. E. Hoerl and R. W. Kennard},
  title = {Ridge regression~: biased estimation for nonorthogonal problems},
  journal = {Technometrics},
  year = {1970},
  volume = {12},
  pages = {55-67},
  number = {1}
}

@article{Hoffmann2010new,
  author = {Hoffmann, Brice and Zaslavskiy, Mikhail and Vert, Jean-Philippe and
	Stoven, Veronique},
  title = {A new protein binding pocket similarity measure based on comparison
	of clouds of atoms in 3D: application to ligand prediction},
  journal = {BMC Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {99},
  number = {1},
  abstract = {BACKGROUND:Predicting which molecules can bind to a given binding
	site of a protein with known 3D structure is important to decipher
	the protein function, and useful in drug design. A classical assumption
	in structural biology is that proteins with similar 3D structures
	have related molecular functions, and therefore may bind similar
	ligands. However, proteins that do not display any overall sequence
	or structure similarity may also bind similar ligands if they contain
	similar binding sites. Quantitatively assessing the similarity between
	binding sites may therefore be useful to propose new ligands for
	a given pocket, based on those known for similar pockets.RESULTS:We
	propose a new method to quantify the similarity between binding pockets,
	and explore its relevance for ligand prediction. We represent each
	pocket by a cloud of atoms, and assess the similarity between two
	pockets by aligning their atoms in the 3D space and comparing the
	resulting configurations with a convolution kernel. Pocket alignment
	and comparison is possible even when the corresponding proteins share
	no sequence or overall structure similarities. In order to predict
	ligands for a given target pocket, we compare it to an ensemble of
	pockets with known ligands to identify the most similar pockets.
	We discuss two criteria to evaluate the performance of a binding
	pocket similarity measure in the context of ligand prediction, namely,
	area under ROC curve (AUC scores) and classification based scores.
	We show that the latter is better suited to evaluate the methods
	with respect to ligand prediction, and demonstrate the relevance
	of our new binding site similarity compared to existing similarity
	measures.CONCLUSIONS:This study demonstrates the relevance of the
	proposed method to identify ligands binding to known binding pockets.
	We also provide a new benchmark for future work in this field. The
	new method and the benchmark are available at http://cbio.ensmp.fr/paris},
  doi = {10.1186/1471-2105-11-99},
  issn = {1471-2105},
  pubmedid = {20175916},
  url = {http://www.biomedcentral.com/1471-2105/11/99}
}

@article{Hofmann2005Concept-based,
  author = {Oliver Hofmann and Dietmar Schomburg},
  title = {Concept-based annotation of enzyme classes.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2059-66},
  number = {9},
  month = {May},
  abstract = {M{OTIVATION}: {G}iven the explosive growth of biomedical data as well
	as the literature describing results and findings, it is getting
	increasingly difficult to keep up to date with new information. {K}eeping
	databases synchronized with current knowledge is a time-consuming
	and expensive task-one which can be alleviated by automatically gathering
	findings from the literature using linguistic approaches. {W}e describe
	a method to automatically annotate enzyme classes with disease-related
	information extracted from the biomedical literature for inclusion
	in such a database. {RESULTS}: {E}nzyme names for the 3901 enzyme
	classes in the {BRENDA} database, a repository for quantitative and
	qualitative enzyme information, were identified in more than 100,000
	abstracts retrieved from the {P}ub{M}ed literature database. {P}hrases
	in the abstracts were assigned to concepts from the {U}nified {M}edical
	{L}anguage {S}ystem ({UMLS}) utilizing the {M}eta{M}ap program, allowing
	for the identification of disease-related concepts by their semantic
	fields in the {UMLS} ontology. {A}ssignments between enzyme classes
	and diseases were created based on their co-occurrence within a single
	sentence. {F}alse positives could be removed by a variety of filters
	including minimum number of co-occurrences, removal of sentences
	containing a negation and the classification of sentences based on
	their semantic fields by a {S}upport {V}ector {M}achine. {V}erification
	of the assignments with a manually annotated set of 1500 sentences
	yielded favorable results of 92\% precision at 50\% recall, sufficient
	for inclusion in a high-quality database. {AVAILABILITY}: {S}ource
	code is available from the author upon request. {SUPPLEMENTARY} {INFORMATION}:
	ftp.uni-koeln.de/institute/biochemie/pub/brenda/info/disease{S}upp.pdf.},
  doi = {10.1093/bioinformatics/bti284},
  pdf = {../local/Hofmann2005Concept-based.pdf},
  file = {Hofmann2005Concept-based.pdf:local/Hofmann2005Concept-based.pdf:PDF},
  keywords = {biosvm},
  pii = {bti284},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti284}
}

@article{Hoheisel2006Microarray,
  author = {Hoheisel, J. D.},
  title = {Microarray technology: beyond transcript profiling and genotype analysis},
  journal = {Nat Rev Genet},
  year = {2006},
  volume = {7},
  pages = {200--210},
  number = {3},
  month = {Mar},
  abstract = {Understanding complex functional mechanisms requires the global and
	parallel analysis of different cellular processes. DNA microarrays
	have become synonymous with this kind of study and, in many cases,
	are the obvious platform to achieve this aim. They have already made
	important contributions, most notably to gene-expression studies,
	although the true potential of this technology is far greater. Whereas
	some assays, such as transcript profiling and genotyping, are becoming
	routine, others are still in the early phases of development, and
	new areas of application, such as genome-wide epigenetic analysis
	and on-chip synthesis, continue to emerge.},
  doi = {10.1038/nrg1809},
  pdf = {../local/Hoheisel2006Microarray.pdf},
  file = {Hoheisel2006Microarray.pdf:Hoheisel2006Microarray.pdf:PDF},
  institution = {Division of Functional Genome Analysis, Deutsches Krebsforschungszentrum,
	Im Neuenheimer Feld 580, 69120 Heidelberg, Germany. J.Hoheisel@dkfz.de},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nrg1809},
  pmid = {16485019},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1038/nrg1809}
}

@article{Hohlbein2010Surfing,
  author = {Johannes Hohlbein and Kristofer Gryte and Mike Heilemann and Achillefs
	N Kapanidis},
  title = {Surfing on a new wave of single-molecule fluorescence methods.},
  journal = {Phys Biol},
  year = {2010},
  volume = {7},
  pages = {031001},
  number = {3},
  abstract = {Single-molecule fluorescence microscopy is currently one of the most
	popular methods in the single-molecule toolbox. In this review, we
	discuss recent advances in fluorescence instrumentation and assays:
	these methods are characterized by a substantial increase in complexity
	of the instrumentation or biological samples involved. Specifically,
	we describe new multi-laser and multi-colour fluorescence spectroscopy
	and imaging techniques, super-resolution microscopy imaging and the
	development of instruments that combine fluorescence detection with
	other single-molecule methods such as force spectroscopy. We also
	highlight two pivotal developments in basic and applied biosciences:
	the new information available from detection of single molecules
	in single biological cells and exciting developments in fluorescence-based
	single-molecule DNA sequencing.},
  doi = {10.1088/1478-3975/7/3/031001},
  institution = {Department of Physics, Biological Physics Research Group, University
	of Oxford, Oxford, UK. j.hohlbein1@physics.ox.ac.uk},
  owner = {phupe},
  pii = {S1478-3975(10)53547-7},
  pmid = {20686191},
  timestamp = {2010.08.20},
  url = {http://dx.doi.org/10.1088/1478-3975/7/3/031001}
}

@article{Holen2002Positional,
  author = {Holen, T. and Amarzguioui, M. and Wiiger, M. T. and Babaie, E. and
	Prydz, H.},
  title = {{P}ositional effects of short interfering {RNA}s targeting the human
	coagulation trigger {T}issue {F}actor.},
  journal = {Nucleic Acids Res.},
  year = {2002},
  volume = {30},
  pages = {1757--1766},
  number = {8},
  month = {Apr},
  abstract = {Chemically synthesised 21-23 bp double-stranded short interfering
	RNAs (siRNA) can induce sequence-specific post-transcriptional gene
	silencing, in a process termed RNA interference (RNAi). In the present
	study, several siRNAs synthesised against different sites on the
	same target mRNA (human Tissue Factor) demonstrated striking differences
	in silencing efficiency. Only a few of the siRNAs resulted in a significant
	reduction in expression, suggesting that accessible siRNA target
	sites may be rare in some human mRNAs. Blocking of the 3'-OH with
	FITC did not reduce the effect on target mRNA. Mutations in the siRNAs
	relative to target mRNA sequence gradually reduced, but did not abolish
	mRNA depletion. Inactive siRNAs competed reversibly with active siRNAs
	in a sequence-independent manner. Several lines of evidence suggest
	the existence of a near equilibrium kinetic balance between mRNA
	production and siRNA-mediated mRNA depletion. The silencing effect
	was transient, with the level of mRNA recovering fully within 4-5
	days, suggesting absence of a propagative system for RNAi in humans.
	Finally, we observed 3' mRNA cleavage fragments resulting from the
	action of the most effective siRNAs. The depletion rate-dependent
	appearance of these fragments argues for the existence of a two-step
	mRNA degradation mechanism.},
  keywords = {sirna},
  owner = {vert},
  pmid = {11937629},
  timestamp = {2006.03.28}
}

@inproceedings{Holford2005Visual,
  author = {Holford, N.},
  title = {{VPC}, the visual predictive check -- superiority to standard diagnostic
	({R}orschach) plots},
  booktitle = {PAGE 14 (http://www. page-meeting.org/?abstract=738)},
  year = {2005},
  owner = {kb},
  timestamp = {2011.04.18},
  url = {http://www. page-meeting.org/?abstract=738}
}

@article{Holliday1997Using,
  author = {J. D. Holliday and P. Willett},
  title = {Using a genetic algorithm to identify common structural features
	in sets of ligands.},
  journal = {J. {M}ol. {G}raph. {M}odel.},
  year = {1997},
  volume = {15},
  pages = {221--232},
  number = {4},
  month = {Aug},
  abstract = {This article describes a program for pharmacophore mapping, called
	{MPHIL} ({M}apping {P}harmacophores in {L}igands). {G}iven as input
	a set of molecules that exhibit some common biological activity,
	{MPHIL} identifies the smallest 3{D} pattern of pharmacophore points
	that has at least m (a user-defined parameter) points in common with
	each of the input molecules. {T}he program thus differs from existing
	programs for pharmacophore mapping in that it does not require all
	of the molecules to share exactly the same pattern of points, although
	it will find such a common pattern if it does, indeed, exist. {MPHIL}
	uses a genetic algorithm ({GA}) approach in which an initial, and
	very rapid, {GA} is used to suggest possible combinations of points
	that are then processed by the second {GA} to yield the final 3{D}
	pattern.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pii = {S1093326397000806},
  pmid = {9524931},
  timestamp = {2006.02.03}
}

@article{Holm1979simple,
  author = {Holm, S.},
  title = {A simple sequentially rejective multiple test procedure},
  journal = {Scandinavian Journal of Statistics},
  year = {1979},
  volume = {6},
  pages = {65--70},
  number = {2},
  owner = {jp},
  timestamp = {2012.03.07}
}

@phdthesis{Holst1996Topological,
  author = {H. van der Holst},
  title = {Topological and spectral graph characterizations},
  school = {Universiteit van Amsterdam},
  year = {1996},
  subject = {net}
}

@article{Homouz20133D,
  author = {Homouz, D. and Kudlicki, A. S.},
  title = {The {3D} Organization of the Yeast Genome Correlates with Co-Expression
	and Reflects Functional Relations between Genes},
  journal = {PLoS ONE},
  year = {2013},
  volume = {8},
  pages = {e54699},
  number = {1},
  month = {01},
  abstract = {The spatial organization of eukaryotic genomes is thought to play
	an important role in regulating gene expression. The recent advances
	in experimental methods including chromatin capture techniques, as
	well as the large amounts of accumulated gene expression data allow
	studying the relationship between spatial organization of the genome
	and co-expression of protein-coding genes. To analyse this genome-wide
	relationship at a single gene resolution, we combined the interchromosomal
	DNA contacts in the yeast genome measured by Duan et al. with a comprehensive
	collection of 1,496 gene expression datasets. We find significant
	enhancement of co-expression among genes with contact links. The
	co-expression is most prominent when two gene loci fall within 1,000
	base pairs from the observed contact. We also demonstrate an enrichment
	of inter-chromosomal links between functionally related genes, which
	suggests that the non random nature of the genome organization serves
	to facilitate coordinated transcription in groups of genes.},
  doi = {10.1371/journal.pone.0054699},
  pdf = {../local/Homouz20133D.pdf},
  file = {Homouz20133D.pdf:Homouz20133D.pdf:PDF},
  keywords = {hic, ngs},
  owner = {nelle},
  publisher = {Public Library of Science},
  timestamp = {2013.03.30},
  url = {http://dx.doi.org/10.1371/journal.pone.0054699}
}

@article{Honeyman1998Neural,
  author = {Honeyman, M. C. and Brusic, V. and Stone, N. L. and Harrison, L.
	C.},
  title = {{N}eural network-based prediction of candidate {T}-cell epitopes.},
  journal = {Nat. Biotechnol.},
  year = {1998},
  volume = {16},
  pages = {966--969},
  number = {10},
  month = {Oct},
  abstract = {Activation of T cells requires recognition by T-cell receptors of
	specific peptides bound to major histocompatibility complex (MHC)
	molecules on the surface of either antigen-presenting or target cells.
	These peptides, T-cell epitopes, have potential therapeutic applications,
	such as for use as vaccines. Their identification, however, usually
	requires that multiple overlapping synthetic peptides encompassing
	a protein antigen be assayed, which in humans, is limited by volume
	of donor blood. T-cell epitopes are a subset of peptides that bind
	to MHC molecules. We use an artificial neural network (ANN) model
	trained to predict peptides that bind to the MHC class II molecule
	HLA-DR4(*0401). Binding prediction facilitates identification of
	T-cell epitopes in tyrosine phosphatase IA-2, an autoantigen in DR4-associated
	type1 diabetes. Synthetic peptides encompassing IA-2 were tested
	experimentally for DR4 binding and T-cell proliferation in humans
	at risk for diabetes. ANN-based binding prediction was sensitive
	and specific, and reduced the number of peptides required for T-cell
	assay by more than half, with only a minor loss of epitopes. This
	strategy could expedite identification of candidate T-cell epitopes
	in diverse diseases.},
  doi = {10.1038/nbt1098-966},
  keywords = {immunoinformatics},
  pmid = {9788355},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1038/nbt1098-966}
}

@article{Hood2004Systems,
  author = {Hood, L. and Heath, J. R. and Phelps, M. E. and Lin, B.},
  title = {Systems biology and new technologies enable predictive and preventative
	medicine},
  journal = {Science},
  year = {2004},
  volume = {306},
  pages = {640--643},
  number = {5696},
  month = {Oct},
  abstract = {Systems approaches to disease are grounded in the idea that disease-perturbed
	protein and gene regulatory networks differ from their normal counterparts;
	we have been pursuing the possibility that these differences may
	be reflected by multiparameter measurements of the blood. Such concepts
	are transforming current diagnostic and therapeutic approaches to
	medicine and, together with new technologies, will enable a predictive
	and preventive medicine that will lead to personalized medicine.},
  doi = {10.1126/science.1104635},
  institution = {Institute for Systems Biology, Seattle, WA, USA. lhood@systemsbiology.org},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {306/5696/640},
  pmid = {15499008},
  timestamp = {2011.01.21},
  url = {http://dx.doi.org/10.1126/science.1104635}
}

@article{Hopkins2002druggable,
  author = {Hopkins, A. L. and Groom, C. R.},
  title = {The druggable genome},
  journal = {Nat. Rev. Drug Discov.},
  year = {2002},
  volume = {1},
  pages = {727--730},
  number = {9},
  month = {Sep},
  abstract = {An assessment of the number of molecular targets that represent an
	opportunity for therapeutic intervention is crucial to the development
	of post-genomic research strategies within the pharmaceutical industry.
	Now that we know the size of the human genome, it is interesting
	to consider just how many molecular targets this opportunity represents.
	We start from the position that we understand the properties that
	are required for a good drug, and therefore must be able to understand
	what makes a good drug target.},
  doi = {10.1038/nrd892},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {nrd892},
  pmid = {12209152},
  timestamp = {2007.09.22},
  url = {http://dx.doi.org/10.1038/nrd892}
}

@article{Hormozdiari2009Combinatorial,
  author = {Fereydoun Hormozdiari and Can Alkan and Evan E Eichler and S. Cenk
	Sahinalp},
  title = {Combinatorial algorithms for structural variation detection in high-throughput
	sequenced genomes.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1270--1278},
  number = {7},
  month = {Jul},
  abstract = {Recent studies show that along with single nucleotide polymorphisms
	and small indels, larger structural variants among human individuals
	are common. The Human Genome Structural Variation Project aims to
	identify and classify deletions, insertions, and inversions (>5 Kbp)
	in a small number of normal individuals with a fosmid-based paired-end
	sequencing approach using traditional sequencing technologies. The
	realization of new ultra-high-throughput sequencing platforms now
	makes it feasible to detect the full spectrum of genomic variation
	among many individual genomes, including cancer patients and others
	suffering from diseases of genomic origin. Unfortunately, existing
	algorithms for identifying structural variation (SV) among individuals
	have not been designed to handle the short read lengths and the errors
	implied by the "next-gen" sequencing (NGS) technologies. In this
	paper, we give combinatorial formulations for the SV detection between
	a reference genome sequence and a next-gen-based, paired-end, whole
	genome shotgun-sequenced individual. We describe efficient algorithms
	for each of the formulations we give, which all turn out to be fast
	and quite reliable; they are also applicable to all next-gen sequencing
	methods (Illumina, 454 Life Sciences [Roche], ABI SOLiD, etc.) and
	traditional capillary sequencing technology. We apply our algorithms
	to identify SV among individual genomes very recently sequenced by
	Illumina technology.},
  doi = {10.1101/gr.088633.108},
  pdf = {../local/Hormozdiari2009Combinatorial.pdf},
  file = {Hormozdiari2009Combinatorial.pdf:Hormozdiari2009Combinatorial.pdf:PDF},
  institution = {School of Computing Science, Simon Fraser University, Burnaby, British
	Columbia, Canada V5A 1S6.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.088633.108},
  pmid = {19447966},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.088633.108}
}

@article{Horn2004Dynamic,
  author = {David Horn and Gideon Dror and Brigitte Quenet},
  title = {Dynamic proximity of spatio-temporal sequences.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {1002-8},
  number = {5},
  month = {Sep},
  abstract = {Recurrent networks can generate spatio-temporal neural sequences of
	very large cycles, having an apparent random behavior. {N}onetheless
	a proximity measure between these sequences may be defined through
	comparison of the synaptic weight matrices that generate them. {F}ollowing
	the dynamic neural filter ({DNF}) formalism we demonstrate this concept
	by comparing teacher and student recurrent networks of binary neurons.
	{W}e show that large sequences, providing a training set well exceeding
	the {C}over limit, allow for good determination of the synaptic matrices.
	{A}lternatively, assuming the matrices to be known, very fast determination
	of the biases can be achieved. {T}hus, a spatio-temporal sequence
	may be regarded as spatio-temporal encoding of the bias vector. {W}e
	introduce a linear support vector machine ({SVM}) variant of the
	{DNF} in order to specify an optimal weight matrix. {T}his approach
	allows us to deal with noise. {S}patio-temporal sequences generated
	by different {DNF}s with the same number of neurons may be compared
	by calculating correlations of the synaptic matrices of the reconstructed
	{DNF}s. {O}ther types of spatio-temporal sequences need the introduction
	of hidden neurons, and/or the use of a kernel variant of the {SVM}
	approach. {T}he latter is being defined as a recurrent support vector
	network ({RSVN}).}
}

@article{Horn2003GPCRDB,
  author = {Horn, F. and Bettler, E. and Oliveira, L. and Campagne, F. and Cohen,
	F. E. and Vriend, G.},
  title = {{GPCRDB} information system for {G} protein-coupled receptors},
  journal = {Nucl. Acids Res.},
  year = {2003},
  volume = {31},
  pages = {294-297},
  number = {1},
  abstract = {The GPCRDB is a molecular class-specific information system that collects,
	combines, validates and disseminates heterogeneous data on G protein-coupled
	receptors (GPCRs). The database stores data on sequences, ligand
	binding constants and mutations. The system also provides computationally
	derived data such as sequence alignments, homology models, and a
	series of query and visualization tools. The GPCRDB is updated automatically
	once every 4-5 months and is freely accessible at http://www.gpcr.org/7tm/.},
  doi = {10.1093/nar/gkg103},
  eprint = {http://nar.oxfordjournals.org/cgi/reprint/31/1/294.pdf},
  keywords = {chemogenomics},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/31/1/294}
}

@article{Hornberg2006Cancer,
  author = {Hornberg, J. J. and Bruggeman, F. J. and Westerhoff, H. V. and Lankelma,
	J.},
  title = {Cancer: a Systems Biology disease.},
  journal = {Biosystems},
  year = {2006},
  volume = {83},
  pages = {81--90},
  number = {2-3},
  abstract = {Cancer research has focused on the identification of molecular differences
	between cancerous and healthy cells. The emerging picture is overwhelmingly
	complex. Molecules out of many parallel signal transduction pathways
	are involved. Their activities appear to be controlled by multiple
	factors. The action of regulatory circuits, cross-talk between pathways
	and the non-linear reaction kinetics of biochemical processes complicate
	the understanding and prediction of the outcome of intracellular
	signaling. In addition, interactions between tumor and other cell
	types give rise to a complex supra-cellular communication network.
	If cancer is such a complex system, how can one ever predict the
	effect of a mutation in a particular gene on a functionality of the
	entire system? And, how should one go about identifying drug targets?
	Here, we argue that one aspect is to recognize, where the essence
	resides, i.e. recognize cancer as a Systems Biology disease. Then,
	more cancer biologists could become systems biologists aiming to
	provide answers to some of the above systemic questions. To this
	aim, they should integrate the available knowledge stemming from
	quantitative experimental results through mathematical models. Models
	that have contributed to the understanding of complex biological
	systems are discussed. We show that the architecture of a signaling
	network is important for determining the site at which an oncologist
	should intervene. Finally, we discuss the possibility of applying
	network-based drug design to cancer treatment and how rationalized
	therapies, such as the application of kinase inhibitors, may benefit
	from Systems Biology.},
  doi = {10.1016/j.biosystems.2005.05.014},
  pdf = {../local/Hornberg2006Cancer.pdf},
  file = {Hornberg2006Cancer.pdf:Hornberg2006Cancer.pdf:PDF},
  institution = {Cell Biology, BioCentrum Amsterdam, Faculty of Earth and Life Sciences,
	Vrije Universiteit, De Boelelaan 1085, 1081 HV Amsterdam, The Netherlands.
	jorrit.hornberg@falw.vu.nl},
  keywords = {csbcbook, csbcbook-mustread},
  owner = {jp},
  pii = {S0303-2647(05)00117-6},
  pmid = {16426740},
  timestamp = {2009.10.11},
  url = {http://dx.doi.org/10.1016/j.biosystems.2005.05.014}
}

@article{Hornberger2012Clinical,
  author = {Hornberger, J. and Alvarado, M.D. and Rebecca, C. and Gutierrez,
	H.R. and Tiffany, M.Y. and Gradishar, W.J.},
  title = {Clinical Validity/Utility, Change in Practice Patterns, and Economic
	Implications of Risk Stratifiers to Predict Outcomes for Early-Stage
	Breast Cancer: A Systematic Review},
  journal = {Journal of the National Cancer Institute},
  year = {2012},
  volume = {104},
  pages = {1068--1079},
  number = {14},
  publisher = {Oxford University Press}
}

@article{Horner2009Bioinformatics,
  author = {Horner, D. S. and Pavesi, G. and Castrignan{\`o}, T. and De Meo,
	P. D. and Liuni, S. and Sammeth, M. and Picardi, E. and Pesole, G.},
  title = {Bioinformatics approaches for genomics and post genomics applications
	of next-generation sequencing.},
  journal = {Brief Bioinform},
  year = {2009},
  month = {Oct},
  abstract = {Technical advances such as the development of molecular cloning, Sanger
	sequencing, PCR and oligonucleotide microarrays are key to our current
	capacity to sequence, annotate and study complete organismal genomes.
	Recent years have seen the development of a variety of so-called
	'next-generation' sequencing platforms, with several others anticipated
	to become available shortly. The previously unimaginable scale and
	economy of these methods, coupled with their enthusiastic uptake
	by the scientific community and the potential for further improvements
	in accuracy and read length, suggest that these technologies are
	destined to make a huge and ongoing impact upon genomic and post-genomic
	biology. However, like the analysis of microarray data and the assembly
	and annotation of complete genome sequences from conventional sequencing
	data, the management and analysis of next-generation sequencing data
	requires (and indeed has already driven) the development of informatics
	tools able to assemble, map, and interpret huge quantities of relatively
	or extremely short nucleotide sequence data. Here we provide a broad
	overview of bioinformatics approaches that have been introduced for
	several genomics and functional genomics applications of next-generation
	sequencing.},
  doi = {10.1093/bib/bbp046},
  pdf = {../local/Horner2009Bioinformatics.pdf},
  file = {Horner2009Bioinformatics.pdf:Horner2009Bioinformatics.pdf:PDF},
  keywords = {ngs},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {bbp046},
  pmid = {19864250},
  timestamp = {2010.01.07},
  url = {http://dx.doi.org/10.1093/bib/bbp046}
}

@article{Horvath2003Neighborhooda,
  author = {Horvath, D. and Jeandenans, C.},
  title = {Neighborhood behavior of in silico structural spaces with respect
	to in vitro activity spaces--a benchmark for neighborhood behavior
	assessment of different in silico similarity metrics.},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {691--698},
  number = {2},
  abstract = {In a previous work, we have introduced Neighborhood Behavior (NB)
	criteria for calculated molecular similarity metrics, based on the
	analysis of in vitro activity spaces that simultaneously monitor
	the responses of a compound with respect to an entire panel of biologically
	relevant receptors and enzymes. Now, these novel NB criteria will
	be used as a benchmark for the comparison of different in silico
	molecular similarity metrics, addressing the following topics: (1)
	the relative performance of 2D vs 3D descriptors, (2) the importance
	of the similarity scoring function for a given descriptor set, and
	(3) binary or Fuzzy Pharmacophore Fingerprints-can they capture the
	similarity of the spatial distribution of pharmacophoric groups despite
	different molecular connectivity? It was found that fuzzy pharmacophore
	descriptors (FBPA) displayed an optimal NB and, unlike their binary
	counterparts, were successful in evidencing pharmacophore pattern
	similarity independently of topological similarity. Topological FBPA,
	identical to the former except for the use of topological instead
	of 3D atom pair distances, display a somehow weaker, but significant
	NB. Metrics based on "classical" global 2D and 3D molecular descriptors
	and a Dice scoring function also performed well. The choice of the
	similarity scoring function is therefore as important as the choice
	of the appropriate molecular descriptors.},
  doi = {10.1021/ci025635r},
  pdf = {../local/Horvath2003Neighborhooda.pdf},
  file = {Horvath2003Neighborhooda.pdf:Horvath2003Neighborhooda.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {vert},
  pmid = {12653539},
  timestamp = {2006.11.24},
  url = {http://dx.doi.org/10.1021/ci025635r}
}

@inproceedings{Horvath2004Cyclic,
  author = {T. Horv{\'a}th and T. G{\"a}rtner and S. Wrobel},
  title = {Cyclic pattern kernels for predictive graph mining},
  booktitle = {Proceedings of the tenth ACM SIGKDD international conference on Knowledge
	discovery and data mining},
  year = {2004},
  pages = {158-167},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  doi = {http://doi.acm.org/10.1145/1014052.1014072},
  keywords = {chemoinformatics kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.02}
}

@article{Hotelling1936Relation,
  author = {H. Hotelling},
  title = {Relation between two sets of variates},
  journal = {Biometrika},
  year = {1936},
  volume = {28},
  pages = {322-377},
  pdf = {../local/Hotelling1936Relation.pdf},
  file = {Hotelling1936Relation.pdf:local/Hotelling1936Relation.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0006-3444%28193612%2928%3A3%2F4%3C321%3ARBTSOV%3E2.0.CO%3B2-F}
}

@article{Hou2004Remote,
  author = {Hou, Y. and Hsu, W. and Lee, M. L. and Bystroff, C.},
  title = {Remote homolog detection using local sequence-structure correlations.},
  journal = {Proteins},
  year = {2004},
  volume = {57},
  pages = {518-530},
  number = {3},
  abstract = {Remote homology detection refers to the detection of structural homology
	in proteins when there is little or no sequence similarity. {I}n
	this article, we present a remote homolog detection method called
	{SVM}-{HMMSTR} that overcomes the reliance on detectable sequence
	similarity by transforming the sequences into strings of hidden {M}arkov
	states that represent local folding motif patterns. {T}hese state
	strings are transformed into fixed-dimension feature vectors for
	input to a support vector machine. {T}wo sets of features are defined:
	an order-independent feature set that captures the amino acid and
	local structure composition; and an order-dependent feature set that
	captures the sequential ordering of the local structures. {T}ests
	using the {S}tructural {C}lassification of {P}roteins ({SCOP}) 1.53
	data set show that the {SVM}-{HMMSTR} gives a significant improvement
	over several current methods.},
  doi = {10.1002/prot.20221},
  pdf = {../local/Hou2004Remote.pdf},
  file = {Hou2004Remote.pdf:local/Hou2004Remote.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Hou2003Efficient,
  author = {Hou, Y. and Hsu, W. and Lee, M. L. and Bystroff, C.},
  title = {Efficient remote homology detection using local structure},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {2294-2301},
  number = {17},
  abstract = {Motivation: {T}he function of an unknown biological sequence can often
	be accurately inferred if we are able to map this unknown sequence
	to its corresponding homologous family. {A}t present, discriminative
	methods such as {SVM}-{F}isher and {SVM}-pairwise, which combine
	support vector machine ({SVM}) and sequence similarity, are recognized
	as the most accurate methods, with {SVM}-pairwise being the most
	accurate. {H}owever, these methods typically encode sequence information
	into their feature vectors and ignore the structure information.
	{T}hey are also computationally inefficient. {B}ased on these observations,
	we present an alternative method for {SVM}-based protein classification.
	{O}ur proposed method, {SVM}-{I}-sites, utilizes structure similarity
	for remote homology detection. {R}esult: {W}e run experiments on
	the {S}tructural {C}lassification of {P}roteins 1.53 data set. {T}he
	results show that {SVM}-{I}-sites is more efficient than {SVM}-pairwise.
	{F}urther, we find that {SVM}-{I}-sites outperforms sequence-based
	methods such as {PSI}-{BLAST}, {SAM}, and {SVM}-{F}isher while achieving
	a comparable performance with {SVM}-pairwise. {A}vailability: {I}-sites
	server is accessible through the web at http://www.bioinfo.rpi.edu.
	{P}rograms are available upon request for academics. {L}icensing
	agreements are available for commercial interests. {T}he framework
	of encoding local structure into feature vector is available upon
	request.},
  pdf = {../local/Hou2003Efficient.pdf},
  file = {Hou2003Efficient.pdf:local/Hou2003Efficient.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/17/2294}
}

@article{Hoyer2004Non-negative,
  author = {Hoyer, P. O.},
  title = {Non-negative Matrix Factorization with sparseness constraints.},
  journal = {J. Mach. Learn. Res.},
  year = {2004},
  volume = {5},
  pages = {1457--1469},
  pdf = {../local/Hoyer2004Non-negative.pdf},
  file = {Hoyer2004Non-negative.pdf:Hoyer2004Non-negative.pdf:PDF},
  owner = {jp},
  timestamp = {2012.02.28}
}

@article{Hsieh2004library,
  author = {Hsieh, A. C. and Bo, R. and Manola, J. and Vazquez, F. and Bare,
	O. and Khvorova, A. and Scaringe, S. and Sellers, W. R.},
  title = {A library of si{RNA} duplexes targeting the phosphoinositide 3-kinase
	pathway: determinants of gene silencing for use in cell-based screens.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {893-901},
  number = {3},
  abstract = {Gene silencing through {RNA} interference ({RNA}i) has been established
	as a means of conducting reverse genetic studies. {I}n order to better
	understand the determinants of short interfering {RNA} (si{RNA})
	knockdown for use in high-throughput cell-based screens, 148 si{RNA}
	duplexes targeting 30 genes within the {PI}3{K} pathway were selected
	and synthesized. {T}he extent of {RNA} knockdown was measured for
	22 genes by quantitative real-time {PCR}. {A}nalysis of the parameters
	correlating with effective knockdown showed that (i) duplexes targeting
	the middle of the coding sequence silenced significantly poorer,
	(ii) silencing by duplexes targeting the 3'{UTR} was comparable with
	duplexes targeting the coding sequence, (iii) pooling of four or
	five duplexes per gene was remarkably efficient in knocking down
	gene expression and (iv) among duplexes that achieved a >70\% knockdown
	of the m{RNA} there were strong nucleotide preferences at specific
	positions, most notably positions 11 ({G} or {C}) and 19 ({T}) of
	the si{RNA} duplex. {F}inally, in a proof-of-principle pathway-wide
	cell-based genetic screen, conducted to detect negative genetic regulators
	of {A}kt {S}473 phosphorylation, both known negative regulators of
	this phosphorylation, {PTEN} and {PDK}1, were found. {T}hese data
	help to lay the foundation for genome-wide si{RNA} screens in mammalian
	cells.},
  doi = {10.1093/nar/gkh238},
  pdf = {../local/Hsieh2004library.pdf},
  file = {Hsieh2004library.pdf:local/Hsieh2004library.pdf:PDF},
  keywords = {sirna},
  pii = {32/3/893},
  url = {http://dx.doi.org/10.1093/nar/gkh238}
}

@article{Hu2004Developing,
  author = {Hu, C. and Li, X. and Liang, J.},
  title = {Developing optimal non-linear scoring function for protein design},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3080-3098},
  number = {17},
  abstract = {Motivation. {P}rotein design aims to identify sequences compatible
	with a given protein fold but incompatible to any alternative folds.
	{T}o select the correct sequences and to guide the search process,
	a design scoring function is critically important. {S}uch a scoring
	function should be able to characterize the global fitness landscape
	of many proteins simultaneously. {R}esults: {T}o find optimal design
	scoring functions, we introduce two geometric views and propose a
	formulation using a mixture of non-linear {G}aussian kernel functions.
	{W}e aim to solve a simplified protein sequence design problem. {O}ur
	goal is to distinguish each native sequence for a major portion of
	representative protein structures from a large number of alternative
	decoy sequences, each a fragment from proteins of different folds.
	{O}ur scoring function discriminates perfectly a set of 440 native
	proteins from 14 million sequence decoys. {W}e show that no linear
	scoring function can succeed in this task. {I}n a blind test of unrelated
	proteins, our scoring function misclassfies only 13 native proteins
	out of 194. {T}his compares favorably with about three-four times
	more misclassifications when optimal linear functions reported in
	the literature are used. {W}e also discuss how to develop protein
	folding scoring function. {A}vailability: {A}vailable on request
	from the authors.},
  doi = {10.1093/bioinformatics/bth369},
  pdf = {../local/Hu2004Developing},
  file = {Hu2004Developing:local/Hu2004Developing:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/17/3080}
}

@article{Hu2004Improved,
  author = {Hu, H.J. and Pan, Y. and Harrison, R. and Tai, P.C.},
  title = {Improved protein secondary structure prediction using support vector
	machine with a new encoding scheme and an advanced tertiary classifier},
  journal = {I{EEE} {T}rans. {N}anobioscience},
  year = {2004},
  volume = {3},
  pages = {265-271},
  number = {4},
  abstract = {Prediction of protein secondary structures is an important problem
	in bioinformatics and has many applications. {T}he recent trend of
	secondary structure prediction studies is mostly based on the neural
	network or the support vector machine ({SVM}). {T}he {SVM} method
	is a comparatively new learning system which has mostly been used
	in pattern recognition problems. {I}n this study, {SVM} is used as
	a machine learning tool for the prediction of secondary structure
	and several encoding schemes, including orthogonal matrix, hydrophobicity
	matrix, {BLOSUM}62 substitution matrix, and combined matrix of these,
	are applied and optimized to improve the prediction accuracy. {A}lso,
	the optimal window length for six {SVM} binary classifiers is established
	by testing different window sizes and our new encoding scheme is
	tested based on this optimal window size via sevenfold cross validation
	tests. {T}he results show 2% increase in the accuracy of the binary
	classifiers when compared with the instances in which the classical
	orthogonal matrix is used. {F}inally, to combine the results of the
	six {SVM} binary classifiers, a new tertiary classifier which combines
	the results of one-versus-one binary classifiers is introduced and
	the performance is compared with those of existing tertiary classifiers.
	{A}ccording to the results, the {Q}3 prediction accuracy of new tertiary
	classifier reaches 78.8% and this is better than the best result
	reported in the literature.},
  pdf = {../local/Hu2004Improved.pdf},
  file = {Hu2004Improved.pdf:local/Hu2004Improved.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Hu2012HiCNorm,
  author = {Hu, Ming and Deng, Ke and Selvaraj, Siddarth and Qin, Zhaohui and
	Ren, Bing and Liu, Jun S.},
  title = {{HiCNorm: removing biases in Hi-C data via Poisson regression}},
  journal = {Bioinformatics},
  year = {2012},
  volume = {28},
  pages = {3131--3133},
  number = {23},
  month = dec,
  abstract = {{Summary: We propose a parametric model, HiCNorm, to remove systematic
	biases in the raw Hi-C contact maps, resulting in a simple, fast,
	yet accurate normalization procedure. Compared with the existing
	Hi-C normalization method developed by Yaffe and Tanay, HiCNorm has
	fewer parameters, runs >1000 times faster and achieves higher reproducibility.Availability:
	Freely available on the web at: http://www.people.fas.harvard.edu/â¼junliu/HiCNorm/.Contact:
	jliu@stat.harvard.eduSupplementary information: Supplementary data
	are available at Bioinformatics online.}},
  day = {1},
  doi = {10.1093/bioinformatics/bts570},
  issn = {1460-2059},
  keywords = {hi-c},
  pmid = {23023982},
  posted-at = {2012-12-05 22:45:25},
  priority = {2},
  publisher = {Oxford University Press},
  url = {http://dx.doi.org/10.1093/bioinformatics/bts570}
}

@article{Hu2009Genetic,
  author = {Xiaolan Hu and Howard M Stern and Lin Ge and Carol O'Brien and Lauren
	Haydu and Cynthia D Honchell and Peter M Haverty and Brock A Peters
	and Thomas D Wu and Lukas C Amler and John Chant and David Stokoe
	and Mark R Lackner and Guy Cavet},
  title = {Genetic alterations and oncogenic pathways associated with breast
	cancer subtypes.},
  journal = {Mol Cancer Res},
  year = {2009},
  volume = {7},
  pages = {511--522},
  number = {4},
  month = {Apr},
  abstract = {Breast cancers can be divided into subtypes with important implications
	for prognosis and treatment. We set out to characterize the genetic
	alterations observed in different breast cancer subtypes and to identify
	specific candidate genes and pathways associated with subtype biology.
	mRNA expression levels of estrogen receptor, progesterone receptor,
	and HER2 were shown to predict marker status determined by immunohistochemistry
	and to be effective at assigning samples to subtypes. HER2(+) cancers
	were shown to have the greatest frequency of high-level amplification
	(independent of the ERBB2 amplicon itself), but triple-negative cancers
	had the highest overall frequencies of copy gain. Triple-negative
	cancers also were shown to have more frequent loss of phosphatase
	and tensin homologue and mutation of RB1, which may contribute to
	genomic instability. We identified and validated seven regions of
	copy number alteration associated with different subtypes, and used
	integrative bioinformatics analysis to identify candidate oncogenes
	and tumor suppressors, including ERBB2, GRB7, MYST2, PPM1D, CCND1,
	HDAC2, FOXA1, and RASA1. We tested the candidate oncogene MYST2 and
	showed that it enhances the anchorage-independent growth of breast
	cancer cells. The genome-wide and region-specific differences between
	subtypes suggest the differential activation of oncogenic pathways.},
  doi = {10.1158/1541-7786.MCR-08-0107},
  pdf = {../local/Hu2009Genetic.pdf},
  file = {Hu2009Genetic.pdf:Hu2009Genetic.pdf:PDF},
  institution = {Department of Bioinformatics, Genentech, Inc., South San Francisco,
	CA, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {7/4/511},
  pmid = {19372580},
  timestamp = {2010.08.05},
  url = {http://dx.doi.org/10.1158/1541-7786.MCR-08-0107}
}

@article{Hu2006molecular,
  author = {Zhiyuan Hu and Cheng Fan and Daniel S Oh and J. S. Marron and Xiaping
	He and Bahjat F Qaqish and Chad Livasy and Lisa A Carey and Evangeline
	Reynolds and Lynn Dressler and Andrew Nobel and Joel Parker and Matthew
	G Ewend and Lynda R Sawyer and Junyuan Wu and Yudong Liu and Rita
	Nanda and Maria Tretiakova and Alejandra Ruiz Orrico and Donna Dreher
	and Juan P Palazzo and Laurent Perreard and Edward Nelson and Mary
	Mone and Heidi Hansen and Michael Mullins and John F Quackenbush
	and Matthew J Ellis and Olufunmilayo I Olopade and Philip S Bernard
	and Charles M Perou},
  title = {The molecular portraits of breast tumors are conserved across microarray
	platforms.},
  journal = {BMC Genomics},
  year = {2006},
  volume = {7},
  pages = {96},
  abstract = {Validation of a novel gene expression signature in independent data
	sets is a critical step in the development of a clinically useful
	test for cancer patient risk-stratification. However, validation
	is often unconvincing because the size of the test set is typically
	small. To overcome this problem we used publicly available breast
	cancer gene expression data sets and a novel approach to data fusion,
	in order to validate a new breast tumor intrinsic list.A 105-tumor
	training set containing 26 sample pairs was used to derive a new
	breast tumor intrinsic gene list. This intrinsic list contained 1300
	genes and a proliferation signature that was not present in previous
	breast intrinsic gene sets. We tested this list as a survival predictor
	on a data set of 311 tumors compiled from three independent microarray
	studies that were fused into a single data set using Distance Weighted
	Discrimination. When the new intrinsic gene set was used to hierarchically
	cluster this combined test set, tumors were grouped into LumA, LumB,
	Basal-like, HER2+/ER-, and Normal Breast-like tumor subtypes that
	we demonstrated in previous datasets. These subtypes were associated
	with significant differences in Relapse-Free and Overall Survival.
	Multivariate Cox analysis of the combined test set showed that the
	intrinsic subtype classifications added significant prognostic information
	that was independent of standard clinical predictors. From the combined
	test set, we developed an objective and unchanging classifier based
	upon five intrinsic subtype mean expression profiles (i.e. centroids),
	which is designed for single sample predictions (SSP). The SSP approach
	was applied to two additional independent data sets and consistently
	predicted survival in both systemically treated and untreated patient
	groups.This study validates the "breast tumor intrinsic" subtype
	classification as an objective means of tumor classification that
	should be translated into a clinical assay for further retrospective
	and prospective validation. In addition, our method of combining
	existing data sets can be used to robustly validate the potential
	clinical value of any new gene expression profile.},
  doi = {10.1186/1471-2164-7-96},
  institution = {Lineberger Comprehensive Cancer Center, University of North Carolina,
	Chapel Hill, NC 27599, USA. zhiyuan_hu@med.unc.edu},
  keywords = {Breast Neoplasms, genetics; Cluster Analysis; Conserved Sequence,
	genetics; Female; Gene Expression Regulation, Neoplastic, genetics;
	Genes, Neoplasm, genetics; Genetic Predisposition to Disease; Humans;
	Oligonucleotide Array Sequence Analysis, methods; Predictive Value
	of Tests; Reproducibility of Results; Research Design; Sample Size;
	Survival Analysis},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2164-7-96},
  pmid = {16643655},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2164-7-96}
}

@article{Hua2005JImmunol,
  author = {Hua, F. and Cornejo, M. G. and Cardone, M. H. and Stokes, C. L. and
	Lauffenburger, D. A.},
  title = {Effects of Bcl-2 levels on Fas signaling-induced caspase-3 activation:
	molecular genetic tests of computational model predictions},
  journal = {J Immunol},
  year = {2005},
  volume = {175},
  pages = {985--95},
  number = {2},
  abstract = {Fas-induced apoptosis is a critical process for normal immune system
	development and function. Although many molecular components in the
	Fas signaling pathway have been identified, a systematic understanding
	of how they work together to determine network dynamics and apoptosis
	itself has remained elusive. To address this, we generated a computational
	model for interpreting and predicting effects of pathway component
	properties. The model integrates current information concerning the
	signaling network downstream of Fas activation, through both type
	I and type II pathways, until activation of caspase-3. Unknown parameter
	values in the model were estimated using experimental data obtained
	from human Jurkat T cells. To elucidate critical signaling network
	properties, we examined the effects of altering the level of Bcl-2
	on the kinetics of caspase-3 activation, using both overexpression
	and knockdown in the model and experimentally. Overexpression was
	used to distinguish among alternative hypotheses for inhibitory binding
	interactions of Bcl-2 with various components in the mitochondrial
	pathway. In comparing model simulations with experimental results,
	we find the best agreement when Bcl-2 blocks the release of cytochrome
	c by binding to both Bax and truncated Bid instead of Bax, truncated
	Bid, or Bid alone. Moreover, although Bcl-2 overexpression strongly
	reduces caspase-3 activation, Bcl-2 knockdown has a negligible effect,
	demonstrating a general model finding that varying the expression
	levels of signal molecules frequently has asymmetric effects on the
	outcome. Finally, we demonstrate that the relative dominance of type
	I vs type II pathways can be switched by varying particular signaling
	component levels without changing network structure.},
  keywords = {csbcbook}
}

@article{Hua2005Optimal,
  author = {Hua, J. and Xiong, Z. and Lowey, J. and Suh, E. and Dougherty, E.
	R.},
  title = {Optimal number of features as a function of sample size for various
	classification rules},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1509-1515},
  number = {8},
  month = {Apr},
  note = {To appear},
  abstract = {Motivation: {G}iven the joint feature-label distribution, increasing
	the number of features always results in decreased classification
	error; however, this is not the case when a classifier is designed
	via a classification rule from sample data. {T}ypically (but not
	always), for fixed sample size, the error of a designed classifier
	decreases and then increases as the number of features grows. {T}he
	potential downside of using too many features is most critical for
	small samples, which are commonplace for gene-expression-based classifiers
	for phenotype discrimination. {F}or fixed sample size and feature-label
	distribution, the issue is to find an optimal number of features.{R}esults:
	{S}ince only in rare cases is there a known distribution of the error
	as a function of the number of features and sample size, this study
	employs simulation for various feature-label distributions and classification
	rules, and across a wide range of sample and feature-set sizes. {T}o
	achieve the desired end, finding the optimal number of features as
	a function of sample size, it employs massively parallel computation.
	{S}even classifiers are treated: 3-nearest-neighbor, {G}aussian kernel,
	linear support vector machine, polynomial support vector machine,
	perceptron, regular histogram and linear discriminant analysis. {T}hree
	{G}aussian-based models are considered: linear, nonlinear and bimodal.
	{I}n addition, real patient data from a large breast-cancer study
	is considered. {T}o mitigate the combinatorial search for finding
	optimal feature sets, and to model the situation in which subsets
	of genes are co-regulated and correlation is internal to these subsets,
	we assume that the covariance matrix of the features is blocked,
	with each block corresponding to a group of correlated features.
	{A}ltogether there is a large number of error surfaces for the many
	cases. {T}hese are provided in full on a companion web-site, which
	is meant to serve as resource for those working with small-sample
	classification.{A}vailability: {F}or the companion web-site, please
	visit http://public.tgen.org/tamu/ofs/.},
  doi = {10.1093/bioinformatics/bti171},
  pdf = {../local/Hua2005Optimal.pdf},
  file = {Hua2005Optimal.pdf:local/Hua2005Optimal.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti171v1}
}

@article{Hua2001Novel,
  author = {Hua, S. and Sun, Z.},
  title = {A {N}ovel {M}ethod of {P}rotein {S}econdary {S}tructure {P}rediction
	with {H}igh {S}egment {O}verlap {M}easure: {S}upport {V}ector {M}achine
	{A}pproach},
  journal = {J. {M}ol. {B}iol.},
  year = {2001},
  volume = {308},
  pages = {397--407},
  number = {2},
  month = {April},
  doi = {10.1006/jmbi.2001.4580},
  pdf = {../local/Hua2001Novel.pdf},
  file = {Hua2001Novel.pdf:local/Hua2001Novel.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel}
}

@article{Hua2001Support,
  author = {Hua, S. and Sun, Z.},
  title = {Support vector machine approach for protein subcellular localization
	prediction},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {721-728},
  number = {8},
  abstract = {Motivation: {S}ubcellular localization is a key functional characteristic
	of proteins. {A} fully automatic and reliable prediction system for
	protein subcellular localization is needed, especially for the analysis
	of large-scale genome sequences. {R}esults: {I}n this paper, {S}upport
	{V}ector {M}achine has been introduced to predict the subcellular
	localization of proteins from their amino acid compositions. {T}he
	total prediction accuracies reach 91.4% for three subcellular locations
	in prokaryotic organisms and 79.4% for four locations in eukaryotic
	organisms. {P}redictions by our approach are robust to errors in
	the protein {N}-terminal sequences. {T}his new approach provides
	superior prediction performance compared with existing algorithms
	based on amino acid composition and can be a complementary method
	to other existing methods based on sorting signals. {A}vailability:
	{A} web server implementing the prediction method is available at
	http://www.bioinfo.tsinghua.edu.cn/{S}ub{L}oc/. {C}ontact: sunzhr@mail.tsinghua.edu.cn;
	huasj00@mails.tsinghua.edu.cn {S}upplementary information: {S}upplementary
	material is available at http://www.bioinfo.tsinghua.edu.cn/{S}ub{L}oc},
  pdf = {../local/Hua2001Support.pdf},
  file = {Hua2001Support.pdf:local/Hua2001Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/17/8/721}
}

@inproceedings{Huan2004Accurate,
  author = {Huan, J. and Wang, W. and Washington, A. and Prins, J. and Shah,
	R. and Tropsha, A.},
  title = {Accurate classification of protein structural families using coherent
	subgraph analysis.},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2004},
  pages = {411-422},
  abstract = {Protein structural annotation and classification is an important problem
	in bioinformatics. {W}e report on the development of an efficient
	subgraph mining technique and its application to finding characteristic
	substructural patterns within protein structural families. {I}n our
	method, protein structures are represented by graphs where the nodes
	are residues and the edges connect residues found within certain
	distance from each other. {A}pplication of subgraph mining to proteins
	is challenging for a number reasons: (1) protein graphs are large
	and complex, (2) current protein databases are large and continue
	to grow rapidly, and (3) only a small fraction of the frequent subgraphs
	among the huge pool of all possible subgraphs could be significant
	in the context of protein classification. {T}o address these challenges,
	we have developed an information theoretic model called coherent
	subgraph mining. {F}rom information theory, the entropy of a random
	variable {X} measures the information content carried by {X} and
	the {M}utual {I}nformation ({MI}) between two random variables {X}
	and {Y} measures the correlation between {X} and {Y}. {W}e define
	a subgraph {X} as coherent if it is strongly correlated with every
	sufficiently large sub-subgraph {Y} embedded in it. {B}ased on the
	{MI} metric, we have designed a search scheme that only reports coherent
	subgraphs. {T}o determine the significance of coherent protein subgraphs,
	we have conducted an experimental study in which all coherent subgraphs
	were identified in several protein structural families annotated
	in the {SCOP} database ({M}urzin et al, 1995). {T}he {S}upport {V}ector
	{M}achine algorithm was used to classify proteins from different
	families under the binary classification scheme. {W}e find that this
	approach identifies spatial motifs unique to individual {SCOP} families
	and affords excellent discrimination between families.},
  pdf = {../local/Huan2004Accurate.pdf},
  file = {Huan2004Accurate.pdf:local/Huan2004Accurate.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Huang2006Ligsite,
  author = {Bingding Huang and Michael Schroeder},
  title = {LIGSITEcsc: predicting ligand binding sites using the Connolly surface
	and degree of conservation.},
  journal = {BMC Struct Biol},
  year = {2006},
  volume = {6},
  pages = {19},
  abstract = {BACKGROUND: Identifying pockets on protein surfaces is of great importance
	for many structure-based drug design applications and protein-ligand
	docking algorithms. Over the last ten years, many geometric methods
	for the prediction of ligand-binding sites have been developed. RESULTS:
	We present LIGSITEcsc, an extension and implementation of the LIGSITE
	algorithm. LIGSITEcsc is based on the notion of surface-solvent-surface
	events and the degree of conservation of the involved surface residues.
	We compare our algorithm to four other approaches, LIGSITE, CAST,
	PASS, and SURFNET, and evaluate all on a dataset of 48 unbound/bound
	structures and 210 bound-structures. LIGSITEcsc performs slightly
	better than the other tools and achieves a success rate of 71\% and
	75\%, respectively. CONCLUSION: The use of the Connolly surface leads
	to slight improvements, the prediction re-ranking by conservation
	to significant improvements of the binding site predictions. A web
	server for LIGSITEcsc and its source code is available at scoppi.biotec.tu-dresden.de/pocket},
  doi = {10.1186/1472-6807-6-19},
  institution = {atics Group, Biotechnological Center, Technical University Dresden,
	Germany. bingding.huang@biotec.tu-dresden.de},
  keywords = {Algorithms; Binding Sites; Databases, Protein; Ligands; Models, Molecular;
	Proteins, chemistry},
  owner = {bricehoffmann},
  pii = {1472-6807-6-19},
  pmid = {16995956},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1186/1472-6807-6-19}
}

@article{Bild2003,
  author = {Huang, E. and Ishida, S. and Pittman, J. and Dressman, H. and Bild,
	A. and Kloos, M. and D'Amico, M. and Pestell, R. G. and West, M.
	and Nevins, J. R.},
  title = {Gene expression phenotypic models that predict the activity of oncogenic
	pathways},
  journal = {Nat {G}enet},
  year = {2003},
  volume = {34},
  pages = {226-30},
  number = {2},
  abstract = {High-density {DNA} microarrays measure expression of large numbers
	of genes in one assay. {T}he ability to find underlying structure
	in complex gene expression data sets and rigorously test association
	of that structure with biological conditions is essential to developing
	multi-faceted views of the gene activity that defines cellular phenotype.
	{W}e sought to connect features of gene expression data with biological
	hypotheses by integrating 'metagene' patterns from {DNA} microarray
	experiments in the characterization and prediction of oncogenic phenotypes.
	{W}e applied these techniques to the analysis of regulatory pathways
	controlled by the genes {HRAS} ({H}arvey rat sarcoma viral oncogene
	homolog), {MYC} (myelocytomatosis viral oncogene homolog) and {E}2{F}1,
	{E}2{F}2 and {E}2{F}3 (encoding {E}2{F} transcription factors 1,
	2 and 3, respectively). {T}he phenotypic models accurately predict
	the activity of these pathways in the context of normal cell proliferation.
	{M}oreover, the metagene models trained with gene expression patterns
	evoked by ectopic production of {M}yc or {R}as proteins in primary
	tissue culture cells properly predict the activity of in vivo tumor
	models that result from deregulation of the {MYC} or {HRAS} pathways.
	{W}e conclude that these gene expression phenotypes have the potential
	to characterize the complex genetic alterations that typify the neoplastic
	state, whether in vitro or in vivo, in a way that truly reflects
	the complexity of the regulatory pathways that are affected.},
  keywords = {Animals *Cell Cycle Proteins *DNA-Binding Proteins E2F Transcription
	Factors E2F1 Transcription Factor E2F2 Transcription Factor E2F3
	Transcription Factor Female *Gene Expression Gene Expression Profiling
	Gene Expression Regulation, Neoplastic Genes, myc Genes, ras Mammary
	Neoplasms, Experimental/genetics Mice Mice, Transgenic *Models, Genetic
	Oligonucleotide Array Sequence Analysis *Oncogenes Phenotype Transcription
	Factors/genetics}
}

@article{Huang2004novel,
  author = {Jian-qiang Huang and Xiang-xian Chen and Le-yu Wang},
  title = {A novel method for tracking pedestrians from real-time video.},
  journal = {J {Z}hejiang {U}niv {S}ci},
  year = {2004},
  volume = {5},
  pages = {99-105},
  number = {1},
  month = {Jan},
  abstract = {This novel method of {P}edestrian {T}racking using {S}upport {V}ector
	({PTSV}) proposed for a video surveillance instrument combines the
	{S}upport {V}ector {M}achine ({SVM}) classifier into an optic-flow
	based tracker. {T}he traditional method using optical flow tracks
	objects by minimizing an intensity difference function between successive
	frames, while {PTSV} tracks objects by maximizing the {SVM} classification
	score. {A}s the {SVM} classifier for object and non-object is pre-trained,
	there is need only to classify an image block as object or non-object
	without having to compare the pixel region of the tracked object
	in the previous frame. {T}o account for large motions between successive
	frames we build pyramids from the support vectors and use a coarse-to-fine
	scan in the classification stage. {T}o accelerate the training of
	{SVM}, a {S}equential {M}inimal {O}ptimization {M}ethod ({SMO}) is
	adopted. {T}he results of using a kernel-{PTSV} for pedestrian tracking
	from real time video are shown at the end. {C}omparative experimental
	results showed that {PTSV} improves the reliability of tracking compared
	to that of traditional tracking method using optical flow.}
}

@article{Huang2005Support,
  author = {Jing Huang and Feng Shi},
  title = {Support vector machines for predicting apoptosis proteins types.},
  journal = {Acta {B}iotheor.},
  year = {2005},
  volume = {53},
  pages = {39-47},
  number = {1},
  abstract = {Apoptosis proteins have a central role in the development and homeostasis
	of an organism. {T}hese proteins are very important for understanding
	the mechanism of programmed cell death, and their function is related
	to their types. {A}ccording to the classification scheme by {Z}hou
	and {D}octor (2003), the apoptosis proteins are categorized into
	the following four types: (1) cytoplasmic protein; (2) plasma membrane-bound
	protein; (3) mitochondrial inner and outer proteins; (4) other proteins.
	{A} powerful learning machine, the {S}upport {V}ector {M}achine,
	is applied for predicting the type of a given apoptosis protein by
	incorporating the sqrt-amino acid composition effect. {H}igh success
	rates were obtained by the re-substitute test (98/98 = 100 \%) and
	the jackknife test (89/98 = 90.8\%).},
  doi = {10.1007/s10441-005-7002-5},
  pdf = {../local/Huang2005Support.pdf},
  file = {Huang2005Support.pdf:local/Huang2005Support.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1007/s10441-005-7002-5}
}

@article{Huang2010benefit,
  author = {Huang, J. and Zhang, T.},
  title = {The benefit of group sparsity},
  journal = {Ann. Stat.},
  year = {2010},
  volume = {38},
  pages = {1978--2004},
  number = {4},
  abstract = {This paper develops a theory for group Lasso using a concept called
	strong group sparsity. Our result shows that group Lasso is superior
	to standard Lasso for strongly group-sparse signals. This provides
	a convincing theoretical justification for using group sparse regularization
	when the underlying group structure is consistent with the data.
	Moreover, the theory predicts some limitations of the group Lasso
	formulation that are confirmed by simulation studies.},
  doi = {10.1214/09-AOS778},
  owner = {jp},
  timestamp = {2011.04.21},
  url = {http://dx.doi.org/10.1214/09-AOS778}
}

@inproceedings{Huang2009Learning,
  author = {Huang, J. and Zhang, T. and Metaxas, D.},
  title = {Learning with structured sparsity},
  booktitle = {Proceedings of the 26th Annual International Conference on Machine
	Learning},
  year = {2009},
  pages = {417--424},
  organization = {ACM}
}

@article{Huang2004Boosting,
  author = {Huang, K. and Murphy, R.F.},
  title = {Boosting accuracy of automated classification of fluorescence microscope
	images for location proteomics},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  pages = {78},
  number = {78},
  abstract = {Background {D}etailed knowledge of the subcellular location of each
	expressed protein is critical to a full understanding of its function.
	{F}luorescence microscopy, in combination with methods for fluorescent
	tagging, is the most suitable current method for proteome-wide determination
	of subcellular location. {P}revious work has shown that neural network
	classifiers can distinguish all major protein subcellular location
	patterns in both 2{D} and 3{D} fluorescence microscope images. {B}uilding
	on these results, we evaluate here new classifiers and features to
	improve the recognition of protein subcellular location patterns
	in both 2{D} and 3{D} fluorescence microscope images. {R}esults {W}e
	report here a thorough comparison of the performance on this problem
	of eight different state-of-the-art classification methods, including
	neural networks, support vector machines with linear, polynomial,
	radial basis, and exponential radial basis kernel functions, and
	ensemble methods such as {A}da{B}oost, {B}agging, and {M}ixtures-of-{E}xperts.
	{T}en-fold cross validation was used to evaluate each classifier
	with various parameters on different {S}ubcellular {L}ocation {F}eature
	sets representing both 2{D} and 3{D} fluorescence microscope images,
	including new feature sets incorporating features derived from {G}abor
	and {D}aubechies wavelet transforms. {A}fter optimal parameters were
	chosen for each of the eight classifiers, optimal majority-voting
	ensemble classifiers were formed for each feature set. {C}omparison
	of results for each image for all eight classifiers permits estimation
	of the lower bound classification error rate for each subcellular
	pattern, which we interpret to reflect the fraction of cells whose
	patterns are distorted by mitosis, cell death or acquisition errors.
	{O}verall, we obtained statistically significant improvements in
	classification accuracy over the best previously published results,
	with the overall error rate being reduced by one-third to one-half
	and with the average accuracy for single 2{D} images being higher
	than 90% for the first time. {I}n particular, the classification
	accuracy for the easily confused endomembrane compartments (endoplasmic
	reticulum, {G}olgi, endosomes, lysosomes) was improved by 5?15%.
	{W}e achieved further improvements when classification was conducted
	on image sets rather than on individual cell images. {C}onclusions
	{T}he availability of accurate, fast, automated classification systems
	for protein location patterns in conjunction with high throughput
	fluorescence microscope imaging techniques enables a new subfield
	of proteomics, location proteomics. {T}he accuracy and sensitivity
	of this approach represents an important alternative to low-resolution
	assignments by curation or sequence-based prediction.},
  doi = {10.1186/1471-2105-5-78},
  pdf = {../local/Huang2004Boosting.pdf},
  file = {Huang2004Boosting.pdf:local/Huang2004Boosting.pdf:PDF},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/5/78}
}

@article{Huang2005CTKPred,
  author = {Huang, N. and Chen, H. and Sun, Z.},
  title = {C{TKP}red: an {SVM}-based method for the prediction and classification
	of the cytokine superfamily.},
  journal = {Protein {E}ng. {D}es. {S}el.},
  year = {2005},
  month = {Jun},
  abstract = {Cell proliferation, differentiation and death are controlled by a
	multitude of cell-cell signals and loss of this control has devastating
	consequences. {P}rominent among these regulatory signals is the cytokine
	superfamily, which has crucial functions in the development, differentiation
	and regulation of immune cells. {I}n this study, a support vector
	machine ({SVM})-based method was developed for predicting families
	and subfamilies of cytokines using dipeptide composition. {T}he taxonomy
	of the cytokine superfamily with which our method complies was described
	in the {C}ytokine {F}amily c{DNA} {D}atabase (db{CFC}) and the dataset
	used in this study for training and testing was obtained from the
	db{CFC} and {S}tructural {C}lassification of {P}roteins ({SCOP}).
	{T}he method classified cytokines and non-cytokines with an accuracy
	of 92.5\% by 7-fold cross-validation. {T}he method is further able
	to predict seven major classes of cytokine with an overall accuracy
	of 94.7\%. {A} server for recognition and classification of cytokines
	based on multi-class {SVM}s has been set up at http://bioinfo.tsinghua.edu.cn/~huangni/{CTKP}red/.},
  doi = {10.1093/protein/gzi041},
  pdf = {../local/Huang2005CTKPred.pdf},
  file = {Huang2005CTKPred.pdf:local/Huang2005CTKPred.pdf:PDF},
  keywords = {biosvm},
  pii = {gzi041},
  url = {http://dx.doi.org/10.1093/protein/gzi041}
}

@article{Huang2005Computation,
  author = {Shao-Wei Huang and Jenn-Kang Hwang},
  title = {Computation of conformational entropy from protein sequences using
	the machine-learning method--application to the study of the relationship
	between structural conservation and local structural stability.},
  journal = {Proteins},
  year = {2005},
  volume = {59},
  pages = {802-9},
  number = {4},
  month = {Jun},
  abstract = {A complete protein sequence can usually determine a unique conformation;
	however, the situation is different for shorter subsequences--some
	of them are able to adopt unique conformations, independent of context;
	while others assume diverse conformations in different contexts.
	{T}he conformations of subsequences are determined by the interplay
	between local and nonlocal interactions. {A} quantitative measure
	of such structural conservation or variability will be useful in
	the understanding of the sequence-structure relationship. {I}n this
	report, we developed an approach using the support vector machine
	method to compute the conformational variability directly from sequences,
	which is referred to as the sequence structural entropy. {A}s a practical
	application, we studied the relationship between sequence structural
	entropy and the hydrogen exchange for a set of well-studied proteins.
	{W}e found that the slowest exchange cores usually comprise amino
	acids of the lowest sequence structural entropy. {O}ur results indicate
	that structural conservation is closely related to the local structural
	stability. {T}his relationship may have interesting implications
	in the protein folding processes, and may be useful in the study
	of the sequence-structure relationship.},
  doi = {10.1002/prot.20462},
  pdf = {../local/Huang2005Computation.pdf},
  file = {Huang2005Computation.pdf:local/Huang2005Computation.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1002/prot.20462}
}

@article{Huang2005Gene,
  author = {Huang, T. M. and Kecman, V.},
  title = {Gene extraction for cancer diagnosis by support vector machines-{A}n
	improvement.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  month = {Jul},
  abstract = {O{BJECTIVE}:: {T}o improve the performance of gene extraction for
	cancer diagnosis by recursive feature elimination with support vector
	machines ({RFE}-{SVM}s): {A} cancer diagnosis by using the {DNA}
	microarray data faces many challenges the most serious one being
	the presence of thousands of genes and only several dozens (at the
	best) of patient's samples. {T}hus, making any kind of classification
	in high-dimensional spaces from a limited number of data is both
	an extremely difficult and a prone to an error procedure. {T}he improved
	{RFE}-{SVM}s is introduced and used here for an elimination of less
	relevant genes and just for a reduction of the overall number of
	genes used in a medical diagnostic. {METHODS}:: {T}he paper shows
	why and how the, usually neglected, penalty parameter {C} and some
	standard data preprocessing techniques (normalizing and scaling)
	influence classification results and the gene selection of {RFE}-{SVM}s.
	{T}he gene selected by {RFE}-{SVM}s is compared with eight other
	gene selection algorithms implemented in the {R}ankgene software
	to investigate whether there is any consensus among the algorithms,
	so the scope of finding the right set of genes can be reduced. {RESULTS}::
	{T}he improved {RFE}-{SVM}s is applied on the two benchmarking colon
	and lymphoma cancer data sets with various {C} parameters and different
	standard preprocessing techniques. {H}ere, decreasing {C} leads to
	the smaller diagnosis error in comparisons to other known methods
	applied to the benchmarking data sets. {W}ith an appropriate parameter
	{C} and with a proper preprocessing procedure, the reduction in a
	diagnosis error is as high as 36\%. {CONCLUSIONS}:: {T}he results
	suggest that with a properly chosen parameter {C}, the extracted
	genes and the constructed classifier will ensure less overfitting
	of the training data leading to an increased accuracy in selecting
	relevant genes. {F}inally, comparison in gene ranking obtained by
	different algorithms shows that there is a significant consensus
	among the various algorithms as to which set of genes is relevant.},
  doi = {10.1016/j.artmed.2005.01.006},
  pdf = {../local/Huang2005Gene.pdf},
  file = {Huang2005Gene.pdf:local/Huang2005Gene.pdf:PDF},
  keywords = {biosvm},
  pii = {S0933-3657(05)00051-5},
  url = {http://dx.doi.org/10.1016/j.artmed.2005.01.006}
}

@article{Huang2005Supporta,
  author = {Yu-Len Huang and Dar-Ren Chen},
  title = {Support vector machines in sonography: application to decision making
	in the diagnosis of breast cancer.},
  journal = {Clin {I}maging},
  year = {2005},
  volume = {29},
  pages = {179-84},
  number = {3},
  abstract = {We evaluated a series of pathologically proven breast tumors using
	the support vector machine ({SVM}) in the differential diagnosis
	of solid breast tumors. {T}his study evaluated two ultrasonic image
	databases, i.e., {DB}1 and {DB}2. {T}he {DB}1 contained 140 ultrasonic
	images of solid breast nodules (52 malignant and 88 benign). {T}he
	{DB}2 contained 250 ultrasonic images of solid breast nodules (35
	malignant and 215 benign). {T}he physician-located regions of interest
	({ROI}) of sonography and textual features were utilized to classify
	breast tumors. {A}n {SVM} classifier using interpixel textual features
	classified the tumor as benign or malignant. {T}he receiver operating
	characteristic ({ROC}) area index for the proposed system on the
	{DB}1 and the {DB}2 are 0.9695+/-0.0150 and 0.9552+/-0.0161, respectively.
	{T}he proposed system differentiates solid breast nodules with a
	relatively high accuracy and helps inexperienced operators avoid
	misdiagnosis. {T}he main advantage in the proposed system is that
	the training procedure of {SVM} was very fast and stable. {T}he training
	and diagnosis procedure of the proposed system is almost 700 times
	faster than that of multilayer perception neural networks ({MLP}s).
	{W}ith the growth of the database, new ultrasonic images can be collected
	and used as reference cases while performing diagnoses. {T}his study
	reduces the training and diagnosis time dramatically.},
  doi = {10.1016/j.clinimag.2004.08.002},
  pii = {S0899-7071(04)00170-6},
  url = {http://dx.doi.org/10.1016/j.clinimag.2004.08.002}
}

@article{Huber1964Robust,
  author = {Huber, P. J.},
  title = {Robust Estimation of a Location Parameter},
  journal = {Ann. Math. Statist.},
  year = {1964},
  volume = {35},
  pages = {73--101},
  number = {1},
  doi = {doi:10.1214/aoms/1177703732},
  pdf = {../local/Huber1964Robust.pdf},
  file = {Huber1964Robust.pdf:Huber1964Robust.pdf:PDF},
  owner = {jp},
  timestamp = {2011.07.23},
  url = {http://dx.doi.org/doi:10.1214/aoms/1177703732}
}

@article{Hudis2007Trastuzumab,
  author = {Hudis, C.A.},
  title = {Trastuzumab--mechanism of action and use in clinical practice.},
  journal = {N. Engl. J. Med.},
  year = {2007},
  volume = {357},
  pages = {39--51},
  number = {1},
  month = {Jul},
  doi = {10.1056/NEJMra043186},
  pdf = {../local/Hudis2007Trastuzumab.pdf},
  file = {Hudis2007Trastuzumab.pdf:Hudis2007Trastuzumab.pdf:PDF},
  institution = {Breast Cancer Medicine Service, Solid Tumor Division, Department
	of Medicine, Memorial Sloan-Kettering Cancer Center, New York, USA.
	hudisc@mskcc.org},
  keywords = {csbcbook},
  owner = {jp},
  pii = {357/1/39},
  pmid = {17611206},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1056/NEJMra043186}
}

@mastersthesis{Hue2004Semi-supervised,
  author = {Hue, M.},
  title = {Semi-supervised learning for protein structure prediction},
  school = {Ecole des Mines de Paris},
  year = {2004},
  owner = {vert}
}

@inproceedings{Hue2010learning,
  author = {Hue, M. and Vert, J-P.},
  title = {On learning with kernels for unordered pairs},
  booktitle = {Proceedings of the 27th International Conference on Machine Learning
	(ICML-10), June 21-24, 2010, Haifa, Israel},
  year = {2010},
  editor = {F{\"u}rnkranz, J. and Joachims, T.},
  pages = {463--470},
  publisher = {Omnipress},
  owner = {jp},
  timestamp = {2010.10.14},
  url = {http://www.icml2010.org/papers/520.pdf}
}

@article{Huebert2006Genome-wide,
  author = {Dana J Huebert and Michael Kamal and Aisling O'Donovan and Bradley
	E Bernstein},
  title = {Genome-wide analysis of histone modifications by ChIP-on-chip.},
  journal = {Methods},
  year = {2006},
  volume = {40},
  pages = {365--369},
  number = {4},
  month = {Dec},
  abstract = {Post-translational modifications to histone proteins regulate the
	packaging of genomic DNA into chromatin, gene activity and other
	functions of the genome. They are understood to play key roles in
	embryonic development and disease pathogenesis. Recent advances in
	technology have made it possible to analyze chromatin structure genome-wide
	in mammalian cells. Global patterns of histone modifications can
	be observed using a technique called ChIP-on-chip, which combines
	the specificity of chromatin immunoprecipitation with the unbiased,
	high-throughput capabilities of microarrays. The resulting maps provide
	insight into the functions of, and relationships between, different
	modifications. Here, we provide validated ChIP-on-chip methods for
	analyzing histone modification patterns at genome-scale in mammalian
	cells.},
  doi = {10.1016/j.ymeth.2006.07.032},
  institution = {Molecular Pathology Unit and Center for Cancer Research, Massachusetts
	General Hospital, Charlestown, MA 02129, USA.},
  keywords = {Animals; Chromatin Immunoprecipitation; Chromosomes, Mammalian; Genomics;
	Histone Code; Histones; Oligonucleotide Array Sequence Analysis;
	Protein Processing, Post-Translational},
  owner = {phupe},
  pii = {S1046-2023(06)00227-1},
  pmid = {17101450},
  timestamp = {2010.08.09},
  url = {http://dx.doi.org/10.1016/j.ymeth.2006.07.032}
}

@article{Huesken2005Design,
  author = {Huesken, D. and Lange, J. and Mickanin, C. and Weiler, J. and Asselbergs,
	F. and Warner, J. and Meloon, B. and Engel, S. and Rosenberg, A.
	and Cohen, D. and Labow, M. and Reinhardt, M. and Natt, F. and Hall,
	J.},
  title = {Design of a genome-wide si{RNA} library using an artificial neural
	network.},
  journal = {Nat. {B}iotechnol.},
  year = {2005},
  volume = {23},
  pages = {995-1001},
  number = {8},
  month = {Aug},
  abstract = {The largest gene knock-down experiments performed to date have used
	multiple short interfering/short hairpin (si/sh){RNA}s per gene1,
	2, 3. {T}o overcome this burden for design of a genome-wide si{RNA}
	library, we used the {S}tuttgart {N}eural {N}et {S}imulator to train
	algorithms on a data set of 2,182 randomly selected si{RNA}s targeted
	to 34 m{RNA} species, assayed through a high-throughput fluorescent
	reporter gene system. {T}he algorithm, ({BIOPRED}si), reliably predicted
	activity of 249 si{RNA}s of an independent test set ({P}earson coefficient
	r = 0.66) and si{RNA}s targeting endogenous genes at m{RNA} and protein
	levels. {N}eural networks trained on a complementary 21-nucleotide
	(nt) guide sequence were superior to those trained on a 19-nt sequence.
	{BIOPRED}si was used in the design of a genome-wide si{RNA} collection
	with two potent si{RNA}s per gene. {W}hen this collection of 50,000
	si{RNA}s was used to identify genes involved in the cellular response
	to hypoxia, two of the most potent hits were the key hypoxia transcription
	factors {HIF}1{A} and {ARNT}.},
  doi = {10.1038/nbt1118},
  pdf = {../local/Huesken2005Design.pdf},
  file = {Huesken2005Design.pdf:local/Huesken2005Design.pdf:PDF},
  keywords = {sirna},
  url = {http://dx.doi.org/10.1038/nbt1118}
}

@article{Hughey1996Hidden,
  author = {Hughey, R. and Krogh, A.},
  title = {Hidden {M}arkov models for sequence analysis: {E}xtension and analysis
	of the basic method},
  journal = {C{ABIOS}},
  year = {1996},
  volume = {12(2)},
  pages = {95--107}
}

@article{Huh2003Global,
  author = {Huh, W.-K. and Falvo, J. V. and Gerke, L. C. and Carroll, A. S. and
	Howson, R. W. and Weissman, J. S. and O'Shea, E. K.},
  title = {{G}lobal analysis of protein localization in budding yeast.},
  journal = {Nature},
  year = {2003},
  volume = {425},
  pages = {686--691},
  number = {6959},
  month = {Oct},
  abstract = {A fundamental goal of cell biology is to define the functions of proteins
	in the context of compartments that organize them in the cellular
	environment. Here we describe the construction and analysis of a
	collection of yeast strains expressing full-length, chromosomally
	tagged green fluorescent protein fusion proteins. We classify these
	proteins, representing 75\% of the yeast proteome, into 22 distinct
	subcellular localization categories, and provide localization information
	for 70\% of previously unlocalized proteins. Analysis of this high-resolution,
	high-coverage localization data set in the context of transcriptional,
	genetic, and protein-protein interaction data helps reveal the logic
	of transcriptional co-regulation, and provides a comprehensive view
	of interactions within and between organelles in eukaryotic cells.},
  doi = {10.1038/nature02026},
  pdf = {../local/Huh2003Global.pdf},
  file = {Huh2003Global.pdf:Huh2003Global.pdf:PDF},
  pii = {nature02026},
  pmid = {14562083},
  timestamp = {2007.02.01},
  url = {http://dx.doi.org/10.1038/nature02026}
}

@article{Humphrey1996VMD,
  author = {Humphrey, W. and Dalke, A. and Schulten, K.},
  title = {{VMD}: visual molecular dynamics.},
  journal = {J. Mol. Graph.},
  year = {1996},
  volume = {14},
  pages = {33--8, 27-8},
  number = {1},
  month = {Feb},
  abstract = {VMD is a molecular graphics program designed for the display and analysis
	of molecular assemblies, in particular biopolymers such as proteins
	and nucleic acids. VMD can simultaneously display any number of structures
	using a wide variety of rendering styles and coloring methods. Molecules
	are displayed as one or more "representations," in which each representation
	embodies a particular rendering method and coloring scheme for a
	selected subset of atoms. The atoms displayed in each representation
	are chosen using an extensive atom selection syntax, which includes
	Boolean operators and regular expressions. VMD provides a complete
	graphical user interface for program control, as well as a text interface
	using the Tcl embeddable parser to allow for complex scripts with
	variable substitution, control loops, and function calls. Full session
	logging is supported, which produces a VMD command script for later
	playback. High-resolution raster images of displayed molecules may
	be produced by generating input scripts for use by a number of photorealistic
	image-rendering applications. VMD has also been expressly designed
	with the ability to animate molecular dynamics (MD) simulation trajectories,
	imported either from files or from a direct connection to a running
	MD simulation. VMD is the visualization component of MDScope, a set
	of tools for interactive problem solving in structural biology, which
	also includes the parallel MD program NAMD, and the MDCOMM software
	used to connect the visualization and simulation programs. VMD is
	written in C++, using an object-oriented design; the program, including
	source code and extensive documentation, is freely available via
	anonymous ftp and through the World Wide Web.},
  owner = {laurent},
  pii = {0263785596000185},
  pmid = {8744570},
  timestamp = {2008.01.16}
}

@article{Huppi2005Defining,
  author = {Huppi, K. and Martin, S. E. and Caplen, N. J.},
  title = {Defining and assaying {RNA}i in mammalian cells.},
  journal = {Mol. {C}ell},
  year = {2005},
  volume = {17},
  pages = {1-10},
  number = {1},
  month = {Jan},
  abstract = {The investigation of protein function through the inhibition of activity
	has been critical to our understanding of many normal and abnormal
	biological processes. {U}ntil recently, functional inhibition in
	biological systems has been induced using a variety of approaches
	including small molecule antagonists, antibodies, aptamers, ribozymes,
	antisense oligonucleotides or transcripts, morpholinos, dominant-negative
	mutants, and knockout transgenic animals. {A}lthough all of these
	approaches have made substantial advances in our understanding of
	the function of many proteins, a lack of specificity or restricted
	applicability has limited their utility. {R}ecently, exploitation
	of the naturally occurring posttranscriptional gene silencing mechanism
	triggered by double-stranded {RNA} (ds{RNA}), termed {RNA} interference
	({RNA}i), has gained much favor as an alternative means for analyzing
	gene function. {A}spects of the basic biology of {RNA}i, its application
	as a functional genomics tool, and its potential as a therapeutic
	approach have been extensively reviewed ({H}annon and {R}ossi, 2004;
	{M}eister and {T}uschl, 2004); however, there has been only limited
	discussion as to how to design and validate an individual {RNA}i
	effector molecule and how to interpret {RNA}i data overall, particularly
	with reference to experimentation in mammalian cells. {T}his perspective
	will aim to consider some of the issues encountered when conducting
	and interpreting {RNA}i experiments in mammalian cells.},
  doi = {10.1016/j.molcel.2004.12.017},
  keywords = {sirna},
  pii = {S1097276504008032},
  url = {http://dx.doi.org/10.1016/j.molcel.2004.12.017}
}

@article{Hupe2004Analysis,
  author = {Hup{\'e}, P. and Stransky, N. and Thiery, J.-P. and Radvanyi, F.
	and Barillot, E.},
  title = {Analysis of array {CGH} data: from signal ratio to gain and loss
	of DNA regions},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3413--3422},
  number = {18},
  month = {Dec},
  abstract = {MOTIVATION: Genomic DNA regions are frequently lost or gained during
	tumor progression. Array Comparative Genomic Hybridization (array
	CGH) technology makes it possible to assess these changes in DNA
	in cancers, by comparison with a normal reference. The identification
	of systematically deleted or amplified genomic regions in a set of
	tumors enables biologists to identify genes involved in cancer progression
	because tumor suppressor genes are thought to be located in lost
	genomic regions and oncogenes, in gained regions. Array CGH profiles
	should also improve the classification of tumors. The achievement
	of these goals requires a methodology for detecting the breakpoints
	delimiting altered regions in genomic patterns and assigning a status
	(normal, gained or lost) to each chromosomal region. RESULTS: We
	have developed a methodology for the automatic detection of breakpoints
	from array CGH profile, and the assignment of a status to each chromosomal
	region. The breakpoint detection step is based on the Adaptive Weights
	Smoothing (AWS) procedure and provides highly convincing results:
	our algorithm detects 97, 100 and 94\% of breakpoints in simulated
	data, karyotyping results and manually analyzed profiles, respectively.
	The percentage of correctly assigned statuses ranges from 98.9 to
	99.8\% for simulated data and is 100\% for karyotyping results. Our
	algorithm also outperforms other solutions on a public reference
	dataset. AVAILABILITY: The R package GLAD (Gain and Loss Analysis
	of DNA) is available upon request.},
  doi = {10.1093/bioinformatics/bth418},
  pdf = {../local/Hupe2004Analysis.pdf},
  file = {Hupe2004Analysis.pdf:Hupe2004Analysis.pdf:PDF},
  institution = {>},
  keywords = {cgh},
  owner = {jp},
  pii = {bth418},
  pmid = {15381628},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth418}
}

@article{Hutter2004Prediction,
  author = {Hutter, B. and Schaab, C. and Albrecht, S. and Borgmann, M. and Brunner,
	N. A. and Freiberg, C. and Ziegelbauer, K. and Rock, C. O. and Ivanov,
	I. and Loferer, H.},
  title = {Prediction of {M}echanisms of {A}ction of {A}ntibacterial {C}ompounds
	by {G}ene {E}xpression {P}rofiling},
  journal = {Antimicrob. {A}gents {C}hemother.},
  year = {2004},
  volume = {48},
  pages = {2838-2844},
  number = {8},
  month = {Aug},
  abstract = {We have generated a database of expression profiles carrying the transcriptional
	responses of the model organism {B}acillus subtilis following treatment
	with 37 well-characterized antibacterial compounds of different classes.
	{T}he database was used to build a predictor for the assignment of
	the mechanisms of action ({M}o{A}s) of antibacterial compounds by
	the use of support vector machines. {T}his predictor was able to
	correctly classify the {M}o{A} class for most compounds tested. {F}urthermore,
	we provide evidence that the in vivo {M}o{A} of hexachlorophene does
	not match the {M}o{A} predicted from in vitro data, a situation frequently
	faced in drug discovery. {A} database of this kind may facilitate
	the prioritization of novel antibacterial entities in drug discovery
	programs. {P}otential applications and limitations are discussed.},
  doi = {10.1128/AAC.48.8.2838-2844.2004},
  eprint = {http://aac.asm.org/cgi/reprint/48/8/2838.pdf},
  pdf = {../local/Hutter2004Prediction.pdf},
  file = {Hutter2004Prediction.pdf:local/Hutter2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1128/AAC.48.8.2838-2844.2004}
}

@article{Huynh-Thu2010Inferring,
  author = {Huynh-Thu, V. A. and Irrthum, A. and Wehenkel, L. and Geurts, P.},
  title = {Inferring regulatory networks from expression data using tree-based
	methods},
  journal = {PLoS One},
  year = {2010},
  volume = {5},
  pages = {e12776},
  number = {9},
  abstract = {One of the pressing open problems of computational systems biology
	is the elucidation of the topology of genetic regulatory networks
	(GRNs) using high throughput genomic data, in particular microarray
	gene expression data. The Dialogue for Reverse Engineering Assessments
	and Methods (DREAM) challenge aims to evaluate the success of GRN
	inference algorithms on benchmarks of simulated data. In this article,
	we present GENIE3, a new algorithm for the inference of GRNs that
	was best performer in the DREAM4 In Silico Multifactorial challenge.
	GENIE3 decomposes the prediction of a regulatory network between
	p genes into p different regression problems. In each of the regression
	problems, the expression pattern of one of the genes (target gene)
	is predicted from the expression patterns of all the other genes
	(input genes), using tree-based ensemble methods Random Forests or
	Extra-Trees. The importance of an input gene in the prediction of
	the target gene expression pattern is taken as an indication of a
	putative regulatory link. Putative regulatory links are then aggregated
	over all genes to provide a ranking of interactions from which the
	whole network is reconstructed. In addition to performing well on
	the DREAM4 In Silico Multifactorial challenge simulated data, we
	show that GENIE3 compares favorably with existing algorithms to decipher
	the genetic regulatory network of Escherichia coli. It doesn't make
	any assumption about the nature of gene regulation, can deal with
	combinatorial and non-linear interactions, produces directed GRNs,
	and is fast and scalable. In conclusion, we propose a new algorithm
	for GRN inference that performs well on both synthetic and real gene
	expression data. The algorithm, based on feature selection with tree-based
	ensemble methods, is simple and generic, making it adaptable to other
	types of genomic data and interactions.},
  doi = {10.1371/journal.pone.0012776},
  pdf = {../local/Huynh-Thu2010Inferring.pdf},
  file = {Huynh-Thu2010Inferring.pdf:Huynh-Thu2010Inferring.pdf:PDF},
  institution = {Department of Electrical Engineering and Computer Science, Systems
	and Modeling, University of Liège, Liège, Belgium. vahuynh@ulg.ac.be},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pmid = {20927193},
  timestamp = {2010.12.16},
  url = {http://dx.doi.org/10.1371/journal.pone.0012776}
}

@inproceedings{Hwang2010Heterogeneous,
  author = {Hwang, T. and Kuang, R.},
  title = {A Heterogeneous Label Propagation Algorithm for Disease Gene Discovery},
  booktitle = {Proceedings of the SIAM International Conference on Data Mining,
	SDM 2010, April 29 - May 1, 2010, Columbus, Ohio, USA},
  year = {2010},
  pages = {583--594},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://www.siam.org/proceedings/datamining/2010/dm10_051_hwangt.pdf},
  owner = {mordelet},
  timestamp = {2010.10.01}
}

@article{Hyman2002Impact,
  author = {Hyman, Elizabeth and Kauraniemi, Päivikki and Hautaniemi, Sampsa
	and Wolf, Maija and Mousses, Spyro and Rozenblum, Ester and Ringnér,
	Markus and Sauter, Guido and Monni, Outi and Elkahloun, Abdel and
	Kallioniemi, Olli-P. and Kallioniemi, Anne},
  title = {Impact of DNA amplification on gene expression patterns in breast
	cancer.},
  journal = {Cancer Res},
  year = {2002},
  volume = {62},
  pages = {6240--6245},
  number = {21},
  month = {Nov},
  abstract = {Genetic changes underlie tumor progression and may lead to cancer-specific
	expression of critical genes. Over 1100 publications have described
	the use of comparative genomic hybridization (CGH) to analyze the
	pattern of copy number alterations in cancer, but very few of the
	genes affected are known. Here, we performed high-resolution CGH
	analysis on cDNA microarrays in breast cancer and directly compared
	copy number and mRNA expression levels of 13,824 genes to quantitate
	the impact of genomic changes on gene expression. We identified and
	mapped the boundaries of 24 independent amplicons, ranging in size
	from 0.2 to 12 Mb. Throughout the genome, both high- and low-level
	copy number changes had a substantial impact on gene expression,
	with 44\% of the highly amplified genes showing overexpression and
	10.5\% of the highly overexpressed genes being amplified. Statistical
	analysis with random permutation tests identified 270 genes whose
	expression levels across 14 samples were systematically attributable
	to gene amplification. These included most previously described amplified
	genes in breast cancer and many novel targets for genomic alterations,
	including the HOXB7 gene, the presence of which in a novel amplicon
	at 17q21.3 was validated in 10.2\% of primary breast cancers and
	associated with poor patient prognosis. In conclusion, CGH on cDNA
	microarrays revealed hundreds of novel genes whose overexpression
	is attributable to gene amplification. These genes may provide insights
	to the clonal evolution and progression of breast cancer and highlight
	promising therapeutic targets.},
  institution = {Howard Hughes Medical Institute-NIH Research Scholar, Bethesda, Maryland
	20892, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12414653},
  timestamp = {2012.02.29}
}

@article{Iafrate2004Detection,
  author = {A. John Iafrate and Lars Feuk and Miguel N Rivera and Marc L Listewnik
	and Patricia K Donahoe and Ying Qi and Stephen W Scherer and Charles
	Lee},
  title = {Detection of large-scale variation in the human genome},
  journal = {Nat. Genet.},
  year = {2004},
  volume = {36},
  pages = {949--951},
  number = {9},
  month = {Sep},
  abstract = {We identified 255 loci across the human genome that contain genomic
	imbalances among unrelated individuals. Twenty-four variants are
	present in > 10\% of the individuals that we examined. Half of these
	regions overlap with genes, and many coincide with segmental duplications
	or gaps in the human genome assembly. This previously unappreciated
	heterogeneity may underlie certain human phenotypic variation and
	susceptibility to disease and argues for a more dynamic human genome
	structure.},
  doi = {10.1038/ng1416},
  pdf = {../local/Iafrate2004Detection.pdf},
  file = {Iafrate2004Detection.pdf:Iafrate2004Detection.pdf:PDF},
  institution = {Department of Pathology, Brigham and Women's Hospital, 20 Shattuck
	St., Thorn 6-28, Boston, Massachusetts 02115, USA.},
  keywords = {cgh, csbcbook, csbcbook-ch2},
  owner = {jp},
  pii = {ng1416},
  pmid = {9},
  timestamp = {2009.02.08},
  url = {http://dx.doi.org/10.1038/ng1416}
}

@article{Ideker2002Discovering,
  author = {Ideker, T. and Ozier, O. and Schwikowski, B. and Siegel, A. F.},
  title = {Discovering regulatory and signalling circuits in molecular interaction
	networks.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18 Suppl 1},
  pages = {S233--S240},
  abstract = {MOTIVATION: In model organisms such as yeast, large databases of protein-protein
	and protein-DNA interactions have become an extremely important resource
	for the study of protein function, evolution, and gene regulatory
	dynamics. In this paper we demonstrate that by integrating these
	interactions with widely-available mRNA expression data, it is possible
	to generate concrete hypotheses for the underlying mechanisms governing
	the observed changes in gene expression. To perform this integration
	systematically and at large scale, we introduce an approach for screening
	a molecular interaction network to identify active subnetworks, i.e.,
	connected regions of the network that show significant changes in
	expression over particular subsets of conditions. The method we present
	here combines a rigorous statistical measure for scoring subnetworks
	with a search algorithm for identifying subnetworks with high score.
	RESULTS: We evaluated our procedure on a small network of 332 genes
	and 362 interactions and a large network of 4160 genes containing
	all 7462 protein-protein and protein-DNA interactions in the yeast
	public databases. In the case of the small network, we identified
	five significant subnetworks that covered 41 out of 77 (53\%) of
	all significant changes in expression. Both network analyses returned
	several top-scoring subnetworks with good correspondence to known
	regulatory mechanisms in the literature. These results demonstrate
	how large-scale genomic approaches may be used to uncover signalling
	and regulatory pathways in a systematic, integrative fashion.},
  pdf = {../local/Ideker2002Discovering.pdf},
  file = {Ideker2002Discovering.pdf:Ideker2002Discovering.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, Cambridge, MA 02142,
	USA Institute for Systems Biology, Seattle, WA 98103, USA. trey@wi.mit.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12169552},
  timestamp = {2011.09.24}
}

@article{Ideker2008Protein,
  author = {Ideker, T. and Sharan, R.},
  title = {Protein networks in disease.},
  journal = {Genome Res},
  year = {2008},
  volume = {18},
  pages = {644--652},
  number = {4},
  month = {Apr},
  abstract = {During a decade of proof-of-principle analysis in model organisms,
	protein networks have been used to further the study of molecular
	evolution, to gain insight into the robustness of cells to perturbation,
	and for assignment of new protein functions. Following these analyses,
	and with the recent rise of protein interaction measurements in mammals,
	protein networks are increasingly serving as tools to unravel the
	molecular basis of disease. We review promising applications of protein
	networks to disease in four major areas: identifying new disease
	genes; the study of their network properties; identifying disease-related
	subnetworks; and network-based disease classification. Applications
	in infectious disease, personalized medicine, and pharmacology are
	also forthcoming as the available protein network information improves
	in quality and coverage.},
  doi = {10.1101/gr.071852.107},
  pdf = {../local/Ideker2008Protein.pdf},
  file = {Ideker2008Protein.pdf:Ideker2008Protein.pdf:PDF},
  institution = {Department of Bioengineering, University of California at San Diego,
	La Jolla, California 92093, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {18/4/644},
  pmid = {18381899},
  timestamp = {2012.03.09},
  url = {http://dx.doi.org/10.1101/gr.071852.107}
}

@article{Ifantis2003nonlinear,
  author = {A. Ifantis and S. Papadimitriou},
  title = {The nonlinear predictability of the electrotelluric field variations
	data analyzed with support vector machines as an earthquake precursor.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2003},
  volume = {13},
  pages = {315-32},
  number = {5},
  month = {Oct},
  abstract = {This work investigates the nonlinear predictability of the {E}lectro
	{T}elluric {F}ield ({ETF}) variations data in order to develop new
	intelligent tools for the difficult task of earthquake prediction.
	{S}upport {V}ector {M}achines trained on a signal window have been
	used to predict the next sample. {W}e observe a significant increase
	at this short-term unpredictability of the {ETF} signal at about
	two weeks time period before the major earthquakes that took place
	in regions near the recording devices. {T}he unpredictability increase
	can be attributed to a quick time variation of the dynamics that
	produce the {ETF} signal due to the earthquake generation process.
	{T}hus, this increase can be taken into advantage for signaling for
	an increased possibility of a large earthquake within the next few
	days in the neighboring region of the recording station.},
  keywords = {Air Pollutants, Aircraft, Algorithms, Artificial Intelligence, Automated,
	Base Composition, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, Computing Methodologies, Cytosine,
	Data Interpretation, Databases, Enhancer Elements (Genetics), Environmental
	Monitoring, Ethanol, Exons, Fourier Transform Infrared, Genetic,
	Guanine, Humans, Image Interpretation, Natural Disasters, Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Online Systems, P.H.S., Pattern
	Recognition, Photography, Probability, Pyrimidines, RNA Precursors,
	RNA Splice Sites, RNA Splicing, Radiation, Reproducibility of Results,
	Research Support, Sensitivity and Specificity, Signal Processing,
	Spectroscopy, Statistical, Subtraction Technique, Thermodynamics,
	Time Factors, U.S. Gov't, Untranslated Regions, Video Recording,
	Walking, 14652873},
  pii = {S0129065703001674}
}

@article{Iizuka2003Oligonucleotide,
  author = {Norio Iizuka and Masaaki Oka and Hisafumi Yamada-Okabe and Minekatsu
	Nishida and Yoshitaka Maeda and Naohide Mori and Takashi Takao and
	Takao Tamesa and Akira Tangoku and Hisahiro Tabuchi and Kenji Hamada
	and Hironobu Nakayama and Hideo Ishitsuka and Takanobu Miyamoto and
	Akira Hirabayashi and Shunji Uchimura and Yoshihiko Hamamoto},
  title = {Oligonucleotide microarray for prediction of early intrahepatic recurrence
	of hepatocellular carcinoma after curative resection.},
  journal = {Lancet},
  year = {2003},
  volume = {361},
  pages = {923-9},
  number = {9361},
  month = {Mar},
  abstract = {B{ACKGROUND}: {H}epatocellular carcinoma has a poor prognosis because
	of the high intrahepatic recurrence rate. {T}here are technological
	limitations to traditional methods such as {TNM} staging for accurate
	prediction of recurrence, suggesting that new techniques are needed.
	{METHODS}: {W}e investigated m{RNA} expression profiles in tissue
	specimens from a training set, comprising 33 patients with hepatocellular
	carcinoma, with high-density oligonucleotide microarrays representing
	about 6000 genes. {W}e used this training set in a supervised learning
	manner to construct a predictive system, consisting of 12 genes,
	with the {F}isher linear classifier. {W}e then compared the predictive
	performance of our system with that of a predictive system with a
	support vector machine ({SVM}-based system) on a blinded set of samples
	from 27 newly enrolled patients. {FINDINGS}: {E}arly intrahepatic
	recurrence within 1 year after curative surgery occurred in 12 (36\%)
	and eight (30\%) patients in the training and blinded sets, respectively.
	{O}ur system correctly predicted early intrahepatic recurrence or
	non-recurrence in 25 (93\%) of 27 samples in the blinded set and
	had a positive predictive value of 88\% and a negative predictive
	value of 95\%. {B}y contrast, the {SVM}-based system predicted early
	intrahepatic recurrence or non-recurrence correctly in only 16 (60\%)
	individuals in the blinded set, and the result yielded a positive
	predictive value of only 38\% and a negative predictive value of
	79\%. {INTERPRETATION}: {O}ur system predicted early intrahepatic
	recurrence or non-recurrence for patients with hepatocellular carcinoma
	much more accurately than the {SVM}-based system, suggesting that
	our system could serve as a new method for characterising the metastatic
	potential of hepatocellular carcinoma.},
  doi = {http://dx.doi.org/10.1016/S0140-6736(03)12775-4},
  pdf = {../local/Iizuka2003Oligonucleotide.pdf},
  file = {Iizuka2003Oligonucleotide.pdf:local/Iizuka2003Oligonucleotide.pdf:PDF},
  pii = {S0140673603127754},
  url = {http://10.1016/S0140-6736(03)12775-4}
}

@article{Ikeda2005asymptotic,
  author = {Kazushi Ikeda and Tsutomu Aoishi},
  title = {An asymptotic statistical analysis of support vector machines with
	soft margins.},
  journal = {Neural {N}etw},
  year = {2005},
  volume = {18},
  pages = {251-9},
  number = {3},
  month = {Apr},
  abstract = {The generalization properties of support vector machines ({SVM}s)
	are examined. {F}rom a geometrical point of view, the estimated parameter
	of an {SVM} is the one nearest the origin in the convex hull formed
	with given examples. {S}ince introducing soft margins is equivalent
	to reducing the convex hull of the examples, an {SVM} with soft margins
	has a different learning curve from the original. {I}n this paper
	we derive the asymptotic average generalization error of {SVM}s with
	soft margins in simple cases, that is, only when the dimension of
	inputs is one, and quantitatively show that soft margins increase
	the generalization error.},
  doi = {10.1016/j.neunet.2004.11.008},
  pdf = {../local/Ikeda2005asymptotic.pdf},
  file = {Ikeda2005asymptotic.pdf:local/Ikeda2005asymptotic.pdf:PDF},
  keywords = {Apoptosis, Gene Expression Profiling, Humans, Neoplasms, Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, Polymerase Chain
	Reaction, Proteins, Research Support, Subcellular Fractions, Unknown
	Primary, 15896573},
  pii = {S0893-6080(05)00021-3},
  url = {http://dx.doi.org/10.1016/j.neunet.2004.11.008}
}

@article{Ikeda2005[Tongue,
  author = {Naoya Ikeda and Takashi Uozumi},
  title = {[{T}ongue diagnosis support system]},
  journal = {Hokkaido {I}gaku {Z}asshi},
  year = {2005},
  volume = {80},
  pages = {269-77},
  number = {3},
  month = {May},
  abstract = {Tongue diagnosis is one of the most important diagnostic methods in
	{O}riental {M}edical {S}cience ({OMS}). {T}his diagnosis is painless
	and non-invasive method. {H}owever, it is not easy to cultivate skillful
	doctors. {A}s one of the reasons, definition of tongue color is rather
	subjective and sensuous measure and color isn't related to quantitative
	physical value. {I}t is, therefore, necessary to associate tongue
	color with physical numerical value. {T}here are two problems to
	overcome the issue. 1) {I}t is necessary for diagnosis to extract
	a region for diagnosis from entire picture because a tongue picture
	consists of two regions, a tongue and a background. 2) {A}ssociate
	tongue color with physical numerical value. {F}or extracting tongue
	region, we used {P}rogressive {L}ive{W}ire method that is an {A}ctive
	{C}ontour {M}odel. {A}nd, for associating tongue color with physical
	measurement, we propose a hierarchical method. {W}e use static rule
	and support vector machine for clustering colors. {T}he performance
	of developed system is improved compared with an early developed
	one. {I}n addition, the developed system did not make a critical
	incorrect discernment that causes incorrect choice about inspection
	in the layer of rule base. {I}n this research average color appraisal
	is done from the region of 37 points. {B}ut, color judgment in the
	literature with the judgment by the eye of the human, has always
	done average judgment with not to limit, there is also a possibility
	some weight attaching being done. {T}herefore, from either one enabling
	the mass data and the comparison with the group of specialists is
	necessary as an appraisal.},
  keywords = {Color, Computer-Assisted, Diagnosis, English Abstract, Expert Systems,
	Humans, Tongue, 15960161}
}

@article{Imoto2002Estimation,
  author = {Imoto, S. and Goto, T. and Miyano, S.},
  title = {Estimation of genetic networks and functional structures between
	genes by using {B}ayesian networks and nonparametric regression.},
  journal = {Pac. {S}ymp. {B}iocomput.},
  year = {2002},
  pages = {175--186},
  abstract = {We propose a new method for constructing genetic network from gene
	expression data by using {B}ayesian networks. {W}e use nonparametric
	regression for capturing nonlinear relationships between genes and
	derive a new criterion for choosing the network in general situations.
	{I}n a theoretical sense, our proposed theory and methodology include
	previous methods based on {B}ayes approach. {W}e applied the proposed
	method to the {S}. cerevisiae cell cycle data and showed the effectiveness
	of our method by comparing with previous methods.},
  pdf = {../local/Imoto2002Estimation.pdf},
  file = {Imoto2002Estimation.pdf:local/Imoto2002Estimation.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pmid = {11928473},
  timestamp = {2006.02.16},
  url = {http://helix-web.stanford.edu/psb02/imoto.pdf}
}

@article{Imoto2003Bayesian,
  author = {Imoto, S. and Kim, S. and Goto, T. and Miyano, S. and Aburatani,
	S. and Tashiro, K. and Kuhara, S.},
  title = {Bayesian network and nonparametric heteroscedastic regression for
	nonlinear modeling of genetic network.},
  journal = {J. {B}ioinform. {C}omput. {B}iol.},
  year = {2003},
  volume = {1},
  pages = {231--252},
  number = {2},
  month = {Jul},
  abstract = {We propose a new statistical method for constructing a genetic network
	from microarray gene expression data by using a {B}ayesian network.
	{A}n essential point of {B}ayesian network construction is the estimation
	of the conditional distribution of each random variable. {W}e consider
	fitting nonparametric regression models with heterogeneous error
	variances to the microarray gene expression data to capture the nonlinear
	structures between genes. {S}electing the optimal graph, which gives
	the best representation of the system among genes, is still a problem
	to be solved. {W}e theoretically derive a new graph selection criterion
	from {B}ayes approach in general situations. {T}he proposed method
	includes previous methods based on {B}ayesian networks. {W}e demonstrate
	the effectiveness of the proposed method through the analysis of
	{S}accharomyces cerevisiae gene expression data newly obtained by
	disrupting 100 genes.},
  doi = {10.1142/S0219720003000071},
  pdf = {../local/Imoto2003Bayesian.pdf},
  file = {Imoto2003Bayesian.pdf:local/Imoto2003Bayesian.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {S0219720003000071},
  pmid = {15290771},
  timestamp = {2006.02.16},
  url = {http://dx.doi.org/10.1142/S0219720003000071}
}

@article{Imoto2002Bayesian,
  author = {Imoto, S. and Sunyong, K. and Goto, T. and Aburatani, S. and Tashiro,
	K. and Kuhara, S. and Miyano, S.},
  title = {Bayesian network and nonparametric heteroscedastic regression for
	nonlinear modeling of genetic network.},
  journal = {Proc. {IEEE} {C}omput. {S}oc. {B}ioinform. {C}onf.},
  year = {2002},
  volume = {1},
  pages = {219--227},
  abstract = {We propose a new statistical method for constructing genetic network
	from microarray gene expression data by using a {B}ayesian network.
	{A}n essential point of {B}ayesian network construction is in the
	estimation of the conditional distribution of each random variable.
	{W}e consider fitting nonparametric regression models with heterogeneous
	error variances to the microarray gene expression data to capture
	the nonlinear structures between genes. {A} problem still remains
	to be solved in selecting an optimal graph, which gives the best
	representation of the system among genes. {W}e theoretically derive
	a new graph selection criterion from {B}ayes approach in general
	situations. {T}he proposed method includes previous methods based
	on {B}ayesian networks. {W}e demonstrate the effectiveness of the
	proposed method through the analysis of {S}accharomyces cerevisiae
	gene expression data newly obtained by disrupting 100 genes.},
  doi = {10.1109/CSB.2002.1039344},
  pdf = {../local/Imoto2002Bayesian.pdf},
  file = {Imoto2002Bayesian.pdf:local/Imoto2002Bayesian.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pmid = {15838138},
  timestamp = {2006.02.16},
  url = {http://dx.doi.org/10.1109/CSB.2002.1039344}
}

@article{Inokuchi2003Complete,
  author = {Inokuchi, A. and Washio, T. and Motoda, H.},
  title = {Complete mining of frequent patterns from graphs: mining graph data},
  journal = {Mach. Learn.},
  year = {2003},
  volume = {50},
  pages = {321-354},
  number = {3},
  timestamp = {2006.08.03}
}

@article{Consortium2003International,
  author = {{International HapMap Consortium}},
  title = {The International HapMap Project.},
  journal = {Nature},
  year = {2003},
  volume = {426},
  pages = {789--796},
  number = {6968},
  month = {Dec},
  abstract = {The goal of the International HapMap Project is to determine the common
	patterns of DNA sequence variation in the human genome and to make
	this information freely available in the public domain. An international
	consortium is developing a map of these patterns across the genome
	by determining the genotypes of one million or more sequence variants,
	their frequencies and the degree of association between them, in
	DNA samples from populations with ancestry from parts of Africa,
	Asia and Europe. The HapMap will allow the discovery of sequence
	variants that affect common disease, will facilitate development
	of diagnostic tools, and will enhance our ability to choose targets
	for therapeutic intervention.},
  keywords = {Base Sequence; Continental Population Groups, genetics; DNA, genetics;
	Gene Frequency; Genetic Variation, genetics; Genome, Human; Genomics,
	methods; Haplotypes, genetics; Humans; International Cooperation;
	Polymorphism, Single Nucleotide, genetics; Public Sector},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {14685227},
  timestamp = {2010.08.01}
}

@article{Consortium2001Physical,
  author = {{International Human Genome Mapping Consortium}},
  title = {A physical map of the human genome},
  journal = {Nature},
  year = {2001},
  volume = {409},
  owner = {philippe},
  timestamp = {2010.07.28}
}

@article{Consortium2004Finishing,
  author = {{International Human Genome Sequencing Consortium}},
  title = {Finishing the euchromatic sequence of the human genome},
  journal = {Nature},
  year = {2004},
  volume = {431},
  owner = {philippe},
  timestamp = {2010.07.28}
}

@article{Consortium2001Initial,
  author = {{International Human Genome Sequencing Consortium}},
  title = {Initial sequencing and analysis of the human genome},
  journal = {Nature},
  year = {2001},
  volume = {409},
  pages = {860-921},
  number = {6822},
  month = {Feb},
  abstract = {The human genome holds an extraordinary trove of information about
	human development, physiology, medicine and evolution. {H}ere we
	report the results of an international collaboration to produce and
	make freely available a draft sequence of the human genome. {W}e
	also present an initial analysis of the data, describing some of
	the insights that can be gleaned from the sequence.},
  doi = {10.1038/35057062},
  pdf = {../local/Consortium2001Initial.pdf},
  file = {Consortium2001Initial.pdf:local/Consortium2001Initial.pdf:PDF},
  keywords = {genomics bio},
  owner = {vert},
  url = {http://dx.doi.org/10.1038/35057062 }
}

@article{Ioannidis2005most,
  author = {Ioannidis, J.P.A.},
  title = {Why most published research findings are false},
  journal = {PLoS medicine},
  year = {2005},
  volume = {2},
  pages = {e124},
  number = {8},
  publisher = {Public Library of Science}
}

@article{Ioannidis2005Microarrays,
  author = {Ioannidis, J. P. A.},
  title = {Microarrays and molecular research: noise discovery?},
  journal = {Lancet},
  year = {2005},
  volume = {365},
  pages = {454},
  number = {9458},
  pdf = {../local/Ioannidis2005Microarrays.pdf},
  file = {Ioannidis2005Microarrays.pdf:Ioannidis2005Microarrays.pdf:PDF},
  keywords = {microarray},
  owner = {jp},
  timestamp = {2011.01.12}
}

@article{Ioannidis2009Repeatability,
  author = {John P A Ioannidis and David B Allison and Catherine A Ball and Issa
	Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and
	Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion
	and Tapan Mehta and Michael Nitzberg and Grier P Page and Enrico
	Petretto and Vera van Noort},
  title = {Repeatability of published microarray gene expression analyses.},
  journal = {Nat Genet},
  year = {2009},
  volume = {41},
  pages = {149--155},
  number = {2},
  month = {Feb},
  abstract = {Given the complexity of microarray-based gene expression studies,
	guidelines encourage transparent design and public data availability.
	Several journals require public data deposition and several public
	databases exist. However, not all data are publicly available, and
	even when available, it is unknown whether the published results
	are reproducible by independent scientists. Here we evaluated the
	replication of data analyses in 18 articles on microarray-based gene
	expression profiling published in Nature Genetics in 2005-2006. One
	table or figure from each article was independently evaluated by
	two teams of analysts. We reproduced two analyses in principle and
	six partially or with some discrepancies; ten could not be reproduced.
	The main reason for failure to reproduce was data unavailability,
	and discrepancies were mostly due to incomplete data annotation or
	specification of data processing and analysis. Repeatability of published
	microarray studies is apparently limited. More strict publication
	rules enforcing public data availability and explicit description
	of data processing and analysis should be considered.},
  doi = {10.1038/ng.295},
  institution = {Clinical and Molecular Epidemiology Unit, Department of Hygiene and
	Epidemiology, University of Ioannina School of Medicine, Ioannina
	45110, Greece. jioannid@cc.uoi.gr},
  keywords = {Animals; Data Interpretation, Statistical; Databases, Genetic; Gene
	Expression Profiling, standards; Genome-Wide Association Study, standards;
	Humans; Oligonucleotide Array Sequence Analysis, standards; Peer
	Review, Research; Publications, standards; Reproducibility of Results},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {ng.295},
  pmid = {19174838},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/ng.295}
}

@article{Irizarry2003Exploration,
  author = {Irizarry, R. A. and Hobbs, B. and Collin, F. and Beazer-Barclay,
	Y. D. and Antonellis, K. J. and Scherf, U. and Speed, T. P.},
  title = {Exploration, normalization, and summaries of high density oligonucleotide
	array probe level datas},
  journal = {Biostatistics},
  year = {2003},
  volume = {4},
  pages = {249--264},
  number = {2},
  month = {Apr},
  abstract = {In this paper we report exploratory analyses of high-density oligonucleotide
	array data from the Affymetrix GeneChip system with the objective
	of improving upon currently used measures of gene expression. Our
	analyses make use of three data sets: a small experimental study
	consisting of five MGU74A mouse GeneChip arrays, part of the data
	from an extensive spike-in study conducted by Gene Logic and Wyeth's
	Genetics Institute involving 95 HG-U95A human GeneChip arrays; and
	part of a dilution study conducted by Gene Logic involving 75 HG-U95A
	GeneChip arrays. We display some familiar features of the perfect
	match and mismatch probe (PM and MM) values of these data, and examine
	the variance-mean relationship with probe-level data from probes
	believed to be defective, and so delivering noise only. We explain
	why we need to normalize the arrays to one another using probe level
	intensities. We then examine the behavior of the PM and MM using
	spike-in data and assess three commonly used summary measures: Affymetrix's
	(i) average difference (AvDiff) and (ii) MAS 5.0 signal, and (iii)
	the Li and Wong multiplicative model-based expression index (MBEI).
	The exploratory data analyses of the probe level data motivate a
	new summary measure that is a robust multi-array average (RMA) of
	background-adjusted, normalized, and log-transformed PM values. We
	evaluate the four expression summary measures using the dilution
	study data, assessing their behavior in terms of bias, variance and
	(for MBEI and RMA) model fit. Finally, we evaluate the algorithms
	in terms of their ability to detect known levels of differential
	expression using the spike-in data. We conclude that there is no
	obvious downside to using RMA and attaching a standard error (SE)
	to this quantity using a linear model which removes probe-specific
	affinities.},
  doi = {10.1093/biostatistics/4.2.249},
  pdf = {../local/Irizarry2003Exploration.pdf},
  file = {Irizarry2003Exploration.pdf:Irizarry2003Exploration.pdf:PDF},
  institution = {Department of Biostatistics, Johns Hopkins University, Baltimore,
	MD 21205, USA. rafa@jhu.edu},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {4/2/249},
  pmid = {12925520},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1093/biostatistics/4.2.249}
}

@article{Irizarry2008Comprehensive,
  author = {Rafael A Irizarry and Christine Ladd-Acosta and Benilton Carvalho
	and Hao Wu and Sheri A Brandenburg and Jeffrey A Jeddeloh and Bo
	Wen and Andrew P Feinberg},
  title = {Comprehensive high-throughput arrays for relative methylation (CHARM).},
  journal = {Genome Res},
  year = {2008},
  volume = {18},
  pages = {780--790},
  number = {5},
  month = {May},
  abstract = {This study was originally conceived to test in a rigorous way the
	specificity of three major approaches to high-throughput array-based
	DNA methylation analysis: (1) MeDIP, or methylated DNA immunoprecipitation,
	an example of antibody-mediated methyl-specific fractionation; (2)
	HELP, or HpaII tiny fragment enrichment by ligation-mediated PCR,
	an example of differential amplification of methylated DNA; and (3)
	fractionation by McrBC, an enzyme that cuts most methylated DNA.
	These results were validated using 1466 Illumina methylation probes
	on the GoldenGate methylation assay and further resolved discrepancies
	among the methods through quantitative methylation pyrosequencing
	analysis. While all three methods provide useful information, there
	were significant limitations to each, specifically bias toward CpG
	islands in MeDIP, relatively incomplete coverage in HELP, and location
	imprecision in McrBC. However, we found that with an original array
	design strategy using tiling arrays and statistical procedures that
	average information from neighboring genomic locations, much improved
	specificity and sensitivity could be achieved, e.g., approximately
	100\% sensitivity at 90\% specificity with McrBC. We term this approach
	"comprehensive high-throughput arrays for relative methylation" (CHARM).
	While this approach was applied to McrBC analysis, the array design
	and computational algorithms are fractionation method-independent
	and make this a simple, general, relatively inexpensive tool suitable
	for genome-wide analysis, and in which individual samples can be
	assayed reliably at very high density, allowing locus-level genome-wide
	epigenetic discrimination of individuals, not just groups of samples.
	Furthermore, unlike the other approaches, CHARM is highly quantitative,
	a substantial advantage in application to the study of human disease.},
  doi = {10.1101/gr.7301508},
  institution = {Department of Biostatistics, Johns Hopkins Bloomberg School of Public
	Health, Baltimore, Maryland 21205, USA. rafa@jhu.edu},
  keywords = {Bias (Epidemiology); CpG Islands; DNA Methylation; Genome, Human;
	Genomics; Humans; Oligonucleotide Array Sequence Analysis; Reference
	Standards; Reproducibility of Results; Sensitivity and Specificity},
  owner = {phupe},
  pii = {gr.7301508},
  pmid = {18316654},
  timestamp = {2010.08.10},
  url = {http://dx.doi.org/10.1101/gr.7301508}
}

@article{Irizarry2006Comparison,
  author = {Rafael A Irizarry and Zhijin Wu and Harris A Jaffee},
  title = {Comparison of Affymetrix GeneChip expression measures.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {789--794},
  number = {7},
  month = {Apr},
  abstract = {MOTIVATION: In the Affymetrix GeneChip system, preprocessing occurs
	before one obtains expression level measurements. Because the number
	of competing preprocessing methods was large and growing we developed
	a benchmark to help users identify the best method for their application.
	A webtool was made available for developers to benchmark their procedures.
	At the time of writing over 50 methods had been submitted. RESULTS:
	We benchmarked 31 probe set algorithms using a U95A dataset of spike
	in controls. Using this dataset, we found that background correction,
	one of the main steps in preprocessing, has the largest effect on
	performance. In particular, background correction appears to improve
	accuracy but, in general, worsen precision. The benchmark results
	put this balance in perspective. Furthermore, we have improved some
	of the original benchmark metrics to provide more detailed information
	regarding precision and accuracy. A handful of methods stand out
	as providing the best balance using spike-in data with the older
	U95A array, although different experiments on more current arrays
	may benchmark differently. AVAILABILITY: The affycomp package, now
	version 1.5.2, continues to be available as part of the Bioconductor
	project (http://www.bioconductor.org). The webtool continues to be
	available at http://affycomp.biostat.jhsph.edu CONTACT: rafa@jhu.edu
	SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics
	online.},
  doi = {10.1093/bioinformatics/btk046},
  institution = {Department of Biostatistics, Johns Hopkins University, 615 N. Wolfe
	Street, Baltimore, MD 21205, USA. rafa@jhu.edu},
  keywords = {Algorithms; Benchmarking; Gene Expression Profiling, methods; Oligonucleotide
	Array Sequence Analysis, instrumentation/methods; Reproducibility
	of Results; Software},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {btk046},
  pmid = {16410320},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1093/bioinformatics/btk046}
}

@article{Irwin2005ZINC,
  author = {Irwin, J. J. and Shoichet, B. K.},
  title = {Z{INC}--a free database of commercially available compounds for virtual
	screening.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {177-82},
  number = {1},
  abstract = {A critical barrier to entry into structure-based virtual screening
	is the lack of a suitable, easy to access database of purchasable
	compounds. {W}e have therefore prepared a library of 727,842 molecules,
	each with 3{D} structure, using catalogs of compounds from vendors
	(the size of this library continues to grow). {T}he molecules have
	been assigned biologically relevant protonation states and are annotated
	with properties such as molecular weight, calculated {L}og{P}, and
	number of rotatable bonds. {E}ach molecule in the library contains
	vendor and purchasing information and is ready for docking using
	a number of popular docking programs. {W}ithin certain limits, the
	molecules are prepared in multiple protonation states and multiple
	tautomeric forms. {I}n one format, multiple conformations are available
	for the molecules. {T}his database is available for free download
	(http://zinc.docking.org) in several common file formats including
	{SMILES}, mol2, 3{D} {SDF}, and {DOCK} flexibase format. {A} {W}eb-based
	query tool incorporating a molecular drawing interface enables the
	database to be searched and browsed and subsets to be created. {U}sers
	can process their own molecules by uploading them to a server. {O}ur
	hope is that this database will bring virtual screening libraries
	to a wide community of structural biologists and medicinal chemists.},
  doi = {10.1021/ci049714+},
  pdf = {../local/Irwin2005ZINC.pdf},
  file = {Irwin2005ZINC.pdf:local/Irwin2005ZINC.pdf:PDF},
  keywords = {Databases, Digital, Drug Design, Factual, Libraries, Molecular Conformation,
	Molecular Structure, P.H.S., Research Support, U.S. Gov't, 15667143},
  url = {http://dx.doi.org/10.1021/ci049714+}
}

@article{Ishkanian2004tiling,
  author = {Ishkanian, A. S. and Malloff, C. A. and Watson, S. K. and DeLeeuw,
	R. J. and Chi, B. and Coe, B. P. and Snijders, A. and Albertson,
	D. G. and Pinkel, D. and Marra, M. A. and Ling, V. and MacAulay,
	C. and Lam, W. L.},
  title = {A tiling resolution {DNA} microarray with complete coverage of the
	human genome},
  journal = {Nat. Genet.},
  year = {2004},
  volume = {36},
  pages = {299--303},
  number = {3},
  month = {Mar},
  abstract = {We constructed a tiling resolution array consisting of 32,433 overlapping
	BAC clones covering the entire human genome. This increases our ability
	to identify genetic alterations and their boundaries throughout the
	genome in a single comparative genomic hybridization (CGH) experiment.
	At this tiling resolution, we identified minute DNA alterations not
	previously reported. These alterations include microamplifications
	and deletions containing oncogenes, tumor-suppressor genes and new
	genes that may be associated with multiple tumor types. Our findings
	show the need to move beyond conventional marker-based genome comparison
	approaches, that rely on inference of continuity between interval
	markers. Our submegabase resolution tiling set for array CGH (SMRT
	array) allows comprehensive assessment of genomic integrity and thereby
	the identification of new genes associated with disease.},
  doi = {10.1038/ng1307},
  pdf = {../local/Ishkanian2004tiling.pdf},
  file = {Ishkanian2004tiling.pdf:Ishkanian2004tiling.pdf:PDF},
  institution = {British Columbia Cancer Research Centre, 601 West 10th Avenue, Vancouver,
	British Columbia V5Z 1L3, Canada.},
  keywords = {csbcbook, microarray},
  owner = {jp},
  pii = {ng1307},
  pmid = {14981516},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1038/ng1307}
}

@article{Ito2001comprehensive,
  author = {Ito, T. and Chiba, T. and Ozawa, R. and Yoshida, M. and Hattori,
	M. and Sakaki, Y.},
  title = {A comprehensive two-hybrid analysis to explore the yeast protein
	interactome},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2001},
  volume = {98},
  pages = {4569--4574},
  number = {8},
  pdf = {../local/ito01.pdf},
  file = {ito01.pdf:local/ito01.pdf:PDF},
  subject = {bionet},
  url = {http://www.pnas.org/cgi/content/full/98/8/4569}
}

@article{Ito2000Toward,
  author = {Ito, T. and Tashiro, K. and Muta, S. and Ozawa, R. and Chiba, T.
	and Nishizawa, M. and Yamamoto, K. and Kuhara, S. and Sakaki, Y.},
  title = {Toward a protein-protein interaction map of the budding yeast: {A}
	comprehensive system to examine two-hybrid interactions in all possible
	combinations between the yeast proteins},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2000},
  volume = {93},
  pages = {1143--1147},
  number = {3},
  pdf = {../local/ito00.pdf},
  file = {ito00.pdf:local/ito00.pdf:PDF},
  subject = {bionet},
  url = {http://www.pnas.org/cgi/content/full/97/3/1143}
}

@inproceedings{Ivanciuc2007Applications,
  author = {Ivanciuc, O.},
  title = {Applications of Support Vector Machines in Chemistry},
  booktitle = {Reviews in Computational Chemistry},
  year = {2007},
  editor = {Lipkowitz, K. B. and Cundari, T. R.},
  volume = {23},
  pages = {291--400},
  address = {Weiheim},
  publisher = {Wiley-VCH},
  owner = {vert},
  timestamp = {2007.08.02}
}

@book{Ivanov1976theory,
  title = {The theory of approximate methods and their application to the numerical
	solution of singular integral equations},
  publisher = {Nordhoff International},
  year = {1976},
  author = {V.V. Ivanov},
  address = {Leiden},
  subject = {ml}
}

@article{Ivshina2006Genetic,
  author = {Ivshina, A.V. and George, J. and Senko, O. and Mow, B. and Putti,
	T.C. and Smeds, J. and Lindahl, T. and Pawitan, Y. and Hall, P. and
	Nordgren, H. and others},
  title = {Genetic reclassification of histologic grade delineates new clinical
	subtypes of breast cancer},
  journal = {Cancer research},
  year = {2006},
  volume = {66},
  pages = {10292--10301},
  number = {21},
  publisher = {AACR}
}

@article{Iwamoto2010Predicting,
  author = {Iwamoto, T. and Pusztai, L. and others},
  title = {Predicting prognosis of breast cancer with gene signatures: are we
	lost in a sea of data?},
  journal = {Genome medicine},
  year = {2010},
  volume = {2},
  pages = {81},
  number = {11},
  publisher = {BioMed Central Ltd}
}

@article{Natraj2005Three,
  author = {Iyer, N. and Jayanti, S. and Lou, K. and Kalyanaraman, Y. and Ramani,
	K. },
  title = {Three-dimensional shape searching: state-of-the-art review and future
	trends},
  journal = {Computer-Aided Design},
  year = {2005},
  volume = {37},
  pages = {509--530},
  number = {5},
  month = {April},
  abstract = {Three-dimensional shape searching is a problem of current interest
	in several different fields. Most techniques have been developed
	for a particular domain and reduce a shape into a simpler shape representation.
	The techniques developed for a particular domain will also find applications
	in other domains.We classify and compare various 3D shape searching
	techniques based on their shape representations. A brief description
	of each technique is provided followed by a detailed survey of the
	state-of-the-art. The paper concludes by identifying gaps in current
	shape search techniques and identifies directions for future research.},
  booktitle = {Geometric Modeling and Processing 2004},
  citeulike-article-id = {670915},
  doi = {http://dx.doi.org/10.1016/j.cad.2004.07.002},
  keywords = {3d-feature-extraction, cad, feature-extraction, object-modeling, object-representation,
	object-retrieval, pattern-recognition, search-benchmark, survey},
  posted-at = {2006-05-26 08:28:52},
  priority = {2},
  url = {http://dx.doi.org/10.1016/j.cad.2004.07.002}
}

@article{Jaakkola2000Discriminative,
  author = {Jaakkola, T. and Diekhans, M. and Haussler, D.},
  title = {A {D}iscriminative {F}ramework for {D}etecting {R}emote {P}rotein
	{H}omologies},
  journal = {J. {C}omput. {B}iol.},
  year = {2000},
  volume = {7},
  pages = {95--114},
  number = {1,2},
  pdf = {../local/jaak00.pdf},
  file = {jaak00.pdf:local/jaak00.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernelcasp},
  url = {http://www.cse.ucsc.edu/research/compbio/discriminative/Jaakola2-1998.ps}
}

@inproceedings{Jaakkola1999Maximum,
  author = {Tommi Jaakkola and Marina Meila and Tony Jebara},
  title = {Maximum Entropy Discrimination},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {1999},
  volume = {12},
  publisher = {MIT Press, Cambridge, MA}
}

@inproceedings{Jaakkola1999Using,
  author = {Jaakkola, T. S. and Diekhans, M. and Haussler, D.},
  title = {Using the {F}isher {K}ernel {M}ethod to {D}etect {R}emote {P}rotein
	{H}omologies},
  booktitle = {Proceedings of the {S}eventh {I}nternational {C}onference on {I}ntelligent
	{S}ystems for {M}olecular {B}iology},
  year = {1999},
  pages = {149--158},
  publisher = {AAAI Press},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@inproceedings{Jaakkola1999Exploiting,
  author = {Jaakkola, T. S. and Haussler, D.},
  title = {Exploiting generative models in discriminative classifiers},
  booktitle = {Proc. of {T}enth {C}onference on {A}dvances in {N}eural {I}nformation
	{P}rocessing {S}ystems},
  year = {1999},
  pdf = {../local/jaak99.pdf},
  file = {jaak99.pdf:local/jaak99.pdf:PDF},
  keywords = {biosvm},
  subject = {kernel},
  url = {http://www.cse.ucsc.edu/research/ml/papers/Jaakola.ps}
}

@inproceedings{Jaakkola1999Probabilistic,
  author = {Jaakkola, T. S. and Haussler, D.},
  title = {Probabilistic kernel regression models},
  booktitle = {Proceedings of the 1999 {C}onference on {AI} and {S}tatistics},
  year = {1999},
  publisher = {Morgan Kaufmann},
  pdf = {../local/jaak99b.pdf},
  file = {jaak99b.pdf:local/jaak99b.pdf:PDF},
  subject = {kernel},
  url = {http://alpha-bits.ai.mit.edu/people/tommi/publications/probker.ps.gz}
}

@article{Jablonka2002changing,
  author = {Jablonka, A. and Lamb, M. J.},
  title = {The changing concept of epigenetics},
  journal = {Ann N Y Acad Sci},
  year = {2002},
  volume = {981},
  pages = {82--96},
  month = {Dec},
  abstract = {We discuss the changing use of epigenetics, a term coined by Conrad
	Waddington in the 1940s, and how the epigenetic approach to development
	differs from the genetic approach. Originally, epigenetics referred
	to the study of the way genes and their products bring the phenotype
	into being. Today, it is primarily concerned with the mechanisms
	through which cells become committed to a particular form or function
	and through which that functional or structural state is then transmitted
	in cell lineages. We argue that modern epigenetics is important not
	only because it has practical significance for medicine, agriculture,
	and species conservation, but also because it has implications for
	the way in which we should view heredity and evolution. In particular,
	recognizing that there are epigenetic inheritance systems through
	which non-DNA variations can be transmitted in cell and organismal
	lineages broadens the concept of heredity and challenges the widely
	accepted gene-centered neo-Darwinian version of Darwinism.},
  institution = {Cohn Institute for the History and Philosophy of Science and Ideas,
	Tel Aviv University, Tel Aviv 69978, Israel. jablonka@post.tau.ac.il},
  keywords = {csbcbook},
  owner = {jp},
  pmid = {12547675},
  timestamp = {2009.10.11}
}

@article{Jackson2003Expression,
  author = {Jackson, A. L. and Bartz, S. R. and Schelter, J. and Kobayashi, S.
	V. and Burchard, J. and Mao, M. and Li, B. and Cavet, G. and Linsley,
	P. S.},
  title = {Expression profiling reveals off-target gene regulation by {RNA}i.},
  journal = {Nat. {B}iotechnol.},
  year = {2003},
  volume = {21},
  pages = {635-7},
  number = {6},
  month = {Jun},
  abstract = {R{NA} interference is thought to require near-identity between the
	small interfering {RNA} (si{RNA}) and its cognate m{RNA}. {H}ere,
	we used gene expression profiling to characterize the specificity
	of gene silencing by si{RNA}s in cultured human cells. {T}ranscript
	profiles revealed si{RNA}-specific rather than target-specific signatures,
	including direct silencing of nontargeted genes containing as few
	as eleven contiguous nucleotides of identity to the si{RNA}. {T}hese
	results demonstrate that si{RNA}s may cross-react with targets of
	limited sequence similarity.},
  doi = {10.1038/nbt831},
  keywords = {sirna},
  pii = {nbt831},
  url = {http://dx.doi.org/10.1038/nbt831}
}

@article{Jackson2004Noise,
  author = {Jackson, A. L. and Linsley, P. S.},
  title = {Noise amidst the silence: off-target effects of si{RNA}s?},
  journal = {Trends {G}enet.},
  year = {2004},
  volume = {20},
  pages = {521-4},
  number = {11},
  month = {Nov},
  abstract = {R{NA} interference ({RNA}i), mediated by short interfering {RNA}s
	(si{RNA}s), is widely used to silence gene expression and to define
	gene function in mammalian cells. {I}nitially, this gene silencing
	via transcript degradation was believed to be exquisitely specific,
	requiring near-identity between the si{RNA} and the target m{RNA}.
	{H}owever, several recent reports have suggested that non-specific
	effects can be induced by si{RNA}s, both at the level of m{RNA} and
	protein. {T}hese findings suggest that si{RNA}s can regulate the
	expression of unintended targets, and argue for further experiments
	on the mechanism and extent of off-target gene regulation(s). {I}n
	the meantime, caution is warranted in interpreting gene function
	and phenotypes resulting from {RNA}i experiments.},
  doi = {10.1016/j.tig.2004.08.006},
  keywords = {sirna},
  pii = {S0168-9525(04)00240-9},
  url = {http://dx.doi.org/10.1016/j.tig.2004.08.006}
}

@incollection{Jacob2009Clustered,
  author = {Jacob, L. and Bach, F. and Vert, J.-P.},
  title = {Clustered Multi-Task Learning: A Convex Formulation},
  booktitle = {Advances in Neural Information Processing Systems 21},
  publisher = {MIT Press},
  year = {2009},
  pages = {745--752},
  url = {http://books.nips.cc/papers/files/nips21/NIPS2008\_0680.pdf}
}

@techreport{Jacob2008VirtualOLD,
  author = {Jacob, L. and Hoffmann, B. and Stoven, B. and Vert, J.-P.},
  title = {Virtual screening of {GPCR}s: an \textit{in silico} chemogenomics
	approach},
  institution = {Arxiv},
  year = {2008},
  number = {0801.4301},
  location = {Mines ParisTech},
  timestamp = {2008.01.24}
}

@article{Jacob2008Virtual,
  author = {Jacob, L. and Hoffmann, B. and Stoven, V. and Vert, J.-P.},
  title = {Virtual screening of {GPCR}s: an {\it in silico} chemogenomics approach},
  journal = {BMC Bioinformatics},
  year = {2008},
  volume = {9},
  pages = {363},
  doi = {10.1186/1471-2105-9-363},
  pdf = {../local/Jacob2008Virtual.pdf},
  file = {Jacob2008Virtual.pdf:Jacob2008Virtual.pdf:PDF},
  keywords = {chemogenomics},
  url = {http://dx.doi.org/10.1186/1471-2105-9-363}
}

@inproceedings{Jacob2009Group,
  author = {Jacob, L. and Obozinski, G. and Vert, J.-P.},
  title = {Group lasso with overlap and graph lasso},
  booktitle = {ICML '09: Proceedings of the 26th Annual International Conference
	on Machine Learning},
  year = {2009},
  pages = {433--440},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1553374.1553431},
  pdf = {../local/Jacob2009Group.pdf},
  file = {Jacob2009Group.pdf:Jacob2009Group.pdf:PDF},
  isbn = {978-1-60558-516-1},
  location = {Montreal, Quebec, Canada}
}

@article{Jacob2008Efficient,
  author = {Jacob, L. and Vert, J.-P.},
  title = {Efficient peptide-{MHC}-{I} binding prediction for alleles with few
	known binders.},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {358--366},
  number = {3},
  month = {Feb},
  abstract = {MOTIVATION: In silico methods for the prediction of antigenic peptides
	binding to MHC class I molecules play an increasingly important role
	in the identification of T-cell epitopes. Statistical and machine
	learning methods in particular are widely used to score candidate
	binders based on their similarity with known binders and non-binders.
	The genes coding for the MHC molecules, however, are highly polymorphic,
	and statistical methods have difficulties building models for alleles
	with few known binders. In this context, recent work has demonstrated
	the utility of leveraging information across alleles to improve the
	performance of the prediction. RESULTS: We design a support vector
	machine algorithm that is able to learn peptide-MHC-I binding models
	for many alleles simultaneously, by sharing binding information across
	alleles. The sharing of information is controlled by a user-defined
	measure of similarity between alleles. We show that this similarity
	can be defined in terms of supertypes, or more directly by comparing
	key residues known to play a role in the peptide-MHC binding. We
	illustrate the potential of this approach on various benchmark experiments
	where it outperforms other state-of-the-art methods. AVAILABILITY:
	The method is implemented on a web server: http://cbio.ensmp.fr/kiss.
	All data and codes are freely and publicly available from the authors.},
  doi = {10.1093/bioinformatics/btm611},
  pdf = {../local/Jacob2008Efficient.pdf},
  file = {Jacob2008Efficient.pdf:Jacob2008Efficient.pdf:PDF},
  keywords = {chemogenomics immunoinformatics},
  owner = {laurent},
  pii = {btm611},
  pmid = {18083718},
  timestamp = {2008.03.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm611}
}

@article{Jacob2008Protein,
  author = {Jacob, L. and Vert, J.-P.},
  title = {Protein-ligand interaction prediction: an improved chemogenomics
	approach},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {2149--2156},
  number = {19},
  doi = {10.1093/bioinformatics/btn409},
  pdf = {../local/Jacob2008Protein.pdf},
  file = {Jacob2008Protein.pdf:Jacob2008Protein.pdf:PDF},
  keywords = {chemogenomics},
  url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btn409}
}

@techreport{Jacob2007Kernel,
  author = {Jacob, L. and Vert, J.-P.},
  title = {Kernel methods for in silico chemogenomics},
  institution = {arXiv},
  year = {2007},
  number = {0709.3931v1},
  keywords = {chemogenomics},
  timestamp = {2007.10.25},
  url = {http://fr.arxiv.org/abs/0709.3931}
}

@techreport{Jacob2006Epitope,
  author = {Jacob, L. and Vert, J.-P.},
  title = {Epitope prediction improved by multitask support vector machines},
  institution = {arXiv},
  year = {2006},
  number = {arXiv:q-bio/0702008v1},
  owner = {jacob}
}

@article{Jacoby20067,
  author = {Edgar Jacoby and Rochdi Bouhelal and Marc Gerspacher and Klaus Seuwen},
  title = {The 7 TM G-protein-coupled receptor target family.},
  journal = {ChemMedChem},
  year = {2006},
  volume = {1},
  pages = {761--782},
  number = {8},
  month = {Aug},
  abstract = {Chemical biology approaches have a long history in the exploration
	of the G-protein-coupled receptor (GPCR) family, which represents
	the largest and most important group of targets for therapeutics.
	The analysis of the human genome revealed a significant number of
	new members with unknown physiological function which are today the
	focus of many reverse pharmacology drug-discovery programs. As the
	seven hydrophobic transmembrane segments are a defining common structural
	feature of these receptors, and as signaling through heterotrimeric
	G proteins is not demonstrated in all cases, these proteins are also
	referred to as seven transmembrane (7 TM) or serpentine receptors.
	This review summarizes important historic milestones of GPCR research,
	from the beginning, when pharmacology was mainly descriptive, to
	the age of modern molecular biology, with the cloning of the first
	receptor and now the availability of the entire human GPCR repertoire
	at the sequence and protein level. It shows how GPCR-directed drug
	discovery was initially based on the careful testing of a few specifically
	made chemical compounds and is today pursued with modern drug-discovery
	approaches, including combinatorial library design, structural biology,
	molecular informatics, and advanced screening technologies for the
	identification of new compounds that activate or inhibit GPCRs specifically.
	Such compounds, in conjunction with other new technologies, allow
	us to study the role of receptors in physiology and medicine, and
	will hopefully result in novel therapies. We also outline how basic
	research on the signaling and regulatory mechanisms of GPCRs is advancing,
	leading to the discovery of new GPCR-interacting proteins and thus
	opening new perspectives for drug development. Practical examples
	from GPCR expression studies, HTS (high-throughput screening), and
	the design of monoamine-related GPCR-focused combinatorial libraries
	illustrate ongoing GPCR chemical biology research. Finally, we outline
	future progress that may relate today's discoveries to the development
	of new medicines.},
  doi = {10.1002/cmdc.200600134},
  owner = {laurent},
  pmid = {16902930},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1002/cmdc.200600134}
}

@article{Jaenisch2003Epigenetic,
  author = {Rudolf Jaenisch and Adrian Bird},
  title = {Epigenetic regulation of gene expression: how the genome integrates
	intrinsic and environmental signals.},
  journal = {Nat. Genet.},
  year = {2003},
  volume = {33 Suppl},
  pages = {245--254},
  month = {Mar},
  abstract = {Cells of a multicellular organism are genetically homogeneous but
	structurally and functionally heterogeneous owing to the differential
	expression of genes. Many of these differences in gene expression
	arise during development and are subsequently retained through mitosis.
	Stable alterations of this kind are said to be 'epigenetic', because
	they are heritable in the short term but do not involve mutations
	of the DNA itself. Research over the past few years has focused on
	two molecular mechanisms that mediate epigenetic phenomena: DNA methylation
	and histone modifications. Here, we review advances in the understanding
	of the mechanism and role of DNA methylation in biological processes.
	Epigenetic effects by means of DNA methylation have an important
	role in development but can also arise stochastically as animals
	age. Identification of proteins that mediate these effects has provided
	insight into this complex process and diseases that occur when it
	is perturbed. External influences on epigenetic processes are seen
	in the effects of diet on long-term diseases such as cancer. Thus,
	epigenetic mechanisms seem to allow an organism to respond to the
	environment through changes in gene expression. The extent to which
	environmental effects can provoke epigenetic responses represents
	an exciting area of future research.},
  doi = {10.1038/ng1089},
  institution = {Whitehead Institute for Biomedical Research and Department of Biology,
	Massachusetts Institute of Technology, 9 Cambridge Center, Cambridge,
	MA 02142, USA.},
  keywords = {Aging; Animals; Cloning, Organism; DNA Methylation; Diet; Dosage Compensation,
	Genetic; Gene Expression Regulation, Developmental; Genetic Diseases,
	Inborn; Genome; Genomic Imprinting; Humans; Mice; Models, Genetic;
	Mutation; Neoplasms; Phenotype; Signal Transduction},
  owner = {ljacob},
  pii = {ng1089},
  pmid = {12610534},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1038/ng1089}
}

@article{Jain1999Data,
  author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.},
  title = {Data clustering: a review},
  journal = {ACM Comput. Surv.},
  year = {1999},
  volume = {31},
  pages = {3},
  pdf = {../local/Jain1999Data.pdf},
  file = {Jain1999Data.pdf:Jain1999Data.pdf:PDF},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Jambon2003New,
  author = {Martin Jambon and Anne Imberty and Gilbert DelÃ©age and Christophe
	Geourjon},
  title = {A new bioinformatic approach to detect common 3D sites in protein
	structures.},
  journal = {Proteins},
  year = {2003},
  volume = {52},
  pages = {137--145},
  number = {2},
  month = {Aug},
  abstract = {An innovative bioinformatic method has been designed and implemented
	to detect similar three-dimensional (3D) sites in proteins. This
	approach allows the comparison of protein structures or substructures
	and detects local spatial similarities: this method is completely
	independent from the amino acid sequence and from the backbone structure.
	In contrast to already existing tools, the basis for this method
	is a representation of the protein structure by a set of stereochemical
	groups that are defined independently from the notion of amino acid.
	An efficient heuristic for finding similarities that uses graphs
	of triangles of chemical groups to represent the protein structures
	has been developed. The implementation of this heuristic constitutes
	a software named SuMo (Surfing the Molecules), which allows the dynamic
	definition of chemical groups, the selection of sites in the proteins,
	and the management and screening of databases. To show the relevance
	of this approach, we focused on two extreme examples illustrating
	convergent and divergent evolution. In two unrelated serine proteases,
	SuMo detects one common site, which corresponds to the catalytic
	triad. In the legume lectins family composed of >100 structures that
	share similar sequences and folds but may have lost their ability
	to bind a carbohydrate molecule, SuMo discriminates between functional
	and non-functional lectins with a selectivity of 96\%. The time needed
	for searching a given site in a protein structure is typically 0.1
	s on a PIII 800MHz/Linux computer; thus, in further studies, SuMo
	will be used to screen the PDB.},
  doi = {10.1002/prot.10339},
  institution = {Institut de Biologie et Chimie des ProtÃ©ines (IBCP), Lyon, France.},
  keywords = {Algorithms; Catalytic Domain; Chymotrypsin, chemistry/genetics; Computational
	Biology, methods; Evolution, Molecular; Fabaceae, chemistry; Models,
	Molecular; Plant Lectins, chemistry/genetics; Protein Conformation;
	Proteins, chemistry; Reproducibility of Results; Subtilisin, chemistry/genetics},
  owner = {bricehoffmann},
  pmid = {12833538},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1002/prot.10339}
}

@book{Jameson1987Summing,
  title = {Summing and Nuclear Norms in Banach Space Theory},
  publisher = {Cambridge University Press},
  year = {1987},
  author = {Jameson, G. J. O.},
  number = {8},
  series = {London Mathematical Society Student Texts},
  doi = {10.1017/CBO9780511569166},
  pdf = {../local/Jameson1987Summing.pdf},
  file = {Jameson1987Summing.pdf:Jameson1987Summing.pdf:PDF},
  owner = {jp},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1017/CBO9780511569166}
}

@article{Jamieson2006Medicinal,
  author = {Jamieson, C. and Moir, E. M. and Rankovic, Z. and Wishart, G.},
  title = {{M}edicinal chemistry of h{ERG} optimizations: {H}ighlights and hang-ups.},
  journal = {J. Med. Chem.},
  year = {2006},
  volume = {49},
  pages = {5029--5046},
  number = {17},
  month = {Aug},
  doi = {10.1021/jm060379l},
  pdf = {../local/Jamieson2006Medicinal.pdf},
  file = {Jamieson2006Medicinal.pdf:local/Jamieson2006Medicinal.pdf:PDF},
  keywords = {herg},
  pmid = {16913693},
  timestamp = {2007.02.03},
  url = {http://dx.doi.org/10.1021/jm060379l}
}

@article{Janoueix-Lerosey2005Preferential,
  author = {Isabelle Janoueix-Lerosey and Philippe Hupé and Zofia Maciorowski
	and Philippe La Rosa and Gudrun Schleiermacher and Gaëlle Pierron
	and Stéphane Liva and Emmanuel Barillot and Olivier Delattre},
  title = {Preferential occurrence of chromosome breakpoints within early replicating
	regions in neuroblastoma.},
  journal = {Cell Cycle},
  year = {2005},
  volume = {4},
  pages = {1842--1846},
  number = {12},
  month = {Dec},
  abstract = {Neuroblastoma (NB) is a frequent paediatric extra cranial solid tumor
	characterized by the occurrence of unbalanced chromosome translocations,
	frequently, but not exclusively, involving chromosomes 1 and 17.
	We have used a 1 Mb resolution BAC array to further refine the mapping
	of breakpoints in NB cell lines. Replication timing profiles were
	evaluated in 7 NB cell lines, using DNAs from G1 and S phases flow
	sorted nuclei hybridised on the same array. Strikingly, these replication
	timing profiles were highly similar between the different NB cell
	lines. Furthermore, a significant level of similarity was also observed
	between NB cell lines and lymphoblastoid cells. A segmentation analysis
	using the Adaptative Weights Smoothing procedure was performed to
	determine regions of coordinate replication. More than 50\% of the
	breakpoints mapped to early replicating regions, which account for
	23.7\% of the total genome. The breakpoints frequency per 10(8) bases
	was therefore 10.84 for early replicating regions, whereas it was
	only 2.94 for late replicating regions, these difference being highly
	significant (p < 10(-4)). This strong association was also observed
	when chromosomes 1 and 17, the two most frequent translocation partners
	in NB were excluded from the statistical analysis. These results
	unambiguously establish a link between unbalanced translocations,
	whose most likely mechanism of occurrence relies on break-induced
	replication, and early replication of the genome.},
  pdf = {../local/Janoueix-Lerosey2005Preferential.pdf},
  file = {Janoueix-Lerosey2005Preferential.pdf:Janoueix-Lerosey2005Preferential.pdf:PDF},
  institution = {Laboratoire de Pathologie Moléculaire des Cancers, Institut Curie,
	Paris, France.},
  keywords = {csbcbook, csbcbook-ch2},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {2257},
  pmid = {16294040},
  timestamp = {2009.10.18},
  url = {http://www.landesbioscience.com/journals/cc/article/2257/}
}

@article{Jansen2002Relating,
  author = {Jansen, R. and Greenbaum, D. and Gerstein, M.},
  title = {Relating whole-genome expression data with protein-protein interactions},
  journal = {Genome Res.},
  year = {2002},
  volume = {12},
  pages = {37--46},
  number = {1},
  month = {Jan},
  abstract = {We investigate the relationship of protein-protein interactions with
	mRNA expression levels, by integrating a variety of data sources
	for yeast. We focus on known protein complexes that have clearly
	defined interactions between their subunits. We find that subunits
	of the same protein complex show significant coexpression, both in
	terms of similarities of absolute mRNA levels and expression profiles,
	e.g., we can often see subunits of a complex having correlated patterns
	of expression over a time course. We classify the yeast protein complexes
	as either permanent or transient, with permanent ones being maintained
	through most cellular conditions. We find that, generally, permanent
	complexes, such as the ribosome and proteasome, have a particularly
	strong relationship with expression, while transient ones do not.
	However, we note that several transient complexes, such as the RNA
	polymerase II holoenzyme and the replication complex, can be subdivided
	into smaller permanent ones, which do have a strong relationship
	to gene expression. We also investigated the interactions in aggregated,
	genome-wide data sets, such as the comprehensive yeast two-hybrid
	experiments, and found them to have only a weak relationship with
	gene expression, similar to that of transient complexes. (Further
	details on genecensus.org/expression/interactions and bioinfo.mbb.yale.edu/expression/interactions.)},
  doi = {10.1101/gr.205602},
  pdf = {../local/Jansen2002Relating.pdf},
  file = {Jansen2002Relating.pdf:Jansen2002Relating.pdf:PDF},
  institution = {Department of Molecular Biophysics, Yale University, New Haven, Connecticut
	06520, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11779829},
  timestamp = {2011.09.27},
  url = {http://dx.doi.org/10.1101/gr.205602}
}

@article{Jansen2003Bayesian,
  author = {Jansen, R. and Yu, H. and Greenbaum, D. and Kluger, Y. and Krogan,
	N.J. and Chung, S. and Emili, A. and Snyder, M. and Greenblatt, J.F.
	and Gerstein, M.},
  title = {A {B}ayesian networks approach for predicting protein-protein interactions
	from genomic data},
  journal = {Science},
  year = {2003},
  volume = {302},
  pages = {449-453},
  number = {5644},
  abstract = {We have developed an approach using {B}ayesian networks to predict
	protein-protein interactions genome-wide in yeast. {O}ur method naturally
	weights and combines into reliable predictions genomic features only
	weakly associated with interaction (e.g., m{RNA} coexpression, coessentiality,
	and colocalization). {I}n addition to de novo predictions, it can
	integrate often noisy, experimental interaction data sets. {W}e observe
	that at given levels of sensitivity, our predictions are more accurate
	than the existing high-throughput experimental data sets. {W}e validate
	our predictions with new {TAP}?tagging (tandem affinity purification)
	experiments.},
  doi = {10.1126/science.1087361},
  pdf = {../local/Jansen2003Bayesian.pdf},
  file = {Jansen2003Bayesian.pdf:local/Jansen2003Bayesian.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  url = {http://dx.doi.org/10.1126/science.1087361}
}

@article{Jarzab2005Gene,
  author = {Barbara Jarzab and Malgorzata Wiench and Krzysztof Fujarewicz and
	Krzysztof Simek and Michal Jarzab and Malgorzata Oczko-Wojciechowska
	and Jan Wloch and Agnieszka Czarniecka and Ewa Chmielik and Dariusz
	Lange and Agnieszka Pawlaczek and Sylwia Szpak and Elzbieta Gubala
	and Andrzej Swierniak},
  title = {Gene expression profile of papillary thyroid cancer: sources of variability
	and diagnostic implications.},
  journal = {Cancer {R}es.},
  year = {2005},
  volume = {65},
  pages = {1587-97},
  number = {4},
  month = {Feb},
  abstract = {The study looked for an optimal set of genes differentiating between
	papillary thyroid cancer ({PTC}) and normal thyroid tissue and assessed
	the sources of variability in gene expression profiles. {T}he analysis
	was done by oligonucleotide microarrays ({G}ene{C}hip {HG}-{U}133{A})
	in 50 tissue samples taken intraoperatively from 33 patients (23
	{PTC} patients and 10 patients with other thyroid disease). {I}n
	the initial group of 16 {PTC} and 16 normal samples, we assessed
	the sources of variability in the gene expression profile by singular
	value decomposition which specified three major patterns of variability.
	{T}he first and the most distinct mode grouped transcripts differentiating
	between tumor and normal tissues. {T}wo consecutive modes contained
	a large proportion of immunity-related genes. {T}o generate a multigene
	classifier for tumor-normal difference, we used support vector machines-based
	technique (recursive feature replacement). {I}t included the following
	19 genes: {DPP}4, {GJB}3, {ST}14, {SERPINA}1, {LRP}4, {MET}, {EVA}1,
	{SPUVE}, {LGALS}3, {HBB}, {MKRN}2, {MRC}2, {IGSF}1, {KIAA}0830, {RXRG},
	{P}4{HA}2, {CDH}3, {IL}13{RA}1, and {MTMR}4, and correctly discriminated
	17 of 18 additional {PTC}/normal thyroid samples and all 16 samples
	published in a previous microarray study. {S}elected novel genes
	({LRP}4, {EVA}1, {TMPRSS}4, {QPCT}, and {SLC}34{A}2) were confirmed
	by {Q}-{PCR}.{O}ur results prove that the gene expression signal
	of {PTC} is easily detectable even when cancer cells do not prevail
	over tumor stroma. {W}e indicate and separate the confounding variability
	related to the immune response. {F}inally, we propose a potent molecular
	classifier able to discriminate between {PTC} and nonmalignant thyroid
	in more than 90\% of investigated samples.},
  doi = {10.1158/0008-5472.CAN-04-3078},
  pdf = {../local/Jarzab2005Gene.pdf},
  file = {Jarzab2005Gene.pdf:local/Jarzab2005Gene.pdf:PDF},
  keywords = {biosvm},
  pii = {65/4/1587},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-04-3078}
}

@inproceedings{Jebara2004Multi-task,
  author = {Jebara, Tony},
  title = {Multi-task feature and kernel selection for SVMs},
  booktitle = {ICML '04: Proceedings of the twenty-first international conference
	on Machine learning},
  year = {2004},
  pages = {55},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1015330.1015426},
  isbn = {1-58113-828-5},
  location = {Banff, Alberta, Canada}
}

@article{Jebara2004Probability,
  author = {Jebara, T. and Kondor, R. and Howard, A.},
  title = {Probability {P}roduct {K}ernels},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2004},
  volume = {5},
  pages = {819-844},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.09},
  url = {http://jmlr.csail.mit.edu/papers/v5/jebara04a.html}
}

@techreport{Jenatton2009Structured,
  author = {Jenatton, R. and Audibert, J.-Y. and Bach, F.},
  title = {Structured variable selection with sparsity-inducing norms},
  institution = {arXiv},
  year = {2009},
  number = {0904.3523},
  abstract = {{W}e consider the empirical risk minimization problem for linear supervised
	learning, with regularization by structured sparsity-inducing norms.
	{T}hese are defined as sums of {E}uclidean norms on certain subsets
	of variables, extending the usual $\ell_1$-norm and the group $\ell_1$-norm
	by allowing the subsets to overlap. {T}his leads to a specific set
	of allowed nonzero patterns for the solutions of such problems. {W}e
	first explore the relationship between the groups defining the norm
	and the resulting nonzero patterns, providing both forward and backward
	algorithms to go back and forth from groups to patterns. {T}his allows
	the design of norms adapted to specific prior knowledge expressed
	in terms of nonzero patterns. {W}e also present an efficient active
	set algorithm, and analyze the consistency of variable selection
	for least-squares linear regression in low and high-dimensional settings.},
  pdf = {../local/Jenatton2009Structured.pdf},
  file = {Jenatton2009Structured.pdf:Jenatton2009Structured.pdf:PDF},
  keywords = {variable selection;sparsity; convex optimization;learning theory},
  language = {{A}nglais},
  pages = {40 },
  url = {http://fr.arxiv.org/abs/0904.3523}
}

@article{Jenatton2011Proximal,
  author = {Jenatton, R. and Mairal, J. and Obozinski, G. and Bach, F.},
  title = {Proximal Methods for Hierarchical Sparse Coding},
  journal = {J. Mach. Learn. Res.},
  year = {2011},
  volume = {12},
  pages = {2297--2334},
  number = {Jul},
  url = {http://jmlr.csail.mit.edu/papers/v12/jenatton11a.html}
}

@article{Jensen2009STRING,
  author = {Jensen, L.J. and Kuhn, M. and Stark, M. and Chaffron, S. and Creevey,
	C. and Muller, J. and Doerks, T. and Julien, P. and Roth, A. and
	Simonovic, M. and Bork, P. and von Mering, C.},
  title = {STRING 8--a global view on proteins and their functional interactions
	in 630 organisms.},
  journal = {Nucleic Acids Res},
  year = {2009},
  volume = {37},
  pages = {D412--D416},
  number = {Database issue},
  month = {Jan},
  abstract = {Functional partnerships between proteins are at the core of complex
	cellular phenotypes, and the networks formed by interacting proteins
	provide researchers with crucial scaffolds for modeling, data reduction
	and annotation. STRING is a database and web resource dedicated to
	protein-protein interactions, including both physical and functional
	interactions. It weights and integrates information from numerous
	sources, including experimental repositories, computational prediction
	methods and public text collections, thus acting as a meta-database
	that maps all interaction evidence onto a common set of genomes and
	proteins. The most important new developments in STRING 8 over previous
	releases include a URL-based programming interface, which can be
	used to query STRING from other resources, improved interaction prediction
	via genomic neighborhood in prokaryotes, and the inclusion of protein
	structures. Version 8.0 of STRING covers about 2.5 million proteins
	from 630 organisms, providing the most comprehensive view on protein-protein
	interactions currently available. STRING can be reached at http://string-db.org/.},
  doi = {10.1093/nar/gkn760},
  institution = {European Molecular Biology Laboratory, Heidelberg, Germany.},
  keywords = {Databases, Protein; Genomics; Multiprotein Complexes; Protein Interaction
	Mapping; Proteins; User-Computer Interface},
  owner = {fantine},
  pii = {gkn760},
  pmid = {18940858},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/nar/gkn760}
}

@article{Jeong2001Lethality,
  author = {H. Jeong and S. P. Mason and A.-L. Barab{\'a}si and Z. N. Oltvai},
  title = {Lethality and centrality in protein networks},
  journal = {Nature},
  year = {2001},
  volume = {411},
  pages = {41--42},
  pdf = {../local/jeon01.pdf},
  file = {jeon01.pdf:local/jeon01.pdf:PDF},
  subject = {bionet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v411/n6833/full/411041a0_fs.html&content_filetype=PDF}
}

@article{Jeong2001,
  author = {H. Jeong and S. P. Mason and A. L. Barabási and Z. N. Oltvai},
  title = {Lethality and centrality in protein networks.},
  journal = {Nature},
  year = {2001},
  volume = {411},
  pages = {41--42},
  number = {6833},
  month = {May},
  doi = {10.1038/35075138},
  institution = {Department of Physics, University of Notre Dame, Notre Dame, Indiana
	46556, USA.},
  keywords = {Fungal Proteins, genetics/physiology; Gene Deletion; Protein Binding;
	Proteome; Saccharomyces cerevisiae, genetics/physiology; Signal Transduction},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {35075138},
  pmid = {11333967},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1038/35075138}
}

@article{Jeong2000large-scale,
  author = {H. Jeong and B. Tombor and R. Albert and Z. N. Oltvai and A.-L. Barab{\'a}si},
  title = {The large-scale organization of metabolic networks},
  journal = {Nature},
  year = {2000},
  volume = {407},
  pages = {651--654},
  pdf = {../local/jeon00.pdf},
  file = {jeon00.pdf:local/jeon00.pdf:PDF},
  subject = {bionet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v407/n6804/full/407651a0_fs.html&content_filetype=PDF}
}

@article{Jerebko2005Support,
  author = {Anna K Jerebko and James D Malley and Marek Franaszek and Ronald
	M Summers},
  title = {Support vector machines committee classification method for computer-aided
	polyp detection in {CT} colonography.},
  journal = {Acad {R}adiol},
  year = {2005},
  volume = {12},
  pages = {479-86},
  number = {4},
  month = {Apr},
  abstract = {R{ATIONALE} {AND} {OBJECTIVES}: {A} new classification scheme for
	the computer-aided detection of colonic polyps in computed tomographic
	colonography is proposed. {MATERIALS} {AND} {METHODS}: {T}he scheme
	involves an ensemble of support vector machines ({SVM}s) for classification,
	a smoothed leave-one-out ({SLOO}) cross-validation method for obtaining
	error estimates, and use of a bootstrap aggregation method for training
	and model selection. {O}ur use of an ensemble of {SVM} classifiers
	with bagging (bootstrap aggregation), built on different feature
	subsets, is intended to improve classification performance compared
	with single {SVM}s and reduce the number of false-positive detections.
	{T}he bootstrap-based model-selection technique is used for tuning
	{SVM} parameters. {I}n our first experiment, two independent data
	sets were used: the first, for feature and model selection, and the
	second, for testing to evaluate the generalizability of our model.
	{I}n the second experiment, the test set that contained higher resolution
	data was used for training and testing (using the {SLOO} method)
	to compare {SVM} committee and single {SVM} performance. {RESULTS}:
	{T}he overall sensitivity on independent test set was 75\%, with
	1.5 false-positive detections/study, compared with 76\%-78\% sensitivity
	and 4.5 false-positive detections/study estimated using the {SLOO}
	method on the training set. {T}he sensitivity of the {SVM} ensemble
	retrained on the former test set estimated using the {SLOO} method
	was 81\%, which is 7\%-10\% greater than the sensitivity of a single
	{SVM}. {T}he number of false-positive detections per study was 2.6,
	a 1.5 times reduction compared with a single {SVM}. {CONCLUSION}:
	{T}raining an {SVM} ensemble on one data set and testing it on the
	independent data has shown that the {SVM} committee classification
	method has good generalizability and achieves high sensitivity and
	a low false-positive rate. {T}he model selection and improved error
	estimation method are effective for computer-aided polyp detection.},
  doi = {10.1016/j.acra.2004.04.024},
  keywords = {, , 15831422},
  pii = {S1076-6332(05)00038-3},
  url = {http://dx.doi.org/10.1016/j.acra.2004.04.024}
}

@article{Jia2006Demonstration,
  author = {Jia, P. and Shi, T. and Cai, Y. and Li, Y.},
  title = {{D}emonstration of two novel methods for predicting functional si{RNA}
	efficiency.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {271},
  abstract = {BACKGROUND: siRNAs are small RNAs that serve as sequence determinants
	during the gene silencing process called RNA interference (RNAi).
	It is well know that siRNA efficiency is crucial in the RNAi pathway,
	and the siRNA efficiency for targeting different sites of a specific
	gene varies greatly. Therefore, there is high demand for reliable
	siRNAs prediction tools and for the design methods able to pick up
	high silencing potential siRNAs. RESULTS: In this paper, two systems
	have been established for the prediction of functional siRNAs: (1)
	a statistical model based on sequence information and (2) a machine
	learning model based on three features of siRNA sequences, namely
	binary description, thermodynamic profile and nucleotide composition.
	Both of the two methods show high performance on the two datasets
	we have constructed for training the model. CONCLUSION: Both of the
	two methods studied in this paper emphasize the importance of sequence
	information for the prediction of functional siRNAs. The way of denoting
	a bio-sequence by binary system in mathematical language might be
	helpful in other analysis work associated with fixed-length bio-sequence.},
  doi = {10.1186/1471-2105-7-271},
  pdf = {../local/Jia2006Demonstration.pdf},
  file = {Jia2006Demonstration.pdf:local/Jia2006Demonstration.pdf:PDF},
  keywords = {sirna},
  pii = {1471-2105-7-271},
  pmid = {16729898},
  timestamp = {2006.10.12},
  url = {http://dx.doi.org/10.1186/1471-2105-7-271}
}

@article{Jiang2009Compensatory,
  author = {Jiang, D. and Zhou, S. and Chen, Y.-P. P.},
  title = {Compensatory ability to null mutation in metabolic networks},
  journal = {Biotechnol. Bioeng.},
  year = {2009},
  volume = {103},
  pages = {361--369},
  number = {2},
  month = {Jun},
  abstract = {Robustness is an inherent property of biological system. It is still
	a limited understanding of how it is accomplished at the cellular
	or molecular level. To this end, this article analyzes the impact
	degree of each reaction to others, which is defined as the number
	of cascading failures of following and/or forward reactions when
	an initial reaction is deleted. By analyzing more than 800 organism's
	metabolic networks, it suggests that the reactions with larger impact
	degrees are likely essential and the universal reactions should also
	be essential. Alternative metabolic pathways compensate null mutations,
	which represents that average impact degrees for all organisms are
	small. Interestingly, average impact degrees of archaea organisms
	are smaller than other two categories of organisms, eukayote and
	bacteria, indicating that archaea organisms have strong robustness
	to resist the various perturbations during the evolution process.
	The results show that scale-free feature and reaction reversibility
	contribute to the robustness in metabolic networks. The optimal growth
	temperature of organism also relates the robust structure of metabolic
	network.},
  doi = {10.1002/bit.22237},
  pdf = {../local/Jiang2009Compensatory.pdf},
  file = {Jiang2009Compensatory.pdf:Jiang2009Compensatory.pdf:PDF},
  institution = {Shanghai Key Laboratory of Intelligent Information Processing, Fudan
	University, Shanghai, China.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19160379},
  timestamp = {2011.11.29},
  url = {http://dx.doi.org/10.1002/bit.22237}
}

@article{Jiang2009Statistical,
  author = {Jiang, H. and Wong, W. H.},
  title = {Statistical inferences for isoform expression in {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {1026--1032},
  number = {8},
  month = {Apr},
  abstract = {SUMMARY: The development of RNA sequencing (RNA-Seq) makes it possible
	for us to measure transcription at an unprecedented precision and
	throughput. However, challenges remain in understanding the source
	and distribution of the reads, modeling the transcript abundance
	and developing efficient computational methods. In this article,
	we develop a method to deal with the isoform expression estimation
	problem. The count of reads falling into a locus on the genome annotated
	with multiple isoforms is modeled as a Poisson variable. The expression
	of each individual isoform is estimated by solving a convex optimization
	problem and statistical inferences about the parameters are obtained
	from the posterior distribution by importance sampling. Our results
	show that isoform expression inference in RNA-Seq is possible by
	employing appropriate statistical methods.},
  doi = {10.1093/bioinformatics/btp113},
  pdf = {../local/Jiang2009Statistical.pdf},
  file = {Jiang2009Statistical.pdf:Jiang2009Statistical.pdf:PDF},
  institution = {Institute for Computational and Mathematical Engineering and Department
	of Statistics, Stanford University, Stanford, CA 94305, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btp113},
  pmid = {19244387},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp113}
}

@article{Jiang-Ning2004Cooperativity,
  author = {Jiang-Ning, S. and Wei-Jiang, L. and Wen-Bo, X.},
  title = {Cooperativity of the oxidization of cysteines in globular proteins.},
  journal = {J. {T}heor. {B}iol.},
  year = {2004},
  volume = {231},
  pages = {85-95},
  number = {1},
  abstract = {Based on the 639 non-homologous proteins with 2910 cysteine-containing
	segments of well-resolved three-dimensional structures, a novel approach
	has been proposed to predict the disulfide-bonding state of cysteines
	in proteins by constructing a two-stage classifier combining a first
	global linear discriminator based on their amino acid composition
	and a second local support vector machine classifier. {T}he overall
	prediction accuracy of this hybrid classifier for the disulfide-bonding
	state of cysteines in proteins has scored 84.1% and 80.1%, when measured
	on cysteine and protein basis using the rigorous jack-knife procedure,
	respectively. {I}t shows that whether cysteines should form disulfide
	bonds depends not only on the global structural features of proteins
	but also on the local sequence environment of proteins. {T}he result
	demonstrates the applicability of this novel method and provides
	comparable prediction performance compared with existing methods
	for the prediction of the oxidation states of cysteines in proteins.},
  doi = {10.1016/j.jtbi.2004.06.002},
  pdf = {../local/Jiang-Ning2004Cooperativity.pdf},
  file = {Jiang-Ning2004Cooperativity.pdf:local/Jiang-Ning2004Cooperativity.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.jtbi.2004.06.002}
}

@article{Jin2007yeast,
  author = {Fulai Jin and Larisa Avramova and Jing Huang and Tony Hazbun},
  title = {A yeast two-hybrid smart-pool-array system for protein-interaction
	mapping.},
  journal = {Nat Methods},
  year = {2007},
  volume = {4},
  pages = {405--407},
  number = {5},
  month = {May},
  abstract = {We present here a new two-hybrid smart pool array (SPA) system in
	which, instead of individual activation domain strains, well-designed
	activation domain pools are screened in an array format that allows
	built-in replication and prey-bait deconvolution. Using this method,
	a Saccharomyces cerevisiae genome SPA increases yeast two-hybrid
	screening efficiency by an order of magnitude.},
  doi = {10.1038/nmeth1042},
  institution = {Department of Molecular and Medical Pharmacology, David Geffen School
	of Medicine, and the Molecular Biology Institute, University of California,
	Los Angeles, California 90095, USA.},
  keywords = {Genome, Fungal; Protein Interaction Mapping; Saccharomyces cerevisiae;
	Saccharomyces cerevisiae Proteins; Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {nmeth1042},
  pmid = {17450148},
  timestamp = {2010.09.01},
  url = {http://dx.doi.org/10.1038/nmeth1042}
}

@incollection{Joachims1999Making,
  author = {Joachims, T.},
  title = {Making large-{S}cale {SVM} {L}earning {P}ractical},
  booktitle = {Advances in {K}ernel {M}ethods - {S}upport {V}ector {L}earning},
  publisher = {MIT Press},
  year = {1999},
  editor = {B. Sch{\"o}lkopf and C. Burges and A. Smola},
  pages = {169--184},
  pdf = {../local/Joachims1999Making.pdf},
  file = {Joachims1999Making.pdf:local/Joachims1999Making.pdf:PDF}
}

@book{Joachims2002Learning,
  title = {Learning to Classify Text Using Support Vector Machines},
  publisher = {Kluwer Academic Publishers},
  year = {2002},
  author = {T. Joachims},
  owner = {mahe},
  timestamp = {2006.09.07}
}

@inproceedings{Joachims1999Transductive,
  author = {Joachims, T.},
  title = {Transductive Inference for Text Classification using Support Vector
	Machines},
  booktitle = {{ICML '99}: Proceedings of the Sixteenth International Conference
	on Machine Learning},
  year = {1999},
  pages = {200--209},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  pdf = {../local/Joachims1999Transductive.pdf},
  file = {Joachims1999Transductive.pdf:Joachims1999Transductive.pdf:PDF},
  isbn = {1-55860-612-2},
  keywords = {PUlearning},
  owner = {fantine},
  timestamp = {2009.06.09}
}

@inproceedings{Joachims97aprobabilistic,
  author = {Joachims, T.},
  title = {A Probabilistic Analysis of the {Rocchio} Algorithm with {TFIDF}
	for Text Categorization},
  booktitle = {{ICML '97}: Proceedings of the Fourteenth International Conference
	on Machine Learning},
  year = {1997},
  pages = {143--151},
  address = {Nashville, Tennessee, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  owner = {mordelet},
  timestamp = {2010.07.16}
}

@article{John2004Human,
  author = {Bino John and Anton J Enright and Alexei Aravin and Thomas Tuschl
	and Chris Sander and Debora S Marks},
  title = {Human MicroRNA targets.},
  journal = {PLoS Biol},
  year = {2004},
  volume = {2},
  pages = {e363},
  number = {11},
  month = {Nov},
  abstract = {MicroRNAs (miRNAs) interact with target mRNAs at specific sites to
	induce cleavage of the message or inhibit translation. The specific
	function of most mammalian miRNAs is unknown. We have predicted target
	sites on the 3' untranslated regions of human gene transcripts for
	all currently known 218 mammalian miRNAs to facilitate focused experiments.
	We report about 2,000 human genes with miRNA target sites conserved
	in mammals and about 250 human genes conserved as targets between
	mammals and fish. The prediction algorithm optimizes sequence complementarity
	using position-specific rules and relies on strict requirements of
	interspecies conservation. Experimental support for the validity
	of the method comes from known targets and from strong enrichment
	of predicted targets in mRNAs associated with the fragile X mental
	retardation protein in mammals. This is consistent with the hypothesis
	that miRNAs act as sequence-specific adaptors in the interaction
	of ribonuclear particles with translationally regulated messages.
	Overrepresented groups of targets include mRNAs coding for transcription
	factors, components of the miRNA machinery, and other proteins involved
	in translational regulation, as well as components of the ubiquitin
	machinery, representing novel feedback loops in gene regulation.
	Detailed information about target genes, target processes, and open-source
	software for target prediction (miRanda) is available at http://www.microrna.org.
	Our analysis suggests that miRNA genes, which are about 1\% of all
	human genes, regulate protein production for 10\% or more of all
	human genes.},
  doi = {10.1371/journal.pbio.0020363},
  pdf = {../local/John2004Human.pdf},
  file = {John2004Human.pdf:John2004Human.pdf:PDF},
  institution = {Computational Biology Center, Memorial Sloan-Kettering Cancer Center,
	New York, New York, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {15502875},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1371/journal.pbio.0020363}
}

@inbook{John1996Stock,
  pages = {303--316},
  title = {Stock selection using {R}econ},
  publisher = {World Scientific},
  year = {1996},
  editor = {Abu-Mostafa, Y. and Moody, J. and Refenes, P. and Weigend, A.},
  author = {John, G. H. and Miller, P. and Kerber, R.},
  pdf = {../local/John1996Stock.pdf},
  file = {John1996Stock.pdf:John1996Stock.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.08}
}

@inproceedings{Johnson02Experimental,
  author = {D.S. Johnson and G. Gutin and L.A. McGeoch and A. Yeo and W. Zhang
	and A. Zverovich},
  title = {Experimental Analysis of Heuristics for the ATSP},
  booktitle = {The Travelling Salesman Problem and Its Variations},
  year = {2002},
  pages = {445--487}
}

@article{Johnson2007Genome-wide,
  author = {Johnson, D. S. and Mortazavi, A. and Myers, R. M. and Wold, B.},
  title = {Genome-wide mapping of in vivo protein-DNA interactions},
  journal = {Science},
  year = {2007},
  volume = {316},
  pages = {1497--1502},
  number = {5830},
  month = {Jun},
  abstract = {In vivo protein-DNA interactions connect each transcription factor
	with its direct targets to form a gene network scaffold. To map these
	protein-DNA interactions comprehensively across entire mammalian
	genomes, we developed a large-scale chromatin immunoprecipitation
	assay (ChIPSeq) based on direct ultrahigh-throughput DNA sequencing.
	This sequence census method was then used to map in vivo binding
	of the neuron-restrictive silencer factor (NRSF; also known as REST,
	for repressor element-1 silencing transcription factor) to 1946 locations
	in the human genome. The data display sharp resolution of binding
	position [+/-50 base pairs (bp)], which facilitated our finding motifs
	and allowed us to identify noncanonical NRSF-binding motifs. These
	ChIPSeq data also have high sensitivity and specificity [ROC (receiver
	operator characteristic) area >/= 0.96] and statistical confidence
	(P <10(-4)), properties that were important for inferring new candidate
	interactions. These include key transcription factors in the gene
	network that regulates pancreatic islet cell development.},
  doi = {10.1126/science.1141319},
  pdf = {../local/Johnson2007Genome-wide.pdf},
  file = {Johnson2007Genome-wide.pdf:Johnson2007Genome-wide.pdf:PDF},
  institution = {Department of Genetics, Stanford University School of Medicine, Stanford,
	CA, 94305-5120, USA.},
  owner = {jp},
  pii = {1141319},
  pmid = {17540862},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1126/science.1141319}
}

@article{Johnson2005Kinomics,
  author = {Johnson, S. A. and Hunter, T.},
  title = {Kinomics: methods for deciphering the kinome.},
  journal = {Nat. Methods},
  year = {2005},
  volume = {2},
  pages = {17--25},
  number = {1},
  month = {Jan},
  abstract = {Phosphorylation by protein kinases is the most widespread and well-studied
	signaling mechanism in eukaryotic cells. Phosphorylation can regulate
	almost every property of a protein and is involved in all fundamental
	cellular processes. Cataloging and understanding protein phosphorylation
	is no easy task: many kinases may be expressed in a cell, and one-third
	of all intracellular proteins may be phosphorylated, representing
	as many as 20,000 distinct phosphoprotein states. Defining the kinase
	complement of the human genome, the kinome, has provided an excellent
	starting point for understanding the scale of the problem. The kinome
	consists of 518 kinases, and every active protein kinase phosphorylates
	a distinct set of substrates in a regulated manner. Deciphering the
	complex network of phosphorylation-based signaling is necessary for
	a thorough and therapeutically applicable understanding of the functioning
	of a cell in physiological and pathological states. We review contemporary
	techniques for identifying physiological substrates of the protein
	kinases and studying phosphorylation in living cells.},
  doi = {10.1038/nmeth731},
  pdf = {../local/Johnson2005Kinomics.pdf},
  file = {Johnson2005Kinomics.pdf:Johnson2005Kinomics.pdf:PDF},
  institution = {Molecular and Cell Biology Laboratory, Salk Institute, 10010 North
	Torrey Pines Road, La Jolla, California 92037, USA.},
  keywords = {csbcbook, csbcbook-ch2},
  owner = {jp},
  pii = {nmeth731},
  pmid = {15789031},
  timestamp = {2009.10.13},
  url = {http://dx.doi.org/10.1038/nmeth731}
}

@article{Johnson2007Adjusting,
  author = {W. Evan Johnson and Cheng Li and Ariel Rabinovic},
  title = {Adjusting batch effects in microarray expression data using empirical
	Bayes methods.},
  journal = {Biostatistics},
  year = {2007},
  volume = {8},
  pages = {118--127},
  number = {1},
  month = {Jan},
  abstract = {Non-biological experimental variation or "batch effects" are commonly
	observed across multiple batches of microarray experiments, often
	rendering the task of combining data from these batches difficult.
	The ability to combine microarray data sets is advantageous to researchers
	to increase statistical power to detect biological phenomena from
	studies where logistical considerations restrict sample size or in
	studies that require the sequential hybridization of arrays. In general,
	it is inappropriate to combine data sets without adjusting for batch
	effects. Methods have been proposed to filter batch effects from
	data, but these are often complicated and require large batch sizes
	( > 25) to implement. Because the majority of microarray studies
	are conducted using much smaller sample sizes, existing methods are
	not sufficient. We propose parametric and non-parametric empirical
	Bayes frameworks for adjusting data for batch effects that is robust
	to outliers in small sample sizes and performs comparable to existing
	methods for large samples. We illustrate our methods using two example
	data sets and show that our methods are justifiable, easy to apply,
	and useful in practice. Software for our method is freely available
	at: http://biosun1.harvard.edu/complab/batch/.},
  doi = {10.1093/biostatistics/kxj037},
  institution = {Department of Biostatistics and Computational Biology, Dana-Farber
	Cancer Institute, Boston, MA, USA.},
  keywords = {Bayes Theorem; Data Interpretation, Statistical; Gene Expression Profiling,
	methods; Humans; Oligonucleotide Array Sequence Analysis, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {kxj037},
  pmid = {16632515},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1093/biostatistics/kxj037}
}

@article{Jojic2006Learning,
  author = {Jojic, N. and Reyes-Gomez, M. and Heckerman, D. and Kadie, C. and
	Schueler-Furman, O.},
  title = {{L}earning {MHC} {I}--peptide binding.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {e227--e235},
  number = {14},
  month = {Jul},
  abstract = {MOTIVATION AND RESULTS: Motivated by the ability of a simple threading
	approach to predict MHC I--peptide binding, we developed a new and
	improved structure-based model for which parameters can be estimated
	from additional sources of data about MHC-peptide binding. In addition
	to the known 3D structures of a small number of MHC-peptide complexes
	that were used in the original threading approach, we included three
	other sources of information on peptide-MHC binding: (1) MHC class
	I sequences; (2) known binding energies for a large number of MHC-peptide
	complexes; and (3) an even larger binary dataset that contains information
	about strong binders (epitopes) and non-binders (peptides that have
	a low affinity for a particular MHC molecule). Our model significantly
	outperforms the standard threading approach in binding energy prediction.
	In our approach, which we call adaptive double threading, the parameters
	of the threading model are learnable, and both MHC and peptide sequences
	can be threaded onto structures of other alleles. These two properties
	make our model appropriate for predicting binding for alleles for
	which very little data (if any) is available beyond just their sequence,
	including prediction for alleles for which 3D structures are not
	available. The ability of our model to generalize beyond the MHC
	types for which training data is available also separates our approach
	from epitope prediction methods which treat MHC alleles as symbolic
	types, rather than biological sequences. We used the trained binding
	energy predictor to study viral infections in 246 HIV patients from
	the West Australian cohort, and over 1000 sequences in HIV clade
	B from Los Alamos National Laboratory database, capturing the course
	of HIV evolution over the last 20 years. Finally, we illustrate short-,
	medium-, and long-term adaptation of HIV to the human immune system.
	AVAILABILITY: http://www.research.microsoft.com/~jojic/hlaBinding.html.},
  doi = {10.1093/bioinformatics/btl255},
  keywords = {immunoinformatics},
  pii = {22/14/e227},
  pmid = {16873476},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl255}
}

@book{Jolliffe1996Principal,
  title = {Principal component analysis},
  publisher = {Springer-Verlag},
  year = {1996},
  author = {I.T. Jolliffe},
  address = {New-York}
}

@article{Jones2004Molecular,
  author = {Jones, C. and Ford, E. and Gillett, C. and Ryder, K. and Merrett,
	S. and Reis-Filho, J. S. and Fulford, L. G. and Hanby, A. and Lakhani,
	S. R.},
  title = {Molecular Cytogenetic Identification of Subgroups of Grade III Invasive
	Ductal Breast Carcinomas with Different Clinical Outcomes},
  journal = {Clin. Cancer Res.},
  year = {2004},
  volume = {10},
  pages = {5988-5997},
  number = {18},
  abstract = {Tumor grade is an established indicator of breast cancer outcome,
	although considerable heterogeneity exists even within-grade. Around
	25% of grade III invasive ductal breast carcinomas are associated
	with a "basal" phenotype, and these tumors are reported to be a distinct
	subgroup. We have investigated whether this group of breast cancers
	has a distinguishing pattern of genetic alterations and which of
	these may relate to the different clinical outcome of these patients.
	We performed comparative genomic hybridization (CGH) analysis on
	43 grade III invasive ductal breast carcinomas positive for basal
	cytokeratin 14, as well as 43 grade- and age-matched CK14-negative
	controls, all with up to 25 years (median, 7 years) of clinical follow-up.
	Significant differences in CGH alterations were seen between the
	two groups in terms of mean number of changes (CK14+ve - 6.5, CK14-ve
	- 10.3; P = 0.0012) and types of alterations at chromosomes 4q, 7q,
	8q, 9p, 13q, 16p, 17p, 17q, 19p, 19q, 20p, 20q and Xp. Supervised
	and unsupervised algorithms separated the two groups on CGH data
	alone with 76% and 74% accuracy, respectively. Hierarchical clustering
	revealed distinct subgroups, one of which contained 18 (42%) of the
	CK14+ve tumors. This subgroup had significantly shorter overall survival
	(P = 0.0414) than other grade III tumors, regardless of CK14 status,
	and was an independent prognostic marker (P = 0.031). These data
	provide evidence that the "basal" phenotype on its own does not convey
	a poor prognosis. Basal tumors are also heterogeneous with only a
	subset, identifiable by pattern of genetic alterations, exhibiting
	a shorter overall survival. Robust characterization of this basal
	group is necessary if it is to have a major impact on management
	of patients with breast cancer.},
  doi = {10.1158/1078-0432.CCR-03-0731},
  eprint = {http://clincancerres.aacrjournals.org/cgi/reprint/10/18/5988.pdf},
  pdf = {../local/Jones2004Molecular.pdf},
  file = {Jones2004Molecular.pdf:Jones2004Molecular.pdf:PDF},
  keywords = {breastcancer, cgh},
  owner = {jp},
  timestamp = {2008.12.08},
  url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/10/18/5988}
}

@article{Jones1997Development,
  author = {G. Jones and P. Willett and R. C. Glen and A. R. Leach and R. Taylor},
  title = {{D}evelopment and validation of a genetic algorithm for flexible
	docking.},
  journal = {J. Mol. Biol.},
  year = {1997},
  volume = {267},
  pages = {727--748},
  number = {3},
  month = {Apr},
  abstract = {Prediction of small molecule binding modes to macromolecules of known
	three-dimensional structure is a problem of paramount importance
	in rational drug design (the "docking" problem). We report the development
	and validation of the program GOLD (Genetic Optimisation for Ligand
	Docking). GOLD is an automated ligand docking program that uses a
	genetic algorithm to explore the full range of ligand conformational
	flexibility with partial flexibility of the protein, and satisfies
	the fundamental requirement that the ligand must displace loosely
	bound water on binding. Numerous enhancements and modifications have
	been applied to the original technique resulting in a substantial
	increase in the reliability and the applicability of the algorithm.
	The advanced algorithm has been tested on a dataset of 100 complexes
	extracted from the Brookhaven Protein DataBank. When used to dock
	the ligand back into the binding site, GOLD achieved a 71\% success
	rate in identifying the experimental binding mode.},
  doi = {10.1006/jmbi.1996.0897},
  keywords = {Algorithms, Binding Sites, Computer Simulation, Crystallography, Genetic,
	Humans, Ligands, Models, Molecular, NADP, Protein Binding, Protein
	Conformation, Proteins, Tetrahydrofolate Dehydrogenase, X-Ray, 9126849},
  owner = {mahe},
  pii = {97-9},
  pmid = {9126849},
  timestamp = {2006.09.05},
  url = {http://dx.doi.org/10.1006/jmbi.1996.0897}
}

@article{Jones2002DNA,
  author = {Peter A Jones},
  title = {DNA methylation and cancer.},
  journal = {Oncogene},
  year = {2002},
  volume = {21},
  pages = {5358--5360},
  number = {35},
  month = {Aug},
  abstract = {There is tremendous ferment in the field of epigenetics as the relationships
	between chromatin structure and DNA methylation patterns become clearer.
	Central to this activity is the realization that the 'histone code',
	which involves the post-translational modification of histones and
	which has important ramifications for chromatin structure, may be
	linked to the DNA cytosine methylation pattern. New discoveries have
	suggested that histone lysine 9 methylation is implicated in the
	spread of heterochromatin in Drosophila and other organisms. Very
	recently it has been found that histone lysine 9 methylation is also
	necessary for some DNA methylation in Neurospora and plants. There
	is therefore the possibility that these two processes are closely
	linked, suggesting ways in which DNA methylation patterns may be
	established during normal development. Understanding these processes
	is fundamental to understanding what goes awry during the process
	of aging and carcinogenesis where DNA methylation patterns become
	substantially altered and contribute to the malignant phenotype.},
  doi = {10.1038/sj.onc.1205597},
  institution = {USC/Norris Comprehensive Cancer Center, Department of Urology, Keck
	School of Medicine of the University of Southern California, 1441
	Eastlake Avenue, MS 8302L, Los Angeles, California, CA 90089-9181.
	jones_p@ccnt.hsc.usc.edu},
  keywords = {Animals; Chromatin; CpG Islands; DNA Methylation; DNA, Neoplasm; Gene
	Expression Regulation; Gene Silencing; Histone-Lysine N-Methyltransferase;
	Humans; Neoplasms; Plants; Transcription, Genetic},
  owner = {ljacob},
  pmid = {12154398},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1038/sj.onc.1205597}
}

@article{Jones2002fundamental,
  author = {Jones, P. A. and Baylin, S. B.},
  title = {The fundamental role of epigenetic events in cancer},
  journal = {Nat. Rev. Genet.},
  year = {2002},
  volume = {3},
  pages = {415--428},
  number = {6},
  month = {Jun},
  abstract = {Patterns of DNA methylation and chromatin structure are profoundly
	altered in neoplasia and include genome-wide losses of, and regional
	gains in, DNA methylation. The recent explosion in our knowledge
	of how chromatin organization modulates gene transcription has further
	highlighted the importance of epigenetic mechanisms in the initiation
	and progression of human cancer. These epigenetic changes -- in particular,
	aberrant promoter hypermethylation that is associated with inappropriate
	gene silencing -- affect virtually every step in tumour progression.
	In this review, we discuss these epigenetic events and the molecular
	alterations that might cause them and/or underlie altered gene expression
	in cancer.},
  doi = {10.1038/nrg816},
  pdf = {../local/Jones2002fundamental.pdf},
  file = {Jones2002fundamental.pdf:Jones2002fundamental.pdf:PDF},
  institution = {USC/Norris Comprehensive Cancer Center, Department of Urology, Keck
	School of Medicine, University of Southern California, 1441 Eastlake
	Avenue, MS 8302L, Los Angeles, California 90089-9181, USA. jones_p@ccnt.hsc.usc.edu},
  owner = {jp},
  pii = {nrg816},
  pmid = {12042769},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1038/nrg816}
}

@article{Jones-Rhoades2004Computational,
  author = {Matthew W Jones-Rhoades and David P Bartel},
  title = {Computational identification of plant microRNAs and their targets,
	including a stress-induced miRNA.},
  journal = {Mol Cell},
  year = {2004},
  volume = {14},
  pages = {787--799},
  number = {6},
  month = {Jun},
  abstract = {MicroRNAs (miRNAs) are approximately 21-nucleotide RNAs, some of which
	have been shown to play important gene-regulatory roles during plant
	development. We developed comparative genomic approaches to systematically
	identify both miRNAs and their targets that are conserved in Arabidopsis
	thaliana and rice (Oryza sativa). Twenty-three miRNA candidates,
	representing seven newly identified gene families, were experimentally
	validated in Arabidopsis, bringing the total number of reported miRNA
	genes to 92, representing 22 families. Nineteen newly identified
	target candidates were confirmed by detecting mRNA fragments diagnostic
	of miRNA-directed cleavage in plants. Overall, plant miRNAs have
	a strong propensity to target genes controlling development, particularly
	those of transcription factors and F-box proteins. However, plant
	miRNAs have conserved regulatory functions extending beyond development,
	in that they also target superoxide dismutases, laccases, and ATP
	sulfurylases. The expression of miR395, the sulfurylase-targeting
	miRNA, increases upon sulfate starvation, showing that miRNAs can
	be induced by environmental stress.},
  doi = {10.1016/j.molcel.2004.05.027},
  pdf = {../local/Jones-Rhoades2004Computational.pdf},
  file = {Jones-Rhoades2004Computational.pdf:Jones-Rhoades2004Computational.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research and Department of Biology,
	Massachusetts Institute of Technology, 9 Cambridge Center, Cambridge,
	MA 02142, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S1097276504003284},
  pmid = {15200956},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.molcel.2004.05.027}
}

@article{Jong2004Breakpoint,
  author = {Jong, K. and Marchiori, E. and Meijer, G. and Vaart, A. V. D. and
	Ylstra, B.},
  title = {Breakpoint identification and smoothing of array comparative genomic
	hybridization data.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3636--3637},
  number = {18},
  month = {Dec},
  abstract = {SUMMARY: We describe a tool, called aCGH-Smooth, for the automated
	identification of breakpoints and smoothing of microarray comparative
	genomic hybridization (array CGH) data. aCGH-Smooth is written in
	visual C++, has a user-friendly interface including a visualization
	of the results and user-defined parameters adapting the performance
	of data smoothing and breakpoint recognition. aCGH-Smooth can handle
	array-CGH data generated by all array-CGH platforms: BAC, PAC, cosmid,
	cDNA and oligo CGH arrays. The tool has been successfully applied
	to real-life data. AVAILABILITY: aCGH-Smooth is free for researchers
	at academic and non-profit institutions at http://www.few.vu.nl/~vumarray/.},
  doi = {10.1093/bioinformatics/bth355},
  pdf = {../local/Jong2004Breakpoint.pdf},
  file = {Jong2004Breakpoint.pdf:Jong2004Breakpoint.pdf:PDF},
  institution = {>},
  keywords = {cgh},
  owner = {jp},
  pii = {bth355},
  pmid = {15201182},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth355}
}

@article{Jonsson1999Xpose,
  author = {E. N. Jonsson and M. O. Karlsson},
  title = {{X}pose--an {S}-{PLUS} based population pharmacokinetic/pharmacodynamic
	model building aid for {NONMEM}},
  journal = {Comput Meth Prog Bio},
  year = {1999},
  volume = {58},
  pages = {51-64},
  number = {1}
}

@incollection{Jorgensen2003Sense,
  author = {Jorgensen, R. A.},
  title = {Sense cosuppression in plants: Past, present, and future},
  booktitle = {RNAi: A guide to gene silencing},
  publisher = {Cold Spring Harbor Laboratory Press},
  year = {2003},
  editor = {Hannon, G. J.},
  address = {Cold Spring Harbor, NY},
  keywords = {sirna},
  owner = {vert},
  timestamp = {2006.03.29}
}

@article{Jorgensen2004many,
  author = {W. L. Jorgensen},
  title = {{T}he many roles of computation in drug discovery.},
  journal = {Science},
  year = {2004},
  volume = {303},
  pages = {1813--1818},
  number = {5665},
  month = {Mar},
  abstract = {An overview is given on the diverse uses of computational chemistry
	in drug discovery. Particular emphasis is placed on virtual screening,
	de novo design, evaluation of drug-likeness, and advanced methods
	for determining protein-ligand binding.},
  doi = {10.1126/science.1096361},
  pdf = {../local/Jorgensen2004many.pdf},
  file = {Jorgensen2004many.pdf:Jorgensen2004many.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {mahe},
  pii = {303/5665/1813},
  pmid = {15031495},
  timestamp = {2006.08.15},
  url = {http://dx.doi.org/10.1126/science.1096361}
}

@article{Jorissen2005Virtual,
  author = {R. N. Jorissen and M. K. Gilson},
  title = {Virtual screening of molecular databases using a support vector machine.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {549-61},
  number = {3},
  abstract = {The {S}upport {V}ector {M}achine ({SVM}) is an algorithm that derives
	a model used for the classification of data into two categories and
	which has good generalization properties. {T}his study applies the
	{SVM} algorithm to the problem of virtual screening for molecules
	with a desired activity. {I}n contrast to typical applications of
	the {SVM}, we emphasize not classification but enrichment of actives
	by using a modified version of the standard {SVM} function to rank
	molecules. {T}he method employs a simple and novel criterion for
	picking molecular descriptors and uses cross-validation to select
	{SVM} parameters. {T}he resulting method is more effective at enriching
	for active compounds with novel chemistries than binary fingerprint-based
	methods such as binary kernel discrimination.},
  doi = {10.1021/ci049641u},
  pdf = {../local/Jorissen2005Virtual.pdf},
  file = {Jorissen2005Virtual.pdf:local/Jorissen2005Virtual.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049641u}
}

@article{Reactome2005,
  author = {Joshi-Tope, G. and Gillespie, M. and Vastrik, I. and D'Eustachio,
	P. and Schmidt, E. and de Bono, B. and Jassal, B. and Gopinath, G.
	R. and Wu, G. R. and Matthews, L. and Lewis, S. and Birney, E. and
	Stein, L.},
  title = {Reactome: a knowledgebase of biological pathways},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {D428-32},
  number = {Database issue},
  note = {1362-4962 (Electronic) Journal Article},
  abstract = {Reactome, located at http://www.reactome.org is a curated, peer-reviewed
	resource of human biological processes. {G}iven the genetic makeup
	of an organism, the complete set of possible reactions constitutes
	its reactome. {T}he basic unit of the {R}eactome database is a reaction;
	reactions are then grouped into causal chains to form pathways. {T}he
	{R}eactome data model allows us to represent many diverse processes
	in the human system, including the pathways of intermediary metabolism,
	regulatory pathways, and signal transduction, and high-level processes,
	such as the cell cycle. {R}eactome provides a qualitative framework,
	on which quantitative data can be superimposed. {T}ools have been
	developed to facilitate custom data entry and annotation by expert
	biologists, and to allow visualization and exploration of the finished
	dataset as an interactive process map. {A}lthough our primary curational
	domain is pathways from {H}omo sapiens, we regularly create electronic
	projections of human pathways onto other organisms via putative orthologs,
	thus making {R}eactome relevant to model organism research communities.
	{T}he database is publicly available under open source terms, which
	allows both its content and its software infrastructure to be freely
	used and redistributed.},
  keywords = {Animals *Databases, Factual Gene Expression Profiling Humans Metabolism
	*Physiological Processes Research Support, Non-U.S. Gov't Research
	Support, U.S. Gov't, P.H.S. Signal Transduction User-Computer Interface}
}

@article{Jovanovic2010epigenetics,
  author = {Jovana Jovanovic and Jo Anders Rønneberg and Jörg Tost and Vessela
	Kristensen},
  title = {The epigenetics of breast cancer.},
  journal = {Mol Oncol},
  year = {2010},
  volume = {4},
  pages = {242--254},
  number = {3},
  month = {Jun},
  abstract = {Epigenetic changes can be defined as stable molecular alterations
	of a cellular phenotype such as the gene expression profile of a
	cell that are heritable during somatic cell divisions (and sometimes
	germ line transmissions) but do not involve changes of the DNA sequence
	itself. Epigenetic phenomena are mediated by several molecular mechanisms
	comprising histone modifications, polycomb/trithorax protein complexes,
	small non-coding or antisense RNAs and DNA methylation. These different
	modifications are closely interconnected. Epigenetic regulation is
	critical in normal growth and development and closely conditions
	the transcriptional potential of genes. Epigenetic mechanisms convey
	genomic adaption to an environment thereby ultimately contributing
	towards given phenotype. In this review we will describe the various
	aspects of epigenetics and in particular DNA methylation in breast
	carcinogenesis and their potential application for diagnosis, prognosis
	and treatment decision.},
  doi = {10.1016/j.molonc.2010.04.002},
  institution = {Department for Clinical Molecular Biology (EpiGen), Institute for
	Clinical Medicine, Akershus University Hospital, University of Oslo,
	Norway.},
  keywords = {Breast Neoplasms, diagnosis/genetics/pathology/therapy; Chromatin,
	chemistry/metabolism; DNA Methylation; DNA Modification Methylases,
	metabolism; DNA, chemistry/metabolism; Epigenesis, Genetic; Female;
	Gene Expression Regulation, Neoplastic; Histones, metabolism; Humans;
	MicroRNAs, genetics/metabolism; Molecular Structure; Prognosis; Receptors,
	Estrogen, genetics/metabolism; Tumor Markers, Biological, metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S1574-7891(10)00024-4},
  pmid = {20627830},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.1016/j.molonc.2010.04.002}
}

@article{Juditsky2000Functional,
  author = {Juditsky, A. and Nemirovski, A.},
  title = {Functional {A}ggregation for {N}onparametric {E}stimation},
  journal = {Ann. {S}tat.},
  year = {2000},
  volume = {28},
  pages = {681--712},
  number = {3},
  month = {June},
  pdf = {../local/judi00.pdf},
  file = {judi00.pdf:local/judi00.pdf:PDF},
  subject = {stat},
  url = {http://ftp://ftp.irisa.fr/techreports/1996/PI-993.ps.gz}
}

@article{Jonsdottir2005Prediction,
  author = {Svava Osk JÃ³nsdÃ³ttir and Flemming Steen JÃ¸rgensen and SÃ¸ren Brunak},
  title = {Prediction methods and databases within chemoinformatics: emphasis
	on drugs and drug candidates.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2145--2160},
  number = {10},
  month = {May},
  abstract = {MOTIVATION: To gather information about available databases and chemoinformatics
	methods for prediction of properties relevant to the drug discovery
	and optimization process. RESULTS: We present an overview of the
	most important databases with 2-dimensional and 3-dimensional structural
	information about drugs and drug candidates, and of databases with
	relevant properties. Access to experimental data and numerical methods
	for selecting and utilizing these data is crucial for developing
	accurate predictive in silico models. Many interesting predictive
	methods for classifying the suitability of chemical compounds as
	potential drugs, as well as for predicting their physico-chemical
	and ADMET properties have been proposed in recent years. These methods
	are discussed, and some possible future directions in this rapidly
	developing field are described.},
  doi = {10.1093/bioinformatics/bti314},
  keywords = {Chemistry, Pharmaceutical; Computational Biology; Databases, Factual;
	Drug Design; Models, Chemical; Models, Molecular; Pharmaceutical
	Preparations; Structure-Activity Relationship},
  owner = {vert},
  pii = {bti314},
  pmid = {15713739},
  timestamp = {2007.08.02},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti314}
}

@article{Kueffner2012Inferring,
  author = {K\"{u}ffner, R. and Petri, T. and Tavakkolkhah, P. and Windhager,
	L. and Zimmer, R},
  title = {Inferring gene regulatory networks by {ANOVA}},
  journal = {Bioinformatics},
  year = {2012},
  doi = {10.1093/bioinformatics/bts143},
  owner = {anne-clairehaury},
  timestamp = {2012.03.23},
  url = {http://dx.doi.org/10.1093/bioinformatics/bts143}
}

@article{Kahraman2007Shape,
  author = {A. Kahraman and R. J. Morris and R. A. Laskowski and J. M. Thornton},
  title = {Shape variation in protein binding pockets and their ligands.},
  journal = {J. Mol. Biol.},
  year = {2007},
  volume = {368},
  pages = {283--301},
  number = {1},
  month = {Apr},
  abstract = {A common assumption about the shape of protein binding pockets is
	that they are related to the shape of the small ligand molecules
	that can bind there. But to what extent is that assumption true?
	Here we use a recently developed shape matching method to compare
	the shapes of protein binding pockets to the shapes of their ligands.
	We find that pockets binding the same ligand show greater variation
	in their shapes than can be accounted for by the conformational variability
	of the ligand. This suggests that geometrical complementarity in
	general is not sufficient to drive molecular recognition. Nevertheless,
	we show when considering only shape and size that a significant proportion
	of the recognition power of a binding pocket for its ligand resides
	in its shape. Additionally, we observe a "buffer zone" or a region
	of free space between the ligand and protein, which results in binding
	pockets being on average three times larger than the ligand that
	they bind.},
  doi = {10.1016/j.jmb.2007.01.086},
  keywords = {Binding Sites; Computer Simulation; Ligands; Models, Molecular; Models,
	Statistical; Protein Binding; Protein Conformation; Protein Folding},
  owner = {laurent},
  pii = {S0022-2836(07)00164-7},
  pmid = {17337005},
  timestamp = {2008.07.08},
  url = {http://dx.doi.org/10.1016/j.jmb.2007.01.086}
}

@article{Kalatzis2003Support,
  author = {I. Kalatzis and D. Pappas and N. Piliouras and D. Cavouras},
  title = {Support vector machines based analysis of brain {SPECT} images for
	determining cerebral abnormalities in asymptomatic diabetic patients.},
  journal = {Med {I}nform {I}nternet {M}ed},
  year = {2003},
  volume = {28},
  pages = {221-30},
  number = {3},
  month = {Sep},
  abstract = {Purpose: {A}n image processing method was developed to investigate
	whether brain {SPECT} images of patients with diabetes mellitus type
	{II} ({DMII}) and no brain damage differ from those of normal subjects.
	{M}aterials and methods: {T}wenty-five {DMII} patients and eight
	healthy volunteers underwent brain 99m{T}c-{B}icisate {SPECT} examination.
	{A} semi-automatic method, allowing for physician's interaction,
	was developed to delineate specific brain regions ({ROI}s) on the
	{SPECT} images. {T}wenty-eight features from the grey-level histogram
	and the spatial-dependence matrix were computed from numerous small
	image-samples collected from each specific {ROI}. {C}lassification
	into 'diabetics' and 'non-diabetics' was performed for each {ROI}
	separately. {T}he classical least squares-minimum distance ({LSMD})
	classifier and the recently developed support vector machines ({SVM})
	classifier were used. {S}ystem performance was evaluated by means
	of the leave-one-out method; one sample was left out, the classifier
	was trained by the rest of the samples, and the left-out sample was
	classified. {B}y repeating for all samples, the classifier's performance
	could be tested on data not incorporated in its design. {R}esults:
	{H}ighest classification accuracies ({LSMD}: 97.8\%, {SVM}: 99.1\%)
	were achieved at the right occipital lobule employing two features,
	the standard deviation and entropy. {F}or the rest of the {ROI}s
	classification accuracies ranged between 84.5 and 98.6\%. {C}onclusion:
	{O}ur findings indicate cerebral blood flow disruption in patients
	with {DMII}. {T}he proposed system may assist physicians in evaluating
	cerebral blood flow in patients with {DMII} undergoing brain {SPECT}.},
  doi = {10.1080/14639230310001613449},
  pii = {YC711HD68JH0RXQY},
  url = {http://dx.doi.org/10.1080/14639230310001613449}
}

@article{Kalatzis2004Design,
  author = {I. Kalatzis and N. Piliouras and E. Ventouras and C. C. Papageorgiou
	and A. D. Rabavilas and D. Cavouras},
  title = {Design and implementation of an {SVM}-based computer classification
	system for discriminating depressive patients from healthy controls
	using the {P}600 component of {ERP} signals.},
  journal = {Comput {M}ethods {P}rograms {B}iomed},
  year = {2004},
  volume = {75},
  pages = {11-22},
  number = {1},
  month = {Jul},
  abstract = {A computer-based classification system has been designed capable of
	distinguishing patients with depression from normal controls by event-related
	potential ({ERP}) signals using the {P}600 component. {C}linical
	material comprised 25 patients with depression and an equal number
	of gender and aged-matched healthy controls. {A}ll subjects were
	evaluated by a computerized version of the digit span {W}echsler
	test. {EEG} activity was recorded and digitized from 15 scalp electrodes
	(leads). {S}eventeen features related to the shape of the waveform
	were generated and were employed in the design of an optimum support
	vector machine ({SVM}) classifier at each lead. {T}he outcomes of
	those {SVM} classifiers were selected by a majority-vote engine ({MVE}),
	which assigned each subject to either the normal or depressive classes.
	{MVE} classification accuracy was 94\% when using all leads and 92\%
	or 82\% when using only the right or left scalp leads, respectively.
	{T}hese findings support the hypothesis that depression is associated
	with dysfunction of right hemisphere mechanisms mediating the processing
	of information that assigns a specific response to a specific stimulus,
	as those mechanisms are reflected by the {P}600 component of {ERP}s.
	{O}ur method may aid the further understanding of the neurophysiology
	underlying depression, due to its potentiality to integrate theories
	of depression and psychophysiology.},
  doi = {10.1016/j.cmpb.2003.09.003},
  pdf = {../local/Kalatzis2004Design.pdf},
  file = {Kalatzis2004Design.pdf:local/Kalatzis2004Design.pdf:PDF},
  pii = {S0169260703001305},
  url = {http://dx.doi.org/10.1016/j.cmpb.2003.09.003}
}

@article{Kallioniemi2010DNA,
  author = {Anne Kallioniemi},
  title = {{DNA} copy number analysis on tissue microarrays.},
  journal = {Methods Mol Biol},
  year = {2010},
  volume = {664},
  pages = {127--134},
  abstract = {Detection of DNA sequence copy number changes is essential in both
	clinical practice and basic research, especially in cancer research.
	The combination of fluorescence in situ hybridization (FISH) and
	tissue microarray (TMA) technology provides high-throughput means
	for the evaluation of genetic aberrations in a large number of tissue
	samples. FISH on TMA is technically demanding and several protocols
	that include a variety of tissue pretreatment steps have been developed
	to improve the success of this methodology. Despite of the technical
	difficulties, FISH analysis on TMA has been successfully used not
	only to uncover genetic alterations in various malignancies but to
	also rapidly establish the clinical significance of such changes.},
  doi = {10.1007/978-1-60761-806-5\_13},
  institution = {Institute of Medical Technology, University of Tampere and Tampere
	University Hospital, Tampere, Finland, anne.kallioniemi@uta.fi.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {20690059},
  timestamp = {2010.08.08},
  url = {http://dx.doi.org/10.1007/978-1-60761-806-5_13}
}

@article{Kallioniemi2008CGH,
  author = {Kallioniemi, A.},
  title = {{CGH} microarrays and cancer.},
  journal = {Curr Opin Biotechnol},
  year = {2008},
  volume = {19},
  pages = {36--40},
  number = {1},
  month = {Feb},
  abstract = {Genetic alterations are a key feature of cancer cells and typically
	target biological processes and pathways that contribute to cancer
	pathogenesis. Array-based comparative genomic hybridization (aCGH)
	has provided a wealth of new information on copy number changes in
	cancer on a genome-wide level and aCGH data have also been utilized
	in cancer classification. More importantly, aCGH analyses have allowed
	highly accurate localization of specific genetic alterations that,
	for example, are associated with tumor progression, therapy response,
	or patient outcome. The genes involved in these aberrations are likely
	to contribute to cancer pathogenesis, and the high-resolution mapping
	by aCGH greatly facilitates the subsequent identification of these
	cancer-associated genes.},
  doi = {10.1016/j.copbio.2007.11.004},
  pdf = {../local/Kallioniemi2008CGH.pdf},
  file = {Kallioniemi2008CGH.pdf:Kallioniemi2008CGH.pdf:PDF},
  institution = {Laboratory of Cancer Genetics, Tampere University Hospital and Institute
	of Medical Technology, University of Tampere, Biokatu 6, Tampere
	FI-33014, Finland. anne.kallioniemi@uta.fi},
  keywords = {csbcbook, csbcbook-ch2, cgh},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0958-1669(07)00148-6},
  pmid = {18162393},
  timestamp = {2009.10.18},
  url = {http://dx.doi.org/10.1016/j.copbio.2007.11.004}
}

@article{Kallioniemi1992Comparative,
  author = {A. Kallioniemi and O. P. Kallioniemi and D. Sudar and D. Rutovitz
	and J. W. Gray and F. Waldman and D. Pinkel},
  title = {Comparative genomic hybridization for molecular cytogenetic analysis
	of solid tumors.},
  journal = {Science},
  year = {1992},
  volume = {258},
  pages = {818--821},
  number = {5083},
  month = {Oct},
  abstract = {Comparative genomic hybridization produces a map of DNA sequence copy
	number as a function of chromosomal location throughout the entire
	genome. Differentially labeled test DNA and normal reference DNA
	are hybridized simultaneously to normal chromosome spreads. The hybridization
	is detected with two different fluorochromes. Regions of gain or
	loss of DNA sequences, such as deletions, duplications, or amplifications,
	are seen as changes in the ratio of the intensities of the two fluorochromes
	along the target chromosomes. Analysis of tumor cell lines and primary
	bladder tumors identified 16 different regions of amplification,
	many in loci not previously known to be amplified.},
  pdf = {../local/Kallioniemi1992Comparative.pdf},
  file = {Kallioniemi1992Comparative.pdf:Kallioniemi1992Comparative.pdf:PDF},
  institution = {Department of Laboratory Medicine, University of California, San
	Francisco 94143.},
  keywords = {csbcbook, csbcbook-ch2, cgh},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {1359641},
  timestamp = {2009.10.18},
  url = {http://www.sciencemag.org/cgi/reprint/258/5083/818}
}

@article{Kallioniemi2001Tissue,
  author = {O. P. Kallioniemi and U. Wagner and J. Kononen and G. Sauter},
  title = {Tissue microarray technology for high-throughput molecular profiling
	of cancer.},
  journal = {Hum Mol Genet},
  year = {2001},
  volume = {10},
  pages = {657--662},
  number = {7},
  month = {Apr},
  abstract = {Tissue microarray (TMA) technology allows rapid visualization of molecular
	targets in thousands of tissue specimens at a time, either at the
	DNA, RNA or protein level. The technique facilitates rapid translation
	of molecular discoveries to clinical applications. By revealing the
	cellular localization, prevalence and clinical significance of candidate
	genes, TMAs are ideally suitable for genomics-based diagnostic and
	drug target discovery. TMAs have a number of advantages compared
	with conventional techniques. The speed of molecular analyses is
	increased by more than 100-fold, precious tissues are not destroyed
	and a very large number of molecular targets can be analyzed from
	consecutive TMA sections. The ability to study archival tissue specimens
	is an important advantage as such specimens are usually not applicable
	in other high-throughput genomic and proteomic surveys. Construction
	and analysis of TMAs can be automated, increasing the throughput
	even further. Most of the applications of the TMA technology have
	come from the field of cancer research. Examples include analysis
	of the frequency of molecular alterations in large tumor materials,
	exploration of tumor progression, identification of predictive or
	prognostic factors and validation of newly discovered genes as diagnostic
	and therapeutic targets.},
  institution = {Cancer Genetics Branch, National Human Genome Research Institute,
	National Institutes of Health, 49 Convent Drive, Room 4A24, MSC 4465,
	Bethesda, MD 20892, USA. okalli@nhgri.nih.gov},
  keywords = {Animals; Genetic Techniques; Humans; In Situ Hybridization, methods;
	Neoplasms, metabolism/pathology; Oligonucleotide Array Sequence Analysis;
	Tissue Distribution},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {11257096},
  timestamp = {2010.08.08}
}

@article{Kalousis2007Stability,
  author = {Kalousis, A. and Prados, J. and Hilario, M.},
  title = {Stability of feature selection algorithms: a study on high-dimensional
	spaces},
  journal = {Knowledge and information systems},
  year = {2007},
  volume = {12},
  pages = {95--116},
  number = {1},
  publisher = {Springer}
}

@article{Kam96documentimage,
  author = {A. C. Kam and G. E. Kopec},
  title = {Document image decoding by heuristic search},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {1996},
  volume = {18},
  pages = {945--950}
}

@article{Kamangar2006Patterns,
  author = {Kamangar, F. and Dores, G. M. and Anderson, W. F.},
  title = {Patterns of cancer incidence, mortality, and prevalence across five
	continents: defining priorities to reduce cancer disparities in different
	geographic regions of the world},
  journal = {J. Clin. Oncol.},
  year = {2006},
  volume = {24},
  pages = {2137--2150},
  number = {14},
  month = {May},
  abstract = {Efforts to reduce global cancer disparities begin with an understanding
	of geographic patterns in cancer incidence, mortality, and prevalence.
	Using the GLOBOCAN (2002) and Cancer Incidence in Five Continents
	databases, we describe overall cancer incidence, mortality, and prevalence,
	age-adjusted temporal trends, and age-specific incidence patterns
	in selected geographic regions of the world. For the eight most common
	malignancies-cancers of lung, breast, colon and rectum, stomach,
	prostate, liver, cervix, and esophagus-the most important risk factors,
	cancer prevention and control measures are briefly reviewed. In 2002,
	an estimated 11 million new cancer cases and 7 million cancer deaths
	were reported worldwide; nearly 25 million persons were living with
	cancer. Among the eight most common cancers, global disparities in
	cancer incidence, mortality, and prevalence are evident, likely due
	to complex interactions of nonmodifiable (ie, genetic susceptibility
	and aging) and modifiable risk factors (ie, tobacco, infectious agents,
	diet, and physical activity). Indeed, when risk factors among populations
	are intertwined with differences in individual behaviors, cultural
	beliefs and practices, socioeconomic conditions, and health care
	systems, global cancer disparities are inevitable. For the eight
	most common cancers, priorities for reducing cancer disparities are
	discussed.},
  doi = {10.1200/JCO.2005.05.2308},
  institution = {gy and Biostatistics Branches, Division of Cancer Epidemiology and
	Genetics, National Institutes of Health, Department of Health and
	Human Services, Rockville, MD 20852-7244, USA.},
  owner = {jp},
  pii = {24/14/2137},
  pmid = {16682732},
  timestamp = {2008.11.26},
  url = {http://dx.doi.org/10.1200/JCO.2005.05.2308}
}

@article{Kamath2003Systematic,
  author = {Kamath, R. S. and Fraser, A. G. and Dong, Y. and Poulin, G. and Durbin,
	R. and Gotta, M. and Kanapin, A. and Le Bot, N. and Moreno, S. and
	Sohrmann, M. and Welchman, D. P. and Zipperlen, P and Ahringer, J.},
  title = {Systematic functional analysis of the {C}aenorhabditis elegans genome
	using {RNA}i},
  journal = {Nature},
  year = {2003},
  volume = {421},
  pages = {231-237},
  number = {6920},
  month = {Jan},
  abstract = {A principal challenge currently facing biologists is how to connect
	the complete {DNA} sequence of an organism to its development and
	behaviour. {L}arge-scale targeted-deletions have been successful
	in defining gene functions in the single-celled yeast {S}accharomyces
	cerevisiae, but comparable analyses have yet to be performed in an
	animal. {H}ere we describe the use of {RNA} interference to inhibit
	the function of approximately 86% of the 19,427 predicted genes of
	{C}. elegans. {W}e identified mutant phenotypes for 1,722 genes,
	about two-thirds of which were not previously associated with a phenotype.
	{W}e find that genes of similar functions are clustered in distinct,
	multi-megabase regions of individual chromosomes; genes in these
	regions tend to share transcriptional profiles. {O}ur resulting data
	set and reusable {RNA}i library of 16,757 bacterial clones will facilitate
	systematic analyses of the connections among gene sequence, chromosomal
	location and gene function in {C}. elegans.},
  doi = {10.1038/nature01278},
  pdf = {../local/Kamath2003Systematic.pdf},
  file = {Kamath2003Systematic.pdf:local/Kamath2003Systematic.pdf:PDF},
  owner = {vert},
  url = {http://dx.doi.org/10.1038/nature01278}
}

@inproceedings{Kandola2003Learning,
  author = {Kandola, J. and Shawe-Taylor, J. and Cristianini, N.},
  title = {Learning {S}emantic {S}imilarity},
  booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 15},
  year = {2003},
  editor = {Suzanna Becker and Sebastian Thrun and Klaus Obermayer},
  publisher = {MIT Press}
}

@techreport{Kandola2002On,
  author = {Kandola, J. and Shawe-Taylor, J. and Cristianini, N.},
  title = {On the application of diffusion kernel to text data},
  institution = {Neurocolt},
  year = {2002},
  note = {NeuroCOLT Technical Report NC-TR-02-122},
  pdf = {../local/kand02.ps.gz},
  file = {kand02.ps.gz:local/kand02.ps.gz:PostScript},
  subject = {kernel},
  url = {http://www.neurocolt.com/abs/2002/abs02122.html}
}

@article{Kanehisa2001Prediction,
  author = {Kanehisa, M. },
  title = {Prediction of higher order functional networks from genomic data},
  journal = {Pharmacogenomics},
  year = {2001},
  volume = {2},
  pages = {373--385},
  number = {4},
  doi = {10.1517/14622416.2.4.373},
  owner = {vert},
  url = {http://dx.doi.org/10.1517/14622416.2.4.373}
}

@article{Kanehisa1997database,
  author = {M. Kanehisa},
  title = {A database for post-genome analysis},
  journal = {Trends {G}enet.},
  year = {1997},
  volume = {13},
  pages = {375--376},
  doi = {10.1016/S0168-9525(97)01223-7},
  pdf = {../local/Kanehisa1997database.pdf},
  file = {Kanehisa1997database.pdf:local/Kanehisa1997database.pdf:PDF},
  subject = {bionet},
  url = {http://dx.doi.org/10.1016/S0168-9525(97)01223-7}
}

@article{Kanehisa2002KEGG,
  author = {M. Kanehisa and S. Goto and S. Kawashima and A. Nakaya},
  title = {The {KEGG} databases at {G}enome{N}et},
  journal = {Nucleic {A}cids {R}es.},
  year = {2002},
  volume = {30},
  pages = {42--46},
  pdf = {../local/kane02.pdf},
  file = {kane02.pdf:local/kane02.pdf:PDF},
  subject = {bionet},
  url = {http://nar.oupjournals.org/cgi/content/full/30/1/42}
}

@article{Kanehisa2004KEGG,
  author = {Kanehisa, M. and Goto, S. and Kawashima, S. and Okuno, Y. and Hattori,
	M.},
  title = {The {KEGG} resource for deciphering the genome.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {D277-80},
  number = {Database issue},
  month = {Jan},
  abstract = {A grand challenge in the post-genomic era is a complete computer representation
	of the cell and the organism, which will enable computational prediction
	of higher-level complexity of cellular processes and organism behavior
	from genomic information. {T}oward this end we have been developing
	a knowledge-based approach for network prediction, which is to predict,
	given a complete set of genes in the genome, the protein interaction
	networks that are responsible for various cellular processes. {KEGG}
	at http://www.genome.ad.jp/kegg/ is the reference knowledge base
	that integrates current knowledge on molecular interaction networks
	such as pathways and complexes ({PATHWAY} database), information
	about genes and proteins generated by genome projects ({GENES}/{SSDB}/{KO}
	databases) and information about biochemical compounds and reactions
	({COMPOUND}/{GLYCAN}/{REACTION} databases). {T}hese three types of
	database actually represent three graph objects, called the protein
	network, the gene universe and the chemical universe. {N}ew efforts
	are being made to abstract knowledge, both computationally and manually,
	about ortholog clusters in the {KO} ({KEGG} {O}rthology) database,
	and to collect and analyze carbohydrate structures in the {GLYCAN}
	database.},
  doi = {10.1093/nar/gkh063},
  keywords = {glycans},
  pii = {32/suppl_1/D277},
  url = {http://dx.doi.org/10.1093/nar/gkh063}
}

@article{Kaper2004BCI,
  author = {Matthias Kaper and Peter Meinicke and Ulf Grossekathoefer and Thomas
	Lingner and Helge Ritter},
  title = {B{CI} {C}ompetition 2003--{D}ata set {II}b: support vector machines
	for the {P}300 speller paradigm.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2004},
  volume = {51},
  pages = {1073-6},
  number = {6},
  month = {Jun},
  abstract = {We propose an approach to analyze data from the {P}300 speller paradigm
	using the machine-learning technique support vector machines. {I}n
	a conservative classification scheme, we found the correct solution
	after five repetitions. {W}hile the classification within the competition
	is designed for offline analysis, our approach is also well-suited
	for a real-world online solution: {I}t is fast, requires only 10
	electrode positions and demands only a small amount of preprocessing.},
  keywords = {Algorithms, Animals, Antisense, Artificial Intelligence, Automated,
	Autonomic Nervous System, Brain, Cell Line, Child, Cluster Analysis,
	Cognition, Comparative Study, Computational Biology, Computer Simulation,
	Computer-Assisted, DNA Fingerprinting, Databases, Drug Evaluation,
	Electroencephalography, Emotions, Event-Related Potentials, Factual,
	Fluorescence, Fuzzy Logic, Gene Silencing, Gene Targeting, Genetic,
	Hela Cells, Humans, Imaging, Intracellular Space, Microscopy, Models,
	Monitoring, Neoplasms, Neural Networks (Computer), Non-U.S. Gov't,
	Oligonucleotides, P.H.S., P300, Pattern Recognition, Peptides, Physiologic,
	Preclinical, Predictive Value of Tests, Preschool, Prognosis, Protein
	Interaction Mapping, Protein Structure, Proteins, Proteomics, Quantitative
	Structure-Activity Relationship, Quaternary, RNA, RNA Interference,
	Recognition (Psychology), Reproducibility of Results, Research Support,
	Sensitivity and Specificity, Signal Processing, Small Interfering,
	Software, Thionucleotides, Three-Dimensional, Tumor, U.S. Gov't,
	User-Computer Interface, Word Processing, 15188881}
}

@article{Kapetanovic2004Overview,
  author = {Izet M Kapetanovic and Simon Rosenfeld and Grant Izmirlian},
  title = {Overview of commonly used bioinformatics methods and their applications.},
  journal = {Ann {N} {Y} {A}cad {S}ci},
  year = {2004},
  volume = {1020},
  pages = {10-21},
  month = {May},
  abstract = {Bioinformatics, in its broad sense, involves application of computer
	processes to solve biological problems. {A} wide range of computational
	tools are needed to effectively and efficiently process large amounts
	of data being generated as a result of recent technological innovations
	in biology and medicine. {A} number of computational tools have been
	developed or adapted to deal with the experimental riches of complex
	and multivariate data and transition from data collection to information
	or knowledge. {T}hese include a wide variety of clustering and classification
	algorithms, including self-organized maps ({SOM}), artificial neural
	networks ({ANN}), support vector machines ({SVM}), fuzzy logic, and
	even hyphenated techniques as neuro-fuzzy networks. {T}hese bioinformatics
	tools are being evaluated and applied in various medical areas including
	early detection, risk assessment, classification, and prognosis of
	cancer. {T}he goal of these efforts is to develop and identify bioinformatics
	methods with optimal sensitivity, specificity, and predictive capabilities.},
  doi = {10.1196/annals.1310.003},
  pdf = {../local/Kapetanovic2004Overview.pdf},
  file = {Kapetanovic2004Overview.pdf:local/Kapetanovic2004Overview.pdf:PDF},
  keywords = {Computational Biology, Fuzzy Logic, Humans, Neoplasms, Neural Networks
	(Computer), Prognosis, 15208179},
  pii = {1020/1/10},
  url = {http://dx.doi.org/10.1196/annals.1310.003}
}

@misc{Kaplunovsky2009Statistics,
  author = {Kaplunovsky, A. and Khailenko, V. and Bolshoy, A. and Atambayeva,
	S. and Ivashchenko, A.},
  title = {Statistics of Exon Lengths in Animals, Plants, Fungi, and Protists},
  year = {2009},
  journal = {World Academy of Science, Engineering and Technology},
  volume = {52}
}

@article{Kapp2006Discovery,
  author = {Amy V Kapp and Stefanie S Jeffrey and Anita Langerød and Anne-Lise
	Børresen-Dale and Wonshik Han and Dong-Young Noh and Ida R K Bukholm
	and Monica Nicolau and Patrick O Brown and Robert Tibshirani},
  title = {Discovery and validation of breast cancer subtypes.},
  journal = {BMC Genomics},
  year = {2006},
  volume = {7},
  pages = {231},
  abstract = {Previous studies demonstrated breast cancer tumor tissue samples could
	be classified into different subtypes based upon DNA microarray profiles.
	The most recent study presented evidence for the existence of five
	different subtypes: normal breast-like, basal, luminal A, luminal
	B, and ERBB2+.Based upon the analysis of 599 microarrays (five separate
	cDNA microarray datasets) using a novel approach, we present evidence
	in support of the most consistently identifiable subtypes of breast
	cancer tumor tissue microarrays being: ESR1+/ERBB2-, ESR1-/ERBB2-,
	and ERBB2+ (collectively called the ESR1/ERBB2 subtypes). We validate
	all three subtypes statistically and show the subtype to which a
	sample belongs is a significant predictor of overall survival and
	distant-metastasis free probability.As a consequence of the statistical
	validation procedure we have a set of centroids which can be applied
	to any microarray (indexed by UniGene Cluster ID) to classify it
	to one of the ESR1/ERBB2 subtypes. Moreover, the method used to define
	the ESR1/ERBB2 subtypes is not specific to the disease. The method
	can be used to identify subtypes in any disease for which there are
	at least two independent microarray datasets of disease samples.},
  doi = {10.1186/1471-2164-7-231},
  institution = {Department of Statistics, Stanford University, Stanford, CA, USA.
	AKapp@stanford.edu},
  keywords = {Algorithms; Breast Neoplasms, classification/genetics/pathology; Female;
	Gene Expression Profiling, methods/statistics /&/ numerical data;
	Humans; Multivariate Analysis; Oligonucleotide Array Sequence Analysis,
	methods/statistics /&/ numerical data; Proportional Hazards Models;
	Risk Factors; Survival Analysis},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2164-7-231},
  pmid = {16965636},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2164-7-231}
}

@article{Karchin2002Classifying,
  author = {Karchin, R. and Karplus, K. and Haussler, D.},
  title = {Classifying {G}-protein coupled receptors with support vector machines},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {147--159},
  abstract = {Motivation: {T}he enormous amount of protein sequence data uncovered
	by genome research has increased the demand for computer software
	that can automate the recognition of new proteins. {W}e discuss the
	relative merits of various automated methods for recognizing {G}-{P}rotein
	{C}oupled {R}eceptors ({GPCR}s), a superfamily of cell membrane proteins.
	{GPCR}s are found in a wide range of organisms and are central to
	a cellular signalling network that regulates many basic physiological
	processes. {T}hey are the focus of a significant amount of current
	pharmaceutical research because they play a key role in many diseases.
	{H}owever, their tertiary structures remain largely unsolved. {T}he
	methods described in this paper use only primary sequence information
	to make their predictions. {W}e compare a simple nearest neighbor
	approach ({BLAST}), methods based on multiple alignments generated
	by a statistical profile {H}idden {M}arkov {M}odel ({HMM}), and methods,
	including {S}upport {V}ector {M}achines ({SVM}s), that transform
	protein sequences into fixed-length feature vectors. {R}esults: {T}he
	last is the most computationally expensive method, but our experiments
	show that, for those interested in annotation-quality classification,
	the results are worth the effort. {I}n two-fold cross-validation
	experiments testing recognition of {GPCR} subfamilies that bind a
	specific ligand (such as a histamine molecule), the errors per sequence
	at the {M}inimum {E}rror {P}oint ({MEP}) were 13.7% for multi-class
	{SVM}s, 17.1% for our {SVM}tree method of hierarchical multi-class
	{SVM} classification, 25.5% for {BLAST}, 30% for profile {HMM}s,
	and 49% for classification based on nearest neighbor feature vector
	{K}ernel {N}earest {N}eighbor (kern{NN}). {T}he percentage of true
	positives recognized before the first false positive was 65% for
	both {SVM} methods, 13% for {BLAST}, 5% for profile {HMM}s and 4%
	for kern{NN}. {A}vailability: {W}e have set up a web server for {GPCR}
	subfamily classification based on hierarchical multi-class {SVM}s
	at http://www.soe.ucsc.edu/research/compbio/gpcr-subclass. {B}y scanning
	predicted peptides found in the human genome with the {SVM}tree server,
	we have identified a large number of genes that encode {GPCR}s. {A}
	list of our predictions for human {GPCR}s is available at http://www.soe.ucsc.edu/research/compbio/gpcrÂ·hg/classÂ·results.
	{W}e also provide suggested subfamily classification for 18 sequences
	previously identified as unclassified {C}lass {A} (rhodopsin-like)
	{GPCR}s in {GPCRDB} ({H}orn et al. , {N}ucleic {A}cids {R}es. , 26,
	277?281, 1998), available at http://www.soe.ucsc.edu/research/compbio/gpcr/class{A}Â·unclassified/},
  comment = {Un papier intÃ©ressant sur l'utilisation du Fisher kernel pour classer
	les GPCR, une famille de protÃ©ines importante pour l'industrie pharmaceutique.},
  pdf = {../local/Karchin2002Classifying.pdf},
  file = {Karchin2002Classifying.pdf:local/Karchin2002Classifying.pdf:PDF},
  keywords = {fisher-kernel sequence-classification biosvm},
  subject = {biokernel},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/18/1/147}
}

@article{Karchin2005Improving,
  author = {R. Karchin and L. Kelly and A. Sali},
  title = {Improving functional annotation of non-synonomous {SNP}s with information
	theory.},
  journal = {Pac {S}ymp {B}iocomput},
  year = {2005},
  pages = {397-408},
  abstract = {Automated functional annotation of ns{SNP}s requires that amino-acid
	residue changes are represented by a set of descriptive features,
	such as evolutionary conservation, side-chain volume change, effect
	on ligand-binding, and residue structural rigidity. {I}dentifying
	the most informative combinations of features is critical to the
	success of a computational prediction method. {W}e rank 32 features
	according to their mutual information with functional effects of
	amino-acid substitutions, as measured by in vivo assays. {I}n addition,
	we use a greedy algorithm to identify a subset of highly informative
	features. {T}he method is simple to implement and provides a quantitative
	measure for selecting the best predictive features given a set of
	features that a human expert believes to be informative. {W}e demonstrate
	the usefulness of the selected highly informative features by cross-validated
	tests of a computational classifier, a support vector machine ({SVM}).
	{T}he {SVM}'s classification accuracy is highly correlated with the
	ranking of the input features by their mutual information. {T}wo
	features describing the solvent accessibility of "wild-type" and
	"mutant" amino-acid residues and one evolutionary feature based on
	superfamily-level multiple alignments produce comparable overall
	accuracy and 6\% fewer false positives than a 32-feature set that
	considers physiochemical properties of amino acids, protein electrostatics,
	amino-acid residue flexibility, and binding interactions.},
  keywords = {biosvm}
}

@article{Karklin2005Classification,
  author = {Karklin, Y. and Meraz, R. F. and Holbrook, S.R.},
  title = {Classification of non-coding {RNA} using graph representations of
	secondary structure.},
  journal = {Pac. {S}ymp. {B}iocomput.},
  year = {2005},
  pages = {4-15},
  abstract = {Some genes produce transcripts that function directly in regulatory,
	catalytic, or structural roles in the cell. {T}hese non-coding {RNA}s
	are prevalent in all living organisms, and methods that aid the understanding
	of their functional roles are essential. {RNA} secondary structure,
	the pattern of base-pairing, contains the critical information for
	determining the three dimensional structure and function of the molecule.
	{I}n this work we examine whether the basic geometric and topological
	properties of secondary structure are sufficient to distinguish between
	{RNA} families in a learning framework. {F}irst, we develop a labeled
	dual graph representation of {RNA} secondary structure by adding
	biologically meaningful labels to the dual graphs proposed by {G}an
	et al [1]. {N}ext, we define a similarity measure directly on the
	labeled dual graphs using the recently developed marginalized kernels
	[2]. {U}sing this similarity measure, we were able to train {S}upport
	{V}ector {M}achine classifiers to distinguish {RNA}s of known families
	from random {RNA}s with similar statistics. {F}or 22 of the 25 families
	tested, the classifier achieved better than 70\% accuracy, with much
	higher accuracy rates for some families. {T}raining a set of classifiers
	to automatically assign family labels to {RNA}s using a one vs. all
	multi-class scheme also yielded encouraging results. {F}rom these
	initial learning experiments, we suggest that the labeled dual graph
	representation, together with kernel machine methods, has potential
	for use in automated analysis and classification of uncharacterized
	{RNA} molecules or efficient genome-wide screens for {RNA} molecules
	from existing families.},
  keywords = {biosvm},
  url = {http://helix-web.stanford.edu/psb05/karklin.pdf}
}

@article{Karklin2005Classificationa,
  author = {Yan Karklin and Richard F Meraz and Stephen R Holbrook},
  title = {{C}lassification of non-coding {RNA} using graph representations
	of secondary structure.},
  journal = {Pac Symp Biocomput},
  year = {2005},
  pages = {4--15},
  abstract = {Some genes produce transcripts that function directly in regulatory,
	catalytic, or structural roles in the cell. These non-coding RNAs
	are prevalent in all living organisms, and methods that aid the understanding
	of their functional roles are essential. RNA secondary structure,
	the pattern of base-pairing, contains the critical information for
	determining the three dimensional structure and function of the molecule.
	In this work we examine whether the basic geometric and topological
	properties of secondary structure are sufficient to distinguish between
	RNA families in a learning framework. First, we develop a labeled
	dual graph representation of RNA secondary structure by adding biologically
	meaningful labels to the dual graphs proposed by Gan et al [1]. Next,
	we define a similarity measure directly on the labeled dual graphs
	using the recently developed marginalized kernels [2]. Using this
	similarity measure, we were able to train Support Vector Machine
	classifiers to distinguish RNAs of known families from random RNAs
	with similar statistics. For 22 of the 25 families tested, the classifier
	achieved better than 70\% accuracy, with much higher accuracy rates
	for some families. Training a set of classifiers to automatically
	assign family labels to RNAs using a one vs. all multi-class scheme
	also yielded encouraging results. From these initial learning experiments,
	we suggest that the labeled dual graph representation, together with
	kernel machine methods, has potential for use in automated analysis
	and classification of uncharacterized RNA molecules or efficient
	genome-wide screens for RNA molecules from existing families.},
  keywords = {Base Sequence, Models, Molecular, Non-, Nucleic Acid Conformation,
	P.H.S., RNA, Research Support, U.S. Gov't, Untranslated, 15759609},
  pmid = {15759609},
  timestamp = {2006.08.03}
}

@incollection{Baringhaus2005A,
  author = {Karl-Heinz Baringhaus, Gerhard Hessler},
  title = {A Chemical Genomics Approach for Ion Channel Modulators},
  booktitle = {Chemogenomics in Drug Discovery},
  publisher = {Wiley-VCH},
  year = {2005},
  chapter = {8},
  pages = {221-242}
}

@article{Karni2009network-based,
  author = {Karni, S. and Soreq, H. and Sharan, R.},
  title = {A network-based method for predicting disease-causing genes},
  journal = {J. Comput. Biol.},
  year = {2009},
  volume = {16},
  pages = {181--189},
  number = {2},
  month = {Feb},
  abstract = {A fundamental problem in human health is the inference of disease-causing
	genes, with important applications to diagnosis and treatment. Previous
	work in this direction relied on knowledge of multiple loci associated
	with the disease, or causal genes for similar diseases, which limited
	its applicability. Here we present a new approach to causal gene
	prediction that is based on integrating protein-protein interaction
	network data with gene expression data under a condition of interest.
	The latter are used to derive a set of disease-related genes which
	is assumed to be in close proximity in the network to the causal
	genes. Our method applies a set-cover-like heuristic to identify
	a small set of genes that best "cover" the disease-related genes.
	We perform comprehensive simulations to validate our method and test
	its robustness to noise. In addition, we validate our method on real
	gene expression data and on gene specific knockouts. Finally, we
	apply it to suggest possible genes that are involved in myasthenia
	gravis.},
  doi = {10.1089/cmb.2008.05TT},
  pdf = {../local/Karni2009network-based.pdf},
  file = {Karni2009network-based.pdf:Karni2009network-based.pdf:PDF},
  institution = {Blavatnik School of Computer Science, Tel-Aviv University, Tel-Aviv,
	Israel.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {10.1089/cmb.2008.05TT},
  pmid = {19193144},
  timestamp = {2011.09.24},
  url = {http://dx.doi.org/10.1089/cmb.2008.05TT}
}

@article{BioCyc2005,
  author = {Karp, P. D. and Ouzounis, C. A. and Moore-Kochlacs, C. and Goldovsky,
	L. and Kaipa, P. and Ahren, D. and Tsoka, S. and Darzentas, N. and
	Kunin, V. and Lopez-Bigas, N.},
  title = {Expansion of the {B}io{C}yc collection of pathway/genome databases
	to 160 genomes},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {6083-9},
  number = {19},
  abstract = {The {B}io{C}yc database collection is a set of 160 pathway/genome
	databases ({PGDB}s) for most eukaryotic and prokaryotic species whose
	genomes have been completely sequenced to date. {E}ach {PGDB} in
	the {B}io{C}yc collection describes the genome and predicted metabolic
	network of a single organism, inferred from the {M}eta{C}yc database,
	which is a reference source on metabolic pathways from multiple organisms.
	{I}n addition, each bacterial {PGDB} includes predicted operons for
	the corresponding species. {T}he {B}io{C}yc collection provides a
	unique resource for computational systems biology, namely global
	and comparative analyses of genomes and metabolic networks, and a
	supplement to the {B}io{C}yc resource of curated {PGDB}s. {T}he {O}mics
	viewer available through the {B}io{C}yc website allows scientists
	to visualize combinations of gene expression, proteomics and metabolomics
	data on the metabolic maps of these organisms. {T}his paper discusses
	the computational methodology by which the {B}io{C}yc collection
	has been expanded, and presents an aggregate analysis of the collection
	that includes the range of number of pathways present in these organisms,
	and the most frequently observed pathways. {W}e seek scientists to
	adopt and curate individual {PGDB}s within the {B}io{C}yc collection.
	{O}nly by harnessing the expertise of many scientists we can hope
	to produce biological databases, which accurately reflect the depth
	and breadth of knowledge that the biomedical research community is
	producing.},
  keywords = {Animals Computational Biology *Databases, Genetic *Genome Genome,
	Archaeal Genome, Bacterial Genomics Humans Metabolism/genetics Research
	Support, N.I.H., Extramural Research Support, Non-U.S. Gov't Research
	Support, U.S. Gov't, P.H.S.}
}

@article{Karplus1998Hidden,
  author = {Karplus, K. and Barrett, C. and Hughey, R.},
  title = {Hidden {M}arkov {M}odels for {D}etecting {R}emote {P}rotein {H}omologies},
  journal = {Bioinformatics},
  year = {1998},
  volume = {14},
  pages = {846--856},
  number = {10},
  pdf = {../local/karp98.pdf},
  file = {karp98.pdf:local/karp98.pdf:PDF},
  subject = {biocasp},
  url = {http://www.cse.ucsc.edu/research/compbio/papers/w9824.ps}
}

@article{Karr2012whole,
  author = {Karr, J. R. and Sanghvi, J. C. and Macklin, D. N. and Gutschow, M.
	V. and Jacobs, J. M. and Bolival, B. and Assad-Garcia, N. and Glass,
	J. I. and Covert, M. W.},
  title = {A whole-cell computational model predicts phenotype from genotype.},
  journal = {Cell},
  year = {2012},
  volume = {150},
  pages = {389--401},
  number = {2},
  month = {Jul},
  abstract = {Understanding how complex phenotypes arise from individual molecules
	and their interactions is a primary challenge in biology that computational
	approaches are poised to tackle. We report a whole-cell computational
	model of the life cycle of the human pathogen Mycoplasma genitalium
	that includes all of its molecular components and their interactions.
	An integrative approach to modeling that combines diverse mathematics
	enabled the simultaneous inclusion of fundamentally different cellular
	processes and experimental measurements. Our whole-cell model accounts
	for all annotated gene functions and was validated against a broad
	range of data. The model provides insights into many previously unobserved
	cellular behaviors, including in vivo rates of protein-DNA association
	and an inverse relationship between the durations of DNA replication
	initiation and replication. In addition, experimental analysis directed
	by model predictions identified previously undetected kinetic parameters
	and biological functions. We conclude that comprehensive whole-cell
	models can be used to facilitate biological discovery.},
  doi = {10.1016/j.cell.2012.05.044},
  pdf = {../local/Karr2012whole.pdf},
  file = {Karr2012whole.pdf:Karr2012whole.pdf:PDF},
  institution = {Graduate Program in Biophysics, Stanford University, Stanford, CA
	94305, USA.},
  keywords = {Phenotype},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092-8674(12)00776-3},
  pmid = {22817898},
  timestamp = {2012.10.24},
  url = {http://dx.doi.org/10.1016/j.cell.2012.05.044}
}

@inproceedings{Kashima2003Marginalized,
  author = {Kashima, H. and Tsuda, K. and Inokuchi, A.},
  title = {Marginalized {K}ernels between {L}abeled {G}raphs},
  booktitle = {Proceedings of the {T}wentieth {I}nternational {C}onference on {M}achine
	{L}earning},
  year = {2003},
  editor = {Faucett, T. and Mishra, N.},
  pages = {321-328},
  address = {New York, NY, USA},
  publisher = {AAAI Press},
  pdf = {../local/Kashima2003Marginalized.pdf},
  file = {Kashima2003Marginalized.pdf:local/Kashima2003Marginalized.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@incollection{Kashima2004Kernels,
  author = {Kashima, H. and Tsuda, K. and Inokuchi, A.},
  title = {Kernels for graphs},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {155-170},
  address = {The MIT Press, Cambridge, Massachussetts},
  keywords = {biosvm chemoinformatics},
  owner = {vert}
}

@article{Kass1995Reference,
  author = {Kass, R. E. and Wasserman, L.},
  title = {A Reference Bayesian Test for Nested Hypotheses and its Relationship
	to the Schwarz Criterion},
  journal = {J. Am. Stat. Assoc.},
  year = {1995},
  volume = {90},
  pages = {928--934},
  number = {431},
  doi = {10.2307/2291327},
  pdf = {../local/Kass1995Reference.pdf},
  file = {Kass1995Reference.pdf:Kass1995Reference.pdf:PDF},
  owner = {jp},
  timestamp = {2011.12.29},
  url = {http://dx.doi.org/10.2307/2291327}
}

@techreport{Kato2001Operator,
  author = {Kato, T.},
  title = {Operator dynamics in molecular biology},
  institution = {I.H.E.S.},
  year = {2001},
  note = {Technical report IHES/M/01/41},
  pdf = {../local/Kato2001Operator.pdf},
  file = {Kato2001Operator.pdf:local/Kato2001Operator.pdf:PDF},
  url = {http://www.ihes.fr/PREPRINTS/M01/Resu/resu-M01-41.html}
}

@article{Kato2005Selective,
  author = {Kato, T. and Tsuda, K. and Asai, K.},
  title = {{S}elective integration of multiple biological data for supervised
	network inference.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2488--2495},
  number = {10},
  month = {May},
  abstract = {MOTIVATION: Inferring networks of proteins from biological data is
	a central issue of computational biology. Most network inference
	methods, including Bayesian networks, take unsupervised approaches
	in which the network is totally unknown in the beginning, and all
	the edges have to be predicted. A more realistic supervised framework,
	proposed recently, assumes that a substantial part of the network
	is known. We propose a new kernel-based method for supervised graph
	inference based on multiple types of biological datasets such as
	gene expression, phylogenetic profiles and amino acid sequences.
	Notably, our method assigns a weight to each type of dataset and
	thereby selects informative ones. Data selection is useful for reducing
	data collection costs. For example, when a similar network inference
	problem must be solved for other organisms, the dataset excluded
	by our algorithm need not be collected. RESULTS: First, we formulate
	supervised network inference as a kernel matrix completion problem,
	where the inference of edges boils down to estimation of missing
	entries of a kernel matrix. Then, an expectation-maximization algorithm
	is proposed to simultaneously infer the missing entries of the kernel
	matrix and the weights of multiple datasets. By introducing the weights,
	we can integrate multiple datasets selectively and thereby exclude
	irrelevant and noisy datasets. Our approach is favorably tested in
	two biological networks: a metabolic network and a protein interaction
	network. AVAILABILITY: Software is available on request.},
  doi = {10.1093/bioinformatics/bti339},
  pii = {bti339},
  pmid = {15728114},
  timestamp = {2007.02.01},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti339}
}

@book{Kay1998Fundamentals,
  title = {{F}undamentals of {S}tatistical {S}ignal {P}rocessing},
  publisher = {Prentice-Hall},
  year = {1998},
  author = {Kay, S.M.},
  volume = {2},
  location = {Englewood Cliffs, NJ},
  owner = {kb}
}

@inproceedings{Kazhdan2003Rotation,
  author = {Michael Kazhdan and Thomas Funkhouser and Szymon Rusinkiewicz},
  title = {Rotation invariant spherical harmonic representation of 3D shape
	descriptors},
  booktitle = {SGP '03: Proceedings of the 2003 Eurographics/ACM SIGGRAPH symposium
	on Geometry processing},
  year = {2003},
  pages = {156--164},
  address = {Aire-la-Ville, Switzerland, Switzerland},
  publisher = {Eurographics Association},
  isbn = {1-58113-687-0},
  location = {Aachen, Germany}
}

@inproceedings{Kearns1993Efficient,
  author = {Kearns, M.},
  title = {Efficient noise-tolerant learning from statistical queries},
  booktitle = {Journal of the ACM},
  year = {1993},
  pages = {392--401},
  owner = {fantine},
  timestamp = {2009.07.21},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.669}
}

@article{Keerthi2003Asymptotic,
  author = {S. Sathiya Keerthi and Chih-Jen Lin},
  title = {Asymptotic behaviors of support vector machines with {G}aussian kernel.},
  journal = {Neural {C}omput},
  year = {2003},
  volume = {15},
  pages = {1667-89},
  number = {7},
  month = {Jul},
  abstract = {Support vector machines ({SVM}s) with the gaussian ({RBF}) kernel
	have been popular for practical use. {M}odel selection in this class
	of {SVM}s involves two hyperparameters: the penalty parameter {C}
	and the kernel width sigma. {T}his letter analyzes the behavior of
	the {SVM} classifier when these hyperparameters take very small or
	very large values. {O}ur results help in understanding the hyperparameter
	space that leads to an efficient heuristic method of searching for
	hyperparameter values with small generalization errors. {T}he analysis
	also indicates that if complete model selection using the gaussian
	kernel has been conducted, there is no need to consider linear {SVM}.},
  doi = {10.1162/089976603321891855},
  url = {http://dx.doi.org/10.1162/089976603321891855}
}

@article{Keerthi2003SMO,
  author = {S. S. Keerthi and S. K. Shevade},
  title = {S{MO} algorithm for least-squares {SVM} formulations.},
  journal = {Neural {C}omput},
  year = {2003},
  volume = {15},
  pages = {487-507},
  number = {2},
  month = {Feb},
  abstract = {This article extends the well-known {SMO} algorithm of support vector
	machines ({SVM}s) to least-squares {SVM} formulations that include
	{LS}-{SVM} classification, kernel ridge regression, and a particular
	form of regularized kernel {F}isher discriminant. {T}he algorithm
	is shown to be asymptotically convergent. {I}t is also extremely
	easy to implement. {C}omputational experiments show that the algorithm
	is fast and scales efficiently (quadratically) as a function of the
	number of examples.},
  doi = {10.1162/089976603762553013},
  url = {http://dx.doi.org/10.1162/089976603762553013}
}

@article{Kellenberger2004Comparative,
  author = {E. Kellenberger and J. Rodrigo and P. Muller and D. Rognan},
  title = {Comparative evaluation of eight docking tools for docking and virtual
	screening accuracy.},
  journal = {Proteins},
  year = {2004},
  volume = {57},
  pages = {225--242},
  number = {2},
  month = {Nov},
  abstract = {Eight docking programs (DOCK, FLEXX, FRED, GLIDE, GOLD, SLIDE, SURFLEX,
	and QXP) that can be used for either single-ligand docking or database
	screening have been compared for their propensity to recover the
	X-ray pose of 100 small-molecular-weight ligands, and for their capacity
	to discriminate known inhibitors of an enzyme (thymidine kinase)
	from randomly chosen "drug-like" molecules. Interestingly, both properties
	are found to be correlated, since the tools showing the best docking
	accuracy (GLIDE, GOLD, and SURFLEX) are also the most successful
	in ranking known inhibitors in a virtual screening experiment. Moreover,
	the current study pinpoints some physicochemical descriptors of either
	the ligand or its cognate protein-binding site that generally lead
	to docking/scoring inaccuracies.},
  doi = {10.1002/prot.20149},
  owner = {mahe},
  pmid = {15340911},
  timestamp = {2006.09.07},
  url = {http://dx.doi.org/10.1002/prot.20149}
}

@article{Kellenberger2008How,
  author = {Kellenberger, E. and Schalon, C. and Rognan, D.},
  title = {How to Measure the Similarity Between Protein Ligand-Binding Sites?},
  journal = {Current Computer-Aided Drug Design},
  year = {2008},
  volume = {4},
  pages = {209--220},
  number = {3},
  month = {Sep.},
  abstract = {Quantification of local similarity between protein 3D structures is
	a promising tool in computer-aided drug design and prediction of
	biological function. Over the last ten years, several computational
	methods were proposed, mostly based on geometrical comparisons. This
	review summarizes the recent literature and gives an overview of
	available programs.
	
	A particular interest is given to the underlying methodologies. Our
	analysis points out strengths and weaknesses of the various approaches.
	If all described methods work relatively well when two binding sites
	obviously resemble each other, scoring potential solutions remains
	a difficult issue, especially if the similarity is low. The other
	challenging question is the protein flexibility, which is indeed
	difficult to evaluate from a static representation. Last, most of
	recently developed techniques are fast and can be applied to large
	amounts of data.
	
	Examples were carefully chosen to illustrate the wide applicability
	domain of the most popular methods: detection of common structural
	motifs, identification of secondary targets for a drug-like compound,
	comparison of binding sites across a functional family, comparison
	of homology models, database screening.},
  doi = {10.2174/157340908785747401},
  pdf = {../local/Kellenberger2008How.pdf},
  file = {Kellenberger2008How.pdf:Kellenberger2008How.pdf:PDF},
  keywords = {chemogenomics},
  owner = {jp},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.2174/157340908785747401}
}

@article{Kelley2003Conserved,
  author = {Kelley, B.P. and Sharan, R. and Karp, R.M. and Sittler, T. and Root,
	D.E. and Stockwell, B.R. and Ideker, T.},
  title = {Conserved pathways within bacteria and yeast as revealed by global
	protein network alignment.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {11394--11399},
  number = {20},
  month = {Sep},
  abstract = {We implement a strategy for aligning two protein-protein interaction
	networks that combines interaction topology and protein sequence
	similarity to identify conserved interaction pathways and complexes.
	Using this approach we show that the protein-protein interaction
	networks of two distantly related species, Saccharomyces cerevisiae
	and Helicobacter pylori, harbor a large complement of evolutionarily
	conserved pathways, and that a large number of pathways appears to
	have duplicated and specialized within yeast. Analysis of these findings
	reveals many well characterized interaction pathways as well as many
	unanticipated pathways, the significance of which is reinforced by
	their presence in the networks of both species.},
  doi = {10.1073/pnas.1534710100},
  pdf = {../local/Kelley2003Conserved.pdf},
  file = {Kelley2003Conserved.pdf:local/Kelley2003Conserved.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center,
	Cambridge, MA 02142, USA.},
  owner = {jp},
  pii = {1534710100},
  pmid = {14504397},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1073/pnas.1534710100}
}

@article{Kelley2004PathBLAST,
  author = {Kelley, B.P. and Yuan, B. and Lewitter, F. and Sharan, R. and Stockwell,
	B.R. and Ideker, T.},
  title = {{PathBLAST}: a tool for alignment of protein interaction networks.},
  journal = {Nucleic Acids Res.},
  year = {2004},
  volume = {32},
  pages = {W83--W88},
  number = {Web Server issue},
  month = {Jul},
  abstract = {PathBLAST is a network alignment and search tool for comparing protein
	interaction networks across species to identify protein pathways
	and complexes that have been conserved by evolution. The basic method
	searches for high-scoring alignments between pairs of protein interaction
	paths, for which proteins of the first path are paired with putative
	orthologs occurring in the same order in the second path. This technique
	discriminates between true- and false-positive interactions and allows
	for functional annotation of protein interaction pathways based on
	similarity to the network of another, well-characterized species.
	PathBLAST is now available at http://www.pathblast.org/ as a web-based
	query. In this implementation, the user specifies a short protein
	interaction path for query against a target protein-protein interaction
	network selected from a network database. PathBLAST returns a ranked
	list of matching paths from the target network along with a graphical
	view of these paths and the overlap among them. Target protein-protein
	interaction networks are currently available for Helicobacter pylori,
	Saccharomyces cerevisiae, Caenorhabditis elegans and Drosophila melanogaster.
	Just as BLAST enables rapid comparison of protein sequences between
	genomes, tools such as PathBLAST are enabling comparative genomics
	at the network level.},
  doi = {10.1093/nar/gkh411},
  institution = {Whitehead Institute for Biomedical Research, Cambridge, MA 02142,
	USA.},
  owner = {jp},
  pii = {32/suppl_2/W83},
  pmid = {15215356},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1093/nar/gkh411}
}

@article{Kelley2005Systematic,
  author = {Kelley, R. and Ideker, T.},
  title = {{S}ystematic interpretation of genetic interactions using protein
	networks.},
  journal = {Nat. Biotechnol.},
  year = {2005},
  volume = {23},
  pages = {561--566},
  number = {5},
  month = {May},
  abstract = {Genetic interaction analysis,in which two mutations have a combined
	effect not exhibited by either mutation alone, is a powerful and
	widespread tool for establishing functional linkages between genes.
	In the yeast Saccharomyces cerevisiae, ongoing screens have generated
	>4,800 such genetic interaction data. We demonstrate that by combining
	these data with information on protein-protein, prote in-DNA or metabolic
	networks, it is possible to uncover physical mechanisms behind many
	of the observed genetic effects. Using a probabilistic model, we
	found that 1,922 genetic interactions are significantly associated
	with either between- or within-pathway explanations encoded in the
	physical networks, covering approximately 40\% of known genetic interactions.
	These models predict new functions for 343 proteins and suggest that
	between-pathway explanations are better than within-pathway explanations
	at interpreting genetic interactions identified in systematic screens.
	This study provides a road map for how genetic and physical interactions
	can be integrated to reveal pathway organization and function.},
  doi = {10.1038/nbt1096},
  pii = {nbt1096},
  pmid = {15877074},
  timestamp = {2006.11.21},
  url = {http://dx.doi.org/10.1038/nbt1096}
}

@article{Kerr2001Experimental,
  author = {M. K. Kerr and G. A. Churchill},
  title = {Experimental design for gene expression microarrays.},
  journal = {Biostatistics},
  year = {2001},
  volume = {2},
  pages = {183--201},
  number = {2},
  month = {Jun},
  abstract = {We examine experimental design issues arising with gene expression
	microarray technology. Microarray experiments have multiple sources
	of variation, and experimental plans should ensure that effects of
	interest are not confounded with ancillary effects. A commonly used
	design is shown to violate this principle and to be generally inefficient.
	We explore the connection between microarray designs and classical
	block design and use a family of ANOVA models as a guide to choosing
	a design. We combine principles of good design and A-optimality to
	give a general set of recommendations for design with microarrays.
	These recommendations are illustrated in detail for one kind of experimental
	objective, where we also give the results of a computer search for
	good designs.},
  doi = {10.1093/biostatistics/2.2.183},
  institution = {The Jackson Laboratory, Bar Harbor, ME, USA. garyc@jax.org},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {2/2/183},
  pmid = {12933549},
  timestamp = {2010.08.15},
  url = {http://dx.doi.org/10.1093/biostatistics/2.2.183}
}

@article{Kertesz2007role,
  author = {Michael Kertesz and Nicola Iovino and Ulrich Unnerstall and Ulrike
	Gaul and Eran Segal},
  title = {The role of site accessibility in microRNA target recognition.},
  journal = {Nat Genet},
  year = {2007},
  volume = {39},
  pages = {1278--1284},
  number = {10},
  month = {Oct},
  abstract = {MicroRNAs are key regulators of gene expression, but the precise mechanisms
	underlying their interaction with their mRNA targets are still poorly
	understood. Here, we systematically investigate the role of target-site
	accessibility, as determined by base-pairing interactions within
	the mRNA, in microRNA target recognition. We experimentally show
	that mutations diminishing target accessibility substantially reduce
	microRNA-mediated translational repression, with effects comparable
	to those of mutations that disrupt sequence complementarity. We devise
	a parameter-free model for microRNA-target interaction that computes
	the difference between the free energy gained from the formation
	of the microRNA-target duplex and the energetic cost of unpairing
	the target to make it accessible to the microRNA. This model explains
	the variability in our experiments, predicts validated targets more
	accurately than existing algorithms, and shows that genomes accommodate
	site accessibility by preferentially positioning targets in highly
	accessible regions. Our study thus demonstrates that target accessibility
	is a critical factor in microRNA function.},
  doi = {10.1038/ng2135},
  pdf = {../local/Kertesz2007role.pdf},
  file = {Kertesz2007role.pdf:Kertesz2007role.pdf:PDF},
  institution = {omputer Science and Applied Mathematics, Weizmann Institute of Science,
	Rehovot 76100, Israel.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng2135},
  pmid = {17893677},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1038/ng2135}
}

@inproceedings{Keselman03many-to-many,
  author = {Y. Keselman and A. Shokoufandeh and M. F. Demirci and S. Dickinson},
  title = {Many-to-Many Graph Matching via Metric Embedding},
  booktitle = {CVPR},
  year = {2003},
  pages = {850--857}
}

@article{Keserue2003Prediction,
  author = {Keser{\"u}, G. M.},
  title = {{P}rediction of h{ERG} potassium channel affinity by traditional
	and hologram q{SAR} methods.},
  journal = {Bioorg. Med. Chem. Lett.},
  year = {2003},
  volume = {13},
  pages = {2773--2775},
  number = {16},
  month = {Aug},
  abstract = {Traditional and hologram QSAR (HQSAR) models were developed for the
	prediction of hERG potassium channel affinities. The models were
	validated on three different test sets including compounds with published
	patch-clamp IC(50) data and two subsets from the World Drug Index
	(compounds indicated to have ECG modifying adverse effect and drugs
	marked to be approved, respectively). Discriminant analysis performed
	on the full set of hERG data resulted in a traditional QSAR model
	that classified 83\% of actives and 87\% of inactives correctly.
	Analysis of our HQSAR model revealed it to be predictive in both
	IC(50) and discrimination studies.},
  keywords = {chemoinformatics herg},
  pii = {S0960894X0300492X},
  pmid = {12873512},
  timestamp = {2006.10.06}
}

@article{Khan2005Proteome,
  author = {Shahid M Khan and Blandine Franke-Fayard and Gunnar R Mair and Edwin
	Lasonder and Chris J Janse and Matthias Mann and Andrew P Waters},
  title = {{P}roteome analysis of separated male and female gametocytes reveals
	novel sex-specific {P}lasmodium biology.},
  journal = {Cell},
  year = {2005},
  volume = {121},
  pages = {675--687},
  number = {5},
  month = {Jun},
  abstract = {Gametocytes, the precursor cells of malaria-parasite gametes, circulate
	in the blood and are responsible for transmission from host to mosquito
	vector. The individual proteomes of male and female gametocytes were
	analyzed using mass spectrometry, following separation by flow sorting
	of transgenic parasites expressing green fluorescent protein, in
	a sex-specific manner. Promoter tagging in transgenic parasites confirmed
	the designation of stage and sex specificity of the proteins. The
	male proteome contained 36\% (236 of 650) male-specific and the female
	proteome 19\% (101 of 541) female-specific proteins, but they share
	only 69 proteins, emphasizing the diverged features of the sexes.
	Of all the malaria life-cycle stages analyzed, the male gametocyte
	has the most distinct proteome, containing many proteins involved
	in flagellar-based motility and rapid genome replication. By identification
	of gender-specific protein kinases and phosphatases and using targeted
	gene disruption of two kinases, new sex-specific regulatory pathways
	were defined.},
  doi = {10.1016/j.cell.2005.03.027},
  pdf = {../local/Khan2005Proteome.pdf},
  file = {Khan2005Proteome.pdf:local/Khan2005Proteome.pdf:PDF},
  keywords = {plasmodium},
  pii = {S0092-8674(05)00299-0},
  pmid = {15935749},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1016/j.cell.2005.03.027}
}

@article{Kharchenko2006Identifying,
  author = {Kharchenko, P. and Chen, L. and Freund, Y. and Vitkup, D. and Church,
	G. M.},
  title = {{I}dentifying metabolic enzymes with multiple types of association
	evidence.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {177},
  abstract = {BACKGROUND: Existing large-scale metabolic models of sequenced organisms
	commonly include enzymatic functions which can not be attributed
	to any gene in that organism. Existing computational strategies for
	identifying such missing genes rely primarily on sequence homology
	to known enzyme-encoding genes. RESULTS: We present a novel method
	for identifying genes encoding for a specific metabolic function
	based on a local structure of metabolic network and multiple types
	of functional association evidence, including clustering of genes
	on the chromosome, similarity of phylogenetic profiles, gene expression,
	protein fusion events and others. Using E. coli and S. cerevisiae
	metabolic networks, we illustrate predictive ability of each individual
	type of association evidence and show that significantly better predictions
	can be obtained based on the combination of all data. In this way
	our method is able to predict 60\% of enzyme-encoding genes of E.
	coli metabolism within the top 10 (out of 3551) candidates for their
	enzymatic function, and as a top candidate within 43\% of the cases.
	CONCLUSION: We illustrate that a combination of genome context and
	other functional association evidence is effective in predicting
	genes encoding metabolic enzymes. Our approach does not rely on direct
	sequence homology to known enzyme-encoding genes, and can be used
	in conjunction with traditional homology-based metabolic reconstruction
	methods. The method can also be used to target orphan metabolic activities.},
  doi = {10.1186/1471-2105-7-177},
  pii = {1471-2105-7-177},
  pmid = {16571130},
  timestamp = {2006.11.21},
  url = {http://dx.doi.org/10.1186/1471-2105-7-177}
}

@article{Kharchenko2004Filling,
  author = {Kharchenko, P. and Vitkup, D. and Church, G. M.},
  title = {{F}illing gaps in a metabolic network using expression information.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20 Suppl 1},
  pages = {I178--I185},
  month = {Aug},
  abstract = {MOTIVATION: The metabolic models of both newly sequenced and well-studied
	organisms contain reactions for which the enzymes have not been identified
	yet. We present a computational approach for identifying genes encoding
	such missing metabolic enzymes in a partially reconstructed metabolic
	network. RESULTS: The metabolic expression placement (MEP) method
	relies on the coexpression properties of the metabolic network and
	is complementary to the sequence homology and genome context methods
	that are currently being used to identify missing metabolic genes.
	The MEP algorithm predicts over 20\% of all known Saccharomyces cerevisiae
	metabolic enzyme-encoding genes within the top 50 out of 5594 candidates
	for their enzymatic function, and 70\% of metabolic genes whose expression
	level has been significantly perturbed across the conditions of the
	expression dataset used. AVAILABILITY: Freely available (in Supplementary
	information). SUPPLEMENTARY INFORMATION: Available at the following
	URL http://arep.med.harvard.edu/kharchenko/mep/supplements.html},
  doi = {10.1093/bioinformatics/bth930},
  keywords = {Bacterial, Binding Sites, Biological, Comparative Study, DNA, Energy
	Metabolism, Enzyme Induction, Enzymes, Escherichia coli Proteins,
	Fungal, Gene Expression Regulation, Genes, Genetic, Genome, Models,
	Non-P.H.S., Non-U.S. Gov't, Phylogeny, Promoter Regions (Genetics),
	Protein, Research Support, Saccharomyces cerevisiae, Saccharomyces
	cerevisiae Proteins, Sequence Analysis, Systems Biology, Transcription
	Factors, U.S. Gov't, 15262797},
  pii = {20/suppl_1/i178},
  pmid = {15262797},
  timestamp = {2006.11.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth930}
}

@article{Khvorova2003Functional,
  author = {Khvorova, A. and Reynolds, A. and Jayasena, S.D.},
  title = {Functional si{RNA}s and mi{RNA}s exhibit strand bias.},
  journal = {Cell},
  year = {2003},
  volume = {115},
  pages = {209-216},
  number = {2},
  month = {Oct},
  abstract = {Both micro{RNA}s (mi{RNA}) and small interfering {RNA}s (si{RNA})
	share a common set of cellular proteins ({D}icer and the {RNA}-induced
	silencing complex [{RISC}]) to elicit {RNA} interference. {I}n the
	following work, a statistical analysis of the internal stability
	of published mi{RNA} sequences in the context of mi{RNA} precursor
	hairpins revealed enhanced flexibility of mi{RNA} precursors, especially
	at the 5?-anti-sense ({AS}) terminal base pair. {T}he same trend
	was observed in si{RNA}, with functional duplexes displaying a lower
	internal stability (?0.5 kcal/mol) at the 5?-{AS} end than nonfunctional
	duplexes. {A}verage internal stability of si{RNA} molecules retrieved
	from plant cells after introduction of long {RNA} sequences also
	shows this characteristic thermodynamic signature. {T}ogether, these
	results suggest that the thermodynamic properties of si{RNA} play
	a critical role in determining the molecule's function and longevity,
	possibly biasing the steps involved in duplex unwinding and strand
	retention by {RISC}.},
  doi = {10.1016/S0092-8674(03)00801-8},
  pdf = {../local/Khvorova2003Functional.pdf},
  file = {Khvorova2003Functional.pdf:local/Khvorova2003Functional.pdf:PDF},
  keywords = {sirna},
  url = {http://dx.doi.org/10.1016/S0092-8674(03)00801-8}
}

@article{Kieffer1978unified,
  author = {Kieffer, J.},
  title = {A unified approach to weak universal source coding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1978},
  volume = {24},
  pages = {674-682},
  number = {6},
  month = {Nov},
  abstract = { {A} new method of constructing a universal sequence of block codes
	for coding a class of ergodic sources is given. {W}ith this method,
	a weakly universal sequence of codes is constructed for variable-rate
	noise. less coding and for fixed- and variable-rate coding with respect
	to a fidelity criterion. {I}n this way a unified approach to weak
	universal block source coding is obtained. {F}or the noiseless variable-rate
	coding and the fixed-rate coding with respect to fidelity criterion,
	the assumptions made on the alphabets, distortion measures, and class
	of sources are both necessary and sufficient. {F}or fixed-rate coding
	with respect to a fidelity criterion, the sample distortion of the
	universal code sequence converges in{L}^{l}norm for each source to
	the optimum distortion for that source. {F}or both variable-rate
	noiseless coding and variable-rate coding with respect to a fidelity
	criterion, the sample rate of the universal code sequence converges
	in{L}^{1}norm for each source to the optimum rate for that source.
	{U}sing this fact, a universal sequence of codes for fixed-rate noiseless
	coding is obtained. {S}ome applications to stationary nonergodic
	sources are also considered. {T}he results of {D}avisson, {Z}iv,
	{N}euhoff, {G}ray, {P}ursley, and {M}ackenthun are extended. },
  pdf = {../local/Kieffer1978unified.pdf},
  file = {Kieffer1978unified.pdf:local/Kieffer1978unified.pdf:PDF},
  keywords = {universal-coding information-theory},
  owner = {vert}
}

@article{Kim2008Insights,
  author = {Eddo Kim and Amir Goren and Gil Ast},
  title = {Insights into the connection between cancer and alternative splicing},
  journal = {Trends Genet.},
  year = {2008},
  volume = {24},
  pages = {7-10},
  keywords = {csbcbook}
}

@article{Kim2007Sparse,
  author = {Hyunsoo Kim and Haesun Park},
  title = {Sparse non-negative matrix factorizations via alternating non-negativity-constrained
	least squares for microarray data analysis.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {1495--1502},
  number = {12},
  month = {Jun},
  abstract = {Many practical pattern recognition problems require non-negativity
	constraints. For example, pixels in digital images and chemical concentrations
	in bioinformatics are non-negative. Sparse non-negative matrix factorizations
	(NMFs) are useful when the degree of sparseness in the non-negative
	basis matrix or the non-negative coefficient matrix in an NMF needs
	to be controlled in approximating high-dimensional data in a lower
	dimensional space.In this article, we introduce a novel formulation
	of sparse NMF and show how the new formulation leads to a convergent
	sparse NMF algorithm via alternating non-negativity-constrained least
	squares. We apply our sparse NMF algorithm to cancer-class discovery
	and gene expression data analysis and offer biological analysis of
	the results obtained. Our experimental results illustrate that the
	proposed sparse NMF algorithm often achieves better clustering performance
	with shorter computing time compared to other existing NMF algorithms.The
	software is available as supplementary material.},
  doi = {10.1093/bioinformatics/btm134},
  pdf = {../local/Kim2007Sparse.pdf},
  file = {Kim2007Sparse.pdf:Kim2007Sparse.pdf:PDF},
  institution = {College of Computing, Georgia Institute of Technology, Atlanta, GA
	30332, USA. hskim@cc.gatech.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btm134},
  pmid = {17483501},
  timestamp = {2012.02.28},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm134}
}

@article{Kim2004Predictiona,
  author = {Kim, H. and Park, H.},
  title = {Prediction of protein relative solvent accessibility with support
	vector machines and long-range interaction 3{D} local descriptor},
  journal = {Proteins},
  year = {2004},
  volume = {54},
  pages = {557-562},
  number = {3},
  month = {Feb},
  abstract = {The prediction of protein relative solvent accessibility gives us
	helpful information for the prediction of tertiary structure of a
	protein. {T}he {SVM}psi method, which uses support vector machines
	({SVM}s), and the position-specific scoring matrix ({PSSM}) generated
	from {PSI}-{BLAST} have been applied to achieve better prediction
	accuracy of the relative solvent accessibility. {W}e have introduced
	a three-dimensional local descriptor that contains information about
	the expected remote contacts by both the long-range interaction matrix
	and neighbor sequences. {M}oreover, we applied feature weights to
	kernels in {SVM}s in order to consider the degree of significance
	that depends on the distance from the specific amino acid. {R}elative
	solvent accessibility based on a two state-model, for 25%, 16%, 5%,
	and 0% accessibility are predicted at 78.7%, 80.7%, 82.4%, and 87.4%
	accuracy, respectively. {T}hree-state prediction results provide
	a 64.5% accuracy with 9%; 36% threshold. {T}he support vector machine
	approach has successfully been applied for solvent accessibility
	prediction by considering long-range interaction and handling unbalanced
	data.},
  doi = {10.1002/prot.10602},
  pdf = {../local/Kim2004Predictiona.pdf},
  file = {Kim2004Predictiona.pdf:local/Kim2004Predictiona.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.10602}
}

@article{Kim2003Protein,
  author = {Kim, H. and Park, H.},
  title = {Protein secondary structure prediction based on an improved support
	vector machines approach},
  journal = {Protein {E}ng.},
  year = {2003},
  volume = {16},
  pages = {553-560},
  number = {8},
  month = {Aug},
  abstract = {The prediction of protein secondary structure is an important step
	in the prediction of protein tertiary structure. {A} new protein
	secondary structure prediction method, {SVM}psi, was developed to
	improve the current level of prediction by incorporating new tertiary
	classifiers and their jury decision system, and the {PSI}-{BLAST}
	{PSSM} profiles. {A}dditionally, efficient methods to handle unbalanced
	data and a new optimization strategy for maximizing the {Q}3 measure
	were developed. {T}he {SVM}psi produces the highest published {Q}3
	and {SOV}94 scores on both the {RS}126 and {CB}513 data sets to date.
	{F}or a new {KP}480 set, the prediction accuracy of {SVM}psi was
	{Q}3 = 78.5% and {SOV}94 = 82.8%. {M}oreover, the blind test results
	for 136 non-redundant protein sequences which do not contain homologues
	of training data sets were {Q}3 = 77.2% and {SOV}94 = 81.8%. {T}he
	{SVM}psi results in {CASP}5 illustrate that it is another competitive
	method to predict protein secondary structure.},
  pdf = {../local/Kim2003Protein.pdf},
  file = {Kim2003Protein.pdf:local/Kim2003Protein.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://peds.oupjournals.org/cgi/content/abstract/16/8/553}
}

@unpublished{Kim2001Evolving,
  author = {J. Kim and P.L. Krapivsky and B. Kahng and S. Redner},
  title = {Evolving protein interaction networks},
  note = {E-print cond-mat/0203167},
  year = {2001},
  pdf = {../local/kim02.pdf},
  file = {kim02.pdf:local/kim02.pdf:PDF},
  subject = {bionetprot},
  url = {http://xxx.lanl.gov/abs/cond-mat/0203167}
}

@article{Kim2004Prediction,
  author = {Kim, J. H. and Lee, J. and Oh, B. and Kimm, K. and Koh, I.},
  title = {Prediction of phosphorylation sites using {SVM}s},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3179-3184},
  number = {17},
  abstract = {Motivation: {P}hosphorylation is involved in diverse signal transduction
	pathways. {B}y predicting phosphorylation sites and their kinases
	from primary protein sequences, we can obtain much valuable information
	that can form the basis for further research. {U}sing support vector
	machines, we attempted to predict phosphorylation sites and the type
	of kinase that acts at each site. {R}esults: {O}ur prediction system
	was limited to phosphorylation sites catalyzed by four protein kinase
	families and four protein kinase groups. {T}he accuracy of the predictions
	ranged from 83 to 95% at the kinase family level, and 76-91% at the
	kinase group level. {T}he prediction system used--{P}red{P}hospho--can
	be applied to the functional study of proteins, and can help predict
	the changes in phosphorylation sites caused by amino acid variations
	at intra- and interspecies levels. {A}vailability: {P}red{P}hospho
	is available at http://www.ngri.re.kr/proteo/{P}red{P}hospho.htm.
	{S}upplementary information: http://www.ngri.re.kr/proteo/supplementary.doc},
  doi = {10.1093/bioinformatics/bth382},
  pdf = {../local/Kim2004Prediction.pdf},
  file = {Kim2004Prediction.pdf:local/Kim2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/17/3179}
}

@inproceedings{Kim2008Robust,
  author = {Kim, J. S. and Scott, C.},
  title = {Robust kernel density estimation},
  booktitle = {Proc. IEEE Int. Conf. Acoustics, Speech and Signal Processing ICASSP
	2008},
  year = {2008},
  pages = {3381--3384},
  doi = {10.1109/ICASSP.2008.4518376},
  pdf = {../local/Kim2008Robust.pdf},
  file = {Kim2008Robust.pdf:Kim2008Robust.pdf:PDF},
  keywords = {kernelbook},
  owner = {jp},
  timestamp = {2011.07.23},
  url = {http://dx.doi.org/10.1109/ICASSP.2008.4518376}
}

@article{Kim2012Robust,
  author = {Kim, J. S. and Scott, C. D.},
  title = {Robust kernel density estimation},
  journal = {J. Mach. Learn. Res.},
  year = {2012},
  volume = {13},
  pages = {2529--2565},
  pdf = {../local/Kim2012Robust.pdf},
  file = {Kim2012Robust.pdf:Kim2012Robust.pdf:PDF},
  owner = {jp},
  timestamp = {2012.10.19},
  url = {http://www.jmlr.org/papers/volume13/kim12b/kim12b.pdf}
}

@article{Kim2004Emotion,
  author = {K. H. Kim and S. W. Bang and S. R. Kim},
  title = {Emotion recognition system using short-term monitoring of physiological
	signals.},
  journal = {Med {B}iol {E}ng {C}omput},
  year = {2004},
  volume = {42},
  pages = {419-27},
  number = {3},
  month = {May},
  abstract = {A physiological signal-based emotion recognition system is reported.
	{T}he system was developed to operate as a user-independent system,
	based on physiological signal databases obtained from multiple subjects.
	{T}he input signals were electrocardiogram, skin temperature variation
	and electrodermal activity, all of which were acquired without much
	discomfort from the body surface, and can reflect the influence of
	emotion on the autonomic nervous system. {T}he system consisted of
	preprocessing, feature extraction and pattern classification stages.
	{P}reprocessing and feature extraction methods were devised so that
	emotion-specific characteristics could be extracted from short-segment
	signals. {A}lthough the features were carefully extracted, their
	distribution formed a classification problem, with large overlap
	among clusters and large variance within clusters. {A} support vector
	machine was adopted as a pattern classifier to resolve this difficulty.
	{C}orrect-classification ratios for 50 subjects were 78.4\% and 61.8\%,
	for the recognition of three and four categories, respectively.},
  keywords = {Algorithms, Animals, Antisense, Artificial Intelligence, Autonomic
	Nervous System, Cell Line, Child, Cluster Analysis, Comparative Study,
	Computational Biology, Computer Simulation, Computer-Assisted, DNA
	Fingerprinting, Drug Evaluation, Emotions, Fluorescence, Fuzzy Logic,
	Gene Silencing, Gene Targeting, Genetic, Hela Cells, Humans, Imaging,
	Intracellular Space, Microscopy, Models, Monitoring, Neoplasms, Neural
	Networks (Computer), Non-U.S. Gov't, Oligonucleotides, P.H.S., Physiologic,
	Preclinical, Preschool, Prognosis, Proteomics, Quantitative Structure-Activity
	Relationship, RNA, RNA Interference, Recognition (Psychology), Research
	Support, Sensitivity and Specificity, Signal Processing, Small Interfering,
	Thionucleotides, Three-Dimensional, Tumor, U.S. Gov't, User-Computer
	Interface, 15191089}
}

@article{Kim2009Effects,
  author = {Kim, S.Y.},
  title = {Effects of sample size on robustness and prediction accuracy of a
	prognostic gene signature},
  journal = {BMC bioinformatics},
  year = {2009},
  volume = {10},
  pages = {147},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@article{Kim2004Enhancing,
  author = {Sang-Woon Kim and B. John Oommen},
  title = {Enhancing prototype reduction schemes with recursion: a method applicable
	for "large" data sets.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1384-97},
  number = {3},
  month = {Jun},
  abstract = {Most of the prototype reduction schemes ({PRS}), which have been reported
	in the literature, process the data in its entirety to yield a subset
	of prototypes that are useful in nearest-neighbor-like classification.
	{F}oremost among these are the prototypes for nearest neighbor classifiers,
	the vector quantization technique, and the support vector machines.
	{T}hese methods suffer from a major disadvantage, namely, that of
	the excessive computational burden encountered by processing all
	the data. {I}n this paper, we suggest a recursive and computationally
	superior mechanism referred to as adaptive recursive partitioning
	({ARP})_{PRS}. {R}ather than process all the data using a {PRS},
	we propose that the data be recursively subdivided into smaller subsets.
	{T}his recursive subdivision can be arbitrary, and need not utilize
	any underlying clustering philosophy. {T}he advantage of {ARP}_{PRS}
	is that the {PRS} processes subsets of data points that effectively
	sample the entire space to yield smaller subsets of prototypes. {T}hese
	prototypes are then, in turn, gathered and processed by the {PRS}
	to yield more refined prototypes. {I}n this manner, prototypes which
	are in the interior of the {V}oronoi spaces, and thus ineffective
	in the classification, are eliminated at the subsequent invocations
	of the {PRS}. {W}e are unaware of any {PRS} that employs such a recursive
	philosophy. {A}lthough we marginally forfeit accuracy in return for
	computational efficiency, our experimental results demonstrate that
	the proposed recursive mechanism yields classification comparable
	to the best reported prototype condensation schemes reported to-date.
	{I}ndeed, this is true for both artificial data sets and for samples
	involving real-life data sets. {T}he results especially demonstrate
	that a fair computational advantage can be obtained by using such
	a recursive strategy for "large" data sets, such as those involved
	in data mining and text categorization applications.}
}

@article{Kim2005Locally,
  author = {Tae-Kyun Kim and Josef Kittler},
  title = {Locally linear discriminant analysis for multimodally distributed
	classes for face recognition with a single model image.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2005},
  volume = {27},
  pages = {318-27},
  number = {3},
  month = {Mar},
  abstract = {We present a novel method of nonlinear discriminant analysis involving
	a set of locally linear transformations called "{L}ocally {L}inear
	{D}iscriminant {A}nalysis ({LLDA})." {T}he underlying idea is that
	global nonlinear data structures are locally linear and local structures
	can be linearly aligned. {I}nput vectors are projected into each
	local feature space by linear transformations found to yield locally
	linearly transformed classes that maximize the between-class covariance
	while minimizing the within-class covariance. {I}n face recognition,
	linear discriminant analysis ({LDA}) has been widely adopted owing
	to its efficiency, but it does not capture nonlinear manifolds of
	faces which exhibit pose variations. {C}onventional nonlinear classification
	methods based on kernels such as generalized discriminant analysis
	({GDA}) and support vector machine ({SVM}) have been developed to
	overcome the shortcomings of the linear method, but they have the
	drawback of high computational cost of classification and overfitting.
	{O}ur method is for multiclass nonlinear discrimination and it is
	computationally highly efficient as compared to {GDA}. {T}he method
	does not suffer from overfitting by virtue of the linear base structure
	of the solution. {A} novel gradient-based learning algorithm is proposed
	for finding the optimal set of local linear bases. {T}he optimization
	does not exhibit a local-maxima problem. {T}he transformation functions
	facilitate robust face recognition in a low-dimensional subspace,
	under pose variations, using a single model image. {T}he classification
	results are given for both synthetic and real face data.}
}

@article{Kim2006Blockwise,
  author = {Kim, Y. and Kim, J. and Kim, K.},
  title = {Blockwise sparse regression},
  journal = {Statistica Sinica},
  year = {2006},
  volume = {16},
  pages = {375--390},
  pdf = {../local/Kim2006Blockwise.pdf},
  file = {Kim2006Blockwise.pdf:Kim2006Blockwise.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2010.06.26}
}

@article{Kimeldorf1971Some,
  author = {G. S. Kimeldorf and G. Wahba},
  title = {Some results on {T}chebycheffian spline functions},
  journal = {J. {M}ath. {A}nal. {A}ppl.},
  year = {1971},
  volume = {33},
  pages = {82-95}
}

@inproceedings{Kin2002Marginalized,
  author = {Kin, T. and Tsuda, K. and Asai, K.},
  title = {Marginalized kernels for {RNA} sequence data analysis},
  booktitle = {Genome {I}nformatics 2002},
  year = {2002},
  editor = {Lathtop, R.H. and Nakai, K. and Miyano, S. and Takagi, T. and Kanehisa,
	M.},
  pages = {112-122},
  publisher = {Universal Academic Press},
  abstract = {We present novel kernels that measure similarity of two {RNA} sequences,
	taking account of their secondary structures. {T}wo types of kernels
	are presented. {O}ne is for {RNA} sequences with known secondary
	structures, the other for those without known secondary structures.
	{T}he latter employs stochastic context-free grammar ({SCFG}) for
	estimating the secondary structure. {W}e call the latter the {\it
	marginalized count kernel} ({MCK}). {W}e show computational experiments
	for {MCK} using 74 sets of human t{RNA} sequence data: (i) kernel
	principal component analysis ({PCA}) for visualizing t{RNA} similarities,
	(ii) supervised classification with support vector machines ({SVM}s).
	{B}oth types of experiment show promising results for {MCK}s.},
  pdf = {../local/Kin2002Marginalized.pdf},
  file = {Kin2002Marginalized.pdf:local/Kin2002Marginalized.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.jsbi.org/journal/GIW02/GIW02F012.html}
}

@article{King1992Drug,
  author = {R. D. King and S. Muggleton and R. A. Lewis and M. J. Sternberg},
  title = {{D}rug design by machine learning: the use of inductive logic programming
	to model the structure-activity relationships of trimethoprim analogues
	binding to dihydrofolate reductase.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {1992},
  volume = {89},
  pages = {11322--11326},
  number = {23},
  month = {Dec},
  abstract = {The machine learning program GOLEM from the field of inductive logic
	programming was applied to the drug design problem of modeling structure-activity
	relationships. The training data for the program were 44 trimethoprim
	analogues and their observed inhibition of Escherichia coli dihydrofolate
	reductase. A further 11 compounds were used as unseen test data.
	GOLEM obtained rules that were statistically more accurate on the
	training data and also better on the test data than a Hansch linear
	regression model. Importantly machine learning yields understandable
	rules that characterized the chemistry of favored inhibitors in terms
	of polarity, flexibility, and hydrogen-bonding character. These rules
	agree with the stereochemistry of the interaction observed crystallographically.},
  keywords = {Algorithms, Artificial Intelligence, Drug Design, Escherichia coli,
	Folic Acid Antagonists, Molecular Structure, Mutagens, Nitroso Compounds,
	Non-U.S. Gov't, Research Support, Structure-Activity Relationship,
	Trimethoprim, 1454814},
  owner = {mahe},
  pmid = {1454814},
  timestamp = {2006.09.06}
}

@article{King1996Structure-activity,
  author = {King, R. D. and Muggleton, S. H. and Srinivasan, A. and Sternberg,
	M. J.},
  title = {{S}tructure-activity relationships derived by machine learning: the
	use of atoms and their bond connectivities to predict mutagenicity
	by inductive logic programming},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {1996},
  volume = {93},
  pages = {438--442},
  number = {1},
  month = {Jan},
  abstract = {We present a general approach to forming structure-activity relationships
	(SARs). This approach is based on representing chemical structure
	by atoms and their bond connectivities in combination with the inductive
	logic programming (ILP) algorithm PROGOL. Existing SAR methods describe
	chemical structure by using attributes which are general properties
	of an object. It is not possible to map chemical structure directly
	to attribute-based descriptions, as such descriptions have no internal
	organization. A more natural and general way to describe chemical
	structure is to use a relational description, where the internal
	construction of the description maps that of the object described.
	Our atom and bond connectivities representation is a relational description.
	ILP algorithms can form SARs with relational descriptions. We have
	tested the relational approach by investigating the SARs of 230 aromatic
	and heteroaromatic nitro compounds. These compounds had been split
	previously into two subsets, 188 compounds that were amenable to
	regression and 42 that were not. For the 188 compounds, a SAR was
	found that was as accurate as the best statistical or neural network-generated
	SARs. The PROGOL SAR has the advantages that it did not need the
	use of any indicator variables handcrafted by an expert, and the
	generated rules were easily comprehensible. For the 42 compounds,
	PROGOL formed a SAR that was significantly (P < 0.025) more accurate
	than linear regression, quadratic regression, and back-propagation.
	This SAR is based on an automatically generated structural alert
	for mutagenicity.},
  owner = {mahe},
  pmid = {8552655},
  timestamp = {2006.09.06}
}

@article{Kirby1990Application,
  author = {Kirby, M. and Sirovich, L.},
  title = {Application of the {K}arhunen-{L}o{\`e}ve procedure for the characterization
	of human faces},
  journal = {I{EEE} {T}rans. {P}attern {A}nal. {M}ach. {I}ntell.},
  year = {1990},
  volume = {12},
  pages = {103--108},
  number = {1},
  doi = {10.1109/34.41390},
  pdf = {../local/Kirby1990Application.pdf},
  file = {Kirby1990Application.pdf:local/Kirby1990Application.pdf:PDF},
  subject = {ml},
  url = {http://dx.doi.org/10.1109/34.41390}
}

@article{Kitano2004Cancer,
  author = {Kitano, H.},
  title = {Cancer as a robust system: implications for anticancer therapy},
  journal = {Nat. {R}ev. {C}ancer},
  year = {2004},
  volume = {4},
  pages = {227-235},
  abstract = {Cancers are extremely complex, heterogeneous diseases. {M}any approaches
	to anticancer treatment have had limited success ? cures are still
	rare. {A} fundamental hurdle to cancer therapy is acquired tumour
	'robustness'. {T}he goal of this article is to present a perspective
	on cancer as a robust system to provide a framework from which the
	complexity of tumours can be approached to yield novel therapies.},
  doi = {10.1038/nrc1300},
  pdf = {../local/Kitano2004Cancer.pdf},
  file = {Kitano2004Cancer.pdf:local/Kitano2004Cancer.pdf:PDF},
  url = {http://dx.doi.org/10.1038/nrc1300}
}

@article{Kitano2002Computational,
  author = {Kitano, H.},
  title = {Computational systems biology},
  journal = {Nature},
  year = {2002},
  volume = {420},
  pages = {206-210},
  abstract = {To understand complex biological systems requires the integration
	of experimental and computational research ? in other words a systems
	biology approach. {C}omputational biology, through pragmatic modelling
	and theoretical exploration, provides a powerful foundation from
	which to address critical scientific questions head-on. {T}he reviews
	in this {I}nsight cover many different aspects of this energetic
	field, although all, in one way or another, illuminate the functioning
	of modular circuits, including their robustness, design and manipulation.
	{C}omputational systems biology addresses questions fundamental to
	our understanding of life, yet progress here will lead to practical
	innovations in medicine, drug discovery and engineering.},
  doi = {10.1038/nature01254},
  pdf = {../local/Kitano2002Computational.pdf},
  file = {Kitano2002Computational.pdf:local/Kitano2002Computational.pdf:PDF},
  url = {http://dx.doi.org/10.1038/nature01254}
}

@book{Kitano2001Foundations,
  title = {Foundations of {S}ystems {B}iology},
  publisher = {MIT Press},
  year = {2001},
  author = {Kitano, H.},
  owner = {vert}
}

@article{Kitano2005NatBiotechnol,
  author = {Kitano, H. and Funahashi, A. and Matsuoka, Y. and Oda, K.},
  title = {Using process diagrams for the graphical representation of biological
	networks},
  journal = {Nat. {B}iotechnol.},
  year = {2005},
  volume = {8},
  pages = {961-966},
  abstract = {With the increased interest in understanding biological networks,
	such as protein-protein interaction networks and gene regulatory
	networks, methods for representing and communicating such networks
	in both human- and machine-readable form have become increasingly
	important. Although there has been significant progress in machine-readable
	representation of networks, as exemplified by the Systems Biology
	Mark-up Language (SBML) (http://www.sbml.org) issues in human-readable
	representation have been largely ignored. This article discusses
	human-readable diagrammatic representations and proposes a set of
	notations that enhances the formality and richness of the information
	represented. The process diagram is a fully state transition-based
	diagram that can be translated into machine-readable forms such as
	SBML in a straightforward way. It is supported by CellDesigner, a
	diagrammatic network editing software (http://www.celldesigner.org/),
	and has been used to represent a variety of networks of various sizes
	(from only a few components to several hundred components).},
  doi = {10.1038/nbt1111},
  keywords = {csbcbook}
}

@article{Kitchen2004Docking,
  author = {D. B. Kitchen and H. Decornez and J. R. Furr and J. Bajorath},
  title = {{D}ocking and scoring in virtual screening for drug discovery: methods
	and applications.},
  journal = {Nat Rev Drug Discov},
  year = {2004},
  volume = {3},
  pages = {935--949},
  number = {11},
  month = {Nov},
  abstract = {Computational approaches that 'dock' small molecules into the structures
	of macromolecular targets and 'score' their potential complementarity
	to binding sites are widely used in hit identification and lead optimization.
	Indeed, there are now a number of drugs whose development was heavily
	influenced by or based on structure-based design and screening strategies,
	such as HIV protease inhibitors. Nevertheless, there remain significant
	challenges in the application of these approaches, in particular
	in relation to current scoring schemes. Here, we review key concepts
	and specific features of small-molecule-protein docking methods,
	highlight selected applications and discuss recent advances that
	aim to address the acknowledged limitations of established approaches.},
  doi = {10.1038/nrd1549},
  owner = {mahe},
  pii = {nrd1549},
  pmid = {15520816},
  timestamp = {2006.08.16},
  url = {http://dx.doi.org/10.1038/nrd1549}
}

@article{Klabunde2007Chemogenomic,
  author = {Klabunde, T.},
  title = {Chemogenomic approaches to drug discovery: similar receptors bind
	similar ligands.},
  journal = {Br. J. Pharmacol.},
  year = {2007},
  volume = {152},
  pages = {5--7},
  month = {May},
  abstract = {Within recent years, a paradigm shift from traditional receptor-specific
	studies to a cross-receptor view has taken place within pharmaceutical
	research to increase the efficiency of modern drug discovery. Receptors
	are no longer viewed as single entities but grouped into sets of
	related proteins or receptor families that are explored in a systematic
	manner. This interdisciplinary approach attempting to derive predictive
	links between the chemical structures of bioactive molecules and
	the receptors with which these molecules interact is referred to
	as chemogenomics. Insights from chemogenomics are used for the rational
	compilation of screening sets and for the rational design and synthesis
	of directed chemical libraries to accelerate drug discovery.British
	Journal of Pharmacology advance online publication, 29 May 2007;
	doi:10.1038/sj.bjp.0707308.},
  doi = {10.1038/sj.bjp.0707308},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {0707308},
  pmid = {17533415},
  timestamp = {2007.07.30},
  url = {http://dx.doi.org/10.1038/sj.bjp.0707308}
}

@incollection{Klabunde2006ChemogenomicsA,
  author = {Klabunde, T.},
  title = {Chemogenomics Approaches to Ligand Design},
  booktitle = {Ligand Design for G Protein-coupled Receptors},
  publisher = {Wiley-VCH},
  year = {2006},
  chapter = {7},
  pages = {115-135},
  address = {Great Britain}
}

@article{Klabunde2006Chemogenomics,
  author = {T. Klabunde and R. J{\"a}ger},
  title = {Chemogenomics approaches to G-protein coupled receptor lead finding.},
  journal = {Ernst Schering Res Found Workshop},
  year = {2006},
  volume = {58},
  pages = {31--46},
  abstract = {G-protein coupled receptors (GPCRs) are promising targets for the
	discovery of novel drugs. In order to identify novel chemical series,
	high-throughput screening (HTS) is often complemented by rational
	chemogenomics lead finding approaches. We have compiled a GPCR directed
	screening set by ligand-based virtual screening of our corporate
	compound database. This set of compounds is supplemented with novel
	libraries synthesized around proprietary scaffolds. These target-directed
	libraries are designed using the knowledge of privileged fragments
	and pharmacophores to address specific GPCR subfamilies (e.g., purinergic
	or chemokine-binding GPCRs). Experimental testing of the GPCR collection
	has provided novel chemical series for several GPCR targets including
	the adenosine A1, the P2Y12, and the chemokine CCR1 receptor. In
	addition, GPCR sequence motifs linked to the recognition of GPCR
	ligands (termed chemoprints) are identified using homology modeling,
	molecular docking, and experimental profiling. These chemoprints
	can support the design and synthesis of compound libraries tailor-made
	for a novel GPCR target.},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16708997},
  timestamp = {2007.07.30}
}

@article{Klamt2004Minimal,
  author = {Klamt, S. and Gilles, E. D.},
  title = {Minimal cut sets in biochemical reaction networks.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {226--234},
  number = {2},
  month = {Jan},
  abstract = {Structural studies of metabolic networks yield deeper insight into
	topology, functionality and capabilities of the metabolisms of different
	organisms. Here, we address the analysis of potential failure modes
	in metabolic networks whose occurrence will render the network structurally
	incapable of performing certain functions. Such studies will help
	to identify crucial parts in the network structure and to find suitable
	targets for repressing undesired metabolic functions.We introduce
	the concept of minimal cut sets for biochemical networks. A minimal
	cut set (MCS) is a minimal (irreducible) set of reactions in the
	network whose inactivation will definitely lead to a failure in certain
	network functions. We present an algorithm which enables the computation
	of the MCSs in a given network related to user-defined objective
	reactions. This algorithm operates on elementary modes. A number
	of potential applications are outlined, including network verifications,
	phenotype predictions, assessing structural robustness and fragility,
	metabolic flux analysis and target identification in drug discovery.
	Applications are illustrated by the MCSs in the central metabolism
	of Escherichia coli for growth on different substrates.Computation
	and analysis of MCSs is an additional feature of the FluxAnalyzer
	(freely available for academic users upon request, special contracts
	for industrial companies; see web page below). Supplementary information:
	http://www.mpi-magdeburg.mpg.de/projects/fluxanalyzer},
  doi = {10.1093/bioinformatics/btg395},
  institution = {Max Planck Institute for Dynamics of Complex Technical Systems, Sandtorstr.1,
	D-39106 Magdeburg, Germany. klamt@mpi-magdeburg.mpg.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {14734314},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1093/bioinformatics/btg395}
}

@article{Klebe2000Recent,
  author = {G. Klebe},
  title = {{R}ecent developments in structure-based drug design.},
  journal = {J Mol Med},
  year = {2000},
  volume = {78},
  pages = {269--281},
  number = {5},
  abstract = {Structure-based design has emerged as a new tool in medicinal chemistry.
	A prerequisite for this new approach is an understanding of the principles
	of molecular recognition in protein-ligand complexes. If the three-dimensional
	structure of a given protein is known, this information can be directly
	exploited for the retrieval and design of new ligands. Structure-based
	ligand design is an iterative approach. First of all, it requires
	the crystal structure or a model derived from the crystal structure
	of a closely related homolog of the target protein, preferentially
	complexed with a ligand. This complex unravels the binding mode and
	conformation of a ligand under investigation and indicates the essential
	aspects determining its binding affinity. It is then used to generate
	new ideas about ways of improving an existing ligand or of developing
	new alternative bonding skeletons. Computational methods supplemented
	by molecular graphics are applied to assist this step of hypothesis
	generation. The features of the protein binding pocket can be translated
	into queries used for virtual computer screening of large compound
	libraries or to design novel ligands de novo. These initial proposals
	must be confirmed experimentally. Subsequently they are optimized
	toward higher affinity and better selectivity. The latter aspect
	is of utmost importance in defining and controlling the pharmacological
	profile of a ligand. A prerequisite to tailoring selectivity by rational
	design is a detailed understanding of molecular parameters determining
	selectivity. Taking examples from current drug development programs
	(HIV proteinase, t-RNA transglycosylase, thymidylate synthase, thrombin
	and, related serine proteinases), we describe recent advances in
	lead discovery via computer screening, iterative design, and understanding
	of selectivity discrimination.},
  keywords = {Animals, Chemistry, Computer Simulation, Cross-Over Studies, Crystallography,
	Deglutition, Deglutition Disorders, Drug Design, Endoscopy, Enzyme
	Inhibitors, Female, Fluoroscopy, Glossopharyngeal Nerve, HIV Protease
	Inhibitors, Horse Diseases, Horses, Male, Models, Molecular, Nerve
	Block, Non-U.S. Gov't, P.H.S., Pharmaceutical, Proteins, Quantitative
	Structure-Activity Relationship, Random Allocation, Research Support,
	Thrombin, Thymidylate Synthase, U.S. Gov't, X-Ray, 10954196},
  owner = {mahe},
  pmid = {10954196},
  timestamp = {2006.09.05}
}

@article{Knight99Decoding,
  author = {K. Knight},
  title = {Decoding complexity in word-replacement translation models},
  journal = {Computational Linguistics},
  year = {1999},
  volume = {25},
  pages = {607--615}
}

@article{Knight2000Asymptotics,
  author = {Knight, K. and Fu, W.},
  title = {Asymptotics for lasso-type estimators},
  journal = {Ann. Stat.},
  year = {2000},
  volume = {28},
  pages = {1356--1378},
  number = {5},
  doi = {doi:10.1214/aos/1015957397},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.01.25},
  url = {http://dx.doi.org/10.1214/aos/1015957397}
}

@misc{Knight-Yamada-02,
  author = {Knight, K. and Yamada, K.},
  title = {Integer Programming Decoder for Machine Translation},
  howpublished = {Patent US 7,177,792},
  year = {2007},
  note = {Application filed in 2002}
}

@article{Knudson1971Mutation,
  author = {Alfred G. Knudson},
  title = {Mutation and Cancer: Statistical Study of Retinoblastoma},
  journal = {Proceedings of the National Academy of Sciences},
  year = {1971},
  volume = {68},
  pages = {820-823},
  keywords = {csbcbook}
}

@article{Kobilka2007G,
  author = {Kobilka, B. K.},
  title = {G protein coupled receptor structure and activation.},
  journal = {Biochim. Biophys. Acta},
  year = {2007},
  volume = {1768},
  pages = {794--807},
  number = {4},
  month = {Apr},
  abstract = {G protein coupled receptors (GPCRs) are remarkably versatile signaling
	molecules. The members of this large family of membrane proteins
	are activated by a spectrum of structurally diverse ligands, and
	have been shown to modulate the activity of different signaling pathways
	in a ligand specific manner. In this manuscript I will review what
	is known about the structure and mechanism of activation of GPCRs
	focusing primarily on two model systems, rhodopsin and the beta(2)
	adrenoceptor.},
  doi = {10.1016/j.bbamem.2006.10.021},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {S0005-2736(06)00398-1},
  pmid = {17188232},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1016/j.bbamem.2006.10.021}
}

@inproceedings{Koehn-05,
  author = {Koehn, P.},
  title = {Europarl: A Parallel Corpus for Statistical Machine Translation},
  booktitle = {{MT} Summit},
  year = {2005}
}

@inproceedings{Koehn-et-al-03,
  author = {Koehn, P. and Och, F. J. and Marcu, D.},
  title = {Statistical phrase-based translation},
  booktitle = {NAACL 2003},
  year = {2003},
  pages = {48--54},
  address = {Morristown, NJ, USA},
  publisher = {Association for Computational Linguistics},
  doi = {http://dx.doi.org/10.3115/1073445.1073462},
  location = {Edmonton, Canada}
}

@article{Koh2007interior,
  author = {Koh, K. and Kim, S.J. and Boyd, S.},
  title = {An interior-point method for large-scale l1-regularized logistic
	regression},
  journal = {Journal of Machine learning research},
  year = {2007},
  volume = {8},
  pages = {1519--1555},
  number = {8}
}

@article{Kohavi1997Wrappers,
  author = {Kohavi, R. and John, G.},
  title = {Wrappers for feature selection},
  journal = {Artificial Intelligence},
  year = {1997},
  volume = {97},
  pages = {273--324},
  number = {1-2},
  owner = {jp},
  timestamp = {2011.01.11}
}

@article{Kohlmann2004Pediatric,
  author = {Kohlmann, A. and Schoch, C. and Schnittger, S. and Dugas, M. and
	Hiddemann, W. and Kern, W. and Haferlach, T.},
  title = {Pediatric acute lymphoblastic leukemia ({ALL}) gene expression signatures
	classify an independent cohort of adult {ALL} patients},
  journal = {Leukemia},
  year = {2004},
  volume = {18},
  pages = {63-71},
  number = {1},
  abstract = {Recent reports support a possible future application of gene expression
	profiling for the diagnosis of leukemias. {H}owever, the robustness
	of subtype-specific gene expression signatures has to be proven on
	independent patient samples. {H}ere, we present gene expression data
	of 34 adult acute lymphoblastic leukemia ({ALL}) patients ({A}ffymetrix
	{U}133{A} microarrays). {S}upport {V}ector {M}achines ({SVM}s) were
	applied to stratify our samples based on given gene lists reported
	to predict {MLL}, {BCR}-{ABL}, and {T}-{ALL}, as well as {MLL} and
	non-{MLL} gene rearrangement positive pediatric {ALL}. {I}n addition,
	seven other {B}-precursor {ALL} cases not bearing t(9;22) or t(11q23)/{MLL}
	chromosomal aberrations were analyzed. {U}sing top differentially
	expressed genes, hierarchical cluster and principal component analyses
	demonstrate that the genetically more heterogeneous {B}-precursor
	{ALL} samples intercalate with {BCR}-{ABL}-positive cases, but were
	clearly distinct from {T}-{ALL} and {MLL} profiles. {S}imilar expression
	signatures were observed for both heterogeneous {B}-precursor {ALL}
	and for {BCR}-{ABL}-positive cases. {A}s an unrelated laboratory,
	we demonstrate that gene signatures defined for childhood {ALL} were
	also capable of stratifying distinct subtypes in our cohort of adult
	{ALL} patients. {A}s such, previously reported gene expression patterns
	identified by microarray technology are validated and confirmed on
	truly independent leukemia patient samples.},
  doi = {10.1038/sj.leu.2403167},
  pdf = {../local/Kohlmann2004Pediatric.pdf},
  file = {Kohlmann2004Pediatric.pdf:local/Kohlmann2004Pediatric.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1038/sj.leu.2403167}
}

@article{Koike2004Prediction,
  author = {Koike, A. and Takagi, T.},
  title = {Prediction of protein-protein interaction sites using support vector
	machines},
  journal = {Protein {E}ng. {D}es. {S}el.},
  year = {2004},
  volume = {17},
  pages = {165-173},
  number = {2},
  month = {Feb},
  abstract = {The identification of protein-protein interaction sites is essential
	for the mutant design and prediction of protein-protein networks.
	{T}he interaction sites of residue units were predicted using support
	vector machines ({SVM}) and the profiles of sequentially/spatially
	neighboring residues, plus additional information. {W}hen only sequence
	information was used, prediction performance was highest using the
	feature vectors, sequentially neighboring profiles and predicted
	interaction site ratios, which were calculated by {SVM} regression
	using amino acid compositions. {W}hen structural information was
	also used, prediction performance was highest using the feature vectors,
	spatially neighboring residue profiles, accessible surface areas,
	and the with/without protein interaction sites ratios predicted by
	{SVM} regression and amino acid compositions. {I}n the latter case,
	the precision at recall = 50% was 54-56% for a homo-hetero mixed
	test set and >20% higher than for random prediction. {A}pproximately
	30% of the residues wrongly predicted as interaction sites were the
	closest sequentially/spatially neighboring on the interaction site
	residues. {T}he predicted residues covered 86-87% of the actual interfaces
	(96-97% of interfaces with over 20 residues). {T}his prediction performance
	appeared to be slightly higher than a previously reported study.
	{C}omparing the prediction accuracy of each molecule, it seems to
	be easier to predict interaction sites for stable complexes.},
  doi = {10.1093/protein/gzh020},
  pdf = {../local/Koike2004Prediction.pdf},
  file = {Koike2004Prediction.pdf:local/Koike2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/protein/gzh020}
}

@book{Koller2009Probabilistic,
  title = {Probabilistic Graphical Models},
  publisher = {MIT Press},
  year = {2009},
  author = {Koller, D. and Friedman, N.},
  owner = {jp},
  timestamp = {2010.10.13}
}

@unpublished{Koltchinskii2003Localized,
  author = {Koltchinskii, V.},
  title = {Localized {R}ademacher complexities},
  note = {Manuscript},
  month = {september},
  year = {2003}
}

@article{Komura2005Multidimensional,
  author = {Komura, D. and Nakamura, H. and Tsutsumi, S. and Aburatani, H. and
	Ihara, S.},
  title = {Multidimensional support vector machines for visualization of gene
	expression data},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {439-444},
  number = {4},
  month = {Feb},
  abstract = {Motivation: {S}ince {DNA} microarray experiments provide us with huge
	amount of gene expression data, they should be analyzed with statistical
	methods to extract the meanings of experimental results. {S}ome dimensionality
	reduction methods such as {P}rincipal {C}omponent {A}nalysis ({PCA})
	are used to roughly visualize the distribution of high dimensional
	gene expression data. {H}owever, in the case of binary classification
	of gene expression data, {PCA} does not utilize class information
	when choosing axes. {T}hus clearly separable data in the original
	space may not be so in the reduced space used in {PCA}.{R}esults:
	{F}or visualization and class prediction of gene expression data,
	we have developed a new {SVM}-based method called multidimensional
	{SVM}s, that generate multiple orthogonal axes. {T}his method projects
	high dimensional data into lower dimensional space to exhibit properties
	of the data clearly and to visualize a distribution of the data roughly.
	{F}urthermore, the multiple axes can be used for class prediction.
	{T}he basic properties of conventional {SVM}s are retained in our
	method: solutions of mathematical programming are sparse, and nonlinear
	classification is implemented implicitly through the use of kernel
	functions. {T}he application of our method to the experimentally
	obtained gene expression datasets for patients' samples indicates
	that our algorithm is efficient and useful for visualization and
	class prediction.},
  doi = {10.1093/bioinformatics/bti188},
  pdf = {../local/Komura2005Multidimensional.pdf},
  file = {Komura2005Multidimensional.pdf:local/Komura2005Multidimensional.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti188v1}
}

@inproceedings{Kondor2008skew,
  author = {Kondor, R. and Borgwardt, K. M.},
  title = {The skew spectrum of graphs},
  booktitle = {ICML '08: Proceedings of the 25th international conference on Machine
	learning},
  year = {2008},
  pages = {496--503},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1390156.1390219},
  pdf = {../local/Kondor2008skew.pdf},
  file = {Kondor2008skew.pdf:Kondor2008skew.pdf:PDF},
  isbn = {978-1-60558-205-4},
  location = {Helsinki, Finland}
}

@inproceedings{Kondor2003Kernel,
  author = {R. Kondor and T. Jebara},
  title = {A kernel between sets of vectors},
  booktitle = {ICML '03: Proceedings of the 20th international conference on Machine
	learning},
  year = {2003}
}

@inproceedings{Kondor2009graphlet,
  author = {Kondor, R. and Shervashidze, N. and Borgwardt, K. M.},
  title = {The graphlet spectrum},
  booktitle = {ICML '09: Proceedings of the 26th Annual International Conference
	on Machine Learning},
  year = {2009},
  pages = {529--536},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1553374.1553443},
  isbn = {978-1-60558-516-1},
  location = {Montreal, Quebec, Canada}
}

@incollection{Kondor2004Diffusion,
  author = {Kondor, R. and Vert, J.-P.},
  title = {Diffusion kernels},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {171-192},
  pdf = {../local/saigo.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/saigo.pdf:PDF;saigo.pdf:http\},
  file = {saigo.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/saigo.pdf:PDF;saigo.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/saigo.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Kondor2010Ranking,
  author = {Kondor, R. I. and Barbosa, M. S.},
  title = {Ranking with Kernels in Fourier space},
  booktitle = {COLT 2010 - The 23rd Conference on Learning Theory, Haifa,
	
	 Israel, June 27-29, 2010},
  year = {2010},
  editor = {Kalai, A. T. and Mohri, M.},
  pages = {451--463},
  publisher = {Omnipress},
  pdf = {../local/Kondor2010Ranking.pdf},
  file = {Kondor2010Ranking.pdf:Kondor2010Ranking.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.08}
}

@inproceedings{Kondor2002Diffusion,
  author = {R. I. Kondor and J. Lafferty},
  title = {Diffusion kernels on graphs and other discrete input},
  booktitle = {Proceedings of the Nineteenth International Conference on Machine
	Learning},
  year = {2002},
  pages = {315--322},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  pdf = {../local/Kondor2002Diffusion.pdf},
  file = {Kondor2002Diffusion.pdf:Kondor2002Diffusion.pdf:PDF},
  keywords = {biosvm},
  subject = {kernelnet}
}

@article{Kononen1998Tissue,
  author = {J. Kononen and L. Bubendorf and A. Kallioniemi and M. Bärlund and
	P. Schraml and S. Leighton and J. Torhorst and M. J. Mihatsch and
	G. Sauter and O. P. Kallioniemi},
  title = {Tissue microarrays for high-throughput molecular profiling of tumor
	specimens.},
  journal = {Nat Med},
  year = {1998},
  volume = {4},
  pages = {844--847},
  number = {7},
  month = {Jul},
  abstract = {Many genes and signalling pathways controlling cell proliferation,
	death and differentiation, as well as genomic integrity, are involved
	in cancer development. New techniques, such as serial analysis of
	gene expression and cDNA microarrays, have enabled measurement of
	the expression of thousands of genes in a single experiment, revealing
	many new, potentially important cancer genes. These genome screening
	tools can comprehensively survey one tumor at a time; however, analysis
	of hundreds of specimens from patients in different stages of disease
	is needed to establish the diagnostic, prognostic and therapeutic
	importance of each of the emerging cancer gene candidates. Here we
	have developed an array-based high-throughput technique that facilitates
	gene expression and copy number surveys of very large numbers of
	tumors. As many as 1000 cylindrical tissue biopsies from individual
	tumors can be distributed in a single tumor tissue microarray. Sections
	of the microarray provide targets for parallel in situ detection
	of DNA, RNA and protein targets in each specimen on the array, and
	consecutive sections allow the rapid analysis of hundreds of molecular
	markers in the same set of specimens. Our detection of six gene amplifications
	as well as p53 and estrogen receptor expression in breast cancer
	demonstrates the power of this technique for defining new subgroups
	of tumors.},
  institution = {Laboratory of Cancer Genetics, National Human Genome Research Institute,
	National Institutes of Health, Bethesda, MD 20892-4470, USA.},
  keywords = {Animals; Breast Neoplasms, genetics/metabolism/pathology; Cyclin D1,
	genetics/metabolism; Female; Genetic Techniques; Humans; Immunoenzyme
	Techniques; In Situ Hybridization, Fluorescence; Mice; Oncogene Proteins
	v-myb; Proto-Oncogene Proteins c-myc, genetics/metabolism; Rabbits;
	Receptor, erbB-2, genetics/metabolism; Receptors, Estrogen, genetics/metabolism;
	Retroviridae Proteins, Oncogenic, genetics/metabolism; Tumor Markers,
	Biological, genetics/metabolism; Tumor Suppressor Protein p53, genetics/metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {9662379},
  timestamp = {2010.08.08}
}

@article{Korbel2009PEMer,
  author = {Korbel, J. and Abyzov, A. and Mu, X. and Carriero, N. and Cayting,
	P. and Zhang, Z. and Snyder, Z. and Gerstein, M.},
  title = {{PEMer}: a computational framework with simulation-based error models
	for inferring genomic structural variants from massive paired-end
	sequencing data.},
  journal = {Genome Biol.},
  year = {2009},
  volume = {10},
  pages = {R23},
  number = {2},
  month = {Feb},
  abstract = {ABSTRACT: Personal-genomics endeavors, such as the 1000 Genomes project,
	are generating maps of genomic structural variants by analyzing ends
	of massively sequenced genome fragments. To process these we developed
	Paired-End Mapper (PEMer; http://sv.gersteinlab.org/pemer). This
	comprises an analysis pipeline, compatible with several next-generation
	sequencing platforms; simulation-based error models, yielding confidence-values
	for each structural variant; and a back-end database. The simulations
	demonstrated high structural variant reconstruction efficiency for
	PEMer's coverage-adjusted multi-cutoff scoring-strategy and showed
	its relative insensitivity to base-calling errors.},
  doi = {10.1186/gb-2009-10-2-r23},
  pdf = {../local/Korbel2009PEMer.pdf},
  file = {Korbel2009PEMer.pdf:Korbel2009PEMer.pdf:PDF},
  institution = {Gene Expression Unit, European Molecular Biology Laboratory (EMBL),
	Meyerhofstr,, Heidelberg, 69117, Germany. korbel@embl.de.},
  keywords = {ngs},
  owner = {jp},
  pii = {gb-2009-10-2-r23},
  pmid = {19236709},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1186/gb-2009-10-2-r23}
}

@article{Korbel2007Paired-end,
  author = {Jan O Korbel and Alexander Eckehart Urban and Jason P Affourtit and
	Brian Godwin and Fabian Grubert and Jan Fredrik Simons and Philip
	M Kim and Dean Palejev and Nicholas J Carriero and Lei Du and Bruce
	E Taillon and Zhoutao Chen and Andrea Tanzer and A. C Eugenia Saunders
	and Jianxiang Chi and Fengtang Yang and Nigel P Carter and Matthew
	E Hurles and Sherman M Weissman and Timothy T Harkins and Mark B
	Gerstein and Michael Egholm and Michael Snyder},
  title = {Paired-end mapping reveals extensive structural variation in the
	human genome.},
  journal = {Science},
  year = {2007},
  volume = {318},
  pages = {420--426},
  number = {5849},
  month = {Oct},
  abstract = {Structural variation of the genome involves kilobase- to megabase-sized
	deletions, duplications, insertions, inversions, and complex combinations
	of rearrangements. We introduce high-throughput and massive paired-end
	mapping (PEM), a large-scale genome-sequencing method to identify
	structural variants (SVs) approximately 3 kilobases (kb) or larger
	that combines the rescue and capture of paired ends of 3-kb fragments,
	massive 454 sequencing, and a computational approach to map DNA reads
	onto a reference genome. PEM was used to map SVs in an African and
	in a putatively European individual and identified shared and divergent
	SVs relative to the reference genome. Overall, we fine-mapped more
	than 1000 SVs and documented that the number of SVs among humans
	is much larger than initially hypothesized; many of the SVs potentially
	affect gene function. The breakpoint junction sequences of more than
	200 SVs were determined with a novel pooling strategy and computational
	analysis. Our analysis provided insights into the mechanisms of SV
	formation in humans.},
  doi = {10.1126/science.1149504},
  pdf = {../local/Korbel2007Paired-end.pdf},
  file = {Korbel2007Paired-end.pdf:Korbel2007Paired-end.pdf:PDF},
  institution = {Molecular Biophysics and Biochemistry Department, Yale University,
	New Haven, CT 06520, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {1149504},
  pmid = {17901297},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1126/science.1149504}
}

@article{Korber2006Immunoinformatics,
  author = {Bette Korber and Montiago LaBute and Karina Yusim},
  title = {Immunoinformatics comes of age.},
  journal = {PLoS Comput. Biol.},
  year = {2006},
  volume = {2},
  pages = {e71},
  number = {6},
  month = {Jun},
  abstract = {With the burgeoning immunological data in the scientific literature,
	scientists must increasingly rely on Internet resources to inform
	and enhance their work. Here we provide a brief overview of the adaptive
	immune response and summaries of immunoinformatics resources, emphasizing
	those with Web interfaces. These resources include searchable databases
	of epitopes and immune-related molecules, and analysis tools for
	T cell and B cell epitope prediction, vaccine design, and protein
	structure comparisons. There is an agreeable synergy between the
	growing collections in immune-related databases and the growing sophistication
	of analysis software; the databases provide the foundation for developing
	predictive computational tools, which in turn enable more rapid identification
	of immune responses to populate the databases. Collectively, these
	resources contribute to improved understanding of immune responses
	and escape, and evolution of pathogens under immune pressure. The
	public health implications are vast, including designing vaccines,
	understanding autoimmune diseases, and defining the correlates of
	immune protection.},
  doi = {10.1371/journal.pcbi.0020071},
  keywords = {Amino Acid Sequence; Animals; Computational Biology; Databases, Factual;
	Epitopes, B-Lymphocyte; Epitopes, T-Lymphocyte; Humans; Immunity},
  owner = {laurent},
  pii = {06-PLCB-RV-0068},
  pmid = {16846250},
  timestamp = {2007.08.23},
  url = {http://dx.doi.org/10.1371/journal.pcbi.0020071}
}

@article{Koren2007Autocorrelation,
  author = {Amnon Koren and Itay Tirosh and Naama Barkai},
  title = {Autocorrelation analysis reveals widespread spatial biases in microarray
	experiments.},
  journal = {BMC Genomics},
  year = {2007},
  volume = {8},
  pages = {164},
  abstract = {BACKGROUND: DNA microarrays provide the ability to interrogate multiple
	genes in a single experiment and have revolutionized genomic research.
	However, the microarray technology suffers from various forms of
	biases and relatively low reproducibility. A particular source of
	false data has been described, in which non-random placement of gene
	probes on the microarray surface is associated with spurious correlations
	between genes. RESULTS: In order to assess the prevalence of this
	effect and better understand its origins, we applied an autocorrelation
	analysis of the relationship between chromosomal position and expression
	level to a database of over 2000 individual yeast microarray experiments.
	We show that at least 60\% of these experiments exhibit spurious
	chromosomal position-dependent gene correlations, which nonetheless
	appear in a stochastic manner within each experimental dataset. Using
	computer simulations, we show that large spatial biases caused in
	the microarray hybridization step and independently of printing procedures
	can exclusively account for the observed spurious correlations, in
	contrast to previous suggestions. Our data suggest that such biases
	may generate more than 15\% false data per experiment. Importantly,
	spatial biases are expected to occur regardless of microarray design
	and over a wide range of microarray platforms, organisms and experimental
	procedures. CONCLUSIONS: Spatial biases comprise a major source of
	noise in microarray studies; revision of routine experimental practices
	and normalizations to account for these biases may significantly
	and comprehensively improve the quality of new as well as existing
	DNA microarray data.},
  doi = {10.1186/1471-2164-8-164},
  institution = {>},
  keywords = {DNA Probes; Diagnostic Errors; Oligonucleotide Array Sequence Analysis,
	methods/standards; Reproducibility of Results; Research, methods/standards;
	Yeasts},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1471-2164-8-164},
  pmid = {17565680},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1186/1471-2164-8-164}
}

@article{Korn2007Cell-based,
  author = {Korn, K. and Krausz, E.},
  title = {Cell-based high-content screening of small-molecule libraries.},
  journal = {Curr. Opin. Chem. Biol.},
  year = {2007},
  volume = {11},
  pages = {503--510},
  number = {5},
  month = {Oct},
  abstract = {Advanced microscopy and the corresponding image analysis have been
	developed in recent years into a powerful tool for studying molecular
	and morphological events in cells and tissues. Cell-based high-content
	screening (HCS) is an upcoming methodology for the investigation
	of cellular processes and their alteration by multiple chemical or
	genetic perturbations. Multiparametric characterization of responses
	to such changes can be analyzed using intact live cells as reporter.
	These disturbances are screened for effects on a variety of molecular
	and cellular targets, including subcellular localization and redistribution
	of proteins. In contrast to biochemical screening, they detect the
	responses within the context of the intercellular structural and
	functional networks of normal and diseased cells, respectively. As
	cell-based HCS of small-molecule libraries is applied to identify
	and characterize new therapeutic lead compounds, large pharmaceutical
	companies are major drivers of the technology and have already shown
	image-based screens using more than 100,000 compounds.},
  doi = {10.1016/j.cbpa.2007.08.030},
  institution = {HT-Technology Development Studio (TDS), Max Planck Institute of Molecular
	Cell Biology and Genetics (MPI-CBG), Pfotenhauerstrasse 108, D-01307
	Dresden, Germany.},
  owner = {jp},
  pii = {S1367-5931(07)00114-7},
  pmid = {17931958},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1016/j.cbpa.2007.08.030}
}

@article{Kosorok2007Marginal,
  author = {Kosorok, M. R. and Ma, S.},
  title = {Marginal asymptotics for the "large p, small n" paradigm: With applications
	to microarray data},
  journal = {Ann. Stat.},
  year = {2007},
  volume = {35},
  pages = {1456--1486},
  number = {4},
  abstract = {The “large p, small n” paradigm arises in microarray studies, image
	analysis, high throughput molecular screening, astronomy, and in
	many other high dimensional applications. False discovery rate (FDR)
	methods are useful for resolving the accompanying multiple testing
	problems. In cDNA microarray studies, for example, p-values may be
	computed for each of p genes using data from n arrays, where typically
	p is in the thousands and n is less than 30. For FDR methods to be
	valid in identifying differentially expressed genes, the p-values
	for the nondifferentially expressed genes must simultaneously have
	uniform distributions marginally. While feasible for permutation
	p-values, this uniformity is problematic for asymptotic based p-values
	since the number of p-values involved goes to infinity and intuition
	suggests that at least some of the p-values should behave erratically.
	We examine this neglected issue when n is moderately large but p
	is almost exponentially large relative to n. We show the somewhat
	surprising result that, under very general dependence structures
	and for both mean and median tests, the p-values are simultaneously
	valid. A small simulation study and data analysis are used for illustration.},
  doi = {10.1214/009053606000001433},
  owner = {jp},
  timestamp = {2010.01.10},
  url = {http://dx.doi.org/10.1214/009053606000001433}
}

@article{Kote-Jarai2004Gene,
  author = {Zsofia Kote-Jarai and Richard D Williams and Nicola Cattini and Maria
	Copeland and Ian Giddings and Richard Wooster and Robert H tePoele
	and Paul Workman and Barry Gusterson and John Peacock and Gerald
	Gui and Colin Campbell and Ros Eeles},
  title = {Gene expression profiling after radiation-induced {DNA} damage is
	strongly predictive of {BRCA}1 mutation carrier status.},
  journal = {Clin. {C}ancer {R}es.},
  year = {2004},
  volume = {10},
  pages = {958-63},
  number = {3},
  month = {Feb},
  abstract = {P{URPOSE}: {T}he impact of the presence of a germ-line {BRCA}1 mutation
	on gene expression in normal breast fibroblasts after radiation-induced
	{DNA} damage has been investigated. {EXPERIMENTAL} {DESIGN}: {H}igh-density
	c{DNA} microarray technology was used to identify differential responses
	to {DNA} damage in fibroblasts from nine heterozygous {BRCA}1 mutation
	carriers compared with five control samples without personal or family
	history of any cancer. {F}ibroblast cultures were irradiated, and
	their expression profile was compared using intensity ratios of the
	c{DNA} microarrays representing 5603 {IMAGE} clones. {RESULTS}: {C}lass
	comparison and class prediction analysis has shown that {BRCA}1 mutation
	carriers can be distinguished from controls with high probability
	(approximately 85\%). {S}ignificance analysis of microarrays and
	the support vector machine classifier identified gene sets that discriminate
	the samples according to their mutation status. {T}hese include genes
	already known to interact with {BRCA}1 such as {CDKN}1{B}, {ATR},
	and {RAD}51. {CONCLUSIONS}: {T}he results of this initial study suggest
	that normal cells from heterozygous {BRCA}1 mutation carriers display
	a different gene expression profile from controls in response to
	{DNA} damage. {A}daptations of this pilot result to other cell types
	could result in the development of a functional assay for {BRCA}1
	mutation status.},
  pdf = {../local/Kote-Jarai2004Gene.pdf},
  file = {Kote-Jarai2004Gene.pdf:local/Kote-Jarai2004Gene.pdf:PDF},
  keywords = {biosvm , breastcancer},
  url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/10/3/958}
}

@article{Kou2002Karyotyping,
  author = {Zhenzhen Kou and Liang Ji and Xuegong Zhang},
  title = {Karyotyping of comparative genomic hybridization human metaphases
	by using support vector machines.},
  journal = {Cytometry},
  year = {2002},
  volume = {47},
  pages = {17-23},
  number = {1},
  month = {Jan},
  abstract = {B{ACKGROUND}: {C}omparative genomic hybridization ({CGH}) is a relatively
	new molecular cytogenetic method for detecting chromosomal imbalance.
	{K}aryotyping of human metaphases is an important step to assign
	each chromosome to one of 23 or 24 classes (22 autosomes and two
	sex chromosomes). {A}utomatic karyotyping in {CGH} analysis is needed.
	{H}owever, conventional karyotyping approaches based on {DAPI} images
	require complex image enhancement procedures. {METHODS}: {T}his paper
	proposes a simple feature extraction method, one that generates density
	profiles from original true color {CGH} images and uses normalized
	profiles as feature vectors without quantization. {A} classifier
	is developed by using support vector machine ({SVM}). {I}t has good
	generalization ability and needs only limited training samples. {RESULTS}:
	{E}xperiment results show that the feature extraction method of using
	color information in {CGH} images can improve greatly the classification
	success rate. {T}he {SVM} classifier is able to acquire knowledge
	about human chromosomes from relatively few samples and has good
	generalization ability. {A} success rate of moe than 90\% has been
	achieved and the time for training and testing is very short. {CONCLUSIONS}:
	{T}he feature extraction method proposed here and the {SVM}-based
	classifier offer a promising computerized intelligent system for
	automatic karyotyping of {CGH} human chromosomes.},
  doi = {10.1002/cyto.10027},
  pdf = {../local/Kou2002Karyotyping.pdf},
  file = {Kou2002Karyotyping.pdf:local/Kou2002Karyotyping.pdf:PDF},
  keywords = {cgh},
  pii = {10.1002/cyto.10027},
  url = {http://dx.doi.org/10.1002/cyto.10027}
}

@article{Kovatcheva2004Combinatorial,
  author = {Assia Kovatcheva and Alexander Golbraikh and Scott Oloff and Yun-De
	Xiao and Weifan Zheng and Peter Wolschann and Gerhard Buchbauer and
	Alexander Tropsha},
  title = {Combinatorial {QSAR} of ambergris fragrance compounds.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {582-95},
  number = {2},
  abstract = {A combinatorial quantitative structure-activity relationships ({C}ombi-{QSAR})
	approach has been developed and applied to a data set of 98 ambergris
	fragrance compounds with complex stereochemistry. {T}he {C}ombi-{QSAR}
	approach explores all possible combinations of different independent
	descriptor collections and various individual correlation methods
	to obtain statistically significant models with high internal (for
	the training set) and external (for the test set) accuracy. {S}even
	different descriptor collections were generated with commercially
	available {MOE}, {C}o{MFA}, {C}o{MMA}, {D}ragon, {V}ol{S}urf, and
	{M}olconn{Z} programs; we also included chirality topological descriptors
	recently developed in our laboratory ({G}olbraikh, {A}.; {B}onchev,
	{D}.; {T}ropsha, {A}. {J}. {C}hem. {I}nf. {C}omput. {S}ci. 2001,
	41, 147-158). {C}o{MMA} descriptors were used in combination with
	{MOE} descriptors. {M}olconn{Z} descriptors were used in combination
	with chirality descriptors. {E}ach descriptor collection was combined
	individually with four correlation methods, including k-nearest neighbors
	(k{NN}) classification, {S}upport {V}ector {M}achines ({SVM}), decision
	trees, and binary {QSAR}, giving rise to 28 different types of {QSAR}
	models. {M}ultiple diverse and representative training and test sets
	were generated by the divisions of the original data set in two.
	{E}ach model with high values of leave-one-out cross-validated correct
	classification rate for the training set was subjected to extensive
	internal and external validation to avoid overfitting and achieve
	reliable predictive power. {T}wo validation techniques were employed,
	i.e., the randomization of the target property (in this case, odor
	intensity) also known as the {Y}-randomization test and the assessment
	of external prediction accuracy using test sets. {W}e demonstrate
	that not every combination of the data modeling technique and the
	descriptor collection yields a validated and predictive {QSAR} model.
	k{NN} classification in combination with {C}o{MFA} descriptors was
	found to be the best {QSAR} approach overall since predictive models
	with correct classification rates for both training and test sets
	of 0.7 and higher were obtained for all divisions of the ambergris
	data set into the training and test sets. {M}any predictive {QSAR}
	models were also found using a combination of k{NN} classification
	method with other collections of descriptors. {T}he combinatorial
	{QSAR} affords automation, computational efficiency, and higher probability
	of identifying significant {QSAR} models for experimental data sets
	than the traditional approaches that rely on a single {QSAR} method.},
  doi = {10.1021/ci034203t},
  pdf = {../local/Kovatcheva2004Combinatorial.pdf},
  file = {Kovatcheva2004Combinatorial.pdf:local/Kovatcheva2004Combinatorial.pdf:PDF},
  keywords = {Algorithms, Ambergris, Combinatorial Chemistry Techniques, Models,
	Molecular, Molecular Conformation, Odors, P.H.S., Perfume, Predictive
	Value of Tests, Quantitative Structure-Activity Relationship, Research
	Support, U.S. Gov't, 15032539},
  url = {http://dx.doi.org/10.1021/ci034203t}
}

@article{Koyutuerk2006Pairwise,
  author = {Koyut{\"u}rk, M. and Kim, Y. and Topkara, U. and Subramaniam, S.
	and Szpankowski, W. and Grama, A.},
  title = {Pairwise alignment of protein interaction networks},
  journal = {J. Comput. Biol.},
  year = {2006},
  volume = {13},
  pages = {182--199},
  number = {2},
  month = {Mar},
  abstract = {With an ever-increasing amount of available data on protein-protein
	interaction (PPI) networks and research revealing that these networks
	evolve at a modular level, discovery of conserved patterns in these
	networks becomes an important problem. Although available data on
	protein-protein interactions is currently limited, recently developed
	algorithms have been shown to convey novel biological insights through
	employment of elegant mathematical models. The main challenge in
	aligning PPI networks is to define a graph theoretical measure of
	similarity between graph structures that captures underlying biological
	phenomena accurately. In this respect, modeling of conservation and
	divergence of interactions, as well as the interpretation of resulting
	alignments, are important design parameters. In this paper, we develop
	a framework for comprehensive alignment of PPI networks, which is
	inspired by duplication/divergence models that focus on understanding
	the evolution of protein interactions. We propose a mathematical
	model that extends the concepts of match, mismatch, and gap in sequence
	alignment to that of match, mismatch, and duplication in network
	alignment and evaluates similarity between graph structures through
	a scoring function that accounts for evolutionary events. By relying
	on evolutionary models, the proposed framework facilitates interpretation
	of resulting alignments in terms of not only conservation but also
	divergence of modularity in PPI networks. Furthermore, as in the
	case of sequence alignment, our model allows flexibility in adjusting
	parameters to quantify underlying evolutionary relationships. Based
	on the proposed model, we formulate PPI network alignment as an optimization
	problem and present fast algorithms to solve this problem. Detailed
	experimental results from an implementation of the proposed framework
	show that our algorithm is able to discover conserved interaction
	patterns very effectively, in terms of both accuracies and computational
	cost.},
  doi = {10.1089/cmb.2006.13.182},
  institution = {Department of Computer Sciences, Purdue University, West Lafayette,
	IN 47907, USA. koyuturk@cs.purdue.edu},
  owner = {jp},
  pmid = {16597234},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1089/cmb.2006.13.182}
}

@inproceedings{Kramer2001Feature,
  author = {Kramer, S. and De Raedt, L.},
  title = {Feature {C}onstruction with {V}ersion {S}paces for {B}iochemical
	{A}pplications},
  booktitle = {Proceedings of the {E}ighteenth {I}nternational {C}onference on {M}achine
	{L}earning},
  year = {2001},
  editor = {Brodley, C.E. and Pohoreckyj Danyluk, A.},
  pages = {258-265},
  publisher = {Morgan Kaufmann},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@article{Kramer2002Fragment,
  author = {S. Kramer and E. Frank and C. Helma},
  title = {Fragment generation and support vector machines for inducing {SAR}s.},
  journal = {S{AR} {QSAR} {E}nviron {R}es},
  year = {2002},
  volume = {13},
  pages = {509-23},
  number = {5},
  month = {Jul},
  abstract = {We present a new approach to the induction of {SAR}s based on the
	generation of structural fragments and support vector machines ({SVM}s).
	{I}t is tailored for bio-chemical databases, where the examples are
	two-dimensional descriptions of chemical compounds. {T}he fragment
	generator finds all fragments (i.e. linearly connected atoms) that
	satisfy user-specified constraints regarding their frequency and
	generality. {I}n this paper, we are querying for fragments within
	a minimum and a maximum frequency in the dataset. {A}fter fragment
	generation, we propose to apply {SVM}s to the problem of inducing
	{SAR}s from these fragments. {W}e conjecture that the {SVM}s are
	particularly useful in this context, as they can deal with a large
	number of features. {E}xperiments in the domains of carcinogenicity
	and mutagenicity prediction show that the minimum and the maximum
	frequency queries for fragments can be answered within a reasonable
	time, and that the predictive accuracy obtained using these fragments
	is satisfactory. {H}owever, further experiments will have to confirm
	that this is a viable approach to inducing {SAR}s.},
  doi = {10.1080/10629360290023340},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1080/10629360290023340}
}

@article{Kratochwil2005automated,
  author = {Kratochwil, N. A. and Malherbe, P. and Lindemann, L. and Ebeling,
	M. and Hoener, M. C. and M{\"u}hlemann, A. and Porter, R. H. P. and
	Stahl, M. and Gerber, P. R.},
  title = {An automated system for the analysis of {G} protein-coupled receptor
	transmembrane binding pockets: alignment, receptor-based pharmacophores,
	and their application.},
  journal = {J. Chem. Inf. Model.},
  year = {2005},
  volume = {45},
  pages = {1324--1336},
  number = {5},
  abstract = {G protein-coupled receptors (GPCRs) share a common architecture consisting
	of seven transmembrane (TM) domains. Various lines of evidence suggest
	that this fold provides a generic binding pocket within the TM region
	for hosting agonists, antagonists, and allosteric modulators. Here,
	a comprehensive and automated method allowing fast analysis and comparison
	of these putative binding pockets across the entire GPCR family is
	presented. The method relies on a robust alignment algorithm based
	on conservation indices, focusing on pharmacophore-like relationships
	between amino acids. Analysis of conservation patterns across the
	GPCR family and alignment to the rhodopsin X-ray structure allows
	the extraction of the amino acids lining the TM binding pocket in
	a so-called ligand binding pocket vector (LPV). In a second step,
	LPVs are translated to simple 3D receptor pharmacophore models, where
	each amino acid is represented by a single spherical pharmacophore
	feature and all atomic detail is omitted. Applications of the method
	include the assessment of selectivity issues, support of mutagenesis
	studies, and the derivation of rules for focused screening to identify
	chemical starting points in early drug discovery projects. Because
	of the coarseness of this 3D receptor pharmacophore model, however,
	meaningful scoring and ranking procedures of large sets of molecules
	are not justified. The LPV analysis of the trace amine-associated
	receptor family and its experimental validation is discussed as an
	example. The value of the 3D receptor model is demonstrated for a
	class C GPCR family, the metabotropic glutamate receptors.},
  doi = {10.1021/ci050221u},
  pdf = {../local/Kratochwil2005automated.pdf},
  file = {Kratochwil2005automated.pdf:Kratochwil2005automated.pdf:PDF},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16180909},
  timestamp = {2007.09.22},
  url = {http://dx.doi.org/10.1021/ci050221u}
}

@article{Krichevskiy1998Laplace's,
  author = {Krichevskiy, R. E.},
  title = {Laplace's law of succession and universal encoding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1998},
  volume = {44},
  pages = {296-303},
  number = {1},
  month = {Jan},
  pdf = {../local/Krichevskiy1998Laplace's.pdf},
  file = {Krichevskiy1998Laplace's.pdf:local/Krichevskiy1998Laplace's.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Krichevsky1981performance,
  author = {Krichevsky, R. and Trofimov, V. },
  title = {The performance of universal encoding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1981},
  volume = {27},
  pages = {199--207},
  number = {2},
  month = {Mar},
  abstract = {Universal coding theory is surveyed from the viewpoint of the interplay
	between delay and redundancy. {T}he price for universality turns
	out to be acceptably small.},
  pdf = {../local/Krichevsky1981performance.pdf},
  file = {Krichevsky1981performance.pdf:local/Krichevsky1981performance.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Krishnan2003comparative,
  author = {Krishnan, V. G. and Westhead, D. R.},
  title = {A comparative study of machine-learning methods to predict the effects
	of single nucleotide polymorphisms on protein function},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {2199-2209},
  number = {17},
  abstract = {Motivation: {T}he large volume of single nucleotide polymorphism data
	now available motivates the development of methods for distinguishing
	neutral changes from those which have real biological effects. {H}ere,
	two different machine-learning methods, decision trees and support
	vector machines ({SVM}s), are applied for the first time to this
	problem. {I}n common with most other methods, only non-synonymous
	changes in protein coding regions of the genome are considered. {R}esults:
	{I}n detailed cross-validation analysis, both learning methods are
	shown to compete well with existing methods, and to out-perform them
	in some key tests. {SVM}s show better generalization performance,
	but decision trees have the advantage of generating interpretable
	rules with robust estimates of prediction confidence. {I}t is shown
	that the inclusion of protein structure information produces more
	accurate methods, in agreement with other recent studies, and the
	effect of using predicted rather than actual structure is evaluated.
	{A}vailability: {S}oftware is available on request from the authors.},
  pdf = {../local/Krishnan2003comparative.pdf},
  file = {Krishnan2003comparative.pdf:local/Krishnan2003comparative.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/17/2199}
}

@incollection{Krishnapuram2004Gene,
  author = {Krishnapuram, B. and Carin, L. and Hartemink, A.},
  title = {Gene expression analysis: joint feature selection and classifier
	design},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {299-317},
  pdf = {../local/heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\},
  file = {heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Krishnapuram2004Joint,
  author = {Krishnapuram, B. and Carin, L. and Hartemink, A.},
  title = {Joint {C}lassifier and {F}eature {O}ptimization for {C}omprehensive
	{C}ancer {D}iagnosis {U}sing {G}ene {E}xpression {D}ata},
  journal = {J. {C}omput. {B}iol.},
  year = {2004},
  volume = {11},
  pages = {227-242},
  number = {2-3},
  abstract = {ecent research has demonstrated quite convincingly that accurate cancer
	diagnosis can be achieved by constructing classifiers that are designed
	to compare the gene expression profile of a tissue of unknown cancer
	status to a database of stored expression profiles from tissues of
	known cancer status. {T}his paper introduces the {JCFO}, a novel
	algorithm that uses a sparse {B}ayesian approach to jointly identify
	both the optimal nonlinear classifier for diagnosis and the optimal
	set of genes on which to base that diagnosis. {W}e show that the
	diagnostic classification accuracy of the proposed algorithm is superior
	to a number of current state-of-the-art methods in a full leave-one-out
	cross-validation study of five widely used benchmark datasets. {I}n
	addition to its superior classification accuracy, the algorithm is
	designed to automatically identify a small subset of genes (typically
	around twenty in our experiments) that are capable of providing complete
	discriminatory information for diagnosis. {F}ocusing attention on
	a small subset of genes is useful not only because it produces a
	classifier with good generalization capacity, but also because this
	set of genes may provide insights into the mechanisms responsible
	for the disease itself. {A} number of the genes identified by the
	{JCFO} in our experiments are already in use as clinical markers
	for cancer diagnosis; some of the remaining genes may be excellent
	candidates for further clinical investigation. {I}f it is possible
	to identify a small set of genes that is indeed capable of providing
	complete discrimination, inexpensive diagnostic assays might be widely
	deployable in clinical settings.},
  doi = {10.1089/1066527041410463},
  pdf = {../local/Krishnapuram2004Joint.pdf},
  file = {Krishnapuram2004Joint.pdf:local/Krishnapuram2004Joint.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1089/1066527041410463}
}

@article{Krishnapuram2004bayesian,
  author = {Krishnapuram, B. and Hartemink, A. J. and Carin, L. and Figueiredo,
	M. A. T.},
  title = {A bayesian approach to joint feature selection and classifier design},
  journal = {IEEE T. Pattern. Anal.},
  year = {2004},
  volume = {26},
  pages = {1105-11},
  number = {9},
  month = {Sep},
  abstract = {This paper adopts a {B}ayesian approach to simultaneously learn both
	an optimal nonlinear classifier and a subset of predictor variables
	(or features) that are most relevant to the classification task.
	{T}he approach uses heavy-tailed priors to promote sparsity in the
	utilization of both basis functions and features; these priors act
	as regularizers for the likelihood function that rewards good classification
	on the training data. {W}e derive an expectation-maximization ({EM})
	algorithm to efficiently compute a maximum a posteriori ({MAP}) point
	estimate of the various parameters. {T}he algorithm is an extension
	of recent state-of-the-art sparse {B}ayesian classifiers, which in
	turn can be seen as {B}ayesian counterparts of support vector machines.
	{E}xperimental comparisons using kernel classifiers demonstrate both
	parsimonious feature selection and excellent classification accuracy
	on a range of synthetic and benchmark data sets.},
  doi = {10.1109/TPAMI.2004.55},
  pdf = {../local/Krishnapuram2004bayesian.pdf},
  file = {Krishnapuram2004bayesian.pdf:local/Krishnapuram2004bayesian.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1109/TPAMI.2004.55}
}

@article{Kristiansen1996database,
  author = {K. Kristiansen and S. G. Dahl and O. Edvardsen},
  title = {A database of mutants and effects of site-directed mutagenesis experiments
	on {G} protein-coupled receptors.},
  journal = {Proteins},
  year = {1996},
  volume = {26},
  pages = {81--94},
  number = {1},
  month = {Sep},
  abstract = {A database system and computer programs for storage and retrieval
	of information about guanine nucleotide-binding protein (G protein)
	-coupled receptor mutants and associated biological effects have
	been developed. Mutation data on the receptors were collected from
	the literature and a database of mutants and effects of mutations
	was developed. The G protein-coupled receptor, family A, point mutation
	database (GRAP) provides detailed information on ligand-binding and
	signal transduction properties of more than 2130 receptor mutants.
	The amino acid sequences of receptors for which mutation experiments
	have been reported were aligned, and from this alignment mutation
	data may be retrieved. Alternatively, a search form allowing detailed
	specification of which mutants to retrieve may be used, for example,
	to search for specific amino acid substitutions, substitutions in
	specific protein domains or reported biological effects. Furthermore,
	ligand and bibliographic oriented queries may be performed. GRAP
	is available on the Internet (URL: http://www-grap.fagmed.uit.no/GRAP/+
	+homepage.html) using the World-Wide Web system.},
  doi = {3.0.CO;2-J},
  keywords = {Amino Acid Sequence; Computer Communication Networks; Computers; GTP-Binding
	Proteins; Information Systems; Molecular Sequence Data; Mutagenesis,
	Site-Directed; Mutation; Receptors, Cell Surface; Sequence Alignment},
  owner = {laurent},
  pii = {3.0.CO;2-J},
  pmid = {8880932},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/3.0.CO;2-J}
}

@article{Kroemer2007Structure,
  author = {Romano T Kroemer},
  title = {Structure-based drug design: docking and scoring.},
  journal = {Curr. Protein Pept. Sci.},
  year = {2007},
  volume = {8},
  pages = {312--328},
  number = {4},
  month = {Aug},
  abstract = {This review gives an introduction into ligand - receptor docking and
	illustrates the basic underlying concepts. An overview of different
	approaches and algorithms is provided. Although the application of
	docking and scoring has led to some remarkable successes, there are
	still some major challenges ahead, which are outlined here as well.
	Approaches to address some of these challenges and the latest developments
	in the area are presented. Some aspects of the assessment of docking
	program performance are discussed. A number of successful applications
	of structure-based virtual screening are described.},
  institution = {ciences, Department of Chemistry, Nerviano Medical Sciences, Viale
	Pasteur 10, 20014 Nerviano (MI), Italy. romano.kroemer@sanofi-aventis.com},
  keywords = {Algorithms; Artificial Intelligence; Computational Biology; Computer
	Simulation; Computer-Aided Design; Drug Design; Imaging, Three-Dimensional;
	Ligands; Models, Molecular; Protein Binding; Protein Conformation;
	Software; Structure-Activity Relationship},
  owner = {bricehoffmann},
  pmid = {17696866},
  timestamp = {2009.02.13}
}

@article{Krogh1994Hidden,
  author = {Krogh, A. and Brown, M. and Mian, I. and Sjolander, K. and Haussler,
	D.},
  title = {Hidden {M}arkov models in computational biology: {A}pplications to
	protein modeling},
  journal = {J. {M}ol. {B}iol.},
  year = {1994},
  volume = {235},
  pages = {1501--1531}
}

@article{TransPath2006,
  author = {Krull, M. and Pistor, S. and Voss, N. and Kel, A. and Reuter, I.
	and Kronenberg, D. and Michael, H. and Schwarzer, K. and Potapov,
	A. and Choi, C. and Kel-Margoulis, O. and Wingender, E.},
  title = {T{RANSPATH}: an information resource for storing and visualizing
	signaling pathways and their pathological aberrations},
  journal = {Nucleic {A}cids {R}es},
  year = {2006},
  volume = {34},
  pages = {D546-51},
  number = {Database issue},
  abstract = {T{RANSPATH} is a database about signal transduction events. {I}t provides
	information about signaling molecules, their reactions and the pathways
	these reactions constitute. {T}he representation of signaling molecules
	is organized in a number of orthogonal hierarchies reflecting the
	classification of the molecules, their species-specific or generic
	features, and their post-translational modifications. {R}eactions
	are similarly hierarchically organized in a three-layer architecture,
	differentiating between reactions that are evidenced by individual
	publications, generalizations of these reactions to construct species-independent
	'reference pathways' and the 'semantic projections' of these pathways.
	{A} number of search and browse options allow easy access to the
	database contents, which can be visualized with the tool {P}athway{B}uildertrade
	mark. {T}he module {P}atho{S}ign adds data about pathologically relevant
	mutations in signaling components, including their genotypes and
	phenotypes. {TRANSPATH} and {P}atho{S}ign can be used as encyclopaedia,
	in the educational process, for vizualization and modeling of signal
	transduction networks and for the analysis of gene expression data.
	{TRANSPATH} {P}ublic 6.0 is freely accessible for users from non-profit
	organizations under http://www.gene-regulation.com/pub/databases.html.}
}

@article{Kuang2005Profile-based,
  author = {Kuang, R. and Ie, E. and Wang, K. and Wang, K. and Siddiqi, M. and
	Freund, Y. and Leslie, C.},
  title = {Profile-based string kernels for remote homology detection and motif
	extraction.},
  journal = {J. Bioinform. Comput. Biol.},
  year = {2005},
  volume = {3},
  pages = {527--550},
  number = {3},
  month = {Jun},
  abstract = {We introduce novel profile-based string kernels for use with support
	vector machines (SVMs) for the problems of protein classification
	and remote homology detection. These kernels use probabilistic profiles,
	such as those produced by the PSI-BLAST algorithm, to define position-dependent
	mutation neighborhoods along protein sequences for inexact matching
	of k-length subsequences ("k-mers") in the data. By use of an efficient
	data structure, the kernels are fast to compute once the profiles
	have been obtained. For example, the time needed to run PSI-BLAST
	in order to build the profiles is significantly longer than both
	the kernel computation time and the SVM training time. We present
	remote homology detection experiments based on the SCOP database
	where we show that profile-based string kernels used with SVM classifiers
	strongly outperform all recently presented supervised SVM methods.
	We further examine how to incorporate predicted secondary structure
	information into the profile kernel to obtain a small but significant
	performance improvement. We also show how we can use the learned
	SVM classifier to extract "discriminative sequence motifs"--short
	regions of the original profile that contribute almost all the weight
	of the SVM classification score--and show that these discriminative
	motifs correspond to meaningful structural features in the protein
	data. The use of PSI-BLAST profiles can be seen as a semi-supervised
	learning technique, since PSI-BLAST leverages unlabeled data from
	a large sequence database to build more informative profiles. Recently
	presented "cluster kernels" give general semi-supervised methods
	for improving SVM protein classification performance. We show that
	our profile kernel results also outperform cluster kernels while
	providing much better scalability to large datasets.},
  keywords = {biosvm},
  owner = {vert},
  pii = {S021972000500120X},
  pmid = {16108083},
  timestamp = {2007.08.01}
}

@article{Kuang2004Profile-based,
  author = {Kuang, R. and Ie, E. and Wang, K. and Wang, K. and Siddiqi, M. and
	Freund, Y. and Leslie, C.},
  title = {Profile-based string kernels for remote homology detection and motif
	extraction.},
  journal = {Proc IEEE Comput Syst Bioinform Conf},
  year = {2004},
  pages = {152--160},
  abstract = {We introduce novel profile-based string kernels for use with support
	vector machines (SVMs) for the problems of protein classification
	and remote homology detection. These kernels use probabilistic profiles,
	such as those produced by the PSI-BLAST algorithm, to define position-dependent
	mutation neighborhoods along protein sequences for inexact matching
	of k-length subsequences ("k-mers") in the data. By use of an efficient
	data structure, the kernels are fast to compute once the profiles
	have been obtained. For example, the time needed to run PSI-BLAST
	in order to build the pro- files is significantly longer than both
	the kernel computation time and the SVM training time. We present
	remote homology detection experiments based on the SCOP database
	where we show that profile-based string kernels used with SVM classifiers
	strongly outperform all recently presented supervised SVM methods.
	We also show how we can use the learned SVM classifier to extract
	"discriminative sequence motifs" -- short regions of the original
	profile that contribute almost all the weight of the SVM classification
	score -- and show that these discriminative motifs correspond to
	meaningful structural features in the protein data. The use of PSI-BLAST
	profiles can be seen as a semi-supervised learning technique, since
	PSI-BLAST leverages unlabeled data from a large sequence database
	to build more informative profiles. Recently presented "cluster kernels"
	give general semi-supervised methods for improving SVM protein classification
	performance. We show that our profile kernel results are comparable
	to cluster kernels while providing much better scalability to large
	datasets.},
  keywords = {biosvm},
  owner = {vert},
  pmid = {16448009},
  timestamp = {2007.08.01}
}

@article{Kuang2004Protein,
  author = {Kuang, R. and Leslie, C. S. and Yang, A.-S.},
  title = {Protein backbone angle prediction with machine learning approaches},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1612-1621},
  number = {10},
  abstract = {Motivation: {P}rotein backbone torsion angle prediction provides useful
	local structural information that goes beyond conventional three-state
	({alpha}, {beta} and coil) secondary structure predictions. {A}ccurate
	prediction of protein backbone torsion angles will substantially
	improve modeling procedures for local structures of protein sequence
	segments, especially in modeling loop conformations that do not form
	regular structures as in {alpha}-helices or {beta}-strands. {R}esults:
	{W}e have devised two novel automated methods in protein backbone
	conformational state prediction: one method is based on support vector
	machines ({SVM}s); the other method combines a standard feed-forward
	back-propagation artificial neural network ({NN}) with a local structure-based
	sequence profile database ({LSBSP}1). {E}xtensive benchmark experiments
	demonstrate that both methods have improved the prediction accuracy
	rate over the previously published methods for conformation state
	prediction when using an alphabet of three or four states. {A}vailability:
	{LSBSP}1 and the {NN} algorithm have been implemented in {P}r{ISM}.1,
	which is available from www.columbia.edu/~ay1/. {S}upplementary information:
	{S}upplementary data for the {SVM} method can be downloaded from
	the {W}ebsite www.cs.columbia.edu/compbio/backbone.},
  pdf = {../local/Kuang2004Protein.pdf},
  file = {Kuang2004Protein.pdf:local/Kuang2004Protein.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/10/1612}
}

@inproceedings{Kubinyi2003Comparative,
  author = {H. Kubinyi},
  title = {Comparative {M}olecular {F}ield {A}nalysis},
  booktitle = {Handbook of Chemoinformatics. From Data to Knowledge, Volume 4},
  year = {2003},
  editor = {J. Gasteiger},
  pages = {1555-1574},
  publisher = {Wiley-VCH, Weinheim},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.31}
}

@article{Kubinyi2006Chemogenomics,
  author = {H. Kubinyi},
  title = {Chemogenomics in drug discovery.},
  journal = {Ernst Schering Res Found Workshop},
  year = {2006},
  volume = {58},
  pages = {1--19},
  abstract = {Chemogenomics is a new strategy in drug discovery which, in principle,
	searches for all molecules that are capable of interacting with any
	biological target. Because of the almost infinite number of drug-like
	organic molecules, this is an impossible task. Therefore chemogenomics
	has been defined as the investigation of classes of compounds (libraries)
	against families of functionally related proteins. In this definition,
	chemogenomics deals with the systematic analysis of chemical-biological
	interactions. Congeneric series of chemical analogs are probes to
	investigate their action on specific target classes, e.g., GPCRs,
	kinases, phosphodiesterases, ion channels, serine proteases, and
	others. Whereas such a strategy developed in pharmaceutical industry
	almost 20 years ago, it is now more systematically applied in the
	search for target- and subtype-specific ligands. The term "privileged
	structures" has been defined for scaffolds, such as the benzodiazepines,
	which very often produce biologically active analogs in a target
	family, in this case in the class of G-protein-coupled receptors.
	The SOSA approach is a strategy to modify the selectivity of biologically
	active compounds, generating new drug candidates from the side activities
	of therapeutically used drugs.},
  keywords = {Animals; Chemistry, Pharmaceutical; Combinatorial Chemistry Techniques;
	Drug Design; Drug Industry; Genomics; Humans; Models, Chemical; Molecular
	Structure; Mutation; Pharmacogenetics; Protein Binding},
  owner = {laurent},
  pmid = {16708995},
  timestamp = {2007.09.22}
}

@article{Kuhn1955Hungarian,
  author = {Kuhn, H. W.},
  title = {The {H}ungarian method for the assignment problem},
  journal = {Naval Research},
  year = {1955},
  volume = {2},
  pages = {83-97},
  publisher = {INFORMS}
}

@article{Kuhn2001Global,
  author = {K. M. Kuhn and J. L. DeRisi and P. O. Brown and P. Sarnow},
  title = {Global and specific translational regulation in the genomic response
	of {S}accharomyces cerevisiae to a rapid transfer from a fermentable
	to a nonfermentable carbon source},
  journal = {Mol. {C}ell. {B}iol.},
  year = {2001},
  volume = {21},
  pages = {916--927},
  number = {3},
  pdf = {../local/kuhn01.pdf},
  file = {kuhn01.pdf:local/kuhn01.pdf:PDF},
  subject = {microarray},
  url = {http://mcb.asm.org/cgi/content/full/21/3/916?view=full&pmid=11154278}
}

@incollection{Kuich1986Semirings,
  author = {Kuich, W. and Salomaa, A.},
  title = {Semirings, {A}utomata, {L}anguages},
  booktitle = {E{ATCS} {M}onographs on {C}omputer {S}cience},
  publisher = {Springer-Verlag},
  year = {1986},
  volume = {5},
  owner = {vert}
}

@inproceedings{Kuksa2008Scalable,
  author = {Pavel P. Kuksa and Pai-Hsi Huang and Vladimir Pavlovic},
  title = {Scalable Algorithms for String Kernels with Inexact Matching.},
  booktitle = {NIPS},
  year = {2008},
  editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{'e}on Bottou},
  pages = {881-888},
  publisher = {MIT Press},
  date = {2009-04-15},
  ee = {http://books.nips.cc/papers/files/nips21/NIPS2008_0373.pdf},
  interhash = {e690e43f62810b0b94bf2db9f4993638},
  intrahash = {107ac82150af9f121cee9968b1cf05e1},
  url = {http://dblp.uni-trier.de/db/conf/nips/nips2008.html#KuksaHP08}
}

@article{Kumagai1980An,
  author = {Kumagai, S.},
  title = {An implicit function theorem: Comment},
  journal = {{J}ournal of {O}ptimization {T}heory and {A}pplications},
  year = {1980},
  volume = {31},
  pages = {285-288},
  month = {Jun}
}

@article{Kumar2005BhairPred,
  author = {Kumar, M. and Bhasin, M. and Natt, N. K. and Raghava, G. P. S.},
  title = {Bhair{P}red: prediction of beta-hairpins in a protein from multiple
	alignment information using {ANN} and {SVM} techniques.},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {W154-9},
  number = {Web Server issue},
  month = {Jul},
  abstract = {This paper describes a method for predicting a supersecondary structural
	motif, beta-hairpins, in a protein sequence. {T}he method was trained
	and tested on a set of 5102 hairpins and 5131 non-hairpins, obtained
	from a non-redundant dataset of 2880 proteins using the {DSSP} and
	{PROMOTIF} programs. {T}wo machine-learning techniques, an artificial
	neural network ({ANN}) and a support vector machine ({SVM}), were
	used to predict beta-hairpins. {A}n accuracy of 65.5\% was achieved
	using {ANN} when an amino acid sequence was used as the input. {T}he
	accuracy improved from 65.5 to 69.1\% when evolutionary information
	({PSI}-{BLAST} profile), observed secondary structure and surface
	accessibility were used as the inputs. {T}he accuracy of the method
	further improved from 69.1 to 79.2\% when the {SVM} was used for
	classification instead of the {ANN}. {T}he performances of the methods
	developed were assessed in a test case, where predicted secondary
	structure and surface accessibility were used instead of the observed
	structure. {T}he highest accuracy achieved by the {SVM} based method
	in the test case was 77.9\%. {A} maximum accuracy of 71.1\% with
	{M}atthew's correlation coefficient of 0.41 in the test case was
	obtained on a dataset previously used by {X}. {C}ruz, {E}. {G}. {H}utchinson,
	{A}. {S}hephard and {J}. {M}. {T}hornton (2002) {P}roc. {N}atl {A}cad.
	{S}ci. {USA}, 99, 11157-11162. {T}he performance of the method was
	also evaluated on proteins used in the '6th community-wide experiment
	on the critical assessment of techniques for protein structure prediction
	({CASP}6)'. {B}ased on the algorithm described, a web server, {B}hair{P}red
	(http://www.imtech.res.in/raghava/bhairpred/), has been developed,
	which can be used to predict beta-hairpins in a protein using the
	{SVM} approach.},
  doi = {doi:10.1093/nar/gki588},
  pdf = {../local/Kumar2005BhairPred.pdf},
  file = {Kumar2005BhairPred.pdf:local/Kumar2005BhairPred.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/doi:10.1093/nar/gki588}
}

@book{Kuncheva2004Combining,
  title = {Combining Pattern Classifiers: Methods and Algorithms},
  publisher = {Wiley-Interscience},
  year = {2004},
  author = {Kuncheva, Ludmila I.},
  isbn = {0471210781}
}

@article{Kurata2006PlosCompBio,
  author = {Hiroyuki Kurata and Hana El-Samad and Rei Iwasaki and Hisao Ohtake
	and John C Doyle and Irina Grigorova and Carol A Gross and Mustafa
	Khammash},
  title = {Module-based analysis of robustness tradeoffs in the heat shock response
	system.},
  journal = {PLoS Comput Biol},
  year = {2006},
  volume = {2},
  pages = {e59},
  number = {7},
  month = {Jul},
  abstract = {Biological systems have evolved complex regulatory mechanisms, even
	in situations where much simpler designs seem to be sufficient for
	generating nominal functionality. Using module-based analysis coupled
	with rigorous mathematical comparisons, we propose that in analogy
	to control engineering architectures, the complexity of cellular
	systems and the presence of hierarchical modular structures can be
	attributed to the necessity of achieving robustness. We employ the
	Escherichia coli heat shock response system, a strongly conserved
	cellular mechanism, as an example to explore the design principles
	of such modular architectures. In the heat shock response system,
	the sigma-factor sigma32 is a central regulator that integrates multiple
	feedforward and feedback modules. Each of these modules provides
	a different type of robustness with its inherent tradeoffs in terms
	of transient response and efficiency. We demonstrate how the overall
	architecture of the system balances such tradeoffs. An extensive
	mathematical exploration nevertheless points to the existence of
	an array of alternative strategies for the existing heat shock response
	that could exhibit similar behavior. We therefore deduce that the
	evolutionary constraints facing the system might have steered its
	architecture toward one of many robustly functional solutions.},
  doi = {10.1371/journal.pcbi.0020059},
  institution = {Department of Bioscience and Bioinformatics, Kyushu Institute of
	Technology, Iizuka, Fukuoka, Japan. kurata@bio.kyutech.ac.jp},
  keywords = {Computer Simulation; Escherichia coli Proteins, metabolism; Escherichia
	coli, metabolism; Feedback, physiology; Gene Expression Regulation,
	Bacterial, physiology; Heat-Shock Proteins, metabolism; Heat-Shock
	Response, physiology; Models, Biological; Oxidative Stress, physiology;
	Signal Transduction, physiology; Systems Biology, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {05-PLCB-RA-0264R4},
  pmid = {16863396},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1371/journal.pcbi.0020059}
}

@article{Kwon2000candidate,
  author = {J. M. Kwon and A. M. Goate},
  title = {The candidate gene approach.},
  journal = {Alcohol Res Health},
  year = {2000},
  volume = {24},
  pages = {164--168},
  number = {3},
  abstract = {Alcoholism has a significant genetic basis, and identifying genes
	that confer a susceptibility to alcoholism will aid clinicians in
	preventing and effectively treating the disease. One commonly used
	technique to identify genetic risk factors for complex disorders
	such as alcoholism is the candidate gene approach, which directly
	tests the effects of genetic variants of a potentially contributing
	gene in an association study. These studies, which may include members
	of an affected family or unrelated cases and controls, can be performed
	relatively quickly and inexpensively and may allow identification
	of genes with small effects. However, the candidate gene approach
	is limited by how much is known of the biology of the disease being
	investigated. As researchers identify potential candidate genes using
	animal studies or linking them to DNA regions implicated through
	other analyses, the candidate gene approach will continue to be commonly
	used.},
  institution = {Washington University School of Medicine, St. Louis, Missouri, USA.},
  keywords = {Alcoholism; Animals; Chromosome Mapping; Disease Models, Animal; Genetic
	Predisposition to Disease; Genetic Variation; Humans; Mice; Mutation;
	Pedigree; Polymorphism, Genetic; Quantitative Trait, Heritable},
  owner = {fantine},
  pmid = {11199286},
  timestamp = {2010.10.20}
}

@article{Kohler2008Walking,
  author = {K{\"o}hler, S. and Bauer, S. and Horn, D. and Robinson, P.N.},
  title = {Walking the interactome for prioritization of candidate disease genes.},
  journal = {Am. J. Hum. Genet.},
  year = {2008},
  volume = {82},
  pages = {949--958},
  number = {4},
  month = {Apr},
  abstract = {The identification of genes associated with hereditary disorders has
	contributed to improving medical care and to a better understanding
	of gene functions, interactions, and pathways. However, there are
	well over 1500 Mendelian disorders whose molecular basis remains
	unknown. At present, methods such as linkage analysis can identify
	the chromosomal region in which unknown disease genes are located,
	but the regions could contain up to hundreds of candidate genes.
	In this work, we present a method for prioritization of candidate
	genes by use of a global network distance measure, random walk analysis,
	for definition of similarities in protein-protein interaction networks.
	We tested our method on 110 disease-gene families with a total of
	783 genes and achieved an area under the ROC curve of up to 98\%
	on simulated linkage intervals of 100 genes surrounding the disease
	gene, significantly outperforming previous methods based on local
	distance measures. Our results not only provide an improved tool
	for positional-cloning projects but also add weight to the assumption
	that phenotypically similar diseases are associated with disturbances
	of subnetworks within the larger protein interactome that extend
	beyond the disease proteins themselves.},
  doi = {10.1016/j.ajhg.2008.02.013},
  institution = {Institute for Medical Genetics, Charité Universitätsmedizin Berlin,
	Augustenburger Platz 1, 13353 Berlin, Germany.},
  keywords = {Animals; Chromosome Mapping; Computational Biology; Databases, Genetic;
	Genetic Diseases, Inborn; Genetic Predisposition to Disease; Humans;
	Internet; Linkage (Genetics); Mice; Pedigree; Protein Interaction
	Mapping; Software},
  owner = {mordelet},
  pii = {S0002-9297(08)00172-9},
  pmid = {18371930},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1016/j.ajhg.2008.02.013}
}

@article{LHeureux2004Locally,
  author = {L'Heureux, P. J. and Carreau, J. and Bengio, Y. and Delalleau, O.
	and Yue, S. Y.},
  title = {Locally linear embedding for dimensionality reduction in {QSAR}.},
  journal = {J. {C}omput. {A}ided {M}ol. {D}es.},
  year = {2004},
  volume = {18},
  pages = {475-82},
  number = {7-9},
  abstract = {Current practice in {Q}uantitative {S}tructure {A}ctivity {R}elationship
	({QSAR}) methods usually involves generating a great number of chemical
	descriptors and then cutting them back with variable selection techniques.
	{V}ariable selection is an effective method to reduce the dimensionality
	but may discard some valuable information. {T}his paper introduces
	{L}ocally {L}inear {E}mbedding ({LLE}), a local non-linear dimensionality
	reduction technique, that can statistically discover a low-dimensional
	representation of the chemical data. {LLE} is shown to create more
	stable representations than other non-linear dimensionality reduction
	algorithms, and to be capable of capturing non-linearity in chemical
	data.},
  doi = {10.1007/s10822-004-5319-9},
  pdf = {../local/LHeureux2004Locally.pdf},
  file = {LHeureux2004Locally.pdf:local/LHeureux2004Locally.pdf:PDF},
  keywords = {dimred},
  url = {http://dx.doi.org/10.1007/s10822-004-5319-9}
}

@article{LeCao2009Sparse,
  author = {{L\^e Cao}, K.-A. and Martin, P. G. P. and Robert-Grani\'e, C. and
	Besse, P.},
  title = {Sparse canonical methods for biological data integration: application
	to a cross-platform study.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {34},
  abstract = {In the context of systems biology, few sparse approaches have been
	proposed so far to integrate several data sets. It is however an
	important and fundamental issue that will be widely encountered in
	post genomic studies, when simultaneously analyzing transcriptomics,
	proteomics and metabolomics data using different platforms, so as
	to understand the mutual interactions between the different data
	sets. In this high dimensional setting, variable selection is crucial
	to give interpretable results. We focus on a sparse Partial Least
	Squares approach (sPLS) to handle two-block data sets, where the
	relationship between the two types of variables is known to be symmetric.
	Sparse PLS has been developed either for a regression or a canonical
	correlation framework and includes a built-in procedure to select
	variables while integrating data. To illustrate the canonical mode
	approach, we analyzed the NCI60 data sets, where two different platforms
	(cDNA and Affymetrix chips) were used to study the transcriptome
	of sixty cancer cell lines.We compare the results obtained with two
	other sparse or related canonical correlation approaches: CCA with
	Elastic Net penalization (CCA-EN) and Co-Inertia Analysis (CIA).
	The latter does not include a built-in procedure for variable selection
	and requires a two-step analysis. We stress the lack of statistical
	criteria to evaluate canonical correlation methods, which makes biological
	interpretation absolutely necessary to compare the different gene
	selections. We also propose comprehensive graphical representations
	of both samples and variables to facilitate the interpretation of
	the results.sPLS and CCA-EN selected highly relevant genes and complementary
	findings from the two data sets, which enabled a detailed understanding
	of the molecular characteristics of several groups of cell lines.
	These two approaches were found to bring similar results, although
	they highlighted the same phenomenons with a different priority.
	They outperformed CIA that tended to select redundant information.},
  doi = {10.1186/1471-2105-10-34},
  institution = {Station d'Amélioration Génétique des Animaux UR 631, Institut National
	de Recherche Agronomique, F-31326 Castanet, France. k.lecao@imb.uq.edu.au},
  keywords = {Computational Biology, methods; Genomics; Metabolomics; Proteomics;
	Systems Biology, methods},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-10-34},
  pmid = {19171069},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2105-10-34}
}

@article{LaBaer2005Protein,
  author = {Joshua LaBaer and Niroshan Ramachandran},
  title = {Protein microarrays as tools for functional proteomics.},
  journal = {Curr Opin Chem Biol},
  year = {2005},
  volume = {9},
  pages = {14--19},
  number = {1},
  month = {Feb},
  abstract = {Protein microarrays present an innovative and versatile approach to
	study protein abundance and function at an unprecedented scale. Given
	the chemical and structural complexity of the proteome, the development
	of protein microarrays has been challenging. Despite these challenges
	there has been a marked increase in the use of protein microarrays
	to map interactions of proteins with various other molecules, and
	to identify potential disease biomarkers, especially in the area
	of cancer biology. In this review, we discuss some of the promising
	advances made in the development and use of protein microarrays.},
  doi = {10.1016/j.cbpa.2004.12.006},
  institution = {Harvard Institute of Proteomics, Department of Biological Chemistry
	and Molecular Pharmacology, Harvard Medical School, 320 Charles Street,
	Cambridge, Massachusetts 02141, USA. joshua_labaer@hms.harvard.edu},
  keywords = {Protein Array Analysis; Proteins; Proteomics; Surface Properties},
  owner = {phupe},
  pii = {S1367-5931(04)00165-6},
  pmid = {15701447},
  timestamp = {2010.08.12},
  url = {http://dx.doi.org/10.1016/j.cbpa.2004.12.006}
}

@article{LaConte2005Support,
  author = {Stephen LaConte and Stephen Strother and Vladimir Cherkassky and
	Jon Anderson and Xiaoping Hu},
  title = {Support vector machines for temporal classification of block design
	f{MRI} data.},
  journal = {Neuroimage},
  year = {2005},
  volume = {26},
  pages = {317-29},
  number = {2},
  month = {Jun},
  abstract = {This paper treats support vector machine ({SVM}) classification applied
	to block design f{MRI}, extending our previous work with linear discriminant
	analysis [{L}a{C}onte, {S}., {A}nderson, {J}., {M}uley, {S}., {A}she,
	{J}., {F}rutiger, {S}., {R}ehm, {K}., {H}ansen, {L}.{K}., {Y}acoub,
	{E}., {H}u, {X}., {R}ottenberg, {D}., {S}trother {S}., 2003a. {T}he
	evaluation of preprocessing choices in single-subject {BOLD} f{MRI}
	using {NPAIRS} performance metrics. {N}euro{I}mage 18, 10-27; {S}trother,
	{S}.{C}., {A}nderson, {J}., {H}ansen, {L}.{K}., {K}jems, {U}., {K}ustra,
	{R}., {S}iditis, {J}., {F}rutiger, {S}., {M}uley, {S}., {L}a{C}onte,
	{S}., {R}ottenberg, {D}. 2002. {T}he quantitative evaluation of functional
	neuroimaging experiments: the {NPAIRS} data analysis framework. {N}euro{I}mage
	15, 747-771]. {W}e compare {SVM} to canonical variates analysis ({CVA})
	by examining the relative sensitivity of each method to ten combinations
	of preprocessing choices consisting of spatial smoothing, temporal
	detrending, and motion correction. {I}mportant to the discussion
	are the issues of classification performance, model interpretation,
	and validation in the context of f{MRI}. {A}s the {SVM} has many
	unique properties, we examine the interpretation of support vector
	models with respect to neuroimaging data. {W}e propose four methods
	for extracting activation maps from {SVM} models, and we examine
	one of these in detail. {F}or both {CVA} and {SVM}, we have classified
	individual time samples of whole brain data, with {TR}s of roughly
	4 s, thirty slices, and nearly 30,000 brain voxels, with no averaging
	of scans or prior feature selection.},
  doi = {10.1016/j.neuroimage.2005.01.048},
  pii = {S1053-8119(05)00089-3},
  url = {http://dx.doi.org/10.1016/j.neuroimage.2005.01.048}
}

@unpublished{Lacoste-Julien2003introduction,
  author = {Lacoste-Julien, S.},
  title = {An introduction to {M}ax-{M}argin {M}arkov {N}etworks},
  note = {UC Berkeley cs281a project report},
  month = {December},
  year = {2003},
  pdf = {../local/Lacoste-Julien2003introduction.pdf},
  file = {Lacoste-Julien2003introduction.pdf:local/Lacoste-Julien2003introduction.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@article{LaCount2005protein,
  author = {Douglas J LaCount and Marissa Vignali and Rakesh Chettier and Amit
	Phansalkar and Russell Bell and Jay R Hesselberth and Lori W Schoenfeld
	and Irene Ota and Sudhir Sahasrabudhe and Cornelia Kurschner and
	Stanley Fields and Robert E Hughes},
  title = {{A} protein interaction network of the malaria parasite {P}lasmodium
	falciparum.},
  journal = {Nature},
  year = {2005},
  volume = {438},
  pages = {103--107},
  number = {7064},
  month = {Nov},
  abstract = {Plasmodium falciparum causes the most severe form of malaria and kills
	up to 2.7 million people annually. Despite the global importance
	of P. falciparum, the vast majority of its proteins have not been
	characterized experimentally. Here we identify P. falciparum protein-protein
	interactions using a high-throughput version of the yeast two-hybrid
	assay that circumvents the difficulties in expressing P. falciparum
	proteins in Saccharomyces cerevisiae. From more than 32,000 yeast
	two-hybrid screens with P. falciparum protein fragments, we identified
	2,846 unique interactions, most of which include at least one previously
	uncharacterized protein. Informatic analyses of network connectivity,
	coexpression of the genes encoding interacting fragments, and enrichment
	of specific protein domains or Gene Ontology annotations were used
	to identify groups of interacting proteins, including one implicated
	in chromatin modification, transcription, messenger RNA stability
	and ubiquitination, and another implicated in the invasion of host
	cells. These data constitute the first extensive description of the
	protein interaction network for this important human pathogen.},
  doi = {10.1038/nature04104},
  pdf = {../local/LaCount2005protein.pdf},
  file = {LaCount2005protein.pdf:local/LaCount2005protein.pdf:PDF},
  keywords = {plasmodium},
  pii = {nature04104},
  pmid = {16267556},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1038/nature04104}
}

@inproceedings{Lafferty2001Conditional,
  author = {Lafferty, J. and McCallum, A. and Pereira, F.},
  title = {Conditional {R}andom {F}ields: {P}robabilistic {M}odels for {S}egmenting
	and {L}abeling {S}equence {D}ata},
  booktitle = {Proc. 18th {I}nternational {C}onf. on {M}achine {L}earning},
  year = {2001},
  pages = {282--289},
  publisher = {Morgan Kaufmann, San Francisco, CA},
  citeseerurl = {http://citeseer.ist.psu.edu/lafferty01conditional.html},
  pdf = {../local/Lafferty2001Conditional.pdf},
  file = {Lafferty2001Conditional.pdf:local/Lafferty2001Conditional.pdf:PDF},
  keywords = {conditional-random-field},
  url = {http://citeseer.ist.psu.edu/lafferty01conditional.html}
}

@article{Lage2007human,
  author = {Lage, K. and Karlberg, E.O. and Størling, Z.M. and Olason, P.I. and
	Pedersen, A.G. and Rigina, O. and Hinsby, A.M. and Tümer, Z. and
	Pociot, F. and Tommerup, N. and Moreau, Y. and Brunak, S.},
  title = {A human phenome-interactome network of protein complexes implicated
	in genetic disorders},
  journal = {Nat. Biotechnol.},
  year = {2007},
  volume = {25},
  pages = {309--316},
  number = {3},
  month = {Mar},
  abstract = {We performed a systematic, large-scale analysis of human protein complexes
	comprising gene products implicated in many different categories
	of human disease to create a phenome-interactome network. This was
	done by integrating quality-controlled interactions of human proteins
	with a validated, computationally derived phenotype similarity score,
	permitting identification of previously unknown complexes likely
	to be associated with disease. Using a phenomic ranking of protein
	complexes linked to human disease, we developed a Bayesian predictor
	that in 298 of 669 linkage intervals correctly ranks the known disease-causing
	protein as the top candidate, and in 870 intervals with no identified
	disease-causing gene, provides novel candidates implicated in disorders
	such as retinitis pigmentosa, epithelial ovarian cancer, inflammatory
	bowel disease, amyotrophic lateral sclerosis, Alzheimer disease,
	type 2 diabetes and coronary heart disease. Our publicly available
	draft of protein complexes associated with pathology comprises 506
	complexes, which reveal functional relationships between disease-promoting
	genes that will inform future experimentation.},
  doi = {10.1038/nbt1295},
  pdf = {../local/Lage2007human.pdf},
  file = {Lage2007human.pdf:Lage2007human.pdf:PDF},
  institution = {Center for Biological Sequence Analysis, BioCentrum-DTU, Technical
	University of Denmark, Building 208, DK-2800 Lyngby, Denmark.},
  owner = {mordelet},
  pii = {nbt1295},
  pmid = {17344885},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1038/nbt1295}
}

@article{Lahav2004NatGenet,
  author = {Lahav, G. and Rosenfeld, N. and Sigal, A. and Geva-Zatorsky, N. and
	Levine, A. J. and Elowitz, M. B. and Alon, U.},
  title = {Dynamics of the p53-Mdm2 feedback loop in individual cells},
  journal = {Nat Genet},
  year = {2004},
  volume = {36},
  pages = {147--50},
  number = {2},
  abstract = {The tumor suppressor p53, one of the most intensely investigated proteins,
	is usually studied by experiments that are averaged over cell populations,
	potentially masking the dynamic behavior in individual cells. We
	present a system for following, in individual living cells, the dynamics
	of p53 and its negative regulator Mdm2 (refs. 1,4-7): this system
	uses functional p53-CFP and Mdm2-YFP fusion proteins and time-lapse
	fluorescence microscopy. We found that p53 was expressed in a series
	of discrete pulses after DNA damage. Genetically identical cells
	had different numbers of pulses: zero, one, two or more. The mean
	height and duration of each pulse were fixed and did not depend on
	the amount of DNA damage. The mean number of pulses, however, increased
	with DNA damage. This approach can be used to study other signaling
	systems and suggests that the p53-Mdm2 feedback loop generates a
	'digital' clock that releases well-timed quanta of p53 until damage
	is repaired or the cell dies.},
  keywords = {csbcbook}
}

@article{Lai2006comparison,
  author = {Lai, C. and Reinders, M. J. T. and van't Veer, L. J. and Wessels,
	L. F. A.},
  title = {A comparison of univariate and multivariate gene selection techniques
	for classification of cancer datasets},
  journal = {BMC bioinformatics},
  year = {2006},
  volume = {7},
  pages = {235},
  number = {1},
  doi = {10.1186/1471-2105-7-235},
  pdf = {../local/Lai2006comparison.pdf},
  file = {Lai2006comparison.pdf:Lai2006comparison.pdf:PDF},
  issn = {1471-2105},
  owner = {jp},
  publisher = {BioMed Central Ltd},
  timestamp = {2011.01.14},
  url = {http://dx.doi.org/10.1186/1471-2105-7-235}
}

@article{Lai2000Kernel,
  author = {P.L. Lai and C. Fyfe},
  title = {Kernel and nonlinear canonical correlation analysis},
  journal = {Int. {J}. {N}eural {S}yst.},
  year = {2000},
  volume = {10},
  pages = {365--377},
  number = {5},
  pdf = {../local/lai00.pdf},
  file = {lai00.pdf:local/lai00.pdf:PDF},
  subject = {kernel},
  url = {http://www.worldscinet.com/journals/ijns/10/sample/S012906570000034X.html}
}

@article{Laird2010Principles,
  author = {Peter W Laird},
  title = {Principles and challenges of genome-wide DNA methylation analysis.},
  journal = {Nat Rev Genet},
  year = {2010},
  volume = {11},
  pages = {191--203},
  number = {3},
  month = {Feb},
  abstract = {Methylation of cytosine bases in DNA provides a layer of epigenetic
	control in many eukaryotes that has important implications for normal
	biology and disease. Therefore, profiling DNA methylation across
	the genome is vital to understanding the influence of epigenetics.
	There has been a revolution in DNA methylation analysis technology
	over the past decade: analyses that previously were restricted to
	specific loci can now be performed on a genome-scale and entire methylomes
	can be characterized at single-base-pair resolution. However, there
	is such a diversity of DNA methylation profiling techniques that
	it can be challenging to select one. This Review discusses the different
	approaches and their relative merits and introduces considerations
	for data analysis.},
  doi = {10.1038/nrg2732},
  institution = {USC Epigenome Center, University of Southern California, Keck School
	of Medicine, 1450 Biggy Street, Room G511B, Los Angeles, California90089-9601,
	USA. plaird@usc.edu.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {philippe},
  pii = {nrg2732},
  pmid = {20125086},
  timestamp = {2010.08.01},
  url = {http://dx.doi.org/10.1038/nrg2732}
}

@article{Lal2004Support,
  author = {Thomas Navin Lal and Michael SchrÃ¶der and Thilo Hinterberger and
	Jason Weston and Martin Bogdan and Niels Birbaumer and Bernhard SchÃ¶lkopf},
  title = {Support vector channel selection in {BCI}.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2004},
  volume = {51},
  pages = {1003-10},
  number = {6},
  month = {Jun},
  abstract = {Designing a brain computer interface ({BCI}) system one can choose
	from a variety of features that may be useful for classifying brain
	activity during a mental task. {F}or the special case of classifying
	electroencephalogram ({EEG}) signals we propose the usage of the
	state of the art feature selection algorithms {R}ecursive {F}eature
	{E}limination and {Z}ero-{N}orm {O}ptimization which are based on
	the training of support vector machines ({SVM}). {T}hese algorithms
	can provide more accurate solutions than standard filter methods
	for feature selection. {W}e adapt the methods for the purpose of
	selecting {EEG} channels. {F}or a motor imagery paradigm we show
	that the number of used channels can be reduced significantly without
	increasing the classification error. {T}he resulting best channels
	agree well with the expected underlying cortical activity patterns
	during the mental tasks. {F}urthermore we show how time dependent
	task specific information can be visualized.},
  keywords = {Algorithms, Animals, Antisense, Artificial Intelligence, Automated,
	Autonomic Nervous System, Brain, Cell Line, Cerebral Cortex, Child,
	Cluster Analysis, Cognition, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, DNA Fingerprinting, Databases,
	Drug Evaluation, Electroencephalography, Emotions, Event-Related
	Potentials, Evoked Potentials, Factual, Fluorescence, Fuzzy Logic,
	Gene Silencing, Gene Targeting, Genetic, Hand, Hela Cells, Humans,
	Imaging, Intracellular Space, Male, Microscopy, Models, Monitoring,
	Motor, Neoplasms, Neural Networks (Computer), Non-U.S. Gov't, Oligonucleotides,
	P.H.S., P300, Pattern Recognition, Peptides, Physiologic, Preclinical,
	Predictive Value of Tests, Preschool, Prognosis, Protein Interaction
	Mapping, Protein Structure, Proteins, Proteomics, Quantitative Structure-Activity
	Relationship, Quaternary, RNA, RNA Interference, Recognition (Psychology),
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Signal Processing, Small Interfering, Software, Thionucleotides,
	Three-Dimensional, Tumor, U.S. Gov't, User-Computer Interface, Word
	Processing, 15188871}
}

@article{Lanckriet2004Learning,
  author = {Lanckriet, G.R.G. and Cristianini, N. and Bartlett, P. and El Ghaoui,
	L. and Jordan, M.I.},
  title = {Learning the kernel matrix with semidefinite programming},
  journal = {J. Mach. Learn. Res.},
  year = {2004},
  volume = {5},
  pages = {27-72},
  pdf = {../local/Lanckriet2004Learning.pdf},
  file = {Lanckriet2004Learning.pdf:Lanckriet2004Learning.pdf:PDF},
  owner = {vert},
  subject = {kernel},
  url = {http://www.jmlr.org/papers/v5/lanckriet04a.html}
}

@incollection{Lanckriet2004Kernel-based,
  author = {Lanckriet, G.R.G. and Cristianini, N. and Jordan, M.I. and Noble,
	W.S.},
  title = {Kernel-based integration of genomic data using semidefinite programming},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {231-259},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Lanckriet2004Kernel-baseda,
  author = {Lanckriet, G.R. and Deng, M. and Cristianini, N. and Jordan, M.I.
	and Noble, W.S.},
  title = {Kernel-based data fusion and its application to protein function
	prediction in yeast.},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing},
  year = {2004},
  pages = {300-311},
  abstract = {Kernel methods provide a principled framework in which to represent
	many types of data, including vectors, strings, trees and graphs.
	{A}s such, these methods are useful for drawing inferences about
	biological phenomena. {W}e describe a method for combining multiple
	kernel representations in an optimal fashion, by formulating the
	problem as a convex optimization problem that can be solved using
	semidefinite programming techniques. {T}he method is applied to the
	problem of predicting yeast protein functional classifications using
	a support vector machine ({SVM}) trained on five types of data. {F}or
	this problem, the new method performs better than a previously-described
	{M}arkov random field method, and better than the {SVM} trained on
	any single type of data.},
  pdf = {../local/Lanckriet2004Kernel-baseda.pdf},
  file = {Lanckriet2004Kernel-baseda.pdf:local/Lanckriet2004Kernel-baseda.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Lanckriet2004statistical,
  author = {Lanckriet, G. R. G. and De Bie, T. and Cristianini, N. and Jordan,
	M. I. and Noble, W. S.},
  title = {A statistical framework for genomic data fusion},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {2626-2635},
  number = {16},
  abstract = {Motivation: {D}uring the past decade, the new focus on genomics has
	highlighted a particular challenge: to integrate the different views
	of the genome that are provided by various types of experimental
	data. {R}esults: {T}his paper describes a computational framework
	for integrating and drawing inferences from a collection of genome-wide
	measurements. {E}ach dataset is represented via a kernel function,
	which defines generalized similarity relationships between pairs
	of entities, such as genes or proteins. {T}he kernel representation
	is both flexible and efficient, and can be applied to many different
	types of data. {F}urthermore, kernel functions derived from different
	types of data can be combined in a straightforward fashion. {R}ecent
	advances in the theory of kernel methods have provided efficient
	algorithms to perform such combinations in a way that minimizes a
	statistical loss function. {T}hese methods exploit semidefinite programming
	techniques to reduce the problem of finding optimizing kernel combinations
	to a convex optimization problem. {C}omputational experiments performed
	using yeast genome-wide datasets, including amino acid sequences,
	hydropathy profiles, gene expression data and known protein-protein
	interactions, demonstrate the utility of this approach. {A} statistical
	learning algorithm trained from all of these data to recognize particular
	classes of proteins--membrane proteins and ribosomal proteins--performs
	significantly better than the same algorithm trained on any single
	type of data. {A}vailability: {S}upplementary data at http://noble.gs.washington.edu/proj/sdp-svm},
  doi = {10.1093/bioinformatics/bth294},
  pdf = {../local/Lanckriet2004statistical.pdf},
  file = {Lanckriet2004statistical.pdf:local/Lanckriet2004statistical.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/16/2626}
}

@techreport{Land1997Variable,
  author = {Land, S. R. and Friedman, J. H.},
  title = {Variable fusion: A new adaptive signal regression method},
  institution = {Department of Statistics, Carnegie Mellon University Pittsburgh},
  year = {1997},
  number = {656},
  keywords = {lasso, ordinal, regression}
}

@article{Lander1999Array,
  author = {E. S. Lander},
  title = {Array of hope.},
  journal = {Nat Genet},
  year = {1999},
  volume = {21},
  pages = {3--4},
  number = {1 Suppl},
  month = {Jan},
  doi = {10.1038/4427},
  institution = {Whitehead Institute for Biomedical Research and Department of Biology,
	Institute of Technology, Cambridge 02142, USA. lander@genome.wi.mit.edu},
  keywords = {Animals; DNA, analysis; Gene Expression; Genetic Variation; Genome;
	Humans; Molecular Probe Techniques, trends; Oligonucleotide Array
	Sequence Analysis, methods; RNA, Messenger, analysis; Saccharomyces
	cerevisiae, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {9915492},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1038/4427}
}

@article{Langford2001property,
  author = {Langford, E. and Schwertman, N. and Owens, M.},
  title = {Is the property of being positively correlated transitive?},
  journal = {The American Statistician},
  year = {2001},
  volume = {55},
  pages = {322--325},
  number = {4},
  publisher = {Taylor \& Francis}
}

@article{Langmead2009Ultrafast,
  author = {Langmead, B. and Trapnell, C. and Pop, M. and Salzberg, S. L.},
  title = {Ultrafast and memory-efficient alignment of short {DNA} sequences
	to the human genome.},
  journal = {Genome Biol},
  year = {2009},
  volume = {10},
  pages = {R25},
  number = {3},
  __markedentry = {[jp:]},
  abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning
	short DNA sequence reads to large genomes. For the human genome,
	Burrows-Wheeler indexing allows Bowtie to align more than 25 million
	reads per CPU hour with a memory footprint of approximately 1.3 gigabytes.
	Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware
	backtracking algorithm that permits mismatches. Multiple processor
	cores can be used simultaneously to achieve even greater alignment
	speeds. Bowtie is open source (http://bowtie.cbcb.umd.edu).},
  doi = {10.1186/gb-2009-10-3-r25},
  pdf = {../local/Langmead2009Ultrafast.pdf},
  file = {Langmead2009Ultrafast.pdf:Langmead2009Ultrafast.pdf:PDF},
  institution = {Center for Bioinformatics and Computational Biology, Institute for
	Advanced Computer Studies, University of Maryland, College Park,
	MD 20742, USA. langmead@cs.umd.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2009-10-3-r25},
  pmid = {19261174},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1186/gb-2009-10-3-r25}
}

@article{Lao2004Morphological,
  author = {Zhiqiang Lao and Dinggang Shen and Zhong Xue and Bilge Karacali and
	Susan M Resnick and Christos Davatzikos},
  title = {Morphological classification of brains via high-dimensional shape
	transformations and machine learning methods.},
  journal = {Neuroimage},
  year = {2004},
  volume = {21},
  pages = {46-57},
  number = {1},
  month = {Jan},
  abstract = {A high-dimensional shape transformation posed in a mass-preserving
	framework is used as a morphological signature of a brain image.
	{P}opulation differences with complex spatial patterns are then determined
	by applying a nonlinear support vector machine ({SVM}) pattern classification
	method to the morphological signatures. {S}ignificant reduction of
	the dimensionality of the morphological signatures is achieved via
	wavelet decomposition and feature reduction methods. {A}pplying the
	method to {MR} images with simulated atrophy shows that the method
	can correctly detect subtle and spatially complex atrophy, even when
	the simulated atrophy represents only a 5\% variation from the original
	image. {A}pplying this method to actual {MR} images shows that brains
	can be correctly determined to be male or female with a successful
	classification rate of 97\%, using the leave-one-out method. {T}his
	proposed method also shows a high classification rate for old adults'
	age classification, even under difficult test scenarios. {T}he main
	characteristic of the proposed methodology is that, by applying multivariate
	pattern classification methods, it can detect subtle and spatially
	complex patterns of morphological group differences which are often
	not detectable by voxel-based morphometric methods, because these
	methods analyze morphological measurements voxel-by-voxel and do
	not consider the entirety of the data simultaneously.},
  pii = {S1053811903005731}
}

@article{Lapinsh2001Development,
  author = {Lapinsh, M. and Prusis, P. and Gutcaits, A. and Lundstedt, T and
	Wikberg, J. E. S.},
  title = {Development of proteo-chemometrics: A novel technology of use for
	analysis of drug-receptor interactions},
  journal = {Biochem. Biophys. Acta},
  year = {2001},
  volume = {1525},
  pages = {180--190},
  owner = {vert},
  timestamp = {2007.08.02}
}

@article{Lapinsh2005Improved,
  author = {Lapinsh, M. and Prusis, P. and Uhl{\'e}n, S. and Wikberg, J. E. S.},
  title = {Improved approach for proteochemometrics modeling: application to
	organic compound--amine {G} protein-coupled receptor interactions.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {4289--4296},
  number = {23},
  month = {Dec},
  abstract = {MOTIVATION: Proteochemometrics is a novel technology for the analysis
	of interactions of series of proteins with series of ligands. We
	have here customized it for analysis of large datasets and evaluated
	it for the modeling of the interaction of psychoactive organic amines
	with all the five known families of amine G protein-coupled receptors
	(GPCRs). RESULTS: The model exploited data for the binding of 22
	compounds to 31 amine GPCRs, correlating chemical descriptions and
	cross-descriptions of compounds and receptors to binding affinity
	using a novel strategy. A highly valid model (q2 = 0.76) was obtained
	which was further validated by external predictions using data for
	10 other entirely independent compounds, yielding the high q2ext
	= 0.67. Interpretation of the model reveals molecular interactions
	that govern psychoactive organic amines overall affinity for amine
	GPCRs, as well as their selectivity for particular amine GPCRs. The
	new modeling procedure allows us to obtain fully interpretable proteochemometrics
	models using essentially unlimited number of ligand and protein descriptors.},
  doi = {10.1093/bioinformatics/bti703},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {bti703},
  pmid = {16204343},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti703}
}

@article{Larsen2005integrative,
  author = {Mette Voldby Larsen and Claus Lundegaard and Kasper Lamberth and
	S\o ren Buus and S\o ren Brunak and Ole Lund and Morten Nielsen},
  title = {An integrative approach to {CTL} epitope prediction: a combined algorithm
	integrating {MHC} class {I} binding, {TAP} transport efficiency,
	and proteasomal cleavage predictions.},
  journal = {Eur. J. Immunol.},
  year = {2005},
  volume = {35},
  pages = {2295--2303},
  number = {8},
  month = {Aug},
  abstract = {Reverse immunogenetic approaches attempt to optimize the selection
	of candidate epitopes, and thus minimize the experimental effort
	needed to identify new epitopes. When predicting cytotoxic T cell
	epitopes, the main focus has been on the highly specific MHC class
	I binding event. Methods have also been developed for predicting
	the antigen-processing steps preceding MHC class I binding, including
	proteasomal cleavage and transporter associated with antigen processing
	(TAP) transport efficiency. Here, we use a dataset obtained from
	the SYFPEITHI database to show that a method integrating predictions
	of MHC class I binding affinity, TAP transport efficiency, and C-terminal
	proteasomal cleavage outperforms any of the individual methods. Using
	an independent evaluation dataset of HIV epitopes from the Los Alamos
	database, the validity of the integrated method is confirmed. The
	performance of the integrated method is found to be significantly
	higher than that of the two publicly available prediction methods
	BIMAS and SYFPEITHI. To identify 85\% of the epitopes in the HIV
	dataset, 9\% and 10\% of all possible nonamers in the HIV proteins
	must be tested when using the BIMAS and SYFPEITHI methods, respectively,
	for the selection of candidate epitopes. This number is reduced to
	7\% when using the integrated method. In practical terms, this means
	that the experimental effort needed to identify an epitope in a hypothetical
	protein with 85\% probability is reduced by 20-30\% when using the
	integrated method.The method is available at http://www.cbs.dtu.dk/services/NetCTL.
	Supplementary material is available at http://www.cbs.dtu.dk/suppl/immunology/CTL.php.},
  doi = {10.1002/eji.200425811},
  keywords = {Algorithms; Data Interpretation, Statistical; Epitopes, T-Lymphocyte;
	Histocompatibility Antigens Class I; Humans; Hydrolysis; Predictive
	Value of Tests; Proteasome Endopeptidase Complex; Protein Binding;
	Research Support, N.I.H., Extramural; Research Support, Non-U.S.
	Gov't; Research Support, U.S. Gov't, P.H.S.; T-Lymphocytes, Cytotoxic},
  owner = {jacob},
  pmid = {15997466},
  timestamp = {2006.08.30},
  url = {http://dx.doi.org/10.1002/eji.200425811}
}

@article{Lasonder2002Analysis,
  author = {Edwin Lasonder and Yasushi Ishihama and Jens S Andersen and Adriaan
	M W Vermunt and Arnab Pain and Robert W Sauerwein and Wijnand M C
	Eling and Neil Hall and Andrew P Waters and Hendrik G Stunnenberg
	and Matthias Mann},
  title = {{A}nalysis of the {P}lasmodium falciparum proteome by high-accuracy
	mass spectrometry.},
  journal = {Nature},
  year = {2002},
  volume = {419},
  pages = {537--542},
  number = {6906},
  month = {Oct},
  abstract = {The annotated genomes of organisms define a 'blueprint' of their possible
	gene products. Post-genome analyses attempt to confirm and modify
	the annotation and impose a sense of the spatial, temporal and developmental
	usage of genetic information by the organism. Here we describe a
	large-scale, high-accuracy (average deviation less than 0.02 Da at
	1,000 Da) mass spectrometric proteome analysis of selected stages
	of the human malaria parasite Plasmodium falciparum. The analysis
	revealed 1,289 proteins of which 714 proteins were identified in
	asexual blood stages, 931 in gametocytes and 645 in gametes. The
	last two groups provide insights into the biology of the sexual stages
	of the parasite, and include conserved, stage-specific, secreted
	and membrane-associated proteins. A subset of these proteins contain
	domains that indicate a role in cell-cell interactions, and therefore
	can be evaluated as potential components of a malaria vaccine formulation.
	We also report a set of peptides with significant matches in the
	parasite genome but not in the protein set predicted by computational
	methods.},
  doi = {10.1038/nature01111},
  pdf = {../local/Lasonder2002Analysis.pdf},
  file = {Lasonder2002Analysis.pdf:local/Lasonder2002Analysis.pdf:PDF},
  keywords = {plasmodium},
  pii = {nature01111},
  pmid = {12368862},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1038/nature01111}
}

@article{Lasso2005Vessel,
  author = {AndrÃ¡s LassÃ³ and Emanuele Trucco},
  title = {Vessel enhancement in digital {X}-ray angiographic sequences by temporal
	statistical learning.},
  journal = {Comput {M}ed {I}maging {G}raph},
  year = {2005},
  volume = {29},
  pages = {343-55},
  number = {5},
  month = {Jul},
  doi = {10.1016/j.compmedimag.2005.02.002},
  keywords = {Apoptosis, Gene Expression Profiling, Humans, Neoplasms, Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, Polymerase Chain
	Reaction, Proteins, Research Support, Subcellular Fractions, Unknown
	Primary, 15893453},
  pii = {S0895-6111(05)00032-7},
  url = {http://dx.doi.org/10.1016/j.compmedimag.2005.02.002}
}

@article{Launay2008Homology,
  author = {G. Launay and T. Simonson},
  title = {Homology modelling of protein-protein complexes: a simple method
	and its possibilities and limitations.},
  journal = {BMC Bioinformatics},
  year = {2008},
  volume = {9},
  pages = {427},
  abstract = {BACKGROUND: Structure-based computational methods are needed to help
	identify and characterize protein-protein complexes and their function.
	For individual proteins, the most successful technique is homology
	modelling. We investigate a simple extension of this technique to
	protein-protein complexes. We consider a large set of complexes of
	known structures, involving pairs of single-domain proteins. The
	complexes are compared with each other to establish their sequence
	and structural similarities and the relation between the two. Compared
	to earlier studies, a simpler dataset, a simpler structural alignment
	procedure, and an additional energy criterion are used. Next, we
	compare the Xray structures to models obtained by threading the native
	sequence onto other, homologous complexes. An elementary requirement
	for a successful energy function is to rank the native structure
	above any threaded structure. We use the DFIREbeta energy function,
	whose quality and complexity are typical of the models used today.
	Finally, we compare near-native models to distinctly non-native models.
	RESULTS: If weakly stable complexes are excluded (defined by a binding
	energy cutoff), as well as a few unusual complexes, a simple homology
	principle holds: complexes that share more than 35\% sequence identity
	share similar structures and interaction modes; this principle was
	less clearcut in earlier studies. The energy function was then tested
	for its ability to identify experimental structures among sets of
	decoys, produced by a simple threading procedure. On average, the
	experimental structure is ranked above 92\% of the alternate structures.
	Thus, discrimination of the native structure is good but not perfect.
	The discrimination of near-native structures is fair. Typically,
	a single, alternate, non-native binding mode exists that has a native-like
	energy. Some of the associated failures may correspond to genuine,
	alternate binding modes and/or native complexes that are artefacts
	of the crystal environment. In other cases, additional model filtering
	with more sophisticated tools is needed. CONCLUSION: The results
	suggest that the simple modelling procedure applied here could help
	identify and characterize protein-protein complexes. The next step
	is to apply it on a genomic scale.},
  doi = {10.1186/1471-2105-9-427},
  institution = {Laboratoire de Biochimie (UMR CNRS 7654), Department of Biology,
	Ecole Polytechnique, 91128, Palaiseau, France. guillaume.launay@irisa.fr},
  keywords = {Algorithms; Protein Binding; Protein Conformation; Protein Interaction
	Domains and Motifs; Proteins, chemistry/metabolism; Structural Homology,
	Protein},
  owner = {bricehoffmann},
  pii = {1471-2105-9-427},
  pmid = {18844985},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1186/1471-2105-9-427}
}

@article{Laurie2005Q-Site,
  author = {Laurie,, A. T. R. and Jackson,, R. M.},
  title = {Q-SiteFinder: an energy-based method for the prediction of protein--ligand
	binding sites},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1908--1916},
  number = {9},
  address = {Oxford, UK},
  doi = {http://dx.doi.org/10.1093/bioinformatics/bti315},
  issn = {1367-4803},
  publisher = {Oxford University Press}
}

@book{Lauritzen1996Graphical,
  title = {Graphical {M}odels},
  publisher = {Oxford},
  year = {1996},
  author = {S. Lauritzen},
  subject = {stat}
}

@article{Lavielle2005Using,
  author = {Lavielle, M.},
  title = {Using penalized contrasts for the change-point problem},
  journal = {Signal Process.},
  year = {2005},
  volume = {85},
  pages = {1501--1510},
  number = {8},
  doi = {10.1016/j.sigpro.2005.01.012},
  pdf = {../local/Lavielle2005Using.pdf},
  file = {Lavielle2005Using.pdf:Lavielle2005Using.pdf:PDF},
  owner = {kb},
  timestamp = {2011.04.19}
}

@inproceedings{Lavielle2005Adaptive,
  author = {Lavielle, M. and Teyssi{\`e}re},
  title = {Adaptive detection of multiple change-points in asset price volatility},
  booktitle = {Long-Memory in Economics},
  year = {2005},
  editor = {Teyssi{\`e}re, G. and Kirman, A.},
  pages = {129--156},
  publisher = {Springer Verlag, Berlin},
  owner = {jp},
  timestamp = {2010.06.02}
}

@article{Lavielle2006Detection,
  author = {Lavielle, M. and Teyssi{\`e}re, G.},
  title = {Detection of multiple change-points in multivariate time series},
  journal = {Lithuanian Mathematical Journal},
  year = {2006},
  volume = {46},
  pages = {287--306},
  number = {3},
  doi = {10.1007/s10986-006-0028-9},
  pdf = {../local/Lavielle2006Detection.pdf},
  file = {Lavielle2006Detection.pdf:Lavielle2006Detection.pdf:PDF},
  owner = {jp},
  timestamp = {2010.06.02},
  url = {http://dx.doi.org/10.1007/s10986-006-0028-9}
}

@article{Lavrik2007JBC,
  author = {Lavrik, I. N. and Golks, A. and Riess, D. and Bentele, M. and Eils,
	R. and Krammer, P. H.},
  title = {Analysis of CD95 threshold signaling: triggering of CD95 (FAS/APO-1)
	at low concentrations primarily results in survival signaling},
  journal = {J Biol Chem},
  year = {2007},
  volume = {282},
  pages = {13664-71},
  number = {18},
  abstract = {Recently we generated a mathematical model (Bentele, M., Lavrik, I.,
	Ulrich, M., Stosser, S., Heermann, D. W., Kalthoff, H., Krammer,
	P. H., and Eils, R. (2004) J. Cell Biol. 166, 839-851) of signaling
	in CD95(Fas/APO-1)-mediated apoptosis. Mathematical modeling in combination
	with experimental data provided new insights into CD95-mediated apoptosis
	and allowed us to establish a threshold mechanism of life and death.
	Here, we further assessed the predictability of the model experimentally
	by a detailed analysis of the threshold behavior of CD95 signaling.
	Using the model predictions for the mechanism of the threshold behavior
	we found that the CD95 DISC (death-inducing signaling complex) is
	formed at the cell membrane upon stimulation with low concentrations
	of agonistic anti-APO-1 monoclonal antibodies; however, activation
	of procaspase-8 at the DISC is blocked due to high cellular FLICE-inhibitory
	protein recruitment into the DISC. Given that death signaling does
	not occur upon CD95 stimulation at low (threshold) anti-APO-1 concentrations,
	we also analyzed survival signaling, focusing on mitogen-activated
	protein kinase activation. Interestingly, we found that mitogen-activated
	protein kinase activation takes place under threshold conditions.
	These findings show that triggering of CD95 can signal both life
	or death, depending on the strength of the stimulus.},
  keywords = {csbcbook}
}

@article{Lazar2012survey,
  author = {Lazar, C. and Taminau, J. and Meganck, S. and Steenhoff, D. and Coletta,
	A. and Molter, C. and de Schaetzen, V. and Duque, R. and Bersini,
	H. and Now{\'e}, A.},
  title = {A survey on filter techniques for feature selection in gene expression
	microarray analysis},
  journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics
	(TCBB)},
  year = {2012},
  volume = {9},
  pages = {1106--1119},
  number = {4},
  publisher = {IEEE Computer Society Press}
}

@article{Lazo2000Combinatorial,
  author = {J. S. Lazo and P. Wipf},
  title = {{C}ombinatorial chemistry and contemporary pharmacology.},
  journal = {J. Pharmacol. Exp. Ther.},
  year = {2000},
  volume = {293},
  pages = {705--709},
  number = {3},
  month = {Jun},
  abstract = {Both solid- and liquid-phase combinatorial chemistry have emerged
	as powerful tools for identifying pharmacologically active compounds
	and optimizing the biological activity of a lead compound. Complementary
	high-throughput in vitro assays are essential for compound evaluation.
	Cell-based assays that use optical endpoints permit investigation
	of a wide variety of functional properties of these compounds including
	specific intracellular biochemical pathways, protein-protein interactions,
	and the subcellular localization of targets. Integration of combinatorial
	chemistry with contemporary pharmacology now represents an important
	factor in drug discovery and development.},
  keywords = {Alzheimer Disease, Animals, Antineoplastic Agents, Biological, Bleomycin,
	Cell Cycle, Cell Cycle Proteins, Cell Death, Cell Line, Cell Nucleus,
	Cell Shape, Cell Transformation, Combinatorial Chemistry Techniques,
	Cultured, Drug Delivery Systems, Drug Design, Drug Evaluation, Enzyme
	Inhibitors, Formazans, Gene Expression, Humans, Inhibitory Concentration
	50, Kinetics, Magnetic Resonance Spectroscopy, Mass, Mitochondria,
	Models, Molecular, Neoplasms, Neoplastic, Non-P.H.S., Non-U.S. Gov't,
	P.H.S., Paclitaxel, Peptide Library, Pharmaceutical Preparations,
	Pharmacology, Phosphoprotein Phosphatase, Preclinical, Protease Inhibitors,
	Protein-Tyrosine-Phosphatase, Research Support, Sensitivity and Specificity,
	Signal Transduction, Spectrum Analysis, Stereoisomerism, Structure-Activity
	Relationship, Sulfonic Acids, Tetrazolium Salts, Thiazoles, Toxicity
	Tests, Tumor, Tumor Cells, U.S. Gov't, cdc25 Phosphatase, 10869367},
  owner = {mahe},
  pmid = {10869367},
  timestamp = {2006.08.22}
}

@article{Lenovere2001MELTING,
  author = {Le Nov{\`e}re, M.},
  title = {M{ELTING}, computing the melting temperature of nucleic acid duplex.},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {1226-7},
  number = {12},
  month = {Dec},
  abstract = {M{ELTING} computes the enthalpy and entropy of an oligonucleotide
	duplex helix-coil transition, and then its melting temperature. {T}he
	program uses the method of nearest-neighbours. {T}he set of thermodynamic
	parameters can be easily customized. {T}he program provides several
	correction methods for the concentration of salt. {MELTING} is a
	free program, available at no cost and open-source. {P}erl scripts
	are provided to show how {MELTING} can be used to construct more
	ambitious programs. {AVAILABILITY}: {MELTING} is available for several
	platforms (http://www.pasteur.fr/recherche/unites/neubiomol/meltinghome.html)
	and is accessible via a www server (http://bioweb.pasteur.fr/seqanal/interfaces/melting.html).
	{CONTACT}: nl223@cus.cam.ac.uk}
}

@article{Lenovere2009The,
  author = {Nicolas {Le Novère} and Michael Hucka and Huaiyu Mi and Stuart Moodie
	and Falk Schreiber and Anatoly Sorokin and Emek Demir and Katja Wegner
	and Mirit I Aladjem and Sarala M Wimalaratne and Frank T Bergman
	and Ralph Gauges and Peter Ghazal and Hideya Kawaji and Lu Li and
	Yukiko Matsuoka and Alice Villéger and Sarah E Boyd and Laurence
	Calzone and Melanie Courtot and Ugur Dogrusoz and Tom C Freeman and
	Akira Funahashi and Samik Ghosh and Akiya Jouraku and Sohyoung Kim
	and Fedor Kolpakov and Augustin Luna and Sven Sahle and Esther Schmidt
	and Steven Watterson and Guanming Wu and Igor Goryanin and Douglas
	B Kell and Chris Sander and Herbert Sauro and Jacky L Snoep and Kurt
	Kohn and Hiroaki Kitano},
  title = {The Systems Biology Graphical Notation.},
  journal = {Nat Biotechnol},
  year = {2009},
  volume = {27},
  pages = {735--741},
  number = {8},
  month = {Aug},
  abstract = {Circuit diagrams and Unified Modeling Language diagrams are just two
	examples of standard visual languages that help accelerate work by
	promoting regularity, removing ambiguity and enabling software tool
	support for communication of complex information. Ironically, despite
	having one of the highest ratios of graphical to textual information,
	biology still lacks standard graphical notations. The recent deluge
	of biological knowledge makes addressing this deficit a pressing
	concern. Toward this goal, we present the Systems Biology Graphical
	Notation (SBGN), a visual language developed by a community of biochemists,
	modelers and computer scientists. SBGN consists of three complementary
	languages: process diagram, entity relationship diagram and activity
	flow diagram. Together they enable scientists to represent networks
	of biochemical interactions in a standard, unambiguous way. We believe
	that SBGN will foster efficient and accurate representation, visualization,
	storage, exchange and reuse of information on all kinds of biological
	knowledge, from gene regulation, to metabolism, to cellular signaling.},
  doi = {10.1038/nbt.1558},
  institution = {EMBL European Bioinformatics Institute, Hinxton, UK. lenov@ebi.ac.uk},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nbt.1558},
  pmid = {19668183},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1038/nbt.1558}
}

@article{Le2008systematic,
  author = {Le Roch, K. G. and Johnson, J. R. and Ahiboh, H. and Chung, D.-W.
	D. and Prudhomme, J. and Plouffe, D. and Henson, K. and Zhou, Y.
	and Witola, W. and Yates, J. R. and Ben Mamoun, C. and Winzeler,
	E. A. and Vial, H.},
  title = {A systematic approach to understand the mechanism of action of the
	bisthiazolium compound {T4} on the human malaria parasite, {P}lasmodium
	falciparum.},
  journal = {BMC Genomics},
  year = {2008},
  volume = {9},
  pages = {513},
  abstract = {BACKGROUND: In recent years, a major increase in the occurrence of
	drug resistant falciparum malaria has been reported. Choline analogs,
	such as the bisthiazolium T4, represent a novel class of compounds
	with strong potency against drug sensitive and resistant P. falciparum
	clones. Although T4 and its analogs are presumed to target the parasite's
	lipid metabolism, their exact mechanism of action remains unknown.
	Here we have employed transcriptome and proteome profiling analyses
	to characterize the global response of P. falciparum to T4 during
	the intraerythrocytic cycle of this parasite. RESULTS: No significant
	transcriptional changes were detected immediately after addition
	of T4 despite the drug's effect on the parasite metabolism. Using
	the Ontology-based Pattern Identification (OPI) algorithm with an
	increased T4 incubation time, we demonstrated cell cycle arrest and
	a general induction of genes involved in gametocytogenesis. Proteomic
	analysis revealed a significant decrease in the level of the choline/ethanolamine-phosphotransferase
	(PfCEPT), a key enzyme involved in the final step of synthesis of
	phosphatidylcholine (PC). This effect was further supported by metabolic
	studies, which showed a major alteration in the synthesis of PC from
	choline and ethanolamine by the compound. CONCLUSION: Our studies
	demonstrate that the bisthiazolium compound T4 inhibits the pathways
	of synthesis of phosphatidylcholine from choline and ethanolamine
	in P. falciparum, and provide evidence for post-transcriptional regulations
	of parasite metabolism in response to external stimuli.},
  doi = {10.1186/1471-2164-9-513},
  institution = {Department of Cell Biology and Neuroscience, University of California,
	Riverside, 900 University Avenue, Riverside, CA 92521, USA. karine.leroch@ucr.edu},
  keywords = {lasso},
  owner = {jp},
  pii = {1471-2164-9-513},
  pmid = {18973684},
  timestamp = {2009.01.21},
  url = {http://dx.doi.org/10.1186/1471-2164-9-513}
}

@article{LeRoch2003Discovery,
  author = {Le Roch, K. G. and Zhou, Y. and Blair, P. L. and Grainger, M. and
	Moch, J. K. and Haynes, J. D. and De la Vega, P. and Holder, A. A.
	and Batalov, S. and Carucci, D. J. and Winzeler, E. A.},
  title = {Discovery of Gene Function by Expression Profiling of the Malaria
	Parasite Life Cycle},
  journal = {Science},
  year = {2003},
  volume = {301},
  pages = {1503-1508},
  number = {5639},
  abstract = {The completion of the genome sequence for {P}lasmodium falciparum,
	the species responsible for most malaria human deaths, has the potential
	to reveal hundreds of new drug targets and proteins involved in pathogenesis.
	{H}owever, only approximately 35% of the genes code for proteins
	with an identifiable function. {T}he absence of routine genetic tools
	for studying {P}lasmodium parasites suggests that this number is
	unlikely to change quickly if conventional serial methods are used
	to characterize encoded proteins. {H}ere, we use a high-density oligonucleotide
	array to generate expression profiles of human and mosquito stages
	of the malaria parasite's life cycle. {G}enes with highly correlated
	levels and temporal patterns of expression were often involved in
	similar functions or cellular processes.},
  doi = {10.1126/science.1087025},
  pdf = {../local/LeRoch2003Discovery.pdf},
  file = {LeRoch2003Discovery.pdf:LeRoch2003Discovery.pdf:PDF},
  keywords = {microarray plasmodium},
  owner = {vert},
  url = {http://www.sciencemag.org/cgi/content/full/301/5639/1503}
}

@article{LeRoux2008Representational,
  author = {{Le Roux}, Nicolas and Bengio, Yoshua},
  title = {Representational power of restricted boltzmann machines and deep
	belief networks.},
  journal = {Neural Comput},
  year = {2008},
  volume = {20},
  pages = {1631--1649},
  number = {6},
  month = {Jun},
  abstract = {Deep belief networks (DBN) are generative neural network models with
	many layers of hidden explanatory factors, recently introduced by
	Hinton, Osindero, and Teh (2006) along with a greedy layer-wise unsupervised
	learning algorithm. The building block of a DBN is a probabilistic
	model called a restricted Boltzmann machine (RBM), used to represent
	one layer of the model. Restricted Boltzmann machines are interesting
	because inference is easy in them and because they have been successfully
	used as building blocks for training deeper models. We first prove
	that adding hidden units yields strictly improved modeling power,
	while a second theorem shows that RBMs are universal approximators
	of discrete distributions. We then study the question of whether
	DBNs with more layers are strictly more powerful in terms of representational
	power. This suggests a new and less greedy criterion for training
	RBMs within DBNs.},
  doi = {10.1162/neco.2008.04-07-510},
  institution = {Département Informatique et Recherche Opérationnelle, Université
	de Montréal, Montréal, Québec, H3C 3J7, Canada. lerouxni@iro.umontreal.ca},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {18254699},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1162/neco.2008.04-07-510}
}

@book{Leach2003introduction,
  title = {An introduction to chemoinformatics},
  publisher = {Kluwer Academic Publishers},
  year = {2003},
  author = {Leach, A. R. and Gillet, V. J.}
}

@article{Leach2006Prediction,
  author = {A. R. Leach and B. K. Shoichet and C. E. Peishoff},
  title = {Prediction of protein-ligand interactions. Docking and scoring: successes
	and gaps.},
  journal = {J. Med. Chem.},
  year = {2006},
  volume = {49},
  pages = {5851--5855},
  number = {20},
  month = {Oct},
  doi = {10.1021/jm060999m},
  institution = {GlaxoSmithKline Pharmaceuticals, 1250 South Collegeville Road, Collegeville,
	Pennsylvania 19426, USA.},
  keywords = {Binding Sites; Drug Design; Ligands; Models, Molecular; Protein Binding;
	Proteins, chemistry; Quantitative Structure-Activity Relationship},
  owner = {bricehoffmann},
  pmid = {17004700},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1021/jm060999m}
}

@article{Leclerc2008Survival,
  author = {Leclerc, R.D.},
  title = {Survival of the sparsest: robust gene networks are parsimonious.},
  journal = {Mol Syst Biol},
  year = {2008},
  volume = {4},
  pages = {213},
  abstract = {Biological gene networks appear to be dynamically robust to mutation,
	stochasticity, and changes in the environment and also appear to
	be sparsely connected. Studies with computational models, however,
	have suggested that denser gene networks evolve to be more dynamically
	robust than sparser networks. We resolve this discrepancy by showing
	that misassumptions about how to measure robustness in artificial
	networks have inadvertently discounted the costs of network complexity.
	We show that when the costs of complexity are taken into account,
	that robustness implies a parsimonious network structure that is
	sparsely connected and not unnecessarily complex; and that selection
	will favor sparse networks when network topology is free to evolve.
	Because a robust system of heredity is necessary for the adaptive
	evolution of complex phenotypes, the maintenance of frugal network
	complexity is likely a crucial design constraint that underlies biological
	organization.},
  doi = {10.1038/msb.2008.52},
  institution = {Wagner Lab, Department of Ecology and Evolutionary Biology, Yale
	University, New Haven, CT 06520, USA. robert.leclerc@yale.edu},
  keywords = {Adaptation, Physiological; Computer Simulation; Evolution, Molecular;
	Gene Regulatory Networks; Genotype; Heredity; Humans; Male; Models,
	Genetic; Mutation; Reproducibility of Results; Selection, Genetic;
	Stochastic Processes; Transcription, Genetic},
  owner = {mordelet},
  pii = {msb200852},
  pmid = {18682703},
  timestamp = {2010.10.18},
  url = {http://dx.doi.org/10.1038/msb.2008.52}
}

@article{Lee2003Discovery,
  author = {Dongkwon Lee and Sang Wook Choi and Myengsoo Kim and Jin Hyun Park
	and Moonkyu Kim and Jungchul Kim and In-Beum Lee},
  title = {Discovery of differentially expressed genes related to histological
	subtype of hepatocellular carcinoma.},
  journal = {Biotechnol {P}rog.},
  year = {2003},
  volume = {19},
  pages = {1011-5},
  number = {3},
  abstract = {Hepatocellular carcinoma ({HCC}) is one of the most common human malignancies
	in the world. {T}o identify the histological subtype-specific genes
	of {HCC}, we analyzed the gene expression profile of 10 {HCC} patients
	by means of c{DNA} microarray. {W}e proposed a systematic approach
	for determining the discriminatory genes and revealing the biological
	phenomena of {HCC} with c{DNA} microarray data. {F}irst, normalization
	of c{DNA} microarray data was performed to reduce or minimize systematic
	variations. {O}n the basis of the suitably normalized data, we identified
	specific genes involved in histological subtype of {HCC}. {T}wo classification
	methods, {F}isher's discriminant analysis ({FDA}) and support vector
	machine ({SVM}), were used to evaluate the reliability of the selected
	genes and discriminate the histological subtypes of {HCC}. {T}his
	study may provide a clue for the needs of different chemotherapy
	and the reason for heterogeneity of the clinical responses according
	to histological subtypes.},
  doi = {10.1021/bp025746a},
  pdf = {../local/Lee2003Discovery.pdf},
  file = {Lee2003Discovery.pdf:local/Lee2003Discovery.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/bp025746a}
}

@article{Lee1999Learning,
  author = {Lee, D. D. and Seung, H. S.},
  title = {Learning the parts of objects by non-negative matrix factorization.},
  journal = {Nature},
  year = {1999},
  volume = {401},
  pages = {788--791},
  number = {6755},
  month = {Oct},
  abstract = {Is perception of the whole based on perception of its parts? There
	is psychological and physiological evidence for parts-based representations
	in the brain, and certain computational theories of object recognition
	rely on such representations. But little is known about how brains
	or computers might learn the parts of objects. Here we demonstrate
	an algorithm for non-negative matrix factorization that is able to
	learn parts of faces and semantic features of text. This is in contrast
	to other methods, such as principal components analysis and vector
	quantization, that learn holistic, not parts-based, representations.
	Non-negative matrix factorization is distinguished from the other
	methods by its use of non-negativity constraints. These constraints
	lead to a parts-based representation because they allow only additive,
	not subtractive, combinations. When non-negative matrix factorization
	is implemented as a neural network, parts-based representations emerge
	by virtue of two properties: the firing rates of neurons are never
	negative and synaptic strengths do not change sign.},
  doi = {10.1038/44565},
  pdf = {../local/Lee1999Learning.pdf},
  file = {Lee1999Learning.pdf:Lee1999Learning.pdf:PDF},
  institution = {Bell Laboratories, Lucent Technologies, Murray Hill, New Jersey 07974,
	USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10548103},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1038/44565}
}

@article{Lee2008Inferring,
  author = {Lee, E. and Chuang, H.-Y. and Kim, J. W. and Ideker, T. and Lee,
	D.},
  title = {Inferring pathway activity toward precise disease classification.},
  journal = {PLoS Comput Biol},
  year = {2008},
  volume = {4},
  pages = {e1000217},
  number = {11},
  month = {Nov},
  abstract = {The advent of microarray technology has made it possible to classify
	disease states based on gene expression profiles of patients. Typically,
	marker genes are selected by measuring the power of their expression
	profiles to discriminate among patients of different disease states.
	However, expression-based classification can be challenging in complex
	diseases due to factors such as cellular heterogeneity within a tissue
	sample and genetic heterogeneity across patients. A promising technique
	for coping with these challenges is to incorporate pathway information
	into the disease classification procedure in order to classify disease
	based on the activity of entire signaling pathways or protein complexes
	rather than on the expression levels of individual genes or proteins.
	We propose a new classification method based on pathway activities
	inferred for each patient. For each pathway, an activity level is
	summarized from the gene expression levels of its condition-responsive
	genes (CORGs), defined as the subset of genes in the pathway whose
	combined expression delivers optimal discriminative power for the
	disease phenotype. We show that classifiers using pathway activity
	achieve better performance than classifiers based on individual gene
	expression, for both simple and complex case-control studies including
	differentiation of perturbed from non-perturbed cells and subtyping
	of several different kinds of cancer. Moreover, the new method outperforms
	several previous approaches that use a static (i.e., non-conditional)
	definition of pathways. Within a pathway, the identified CORGs may
	facilitate the development of better diagnostic markers and the discovery
	of core alterations in human disease.},
  doi = {10.1371/journal.pcbi.1000217},
  pdf = {../local/Lee2008Inferring.pdf},
  file = {Lee2008Inferring.pdf:Lee2008Inferring.pdf:PDF},
  institution = {Department of Bio and Brain Engineering, KAIST, Daejeon, South Korea.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {18989396},
  timestamp = {2011.08.07},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000217}
}

@article{Lee2005improved,
  author = {Jaewook Lee and Daewon Lee},
  title = {An improved cluster labeling method for support vector clustering.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2005},
  volume = {27},
  pages = {461-4},
  number = {3},
  month = {Mar},
  abstract = {The support vector clustering ({SVC}) algorithm is a recently emerged
	unsupervised learning method inspired by support vector machines.
	{O}ne key step involved in the {SVC} algorithm is the cluster assignment
	of each data point. {A} new cluster labeling method for {SVC} is
	developed based on some invariant topological properties of a trained
	kernel radius function. {B}enchmark results show that the proposed
	method outperforms previously reported labeling techniques.}
}

@article{Lee2005extensive,
  author = {Lee, J. W. and Lee, J. B. and Park, M. and Song, S. H.},
  title = {An extensive comparison of recent classification tools applied to
	microarray datas},
  journal = {Comput. Stat. Data An.},
  year = {2005},
  volume = {48},
  pages = {869--885},
  pdf = {../local/Lee2005extensive.pdf},
  file = {Lee2005extensive.pdf:Lee2005extensive.pdf:PDF},
  owner = {jp},
  timestamp = {2012.03.04}
}

@article{Lee2004efficient,
  author = {Martin M S Lee and S. Sathiya Keerthi and Chong Jin Ong and Dennis
	DeCoste},
  title = {An efficient method for computing leave-one-out error in support
	vector machines with {G}aussian kernels.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {750-7},
  number = {3},
  month = {May},
  abstract = {In this paper, we give an efficient method for computing the leave-one-out
	({LOO}) error for support vector machines ({SVM}s) with {G}aussian
	kernels quite accurately. {I}t is particularly suitable for iterative
	decomposition methods of solving {SVM}s. {T}he importance of various
	steps of the method is illustrated in detail by showing the performance
	on six benchmark datasets. {T}he new method often leads to speedups
	of 10-50 times compared to standard {LOO} error computation. {I}t
	has good promise for use in hyperparameter tuning and model comparison},
  keywords = {Algorithms, Bayes Theorem, Computing Methodologies, Models, Neural
	Networks (Computer), Non-U.S. Gov't, Normal Distribution, Research
	Design, Research Support, Theoretical, 15384561}
}

@inproceedings{Lee1999oscillatory,
  author = {R. S. T. Lee and J. N. K. Liu},
  title = {An oscillatory elastic graph matching model for recognition of offline
	handwritten Chinese characters},
  booktitle = {KES},
  year = {1999},
  pages = {284-287},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1109/KES.1999.820179}
}

@article{Lee2003Application,
  author = {Lee, S.-I. and Batzoglou, S.},
  title = {Application of independent component analysis to microarrays.},
  journal = {Genome Biol.},
  year = {2003},
  volume = {4},
  pages = {R76},
  number = {11},
  doi = {10.1186/gb-2003-4-11-r76},
  pdf = {../local/Lee2003Application.pdf},
  file = {Lee2003Application.pdf:Lee2003Application.pdf:PDF},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2003-4-11-r76},
  pmid = {14611662},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1186/gb-2003-4-11-r76}
}

@inproceedings{Lee2003Learning,
  author = {Lee, W. S. and Liu, B.},
  title = {Learning with Positive and Unlabeled Examples Using Weighted Logistic
	Regression},
  booktitle = {Machine Learning, Proceedings of the Twentieth International Conference
	(ICML 2003},
  year = {2003},
  editor = {Fawcett, T. and Mishra, N.},
  pages = {448--455},
  publisher = {AAAI Press},
  owner = {jp},
  timestamp = {2010.02.01}
}

@article{Lee2003Classification,
  author = {Lee, Y. and Lee, C.-K.},
  title = {Classification of multiple cancer types by multicategory support
	vector machines using gene expression data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1132-1139},
  number = {9},
  abstract = {Motivation: {H}igh-density {DNA} microarray measures the activities
	of several thousand genes simultaneously and the gene expression
	profiles have been used for the cancer classification recently. {T}his
	new approach promises to give better therapeutic measurements to
	cancer patients by diagnosing cancer types with improved accuracy.
	{T}he {S}upport {V}ector {M}achine ({SVM}) is one of the classification
	methods successfully applied to the cancer diagnosis problems. {H}owever,
	its optimal extension to more than two classes was not obvious, which
	might impose limitations in its application to multiple tumor types.
	{W}e briefly introduce the {M}ulticategory {SVM}, which is a recently
	proposed extension of the binary {SVM}, and apply it to multiclass
	cancer diagnosis problems {R}esults: {I}ts applicability is demonstrated
	on the leukemia data ({G}olub et al., 1999) and the small round blue
	cell tumors of childhood data ({K}han et al., 2001). {C}omparable
	classification accuracy shown in the applications and its flexibility
	render the {MSVM} a viable alternative to other classification methods
	{S}upplementary {I}nformation: http://www.stat.ohio-state.edu/~yklee/msvm.html
	{C}ontact: yklee@stat.ohio-state.edu},
  pdf = {../local/Lee2003Classification.pdf},
  file = {Lee2003Classification.pdf:local/Lee2003Classification.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/9/1132}
}

@article{Lefkowitz2008crystal,
  author = {Lefkowitz, R. J. and Sun, J.-P. and Shukla, A. K.},
  title = {A crystal clear view of the beta2-adrenergic receptor},
  journal = {Nat. Biotechnol.},
  year = {2008},
  volume = {26},
  pages = {189--191},
  number = {2},
  month = {Feb},
  doi = {10.1038/nbt0208-189},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {nbt0208-189},
  pmid = {18259173},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1038/nbt0208-189}
}

@article{Legewie2006PLOSCompBiol,
  author = {Legewie, S. and Blathgen, N. and Herzel, H.},
  title = {Mathematical Modeling Identifies Inhibitors of Apoptosis as Mediators
	of Positive Feedback and Bistability},
  journal = {PLoS Comput Biol},
  year = {2006},
  volume = {2},
  pages = {e120},
  number = {9},
  month = {09},
  abstract = {The intrinsic, or mitochondrial, pathway of caspase activation is
	essential for apoptosis induction by various stimuli including cytotoxic
	stress. It depends on the cellular context, whether cytochrome c
	released from mitochondria induces caspase activation gradually or
	in an all-or-none fashion, and whether caspase activation irreversibly
	commits cells to apoptosis. By analyzing a quantitative kinetic model,
	we show that inhibition of caspase-3 (Casp3) and Casp9 by inhibitors
	of apoptosis (IAPs) results in an implicit positive feedback, since
	cleaved Casp3 augments its own activation by sequestering IAPs away
	from Casp9. We demonstrate that this positive feedback brings about
	bistability (i.e., all-or-none behaviour), and that it cooperates
	with Casp3-mediated feedback cleavage of Casp9 to generate irreversibility
	in caspase activation. Our calculations also unravel how cell-specific
	protein expression brings about the observed qualitative differences
	in caspase activation (gradual versus all-or-none and reversible
	versus irreversible). Finally, known regulators of the pathway are
	shown to efficiently shift the apoptotic threshold stimulus, suggesting
	that the bistable caspase cascade computes multiple inputs into an
	all-or-none caspase output. As cellular inhibitory proteins (e.g.,
	IAPs) frequently inhibit consecutive intermediates in cellular signaling
	cascades (e.g., Casp3 and Casp9), the feedback mechanism described
	in this paper is likely to be a widespread principle on how cells
	achieve ultrasensitivity, bistability, and irreversibility.},
  doi = {10.1371/journal.pcbi.0020120},
  keywords = {csbcbook},
  publisher = {Public Library of Science},
  url = {http://dx.plos.org/10.1371/journal.pcbi.0020120}
}

@article{Leighton1986Estimating,
  author = { Leighton, F. and Rivest, R. },
  title = {Estimating a probability using finite memory},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1986},
  volume = {32},
  pages = {733- 742},
  number = {6},
  month = {Nov},
  abstract = { {L}et{{X}_{i}}_{i=1}^{infty}be a sequence of independent {B}ernoulli
	random variables with probabilitypthat{X}_{i} = 1and probabilityq=1-pthat{X}_{i}
	= 0for alli geq 1. {T}ime-invariant finite-memory (i.e., finite-state)
	estimation procedures for the parameter p are considered which take{X}_{1},
	cdotsas an input sequence. {I}n particular, an n-state deterministic
	estimation procedure is described which can estimate p with mean-square
	error{O}(log n/n)and ann-state probabilistic estimation procedure
	which can estimatepwith mean-square error{O}(1/n). {I}t is proved
	that the{O}(1/n)bound is optimal to within a constant factor. {I}n
	addition, it is shown that linear estimation procedures are just
	as powerful (up to the measure of mean-square error) as arbitrary
	estimation procedures. {T}he proofs are based on an analog of the
	well-known matrix tree theorem that is called the {M}arkov chain
	tree theorem. },
  pdf = {../local/Leighton1986Estimating.pdf},
  file = {Leighton1986Estimating.pdf:local/Leighton1986Estimating.pdf:PDF},
  owner = {vert}
}

@article{Lemarechal1997Practical,
  author = {Claude Lemarechal and Claudia Sagastiz\'abal and Echal and Claudia
	Sagastiz Abal and Pii S},
  title = {Practical Aspects of the Moreau-Yosida Regularization: Theoretical
	Preliminaries},
  journal = {SIAM Journal on Optimization},
  year = {1997},
  volume = {7},
  pages = {367--385}
}

@article{Lemmen2000Computational,
  author = {C. Lemmen and T. Lengauer},
  title = {Computational methods for the structural alignment of molecules.},
  journal = {J. {C}omput. {A}ided. {M}ol. {D}es.},
  year = {2000},
  volume = {14},
  pages = {215--232},
  number = {3},
  month = {Mar},
  abstract = {In drug design, often enough, no structural information on a particular
	receptor protein is available. {H}owever, frequently a considerable
	number of different ligands is known together with their measured
	binding affinities towards a receptor under consideration. {I}n such
	a situation, a set of plausible relative superpositions of different
	ligands, hopefully approximating their putative binding geometry,
	is usually the method of choice for preparing data for the subsequent
	application of 3{D} methods that analyze the similarity or diversity
	of the ligands. {E}xamples are 3{D}-{QSAR} studies, pharmacophore
	elucidation, and receptor modeling. {A}n aggravating fact is that
	ligands are usually quite flexible and a rigorous analysis has to
	incorporate molecular flexibility. {W}e review the past six years
	of scientific publishing on molecular superposition. {O}ur focus
	lies on automatic procedures to be performed on arbitrary molecular
	structures. {M}ethodical aspects are our main concern here. {A}ccordingly,
	plain application studies with few methodical elements are omitted
	in this presentation. {W}hile this review cannot mention every contribution
	to this actively developing field, we intend to provide pointers
	to the recent literature providing important contributions to computational
	methods for the structural alignment of molecules. {F}inally we provide
	a perspective on how superposition methods can effectively be used
	for the purpose of virtual database screening. {I}n our opinion it
	is the ultimate goal to detect analogues in structure databases of
	nontrivial size in order to narrow down the search space for subsequent
	experiments.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pmid = {10756477},
  timestamp = {2006.02.03}
}

@article{Leng2004note,
  author = {Leng, C. and Lin, Y. and Wahba, G.},
  title = {A note on the {L}asso and related procedures in model selection},
  journal = {Statistica Sinica},
  year = {2004},
  volume = {16},
  pages = {1273--1284},
  number = {4},
  pdf = {../local/Leng2004note.pdf},
  file = {Leng2004note.pdf:Leng2004note.pdf:PDF}
}

@inproceedings{Leordeanu2005Spectral,
  author = {M. Leordeanu and M. Hebert},
  title = {A Spectral Technique for Correspondence Problems using Pairwise Constraints},
  booktitle = {International Conference of Computer Vision (ICCV)},
  year = {2005},
  volume = {2},
  pages = {1482 - 1489},
  month = {October}
}

@inproceedings{Leordeanu2007Beyond,
  author = {M. Leordeanu and M. Hebert and R. Sukthankar},
  title = {Beyond Local Appearance: Category Recognition from Pairwise Interactions
	of Simple Features},
  booktitle = {Proceedings of CVPR},
  year = {2007},
  month = {June}
}

@inproceedings{Leslie2002spectrum,
  author = {Leslie, C. and Eskin, E. and Noble, W.S.},
  title = {The spectrum kernel: a string kernel for {SVM} protein classification},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2002},
  editor = {Russ B. Altman and A. Keith Dunker and Lawrence Hunter and Kevin
	Lauerdale and Teri E. Klein},
  pages = {564--575},
  address = {Singapore},
  publisher = {World Scientific},
  pdf = {../local/lesl02.pdf},
  file = {lesl02.pdf:local/lesl02.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel}
}

@inproceedings{Leslie2003Mismatch,
  author = {Leslie, C. and Eskin, E. and Weston, J. and Noble, W.S.},
  title = {Mismatch {S}tring {K}ernels for {SVM} {P}rotein {C}lassification},
  booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 15},
  year = {2003},
  editor = {Suzanna Becker and Sebastian Thrun and Klaus Obermayer},
  publisher = {MIT Press},
  pdf = {../local/lesl02b.pdf},
  file = {lesl02b.pdf:local/lesl02b.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.cs.columbia.edu/~cleslie/papers/mismatch-short.pdf}
}

@article{Leslie2004Fast,
  author = {Leslie, C. and Kuang, R.},
  title = {Fast string kernels using inexact matching for protein sequences},
  journal = {J. Mach. Learn. Res.},
  year = {2004},
  volume = {5},
  pages = {1435--1455},
  owner = {vert},
  timestamp = {2007.08.01}
}

@article{Leslie2004Mismatch,
  author = {Leslie, C. S. and Eskin, E. and Cohen, A. and Weston, J. and Noble,
	W. S.},
  title = {Mismatch string kernels for discriminative protein classification},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {467-476},
  number = {4},
  abstract = {Motivation: {C}lassification of proteins sequences into functional
	and structural families based on sequence homology is a central problem
	in computational biology. {D}iscriminative supervised machine learning
	approaches provide good performance, but simplicity and computational
	efficiency of training and prediction are also important concerns.
	{R}esults: {W}e introduce a class of string kernels, called mismatch
	kernels, for use with support vector machines ({SVM}s) in a discriminative
	approach to the problem of protein classification and remote homology
	detection. {T}hese kernels measure sequence similarity based on shared
	occurrences of fixed-length patterns in the data, allowing for mutations
	between patterns. {T}hus, the kernels provide a biologically well-motivated
	way to compare protein sequences without relying on family-based
	generative models such as hidden {M}arkov models. {W}e compute the
	kernels efficiently using a mismatch tree data structure, allowing
	us to calculate the contributions of all patterns occurring in the
	data in one pass while traversing the tree. {W}hen used with an {SVM},
	the kernels enable fast prediction on test sequences. {W}e report
	experiments on two benchmark {SCOP} datasets, where we show that
	the mismatch kernel used with an {SVM} classifier performs competitively
	with state-of-the-art methods for homology detection, particularly
	when very few training examples are available. {E}xamination of the
	highest-weighted patterns learned by the {SVM} classifier recovers
	biologically important motifs in protein families and superfamilies.
	{A}vailability: {SVM} software is publicly available at http://microarray.cpmc.columbia.edu/gist.
	{M}ismatch kernel software is available upon request.},
  pdf = {../local/Leslie2004Mismatch.pdf},
  file = {Leslie2004Mismatch.pdf:local/Leslie2004Mismatch.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/4/467}
}

@misc{Letouzey2000Learning,
  author = {Letouzey, F. and Denis, F. and Gilleron, R.},
  title = {Learning from Positive and Unlabeled Examples},
  year = {2000},
  booktitle = {Procs. of the 11th International Conference on Algorithmic Learning
	Theory},
  owner = {fantine},
  pages = {71--85},
  timestamp = {2009.06.22},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.9.1122}
}

@article{Lett2004Interaction,
  author = {Lett, D. and Hsing, M. and Pio, F.},
  title = {Interaction profile-based protein classification of death domain},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {75},
  abstract = {Background {T}he increasing number of protein sequences and 3{D} structure
	obtained from genomic initiatives is leading many of us to focus
	on proteomics, and to dedicate our experimental and computational
	efforts on the creation and analysis of information derived from
	3{D} structure. {I}n particular, the high-throughput generation of
	protein-protein interaction data from a few organisms makes such
	an approach very important towards understanding the molecular recognition
	that make-up the entire protein-protein interaction network. {S}ince
	the generation of sequences, and experimental protein-protein interactions
	increases faster than the 3{D} structure determination of protein
	complexes, there is tremendous interest in developing in silico methods
	that generate such structure for prediction and classification purposes.
	{I}n this study we focused on classifying protein family members
	based on their protein-protein interaction distinctiveness. {S}tructure-based
	classification of protein-protein interfaces has been described initially
	by {P}onstingl et al. [1] and more recently by {V}aldar et al. [2]
	and {M}intseris et al. [3], from complex structures that have been
	solved experimentally. {H}owever, little has been done on protein
	classification based on the prediction of protein-protein complexes
	obtained from homology modeling and docking simulation. {R}esults
	{W}e have developed an in silico classification system entitled {HODOCO}
	({H}omology modeling, {D}ocking and {C}lassification {O}racle), in
	which protein {R}esidue {P}otential {I}nteraction {P}rofiles ({RPIPS})
	are used to summarize protein-protein interaction characteristics.
	{T}his system applied to a dataset of 64 proteins of the death domain
	superfamily was used to classify each member into its proper subfamily.
	{T}wo classification methods were attempted, heuristic and support
	vector machine learning. {B}oth methods were tested with a 5-fold
	cross-validation. {T}he heuristic approach yielded a 61% average
	accuracy, while the machine learning approach yielded an 89% average
	accuracy. {C}onclusion {W}e have confirmed the reliability and potential
	value of classifying proteins via their predicted interactions. {O}ur
	results are in the same range of accuracy as other studies that classify
	protein-protein interactions from 3{D} complex structure obtained
	experimentally. {W}hile our classification scheme does not take directly
	into account sequence information our results are in agreement with
	functional and sequence based classification of death domain family
	members.},
  doi = {10.1186/1471-2105-5-75},
  pdf = {../local/Lett2004Interaction.pdf},
  file = {Lett2004Interaction.pdf:local/Lett2004Interaction.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/5/75}
}

@article{Leung2001Representing,
  author = {Leung, T. and Malik, J.},
  title = {Representing and Recognizing the Visual Appearance of Materials using
	Three-dimensional Textons},
  journal = {Int. J. Comput. Vision},
  year = {2001},
  volume = {43},
  pages = {29--44},
  number = {1},
  abstract = {We study the recognition of surfaces made from different materials
	such as concrete, rug, marble, or leather on the basis of their textural
	appearance. Such natural textures arise from spatial variation of
	two surface attributes: (1) reflectance and (2) surface normal. In
	this paper, we provide a unified model to address both these aspects
	of natural texture. The main idea is to construct a vocabulary of
	prototype tiny surface patches with associated local geometric and
	photometric properties. We call these 3D textons. Examples might
	be ridges, grooves, spots or stripes or combinations thereof. Associated
	with each texton is an appearance vector, which characterizes the
	local irradiance distribution, represented as a set of linear Gaussian
	derivative filter outputs, under different lighting and viewing conditions.
	
	
	Given a large collection of images of different materials, a clustering
	approach is used to acquire a small (on the order of 100) 3D texton
	vocabulary. Given a few (1 to 4) images of any material, it can be
	characterized using these textons. We demonstrate the application
	of this representation for recognition of the material viewed under
	novel lighting and viewing conditions. We also illustrate how the
	3D texton model can be used to predict the appearance of materials
	under novel conditions.},
  doi = {10.1023/A:1011126920638},
  pdf = {../local/Leung2001Representing.pdf},
  file = {Leung2001Representing.pdf:local/Leung2001Representing.pdf:PDF},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1023/A:1011126920638}
}

@article{LevBarOr2000PNAS,
  author = {Lev Bar-Or, R. and Maya, R. and Segel, L. A. and Alon, U. and Levine,
	A. J. and Oren, M.},
  title = {Generation of oscillations by the p53-Mdm2 feedback loop: a theoretical
	and experimental study},
  journal = {Proc Natl Acad Sci U S A},
  year = {2000},
  volume = {97},
  pages = {11250--5},
  number = {21},
  abstract = {The intracellular activity of the p53 tumor suppressor protein is
	regulated through a feedback loop involving its transcriptional target,
	mdm2. We present a simple mathematical model suggesting that, under
	certain circumstances, oscillations in p53 and Mdm2 protein levels
	can emerge in response to a stress signal. A delay in p53-dependent
	induction of Mdm2 is predicted to be required, albeit not sufficient,
	for this oscillatory behavior. In line with the predictions of the
	model, oscillations of both p53 and Mdm2 indeed occur on exposure
	of various cell types to ionizing radiation. Such oscillations may
	allow cells to repair their DNA without risking the irreversible
	consequences of continuous excessive p53 activation.},
  keywords = {csbcbook}
}

@inproceedings{Levin1995Stock,
  author = {Levin, A. E.},
  title = {Stock Selection via Nonlinear Multi-Factor Models},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {1995},
  editor = {Touretzky, D. S. and Mozer, M. and Hasselmo, M. E.},
  volume = {8},
  pages = {966--972},
  publisher = {MIT Press},
  pdf = {../local/Levin1995Stock.pdf},
  file = {Levin1995Stock.pdf:Levin1995Stock.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.08}
}

@article{Levine1979Review,
  author = {H. A. Levine},
  title = {Review of~: Solutions of ill posed problems},
  journal = {Bull. Amer. Math. Soc.},
  year = {1979},
  volume = {1},
  pages = {521-524}
}

@article{Levy2007Diploid,
  author = {Samuel Levy and Granger Sutton and Pauline C Ng and Lars Feuk and
	Aaron L Halpern and Brian P Walenz and Nelson Axelrod and Jiaqi Huang
	and Ewen F Kirkness and Gennady Denisov and Yuan Lin and Jeffrey
	R MacDonald and Andy Wing Chun Pang and Mary Shago and Timothy B
	Stockwell and Alexia Tsiamouri and Vineet Bafna and Vikas Bansal
	and Saul A Kravitz and Dana A Busam and Karen Y Beeson and Tina C
	McIntosh and Karin A Remington and Josep F Abril and John Gill and
	Jon Borman and Yu-Hui Rogers and Marvin E Frazier and Stephen W Scherer
	and Robert L Strausberg and J. Craig Venter},
  title = {The diploid genome sequence of an individual human.},
  journal = {PLoS Biol},
  year = {2007},
  volume = {5},
  pages = {e254},
  number = {10},
  month = {Sep},
  abstract = {Presented here is a genome sequence of an individual human. It was
	produced from approximately 32 million random DNA fragments, sequenced
	by Sanger dideoxy technology and assembled into 4,528 scaffolds,
	comprising 2,810 million bases (Mb) of contiguous sequence with approximately
	7.5-fold coverage for any given region. We developed a modified version
	of the Celera assembler to facilitate the identification and comparison
	of alternate alleles within this individual diploid genome. Comparison
	of this genome and the National Center for Biotechnology Information
	human reference assembly revealed more than 4.1 million DNA variants,
	encompassing 12.3 Mb. These variants (of which 1,288,319 were novel)
	included 3,213,401 single nucleotide polymorphisms (SNPs), 53,823
	block substitutions (2-206 bp), 292,102 heterozygous insertion/deletion
	events (indels)(1-571 bp), 559,473 homozygous indels (1-82,711 bp),
	90 inversions, as well as numerous segmental duplications and copy
	number variation regions. Non-SNP DNA variation accounts for 22\%
	of all events identified in the donor, however they involve 74\%
	of all variant bases. This suggests an important role for non-SNP
	genetic alterations in defining the diploid genome structure. Moreover,
	44\% of genes were heterozygous for one or more variants. Using a
	novel haplotype assembly strategy, we were able to span 1.5 Gb of
	genome sequence in segments >200 kb, providing further precision
	to the diploid nature of the genome. These data depict a definitive
	molecular portrait of a diploid human genome that provides a starting
	point for future genome comparisons and enables an era of individualized
	genomic information.},
  doi = {10.1371/journal.pbio.0050254},
  institution = {J. Craig Venter Institute, Rockville, Maryland, USA. slevy@jcvi.org},
  keywords = {Base Sequence; Chromosome Mapping, instrumentation/methods; Chromosomes,
	Human; Chromosomes, Human, Y, genetics; Diploidy; Gene Dosage; Genome,
	Human; Genotype; Haplotypes; Human Genome Project; Humans; INDEL
	Mutation; In Situ Hybridization, Fluorescence; Male; Microarray Analysis;
	Middle Aged; Molecular Sequence Data; Pedigree; Phenotype; Polymorphism,
	Single Nucleotide; Reproducibility of Results; Sequence Analysis,
	DNA, instrumentation/methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {07-PLBI-RA-1258},
  pmid = {17803354},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1371/journal.pbio.0050254}
}

@article{Lewis2005Conserved,
  author = {Benjamin P Lewis and Christopher B Burge and David P Bartel},
  title = {Conserved seed pairing, often flanked by adenosines, indicates that
	thousands of human genes are microRNA targets.},
  journal = {Cell},
  year = {2005},
  volume = {120},
  pages = {15--20},
  number = {1},
  month = {Jan},
  abstract = {We predict regulatory targets of vertebrate microRNAs (miRNAs) by
	identifying mRNAs with conserved complementarity to the seed (nucleotides
	2-7) of the miRNA. An overrepresentation of conserved adenosines
	flanking the seed complementary sites in mRNAs indicates that primary
	sequence determinants can supplement base pairing to specify miRNA
	target recognition. In a four-genome analysis of 3' UTRs, approximately
	13,000 regulatory relationships were detected above the estimate
	of false-positive predictions, thereby implicating as miRNA targets
	more than 5300 human genes, which represented 30\% of our gene set.
	Targeting was also detected in open reading frames. In sum, well
	over one third of human genes appear to be conserved miRNA targets.},
  doi = {10.1016/j.cell.2004.12.035},
  pdf = {../local/Lewis2005Conserved.pdf},
  file = {Lewis2005Conserved.pdf:Lewis2005Conserved.pdf:PDF},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092867404012607},
  pmid = {15652477},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.cell.2004.12.035}
}

@article{Lewis2003Prediction,
  author = {Benjamin P Lewis and I-hung Shih and Matthew W Jones-Rhoades and
	David P Bartel and Christopher B Burge},
  title = {Prediction of mammalian microRNA targets.},
  journal = {Cell},
  year = {2003},
  volume = {115},
  pages = {787--798},
  number = {7},
  month = {Dec},
  abstract = {MicroRNAs (miRNAs) can play important gene regulatory roles in nematodes,
	insects, and plants by basepairing to mRNAs to specify posttranscriptional
	repression of these messages. However, the mRNAs regulated by vertebrate
	miRNAs are all unknown. Here we predict more than 400 regulatory
	target genes for the conserved vertebrate miRNAs by identifying mRNAs
	with conserved pairing to the 5' region of the miRNA and evaluating
	the number and quality of these complementary sites. Rigorous tests
	using shuffled miRNA controls supported a majority of these predictions,
	with the fraction of false positives estimated at 31\% for targets
	identified in human, mouse, and rat and 22\% for targets identified
	in pufferfish as well as mammals. Eleven predicted targets (out of
	15 tested) were supported experimentally using a HeLa cell reporter
	system. The predicted regulatory targets of mammalian miRNAs were
	enriched for genes involved in transcriptional regulation but also
	encompassed an unexpectedly broad range of other functions.},
  institution = {Department of Biology, Massachusetts Institute of Technology, Cambridge,
	MA 02139, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092867403010183},
  pmid = {14697198},
  timestamp = {2009.10.28}
}

@article{Li2009BriefBioinform,
  author = {Li, C. and Courtot, M. and Le Novere, N. and Laibe, C.},
  title = {BioModels.net Web Services, a free and integrated toolkit for computational
	modelling software},
  journal = {Brief Bioinform},
  year = {2009},
  abstract = {Exchanging and sharing scientific results are essential for researchers
	in the field of computational modelling. BioModels.net defines agreed-upon
	standards for model curation. A fundamental one, MIRIAM (Minimum
	Information Requested in the Annotation of Models), standardises
	the annotation and curation process of quantitative models in biology.
	To support this standard, MIRIAM Resources maintains a set of standard
	data types for annotating models, and provides services for manipulating
	these annotations. Furthermore, BioModels.net creates controlled
	vocabularies, such as SBO (Systems Biology Ontology) which strictly
	indexes, defines and links terms used in Systems Biology. Finally,
	BioModels Database provides a free, centralised, publicly accessible
	database for storing, searching and retrieving curated and annotated
	computational models. Each resource provides a web interface to submit,
	search, retrieve and display its data. In addition, the BioModels.net
	team provides a set of Web Services which allows the community to
	programmatically access the resources. A user is then able to perform
	remote queries, such as retrieving a model and resolving all its
	MIRIAM Annotations, as well as getting the details about the associated
	SBO terms. These web services use established standards. Communications
	rely on SOAP (Simple Object Access Protocol) messages and the available
	queries are described in a WSDL (Web Services Description Language)
	file. Several libraries are provided in order to simplify the development
	of client software. BioModels.net Web Services make one step further
	for the researchers to simulate and understand the entirety of a
	biological system, by allowing them to retrieve biological models
	in their own tool, combine queries in workflows and efficiently analyse
	models.},
  keywords = {csbcbook}
}

@article{Li2007JBiosci,
  author = {Li, C. and Ge, Q. W. and Nakata, M. and Matsuno, H. and Miyano, S.},
  title = {Modelling and simulation of signal transductions in an apoptosis
	pathway by using timed Petri nets},
  journal = {J Biosci},
  year = {2007},
  volume = {32},
  pages = {113--27},
  number = {1},
  abstract = {This paper first presents basic Petri net components representing
	molecular interactions and mechanisms of signalling pathways, and
	introduces a method to construct a Petri net model of a signalling
	pathway with these components. Then a simulation method of determining
	the delay time of transitions, by using timed Petri nets - i.e. the
	time taken in fi ring of each transition - is proposed based on some
	simple principles that the number of tokens flowed into a place is
	equivalent to the number of tokens fl owed out. Finally, the availability
	of proposed method is confirmed by observing signalling transductions
	in biological pathways through simulation experiments of the apoptosis
	signalling pathways as an example.},
  keywords = {csbcbook}
}

@article{Li2008Mapping,
  author = {Li, H. and Ruan, J. and Durbin, R.},
  title = {Mapping short {DNA} sequencing reads and calling variants using mapping
	quality scores.},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {1851--1858},
  number = {11},
  month = {Nov},
  abstract = {New sequencing technologies promise a new era in the use of DNA sequence.
	However, some of these technologies produce very short reads, typically
	of a few tens of base pairs, and to use these reads effectively requires
	new algorithms and software. In particular, there is a major issue
	in efficiently aligning short reads to a reference genome and handling
	ambiguity or lack of accuracy in this alignment. Here we introduce
	the concept of mapping quality, a measure of the confidence that
	a read actually comes from the position it is aligned to by the mapping
	algorithm. We describe the software MAQ that can build assemblies
	by mapping shotgun short reads to a reference genome, using quality
	scores to derive genotype calls of the consensus sequence of a diploid
	genome, e.g., from a human sample. MAQ makes full use of mate-pair
	information and estimates the error probability of each read alignment.
	Error probabilities are also derived for the final genotype calls,
	using a Bayesian statistical model that incorporates the mapping
	qualities, error probabilities from the raw sequence quality scores,
	sampling of the two haplotypes, and an empirical model for correlated
	errors at a site. Both read mapping and genotype calling are evaluated
	on simulated data and real data. MAQ is accurate, efficient, versatile,
	and user-friendly. It is freely available at http://maq.sourceforge.net.},
  doi = {10.1101/gr.078212.108},
  pdf = {../local/Li2008Mapping.pdf},
  file = {Li2008Mapping.pdf:Li2008Mapping.pdf:PDF},
  institution = {The Wellcome Trust Sanger Institute, Hinxton CB10 1SA, United Kingdom.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.078212.108},
  pmid = {18714091},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1101/gr.078212.108}
}

@article{Li2005Prediction,
  author = {H. Li and C. Ung and C. Yap and Y. Xue and Z. Li and Z. Cao and Y.
	Chen},
  title = {Prediction of genotoxicity of chemical compounds by statistical learning
	methods.},
  journal = {Chem. {R}es. {T}oxicol.},
  year = {2005},
  volume = {18},
  pages = {1071-1080},
  number = {6},
  month = {Jun},
  abstract = {Various toxicological profiles, such as genotoxic potential, need
	to be studied in drug discovery processes and submitted to the drug
	regulatory authorities for drug safety evaluation. {A}s part of the
	effort for developing low cost and efficient adverse drug reaction
	testing tools, several statistical learning methods have been used
	for developing genotoxicity prediction systems with an accuracy of
	up to 73.8\% for genotoxic ({GT}+) and 92.8\% for nongenotoxic ({GT}-)
	agents. {T}hese systems have been developed and tested by using less
	than 400 known {GT}+ and {GT}- agents, which is significantly less
	in number and diversity than the 860 {GT}+ and {GT}- agents known
	at present. {T}here is a need to examine if a similar level of accuracy
	can be achieved for the more diverse set of molecules and to evaluate
	other statistical learning methods not yet applied to genotoxicity
	prediction. {T}his work is intended for testing several statistical
	learning methods by using 860 {GT}+ and {GT}- agents, which include
	support vector machines ({SVM}), probabilistic neural network ({PNN}),
	k-nearest neighbor (k-{NN}), and {C}4.5 decision tree ({DT}). {A}
	feature selection method, recursive feature elimination, is used
	for selecting molecular descriptors relevant to genotoxicity study.
	{T}he overall accuracies of {SVM}, k-{NN}, and {PNN} are comparable
	to and those of {DT} lower than the results from earlier studies,
	with {SVM} giving the highest accuracies of 77.8\% for {GT}+ and
	92.7\% for {GT}- agents. {O}ur study suggests that statistical learning
	methods, particularly {SVM}, k-{NN}, and {PNN}, are useful for facilitating
	the prediction of genotoxic potential of a diverse set of molecules.},
  doi = {10.1021/tx049652h},
  pdf = {../local/Li2005Prediction.pdf},
  file = {Li2005Prediction.pdf:local/Li2005Prediction.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/tx049652h}
}

@article{Li2002Involvement,
  author = {Jiwen Li and Qiushi Lin and Ho-Geun Yoon and Zhi-Qing Huang and Brian
	D Strahl and C. David Allis and Jiemin Wong},
  title = {Involvement of histone methylation and phosphorylation in regulation
	of transcription by thyroid hormone receptor.},
  journal = {Mol Cell Biol},
  year = {2002},
  volume = {22},
  pages = {5688--5697},
  number = {16},
  month = {Aug},
  abstract = {Previous studies have established an important role of histone acetylation
	in transcriptional control by nuclear hormone receptors. With chromatin
	immunoprecipitation assays, we have now investigated whether histone
	methylation and phosphorylation are also involved in transcriptional
	regulation by thyroid hormone receptor (TR). We found that repression
	by unliganded TR is associated with a substantial increase in methylation
	of H3 lysine 9 (H3-K9) and a decrease in methylation of H3 lysine
	4 (H3-K4), methylation of H3 arginine 17 (H3-R17), and a dual modification
	of phosphorylation of H3 serine 10 and acetylation of lysine 14 (pS10/acK14).
	On the other hand, transcriptional activation by liganded TR is coupled
	with a substantial decrease in both H3-K4 and H3-K9 methylation and
	a robust increase in H3-R17 methylation and the dual modification
	of pS10/acK14. Trichostatin A treatment results in not only histone
	hyperacetylation but also an increase in methylation of H3-K4, increase
	in dual modification of pS10/acK14, and reduction in methylation
	of H3-K9, revealing an extensive interplay between histone acetylation,
	methylation, and phosphorylation. In an effort to understand the
	underlying mechanism for an increase in H3-K9 methylation during
	repression by unliganded TR, we demonstrated that TR interacts in
	vitro with an H3-K9-specific histone methyltransferase (HMT), SUV39H1.
	Functional analysis indicates that SUV39H1 can facilitate repression
	by unliganded TR and in so doing requires its HMT activity. Together,
	our data uncover a novel role of H3-K9 methylation in repression
	by unliganded TR and provide strong evidence for the involvement
	of multiple distinct histone covalent modifications (acetylation,
	methylation, and phosphorylation) in transcriptional control by nuclear
	hormone receptors.},
  institution = {Department of Molecular and Cellular Biology, Baylor College of Medicine,
	Houston, Texas 77030, USA.},
  keywords = {Animals; Cell Fractionation; Gene Expression Regulation, drug effects;
	Genes, Reporter; Histone-Lysine N-Methyltransferase; Histones, chemistry/genetics/metabolism;
	Humans; Hydroxamic Acids, pharmacology; Methylation; Methyltransferases,
	metabolism; Oocytes, physiology; Phosphorylation; Protein Methyltransferases;
	Protein Synthesis Inhibitors, pharmacology; Receptors, Thyroid Hormone,
	metabolism; Transcription, Genetic; Xenopus laevis, physiology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12138181},
  timestamp = {2010.11.23}
}

@article{Li2003Simple,
  author = {Jinyan Li and Huiqing Liu and James R Downing and Allen Eng-Juh Yeoh
	and Limsoon Wong},
  title = {Simple rules underlying gene expression profiles of more than six
	subtypes of acute lymphoblastic leukemia ({ALL}) patients.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {71-8},
  number = {1},
  month = {Jan},
  abstract = {M{OTIVATIONS} {AND} {RESULTS}: {F}or classifying gene expression profiles
	or other types of medical data, simple rules are preferable to non-linear
	distance or kernel functions. {T}his is because rules may help us
	understand more about the application in addition to performing an
	accurate classification. {I}n this paper, we discover novel rules
	that describe the gene expression profiles of more than six subtypes
	of acute lymphoblastic leukemia ({ALL}) patients. {W}e also introduce
	a new classifier, named {PCL}, to make effective use of the rules.
	{PCL} is accurate and can handle multiple parallel classifications.
	{W}e evaluate this method by classifying 327 heterogeneous {ALL}
	samples. {O}ur test error rate is competitive to that of support
	vector machines, and it is 71\% better than {C}4.5, 50\% better than
	{N}aive {B}ayes, and 43\% better than k-nearest neighbour. {E}xperimental
	results on another independent data sets are also presented to show
	the strength of our method. {AVAILABILITY}: {U}nder http://sdmc.lit.org.sg/{GED}atasets/,
	click on {S}upplementary {I}nformation.},
  keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base
	Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative
	Study, Computer-Assisted, DNA, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans,
	Leukemia, Lymphocytic, Markov Chains, Messenger, Models, Molecular
	Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm,
	Neoplastic, Neural Networks (Computer), Non-U.S. Gov't, Nucleic Acid
	Conformation, Oligonucleotide Array Sequence Analysis, Pattern Recognition,
	Quality Control, RNA, Research Support, Signal Processing, Statistical,
	Stomach Neoplasms, Tumor Markers, 12499295}
}

@article{Li2011Sparse,
  author = {Li, J. J. and Jiang, C.-R. and Brown, J. B. and Huang, H. and Bickel,
	P. J.},
  title = {Sparse linear modeling of next-generation {mRNA} sequencing ({RNA-Seq})
	data for isoform discovery and abundance estimation},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2011},
  volume = {108},
  pages = {19867--19872},
  number = {50},
  month = dec,
  abstract = {{Since the inception of next-generation mRNA sequencing (RNA-Seq)
	technology, various attempts have been made to utilize RNA-Seq data
	in assembling full-length mRNA isoforms de novo and estimating abundance
	of isoforms. However, for genes with more than a few exons, the problem
	tends to be challenging and often involves identifiability issues
	in statistical modeling. We have developed a statistical method called
	â sparse linear modeling of RNA-Seq data for isoform discovery
	and abundance estimationâ (SLIDE) that takes exon boundaries and
	RNA-Seq data as input to discern the set of mRNA isoforms that are
	most likely to present in an RNA-Seq sample. SLIDE is based on a
	linear model with a design matrix that models the sampling probability
	of RNA-Seq reads from different mRNA isoforms. To tackle the model
	unidentifiability issue, SLIDE uses a modified Lasso procedure for
	parameter estimation. Compared with deterministic isoform assembly
	algorithms (e.g., Cufflinks), SLIDE considers the stochastic aspects
	of RNA-Seq reads in exons from different isoforms and thus has increased
	power in detecting more novel isoforms. Another advantage of SLIDE
	is its flexibility of incorporating other transcriptomic data such
	as RACE, CAGE, and EST into its model to further increase isoform
	discovery accuracy. SLIDE can also work downstream of other RNA-Seq
	assembly algorithms to integrate newly discovered genes and exons.
	Besides isoform discovery, SLIDE sequentially uses the same linear
	model to estimate the abundance of discovered isoforms. Simulation
	and real data studies show that SLIDE performs as well as or better
	than major competitors in both isoform discovery and abundance estimation.
	The SLIDE software package is available at https://sites.google.com/site/jingyijli/SLIDE.zip.}},
  citeulike-article-id = {10102447},
  citeulike-linkout-0 = {http://dx.doi.org/10.1073/pnas.1113972108},
  citeulike-linkout-1 = {http://www.pnas.org/content/early/2011/11/23/1113972108.abstract},
  citeulike-linkout-2 = {http://www.pnas.org/content/early/2011/11/23/1113972108.full.pdf},
  citeulike-linkout-3 = {http://www.pnas.org/cgi/content/abstract/108/50/19867},
  citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/22135461},
  citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=22135461},
  day = {13},
  doi = {10.1073/pnas.1113972108},
  pdf = {../local/Li2011Sparse.pdf},
  file = {Li2011Sparse.pdf:Li2011Sparse.pdf:PDF},
  issn = {1091-6490},
  keywords = {ngs, rnaseq},
  pmid = {22135461},
  posted-at = {2011-12-16 22:07:32},
  priority = {2},
  publisher = {National Academy of Sciences},
  url = {http://dx.doi.org/10.1073/pnas.1113972108}
}

@article{Li2005robust,
  author = {Li, L. and Jiang, W. and Li, X. and Moser, K.L. and Guo, Z. and Du,
	L. and Wang, Q. and Topol, E.J. and Wang, Q. and Rao, S.},
  title = {A robust hybrid between genetic algorithm and support vector machine
	for extracting an optimal feature gene subset},
  journal = {Genomics},
  year = {2005},
  volume = {85},
  pages = {16-23},
  number = {1},
  abstract = {Development of a robust and efficient approach for extracting useful
	information from microarray data continues to be a significant and
	challenging task. {M}icroarray data are characterized by a high dimension,
	high signal-to-noise ratio, and high correlations between genes,
	but with a relatively small sample size. {C}urrent methods for dimensional
	reduction can further be improved for the scenario of the presence
	of a single (or a few) high influential gene(s) in which its effect
	in the feature subset would prohibit inclusion of other important
	genes. {W}e have formalized a robust gene selection approach based
	on a hybrid between genetic algorithm and support vector machine.
	{T}he major goal of this hybridization was to exploit fully their
	respective merits (e.g., robustness to the size of solution space
	and capability of handling a very large dimension of feature genes)
	for identification of key feature genes (or molecular signatures)
	for a complex biological phenotype. {W}e have applied the approach
	to the microarray data of diffuse large {B} cell lymphoma to demonstrate
	its behaviors and properties for mining the high-dimension data of
	genome-wide gene expression profiles. {T}he resulting classifier(s)
	(the optimal gene subset(s)) has achieved the highest accuracy (99%)
	for prediction of independent microarray samples in comparisons with
	marginal filters and a hybrid between genetic algorithm and {K} nearest
	neighbors.},
  doi = {10.1016/j.ygeno.2004.09.007},
  pdf = {../local/Li2005robust.pdf},
  file = {Li2005robust.pdf:local/Li2005robust.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.ygeno.2004.09.007}
}

@article{Li2004Data,
  author = {Li, L. and Tang, H. and Wu, Z. and Gong, J. and Gruidl, M. and Zou,
	J. and Tockman, M. and Clark, R.A.},
  title = {Data mining techniques for cancer detection using serum proteomic
	profiling.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2004},
  volume = {32},
  pages = {71-83},
  number = {2},
  abstract = {O{BJECTIVE}: {P}athological changes in an organ or tissue may be reflected
	in proteomic patterns in serum. {I}t is possible that unique serum
	proteomic patterns could be used to discriminate cancer samples from
	non-cancer ones. {D}ue to the complexity of proteomic profiling,
	a higher order analysis such as data mining is needed to uncover
	the differences in complex proteomic patterns. {T}he objectives of
	this paper are (1) to briefly review the application of data mining
	techniques in proteomics for cancer detection/diagnosis; (2) to explore
	a novel analytic method with different feature selection methods;
	(3) to compare the results obtained on different datasets and that
	reported by {P}etricoin et al. in terms of detection performance
	and selected proteomic patterns. {METHODS} {AND} {MATERIAL}: {T}hree
	serum {SELDI} {MS} data sets were used in this research to identify
	serum proteomic patterns that distinguish the serum of ovarian cancer
	cases from non-cancer controls. {A} support vector machine-based
	method is applied in this study, in which statistical testing and
	genetic algorithm-based methods are used for feature selection respectively.
	{L}eave-one-out cross validation with receiver operating characteristic
	({ROC}) curve is used for evaluation and comparison of cancer detection
	performance. {RESULTS} {AND} {CONCLUSIONS}: {T}he results showed
	that (1) data mining techniques can be successfully applied to ovarian
	cancer detection with a reasonably high performance; (2) the classification
	using features selected by the genetic algorithm consistently outperformed
	those selected by statistical testing in terms of accuracy and robustness;
	(3) the discriminatory features (proteomic patterns) can be very
	different from one selection method to another. {I}n other words,
	the pattern selection and its classification efficiency are highly
	classifier dependent. {T}herefore, when using data mining techniques,
	the discrimination of cancer from normal does not depend solely upon
	the identity and origination of cancer-related proteins.},
  doi = {10.1016/j.artmed.2004.03.006},
  pdf = {../local/Li2004Data.pdf},
  file = {Li2004Data.pdf:local/Li2004Data.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.03.006}
}

@article{Li2009SNP,
  author = {Li, R. and Li, Y. and Fang, X. and Yang, H. and Wang, J. and Kristiansen,
	K. and Wang, J.},
  title = {{SNP} detection for massively parallel whole-genome resequencing.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1124--1132},
  number = {6},
  month = {Jun},
  abstract = {Next-generation massively parallel sequencing technologies provide
	ultrahigh throughput at two orders of magnitude lower unit cost than
	capillary Sanger sequencing technology. One of the key applications
	of next-generation sequencing is studying genetic variation between
	individuals using whole-genome or target region resequencing. Here,
	we have developed a consensus-calling and SNP-detection method for
	sequencing-by-synthesis Illumina Genome Analyzer technology. We designed
	this method by carefully considering the data quality, alignment,
	and experimental errors common to this technology. All of this information
	was integrated into a single quality score for each base under Bayesian
	theory to measure the accuracy of consensus calling. We tested this
	methodology using a large-scale human resequencing data set of 36x
	coverage and assembled a high-quality nonrepetitive consensus sequence
	for 92.25\% of the diploid autosomes and 88.07\% of the haploid X
	chromosome. Comparison of the consensus sequence with Illumina human
	1M BeadChip genotyped alleles from the same DNA sample showed that
	98.6\% of the 37,933 genotyped alleles on the X chromosome and 98\%
	of 999,981 genotyped alleles on autosomes were covered at 99.97\%
	and 99.84\% consistency, respectively. At a low sequencing depth,
	we used prior probability of dbSNP alleles and were able to improve
	coverage of the dbSNP sites significantly as compared to that obtained
	using a nonimputation model. Our analyses demonstrate that our method
	has a very low false call rate at any sequencing depth and excellent
	genome coverage at a high sequencing depth.},
  doi = {10.1101/gr.088013.108},
  pdf = {../local/Li2009SNP.pdf},
  file = {Li2009SNP.pdf:Li2009SNP.pdf:PDF},
  institution = {Beijing Genomics Institute at Shenzhen, Shenzhen 518000, China},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.088013.108},
  pmid = {19420381},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1101/gr.088013.108}
}

@article{Li2004Fusing,
  author = {Shutao Li and James Tin-Yau Kwok and Ivor Wai-Hung Tsang and Yaonan
	Wang},
  title = {Fusing images with different focuses using support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {1555-61},
  number = {6},
  month = {Nov},
  abstract = {Many vision-related processing tasks, such as edge detection, image
	segmentation and stereo matching, can be performed more easily when
	all objects in the scene are in good focus. {H}owever, in practice,
	this may not be always feasible as optical lenses, especially those
	with long focal lengths, only have a limited depth of field. {O}ne
	common approach to recover an everywhere-in-focus image is to use
	wavelet-based image fusion. {F}irst, several source images with different
	focuses of the same scene are taken and processed with the discrete
	wavelet transform ({DWT}). {A}mong these wavelet decompositions,
	the wavelet coefficient with the largest magnitude is selected at
	each pixel location. {F}inally, the fused image can be recovered
	by performing the inverse {DWT}. {I}n this paper, we improve this
	fusion procedure by applying the discrete wavelet frame transform
	({DWFT}) and the support vector machines ({SVM}). {U}nlike {DWT},
	{DWFT} yields a translation-invariant signal representation. {U}sing
	features extracted from the {DWFT} coefficients, a {SVM} is trained
	to select the source image that has the best focus at each pixel
	location, and the corresponding {DWFT} coefficients are then incorporated
	into the composite wavelet representation. {E}xperimental results
	show that the proposed method outperforms the traditional approach
	both visually and quantitatively.},
  keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota,
	Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms,
	Comparative Study, Computer Simulation, Computer-Assisted, Computing
	Methodologies, Crystallography, DNA, DNA Primers, Databases, Diagnostic
	Imaging, Enzymes, Fixation, Gene Expression Profiling, Genetic, Hordeum,
	Host-Parasite Relations, Humans, Image Enhancement, Image Interpretation,
	Informatics, Information Storage and Retrieval, Kinetics, Magnetic
	Resonance Spectroscopy, Models, Nanotechnology, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Ocular, Oligonucleotide Array Sequence
	Analysis, P.H.S., Pattern Recognition, Plant, Plants, Predictive
	Value of Tests, Protein, Protein Conformation, Research Support,
	Sample Size, Selection (Genetics), Sequence Alignment, Sequence Analysis,
	Sequence Homology, Signal Processing, Skin, Software, Statistical,
	Subtraction Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral
	Proteins, X-Ray, 15565781}
}

@article{Li2004comparative,
  author = {Li, T. and Zhang, C. and Ogihara, M.},
  title = {A comparative study of feature selection and multiclass classification
	methods for tissue classification based on gene expression},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {2429-2437},
  number = {15},
  abstract = {Summary: {T}his paper studies the problem of building multiclass classifiers
	for tissue classification based on gene expression. {T}he recent
	development of microarray technologies has enabled biologists to
	quantify gene expression of tens of thousands of genes in a single
	experiment. {B}iologists have begun collecting gene expression for
	a large number of samples. {O}ne of the urgent issues in the use
	of microarray data is to develop methods for characterizing samples
	based on their gene expression. {T}he most basic step in the research
	direction is binary sample classification, which has been studied
	extensively over the past few years. {T}his paper investigates the
	next step--multiclass classification of samples based on gene expression.
	{T}he characteristics of expression data (e.g. large number of genes
	with small sample size) makes the classification problem more challenging.
	{T}he process of building multiclass classifiers is divided into
	two components: (i) selection of the features (i.e. genes) to be
	used for training and testing and (ii) selection of the classification
	method. {T}his paper compares various feature selection methods as
	well as various state-of-the-art classification methods on various
	multiclass gene expression datasets. {O}ur study indicates that multiclass
	classification problem is much more difficult than the binary one
	for the gene expression datasets. {T}he difficulty lies in the fact
	that the data are of high dimensionality and that the sample size
	is small. {T}he classification accuracy appears to degrade very rapidly
	as the number of classes increases. {I}n particular, the accuracy
	was very low regardless of the choices of the methods for large-class
	datasets (e.g. {NCI}60 and {GCM}). {W}hile increasing the number
	of samples is a plausible solution to the problem of accuracy degradation,
	it is important to develop algorithms that are able to analyze effectively
	multiple-class expression data for these special datasets.},
  pdf = {../local/Li2004comparative.pdf},
  file = {Li2004comparative.pdf:local/Li2004comparative.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/15/2429}
}

@article{Li2011IsoLasso,
  author = {Li, W. and Feng, J. and Jiang, T.},
  title = {IsoLasso: a {LASSO} regression approach to {RNA-Seq} based transcriptome
	assembly.},
  journal = {J Comput Biol},
  year = {2011},
  volume = {18},
  pages = {1693--1707},
  number = {11},
  month = {Nov},
  __markedentry = {[jp:6]},
  abstract = {The new second generation sequencing technology revolutionizes many
	biology-related research fields and poses various computational biology
	challenges. One of them is transcriptome assembly based on RNA-Seq
	data, which aims at reconstructing all full-length mRNA transcripts
	simultaneously from millions of short reads. In this article, we
	consider three objectives in transcriptome assembly: the maximization
	of prediction accuracy, minimization of interpretation, and maximization
	of completeness. The first objective, the maximization of prediction
	accuracy, requires that the estimated expression levels based on
	assembled transcripts should be as close as possible to the observed
	ones for every expressed region of the genome. The minimization of
	interpretation follows the parsimony principle to seek as few transcripts
	in the prediction as possible. The third objective, the maximization
	of completeness, requires that the maximum number of mapped reads
	(or ?expressed segments? in gene models) be explained by (i.e., contained
	in) the predicted transcripts in the solution. Based on the above
	three objectives, we present IsoLasso, a new RNA-Seq based transcriptome
	assembly tool. IsoLasso is based on the well-known LASSO algorithm,
	a multivariate regression method designated to seek a balance between
	the maximization of prediction accuracy and the minimization of interpretation.
	By including some additional constraints in the quadratic program
	involved in LASSO, IsoLasso is able to make the set of assembled
	transcripts as complete as possible. Experiments on simulated and
	real RNA-Seq datasets show that IsoLasso achieves, simultaneously,
	higher sensitivity and precision than the state-of-art transcript
	assembly tools.},
  doi = {10.1089/cmb.2011.0171},
  pdf = {../local/Li2011IsoLasso.pdf},
  file = {Li2011IsoLasso.pdf:Li2011IsoLasso.pdf:PDF},
  institution = {Department of Computer Science and Engineering, University of California,
	Riverside, Riverside, CA 92507, USA. liw@cs.ucr.edu},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {21951053},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1089/cmb.2011.0171}
}

@inproceedings{Li2003Learning,
  author = {Li, X. and Liu, B.},
  title = {Learning to classify texts using positive and unlabeled data},
  booktitle = {IJCAI'03: Proceedings of the 18th international joint conference
	on Artificial intelligence},
  year = {2003},
  editor = {Gottlob, G. and Walsh, T.},
  pages = {587--592},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  abstract = {In traditional text classification, a classifier is built using labeled
	training documents of every class. This paper studies a different
	problem. Given a set P of documents of a particular class (called
	positive class) and a set U of unlabeled documents that contains
	documents from class P and also other types of documents (called
	negative class documents), we want to build a classifier to classify
	the documents in U into documents from P and documents not from P.
	The key feature of this problem is that there is no labeled negative
	document, which makes traditional text classification techniques
	inapplicable. In this paper, we propose an effective technique to
	solve the problem. It combines the Rocchio method and the SVM technique
	for classifier building. Experimental results show that the new method
	outperforms existing methods significantly.},
  owner = {jp},
  timestamp = {2010.01.31}
}

@article{Liang2001Detection,
  author = {H. Liang and Z. Lin},
  title = {Detection of delayed gastric emptying from electrogastrograms with
	support vector machine.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2001},
  volume = {48},
  pages = {601-4},
  number = {5},
  month = {May},
  abstract = {A recent study reported a conventional neural network ({NN}) approach
	for the noninvasive diagnosis of delayed gastric emptying from the
	cutaneous electrogastrograms. {U}sing support vector machine, we
	show that this relatively new technique can be used for detection
	of delayed gastric emptying and is in fact able to outdo the conventional
	{NN}.},
  keywords = {Algorithms, Amino Acid Sequence, Artificial Intelligence, Biological,
	Cell Compartmentation, Comparative Study, Computer Simulation, Computer-Assisted,
	Decision Trees, Diagnosis, Discriminant Analysis, Electrophysiology,
	Gastric Emptying, Humans, Logistic Models, Melanoma, Models, Neural
	Networks (Computer), Nevus, Non-U.S. Gov't, Organelles, P.H.S., Pigmented,
	Predictive Value of Tests, Proteins, Reproducibility of Results,
	Research Support, Skin Diseases, Skin Neoplasms, Skin Pigmentation,
	Stomach Diseases, U.S. Gov't, 11341535}
}

@article{Liang2008Gene,
  author = {Liang, K-C. and Wang, X.},
  title = {Gene regulatory network reconstruction using conditional mutual information.},
  journal = {EURASIP J Bioinform Syst Biol},
  year = {2008},
  pages = {253894},
  abstract = {The inference of gene regulatory network from expression data is an
	important area of research that provides insight to the inner workings
	of a biological system. The relevance-network-based approaches provide
	a simple and easily-scalable solution to the understanding of interaction
	between genes. Up until now, most works based on relevance network
	focus on the discovery of direct regulation using correlation coefficient
	or mutual information. However, some of the more complicated interactions
	such as interactive regulation and co-regulation are not easily detected.
	In this work, we propose a relevance network model for gene regulatory
	network inference which employs both mutual information and conditional
	mutual information to determine the interactions between genes. For
	this purpose, we propose a conditional mutual information estimator
	based on adaptive partitioning which allows us to condition on both
	discrete and continuous random variables. We provide experimental
	results that demonstrate that the proposed regulatory network inference
	algorithm can provide better performance when the target network
	contains coregulated and interactively regulated genes.},
  doi = {10.1155/2008/253894},
  institution = {Department of Electrical Engineering, Columbia University, New York,
	NY 10027, USA.},
  owner = {fantine},
  pmid = {18584050},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1155/2008/253894}
}

@article{Liang1998Reveal,
  author = {Liang, S. and Fuhrman, S. and Somogyi, R.},
  title = {{REVEAL}, a general reverse engineering algorithm for inference of
	genetic network architectures},
  journal = {Pac. Symp. Biocomput.},
  year = {1998},
  volume = {3},
  pages = {18--29},
  abstract = {Given the immanent gene expression mapping covering whole genomes
	during development, health and disease, we seek computational methods
	to maximize functional inference from such large data sets. Is it
	possible, in principle, to completely infer a complex regulatory
	network architecture from input/output patterns of its variables?
	We investigated this possibility using binary models of genetic networks.
	Trajectories, or state transition tables of Boolean nets, resemble
	time series of gene expression. By systematically analyzing the mutual
	information between input states and output states, one is able to
	infer the sets of input elements controlling each element or gene
	in the network. This process is unequivocal and exact for complete
	state transition tables. We implemented this REVerse Engineering
	ALgorithm (REVEAL) in a C program, and found the problem to be tractable
	within the conditions tested so far. For n = 50 (elements) and k
	= 3 (inputs per element), the analysis of incomplete state transition
	tables (100 state transition pairs out of a possible 10(15)) reliably
	produced the original rule and wiring sets. While this study is limited
	to synchronous Boolean networks, the algorithm is generalizable to
	include multi-state models, essentially allowing direct application
	to realistic biological data sets. The ability to adequately solve
	the inverse problem may enable in-depth analysis of complex dynamic
	systems in biology and other fields.},
  pdf = {../local/Liang1998Reveal.pdf},
  file = {Liang1998Reveal.pdf:Liang1998Reveal.pdf:PDF},
  institution = {SETI Institute, NASA Ames Research Center, Moffett Field, CA 94035,
	USA. sliang@mail.arc.nasa.gov},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {9697168},
  timestamp = {2012.04.03}
}

@article{Liao2003Network,
  author = {Liao, J. C. and Boscolo, R. and Yang, Y.-L. and Tran, L. M. and Sabatti,
	C. and Roychowdhury, V. P.},
  title = {Network component analysis: Reconstruction of regulatory signals
	in biological systems},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {15522--15527},
  number = {26},
  doi = {10.1073/pnas.2136632100},
  pdf = {../local/Liao2003Network.pdf},
  file = {Liao2003Network.pdf:Liao2003Network.pdf:PDF},
  owner = {jp},
  timestamp = {2011.07.23},
  url = {http://dx.doi.org/10.1073/pnas.2136632100}
}

@article{Liao2003Combining,
  author = {Liao, L. and Noble, W.S.},
  title = {Combining {P}airwise {S}equence {S}imilarity and {S}upport {V}ector
	{M}achines for {D}etecting {R}emote {P}rotein {E}volutionary and
	{S}tructural {R}elationships},
  journal = {J. {C}omput. {B}iol.},
  year = {2003},
  volume = {10},
  pages = {857-868},
  number = {6},
  abstract = {One key element in understanding the molecular machinery of the cell
	is to understand the structure and function of each protein encoded
	in the genome. {A} very successful means of inferring the structure
	or function of a previously unannotated protein is via sequence similarity
	with one or more proteins whose structure or function is already
	known. {T}oward this end, we propose a means of representing proteins
	using pairwise sequence similarity scores. {T}his representation,
	combined with a discriminative classification algorithm known as
	the support vector machine ({SVM}), provides a powerful means of
	detecting subtle structural and evolutionary relationships among
	proteins. {T}he algorithm, called {SVM}-pairwise, when tested on
	its ability to recognize previously unseen families from the {SCOP}
	database, yields significantly better performance than {SVM}-{F}isher,
	profile {HMM}s, and {PSI}-{BLAST}.},
  pdf = {../local/Liao2003Combining.pdf},
  file = {Liao2003Combining.pdf:local/Liao2003Combining.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.liebertonline.com/doi/abs/10.1089/106652703322756113}
}

@inproceedings{Liao2002Combining,
  author = {Liao, L. and Noble, W. S.},
  title = {Combining pairwise sequence similarity and support vector machines
	for remote protein homology detection},
  booktitle = {Proceedings of the {S}ixth {I}nternational {C}onference on {C}omputational
	{M}olecular {B}iology},
  year = {2002},
  pdf = {../local/liao02.pdf},
  file = {liao02.pdf:local/liao02.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernelcasp},
  url = {http://www.cs.columbia.edu/~bgrundy/papers/fps-svm.html}
}

@article{Liaw2002Classification,
  author = {Liaw, A. and Wiener, M.},
  title = {{Classification and Regression by randomForest}},
  journal = {R News},
  year = {2002},
  volume = {2},
  pages = {18--22},
  number = {3},
  owner = {jp},
  timestamp = {2012.07.31}
}

@article{Liben-Nowell2007link-prediction,
  author = {Liben-Nowell, D. and Kleinberg, J.},
  title = {The link-prediction problem for social networks},
  journal = {J. Am. Soc. Inf. Sci. Technol.},
  year = {2007},
  volume = {58},
  pages = {1019--1031},
  number = {7},
  month = {May},
  doi = {10.1002/asi.v58:7},
  owner = {jp},
  timestamp = {2011.09.20},
  url = {http://dx.doi.org/10.1002/asi.v58:7}
}

@article{Liberles2002use,
  author = {Liberles, D. A. and Thor{\'e}n, A. and von Heijne, G. and Elofsson,
	A.},
  title = {The use of phylogenetic profiles for gene predictions},
  journal = {Curr. {G}enom.},
  year = {2002},
  note = {To appear},
  pdf = {../local/libe02.pdf},
  file = {libe02.pdf:local/libe02.pdf:PDF},
  subject = {bio},
  url = {http://www.sbc.su.se/~arne/papers/phylo.pdf}
}

@article{Lieberman-Aiden2009Comprehensive,
  author = {Erez Lieberman-Aiden and Nynke L van Berkum and Louise Williams and
	Maxim Imakaev and Tobias Ragoczy and Agnes Telling and Ido Amit and
	Bryan R Lajoie and Peter J Sabo and Michael O Dorschner and Richard
	Sandstrom and Bradley Bernstein and M. A. Bender and Mark Groudine
	and Andreas Gnirke and John Stamatoyannopoulos and Leonid A Mirny
	and Eric S Lander and Job Dekker},
  title = {Comprehensive mapping of long-range interactions reveals folding
	principles of the human genome.},
  journal = {Science},
  year = {2009},
  volume = {326},
  pages = {289--293},
  number = {5950},
  month = {Oct},
  abstract = {We describe Hi-C, a method that probes the three-dimensional architecture
	of whole genomes by coupling proximity-based ligation with massively
	parallel sequencing. We constructed spatial proximity maps of the
	human genome with Hi-C at a resolution of 1 megabase. These maps
	confirm the presence of chromosome territories and the spatial proximity
	of small, gene-rich chromosomes. We identified an additional level
	of genome organization that is characterized by the spatial segregation
	of open and closed chromatin to form two genome-wide compartments.
	At the megabase scale, the chromatin conformation is consistent with
	a fractal globule, a knot-free, polymer conformation that enables
	maximally dense packing while preserving the ability to easily fold
	and unfold any genomic locus. The fractal globule is distinct from
	the more commonly used globular equilibrium model. Our results demonstrate
	the power of Hi-C to map the dynamic conformations of whole genomes.},
  doi = {10.1126/science.1181369},
  pdf = {../local/Lieberman-Aiden2009Comprehensive.pdf},
  file = {Lieberman-Aiden2009Comprehensive.pdf:Lieberman-Aiden2009Comprehensive.pdf:PDF},
  institution = {Broad Institute of Harvard and Massachusetts Institute of Technology
	(MIT), MA 02139, USA.},
  keywords = {hic, ngs},
  owner = {phupe},
  pii = {326/5950/289},
  pmid = {19815776},
  timestamp = {2010.08.26},
  url = {http://dx.doi.org/10.1126/science.1181369}
}

@article{Liebermeister2002Linear,
  author = {Liebermeister, W.},
  title = {Linear modes of gene expression determined by independent component
	analysis.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {51--60},
  number = {1},
  month = {Jan},
  abstract = {MOTIVATION: The expression of genes is controlled by specific combinations
	of cellular variables. We applied Independent Component Analysis
	(ICA) to gene expression data, deriving a linear model based on hidden
	variables, which we term 'expression modes'. The expression of each
	gene is a linear function of the expression modes, where, according
	to the ICA model, the linear influences of different modes show a
	minimal statistical dependence, and their distributions deviate sharply
	from the normal distribution. RESULTS: Studying cell cycle-related
	gene expression in yeast, we found that the dominant expression modes
	could be related to distinct biological functions, such as phases
	of the cell cycle or the mating response. Analysis of human lymphocytes
	revealed modes that were related to characteristic differences between
	cell types. With both data sets, the linear influences of the dominant
	modes showed distributions with large tails, indicating the existence
	of specifically up- and downregulated target genes. The expression
	modes and their influences can be used to visualize the samples and
	genes in low-dimensional spaces. A projection to expression modes
	helps to highlight particular biological functions, to reduce noise,
	and to compress the data in a biologically sensible way.},
  doi = {10.1093/bioinformatics/18.1.51},
  pdf = {../local/Liebermeister2002Linear.pdf},
  file = {Liebermeister2002Linear.pdf:Liebermeister2002Linear.pdf:PDF},
  institution = {Theoretische Biophysik, Institut für Biologie, Humboldt-Universität
	zu Berlin, Invalidenstrasse 42, 10115 Berlin, Germany. wolfram.liebermeister@rz-hu-berlin.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11836211},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1093/bioinformatics/18.1.51}
}

@article{Lievens2009Mammalian,
  author = {Sam Lievens and Irma Lemmens and Jan Tavernier},
  title = {Mammalian two-hybrids come of age.},
  journal = {Trends Biochem Sci},
  year = {2009},
  volume = {34},
  pages = {579--588},
  number = {11},
  month = {Nov},
  abstract = {A diverse series of mammalian two-hybrid technologies for the detection
	of protein-protein interactions have emerged in the past few years,
	complementing the established yeast two-hybrid approach. Given the
	mammalian background in which they operate, these assays open new
	avenues to study the dynamics of mammalian protein interaction networks,
	i.e. the temporal, spatial and functional modulation of protein-protein
	associations. In addition, novel assay formats are available that
	enable high-throughput mammalian two-hybrid applications, facilitating
	their use in large-scale interactome mapping projects. Finally, as
	they can be applied in drug discovery and development programs, these
	techniques also offer exciting new opportunities for biomedical research.},
  doi = {10.1016/j.tibs.2009.06.009},
  institution = {Department of Medical Protein Research, VIB, A. Baertsoenkaai 3,
	9000 Ghent, Belgium},
  keywords = {Animals; Genes, Reporter; Humans; Models, Biological; Protein Binding;
	Protein Interaction Mapping; Proteins; Recombinant Fusion Proteins;
	Transfection; Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {S0968-0004(09)00158-3},
  pmid = {19786350},
  timestamp = {2010.08.31},
  url = {http://dx.doi.org/10.1016/j.tibs.2009.06.009}
}

@article{Lim2005Microarray,
  author = {Lee P Lim and Nelson C Lau and Philip Garrett-Engele and Andrew Grimson
	and Janell M Schelter and John Castle and David P Bartel and Peter
	S Linsley and Jason M Johnson},
  title = {Microarray analysis shows that some microRNAs downregulate large
	numbers of target mRNAs.},
  journal = {Nature},
  year = {2005},
  volume = {433},
  pages = {769--773},
  number = {7027},
  month = {Feb},
  abstract = {MicroRNAs (miRNAs) are a class of noncoding RNAs that post-transcriptionally
	regulate gene expression in plants and animals. To investigate the
	influence of miRNAs on transcript levels, we transfected miRNAs into
	human cells and used microarrays to examine changes in the messenger
	RNA profile. Here we show that delivering miR-124 causes the expression
	profile to shift towards that of brain, the organ in which miR-124
	is preferentially expressed, whereas delivering miR-1 shifts the
	profile towards that of muscle, where miR-1 is preferentially expressed.
	In each case, about 100 messages were downregulated after 12 h. The
	3' untranslated regions of these messages had a significant propensity
	to pair to the 5' region of the miRNA, as expected if many of these
	messages are the direct targets of the miRNAs. Our results suggest
	that metazoan miRNAs can reduce the levels of many of their target
	transcripts, not just the amount of protein deriving from these transcripts.
	Moreover, miR-1 and miR-124, and presumably other tissue-specific
	miRNAs, seem to downregulate a far greater number of targets than
	previously appreciated, thereby helping to define tissue-specific
	gene expression in humans.},
  doi = {10.1038/nature03315},
  institution = {Rosetta Inpharmatics, Merck and Co, 401 Terry Avenue N, Seattle,
	Washington 98109, USA. lee_lim@merck.com},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature03315},
  pmid = {15685193},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1038/nature03315}
}

@article{Lima-Mendez2009powerful,
  author = {Lima-Mendez, G. and van Helden, J.},
  title = {The powerful law of the power law and other myths in network biology.},
  journal = {Mol Biosyst},
  year = {2009},
  volume = {5},
  pages = {1482--1493},
  number = {12},
  month = {Dec},
  abstract = {For almost 10 years, topological analysis of different large-scale
	biological networks (metabolic reactions, protein interactions, transcriptional
	regulation) has been highlighting some recurrent properties: power
	law distribution of degree, scale-freeness, small world, which have
	been proposed to confer functional advantages such as robustness
	to environmental changes and tolerance to random mutations. Stochastic
	generative models inspired different scenarios to explain the growth
	of interaction networks during evolution. The power law and the associated
	properties appeared so ubiquitous in complex networks that they were
	qualified as "universal laws". However, these properties are no longer
	observed when the data are subjected to statistical tests: in most
	cases, the data do not fit the expected theoretical models, and the
	cases of good fitting merely result from sampling artefacts or improper
	data representation. The field of network biology seems to be founded
	on a series of myths, i.e. widely believed but false ideas. The weaknesses
	of these foundations should however not be considered as a failure
	for the entire domain. Network analysis provides a powerful frame
	for understanding the function and evolution of biological processes,
	provided it is brought to an appropriate level of description, by
	focussing on smaller functional modules and establishing the link
	between their topological properties and their dynamical behaviour.},
  doi = {10.1039/b908681a},
  institution = {Bioinformatique des Génomes et des Réseaux-BiGRe, Université Libre
	de Bruxelles, Campus Plaine, CP 263, Boulevard du Triomphe, B-1050
	Bruxelles, Belgium. gipsi@bigre.ulb.ac.be},
  keywords = {Computational Biology, methods; Gene Regulatory Networks; Metabolic
	Networks and Pathways; Models, Biological; Semantics; Signal Transduction},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pmid = {20023717},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1039/b908681a}
}

@article{Lin2002Conserved,
  author = {Lin, K. and Kuang, Y. and Joseph, J. S. and Kolatkar, P. R.},
  title = {Conserved codon composition of ribosomal protein coding genes in
	{E}scherichia coli, {M}ycobacterium tuberculosis and {S}accharomyces
	cerevisiae: lessons from supervised machine learning in functional
	genomics},
  journal = {Nucl. {A}cids {R}es.},
  year = {2002},
  volume = {30},
  pages = {2599-2607},
  number = {11},
  abstract = {Genomics projects have resulted in a flood of sequence data. {F}unctional
	annotation currently relies almost exclusively on inter-species sequence
	comparison and is restricted in cases of limited data from related
	species and widely divergent sequences with no known homologs. {H}ere,
	we demonstrate that codon composition, a fusion of codon usage bias
	and amino acid composition signals, can accurately discriminate,
	in the absence of sequence homology information, cytoplasmic ribosomal
	protein genes from all other genes of known function in {S}accharomyces
	cerevisiae, {E}scherichia coli and {M}ycobacterium tuberculosis using
	an implementation of support vector machines, {SVM}light. {A}nalysis
	of these codon composition signals is instructive in determining
	features that confer individuality to ribosomal protein genes. {E}ach
	of the sets of positively charged, negatively charged and small hydrophobic
	residues, as well as codon bias, contribute to their distinctive
	codon composition profile. {T}he representation of all these signals
	is sensitively detected, combined and augmented by the {SVM}s to
	perform an accurate classification. {O}f special mention is an obvious
	outlier, yeast gene {RPL}22{B}, highly homologous to {RPL}22{A} but
	employing very different codon usage, perhaps indicating a non-ribosomal
	function. {F}inally, we propose that codon composition be used in
	combination with other attributes in gene/protein classification
	by supervised machine learning algorithms.},
  pdf = {../local/Lin2002Conserved.pdf},
  file = {Lin2002Conserved.pdf:local/Lin2002Conserved.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://nar.oupjournals.org/cgi/content/abstract/30/11/2599}
}

@article{Lin73Effective,
  author = {S. Lin and B. W. Kernighan},
  title = {An Effective Heuristic Algorithm for the Traveling-Salesman Problem},
  journal = {Operations Res.},
  year = {1973},
  volume = {21},
  pages = {498--516}
}

@article{Lin2004Orphan,
  author = {Lin, S. H. S. and Civelli, O.},
  title = {Orphan {G} protein-coupled receptors: targets for new therapeutic
	interventions.},
  journal = {Ann. Med.},
  year = {2004},
  volume = {36},
  pages = {204--214},
  number = {3},
  abstract = {With the completion of the human genome, many genes will be uncovered
	with unknown functions. The 'orphan' G protein coupled receptors
	(GPCRs) are examples of genes without known functions. These are
	genes that exhibit the seven helical conformation hallmark of the
	GPCRs but that are called 'orphans' because they are activated by
	none of the primary messengers known to activate GPCRs in vivo. They
	are the targets of undiscovered transmitters and this lack of knowledge
	precludes understanding their function. Yet, because they belong
	to the supergene family that has the widest regulatory role in the
	organism, the orphan GPCRs have generated much excitement in academia
	and industry. They hold much hope for revealing new intercellular
	interactions that will open new areas of basic research which ultimately
	will lead to new therapeutic applications. However, the first step
	in understanding the function of orphan GPCRs is to 'deorphanize'
	them, to identify their natural transmitters. Here we review the
	search for the natural primary messengers of orphan GPCRs and focus
	on two recently deorphanized GPCR systems, the melanin-concentrating
	hormone (MCH) and prolactin-releasing peptide (PrRP) systems, to
	illustrate the strategies applied to solve their function and to
	exemplify the therapeutic potentials that such systems hold.},
  doi = {10.1080/07853890310024668},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {15181976},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1080/07853890310024668}
}

@article{Lin2004Adaptive,
  author = {Tzu-Chao Lin and Pao-Ta Yu},
  title = {Adaptive two-pass median filter based on support vector machines
	for image restoration.},
  journal = {Neural {C}omput},
  year = {2004},
  volume = {16},
  pages = {332-53},
  number = {2},
  month = {Feb},
  abstract = {In this letter, a novel adaptive filter, the adaptive two-pass median
	({ATM}) filter based on support vector machines ({SVM}s), is proposed
	to preserve more image details while effectively suppressing impulse
	noise for image restoration. {T}he proposed filter is composed of
	a noise decision maker and two-pass median filters. {O}ur new approach
	basically uses an {SVM} impulse detector to judge whether the input
	pixel is noise. {I}f a pixel is detected as a corrupted pixel, the
	noise-free reduction median filter will be triggered to replace it.
	{O}therwise, it remains unchanged. {T}hen, to improve the quality
	of the restored image, a decision impulse filter is put to work in
	the second-pass filtering procedure. {A}s for the noise suppressing
	both fixed-valued and random-valued impulses without degrading the
	quality of the fine details, the results of our extensive experiments
	demonstrate that the proposed filter outperforms earlier median-based
	filters in the literature. {O}ur new filter also provides excellent
	robustness at various percentages of impulse noise.},
  doi = {10.1162/089976604322742056},
  keywords = {Adaptation, Algorithms, Ambergris, Animals, Artifacts, Artificial
	Intelligence, Automated, Cadmium, Candida, Candida albicans, Capillary,
	Cluster Analysis, Combinatorial Chemistry Techniques, Computer-Assisted,
	Electrophoresis, Eye Enucleation, Humans, Image Processing, Magnetic
	Resonance Spectroscopy, Melanoma, Models, Molecular, Molecular Conformation,
	Neural Networks (Computer), Non-U.S. Gov't, Nonlinear Dynamics, Odors,
	P.H.S., Pattern Recognition, Perfume, Physiological, Predictive Value
	of Tests, Prognosis, Prospective Studies, Quantitative Structure-Activity
	Relationship, Rats, Research Support, Signal Processing, U.S. Gov't,
	Uveal Neoplasms, Visual, 15006099},
  url = {http://dx.doi.org/10.1162/089976604322742056}
}

@article{Lin2004Classification,
  author = {WuMei Lin and Xin Yuan and Powing Yuen and William I Wei and Jonathan
	Sham and PengCheng Shi and Jianan Qu},
  title = {Classification of in vivo autofluorescence spectra using support
	vector machines.},
  journal = {J {B}iomed {O}pt},
  year = {2004},
  volume = {9},
  pages = {180-6},
  number = {1},
  abstract = {An algorithm based on support vector machines ({SVM}), the most recent
	advance in pattern recognition, is presented for use in classifying
	light-induced autofluorescence collected from cancerous and normal
	tissues. {T}he in vivo autofluorescence spectra used for development
	and evaluation of {SVM} diagnostic algorithms were measured from
	85 nasopharyngeal carcinoma ({NPC}) lesions and 131 normal tissue
	sites from 59 subjects during routine nasal endoscopy. {L}eave-one-out
	cross-validation was used to evaluate the performance of the algorithms.
	{A}n overall diagnostic accuracy of 96\%, a sensitivity of 94\%,
	and a specificity of 97\% for discriminating nasopharyngeal carcinomas
	from normal tissues were achieved using a linear {SVM} algorithm.
	{A} diagnostic accuracy of 98\%, a sensitivity of 95\%, and a specificity
	of 99\% for detecting {NPC} were achieved with a nonlinear {SVM}
	algorithm. {I}n a comparison with previously developed algorithms
	using the same dataset and the principal component analysis ({PCA})
	technique, the {SVM} algorithms produced better diagnostic accuracy
	in all instances. {I}n addition, we investigated a method combining
	{PCA} and {SVM} techniques for reducing the complexity of the {SVM}
	algorithms.},
  doi = {10.1117/1.1628244},
  pdf = {../local/Lin2004Classification.pdf},
  file = {Lin2004Classification.pdf:local/Lin2004Classification.pdf:PDF},
  url = {http://dx.doi.org/10.1117/1.1628244}
}

@article{Lin2002Support,
  author = {Lin, Y.},
  title = {Support vector machines and the {B}ayes rule in classification},
  journal = {Data {M}ining and {K}nowledge {D}iscovery},
  year = {2002},
  volume = {6},
  pages = {259-275},
  number = {3},
  doi = {10.1023/A:1015469627679},
  pdf = {../local/Lin2002Support.pdf},
  file = {Lin2002Support.pdf:local/Lin2002Support.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1023/A:1015469627679}
}

@article{Lind2003Support,
  author = {P. Lind and T. Maltseva},
  title = {Support vector machines for the estimation of aqueous solubility.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {1855-9},
  number = {6},
  abstract = {Support {V}ector {M}achines ({SVM}s) are used to estimate aqueous
	solubility of organic compounds. {A} {SVM} equipped with a {T}animoto
	similarity kernel estimates solubility with accuracy comparable to
	results from other reported methods where the same data sets have
	been studied. {C}omplete cross-validation on a diverse data set resulted
	in a root-mean-squared error = 0.62 and {R}(2) = 0.88. {T}he data
	input to the machine is in the form of molecular fingerprints. {N}o
	physical parameters are explicitly involved in calculations.},
  doi = {10.1021/ci034107s},
  pdf = {../local/Lind2003Support.pdf},
  file = {Lind2003Support.pdf:local/Lind2003Support.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci034107s}
}

@article{Linghu2009Genome-wide,
  author = {Linghu, B. and Snitkin, E.S. and Hu, Z. and Xia, Y. and Delisi, C.},
  title = {Genome-wide prioritization of disease genes and identification of
	disease-disease associations from an integrated human functional
	linkage network.},
  journal = {Genome Biol.},
  year = {2009},
  volume = {10},
  pages = {R91},
  number = {9},
  abstract = {We integrate 16 genomic features to construct an evidence-weighted
	functional-linkage network comprising 21,657 human genes. The functional-linkage
	network is used to prioritize candidate genes for 110 diseases, and
	to reliably disclose hidden associations between disease pairs having
	dissimilar phenotypes, such as hypercholesterolemia and Alzheimer's
	disease. Many of these disease-disease associations are supported
	by epidemiology, but with no previous genetic basis. Such associations
	can drive novel hypotheses on molecular mechanisms of diseases and
	therapies.},
  doi = {10.1186/gb-2009-10-9-r91},
  institution = {Bioinformatics Program, Boston University, 24 Cummington Street,
	Boston, MA 02215, USA. blinghu@bu.edu},
  owner = {mordelet},
  pii = {gb-2009-10-9-r91},
  pmid = {19728866},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1186/gb-2009-10-9-r91}
}

@article{Lipinski2001Experimental,
  author = {Lipinski, C. A. and Lombardo, F. and Dominy, B. W. and Feeney, P.
	J.},
  title = {Experimental and computational approaches to estimate solubility
	and permeability in drug discovery and development settings},
  journal = {Adv. {D}rug. {D}eliv. {R}ev},
  year = {2001},
  volume = {46},
  pages = {3--26},
  number = {1-3},
  month = {Mar},
  abstract = {Experimental and computational approaches to estimate solubility and
	permeability in discovery and development settings are described.
	{I}n the discovery setting 'the rule of 5' predicts that poor absorption
	or permeation is more likely when there are more than 5 {H}-bond
	donors, 10 {H}-bond acceptors, the molecular weight ({MWT}) is greater
	than 500 and the calculated {L}og {P} ({CL}og{P}) is greater than
	5 (or {M}log{P} > 4.15). {C}omputational methodology for the rule-based
	{M}origuchi {L}og {P} ({ML}og{P}) calculation is described. {T}urbidimetric
	solubility measurement is described and applied to known drugs. {H}igh
	throughput screening ({HTS}) leads tend to have higher {MWT} and
	{L}og {P} and lower turbidimetric solubility than leads in the pre-{HTS}
	era. {I}n the development setting, solubility calculations focus
	on exact value prediction and are difficult because of polymorphism.
	{R}ecent work on linear free energy relationships and {L}og {P} approaches
	are critically reviewed. {U}seful predictions are possible in closely
	related analog series when coupled with experimental thermodynamic
	solubility measurements.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pii = {S0169409X00001290},
  pmid = {11259830},
  timestamp = {2006.02.03}
}

@article{Listgarten2004Predictive,
  author = {Listgarten, J. and Damaraju, S. and Poulin, B. and Cook, L. and Dufour,
	J. and Driga, A. and Mackey, J. and Wishart, D. and Greiner, R. and
	Zanke, B.},
  title = {Predictive {M}odels for {B}reast {C}ancer {S}usceptibility from {M}ultiple
	{S}ingle {N}ucleotide {P}olymorphisms},
  journal = {Clin. {C}ancer {R}es.},
  year = {2004},
  volume = {10},
  pages = {2725-2737},
  number = {8},
  abstract = {Hereditary predisposition and causative environmental exposures have
	long been recognized in human malignancies. {I}n most instances,
	cancer cases occur sporadically, suggesting that environmental influences
	are critical in determining cancer risk. {T}o test the influence
	of genetic polymorphisms on breast cancer risk, we have measured
	98 single nucleotide polymorphisms ({SNP}s) distributed over 45 genes
	of potential relevance to breast cancer etiology in 174 patients
	and have compared these with matched normal controls. {U}sing machine
	learning techniques such as support vector machines ({SVM}s), decision
	trees, and naive {B}ayes, we identified a subset of three {SNP}s
	as key discriminators between breast cancer and controls. {T}he {SVM}s
	performed maximally among predictive models, achieving 69% predictive
	power in distinguishing between the two groups, compared with a 50%
	baseline predictive power obtained from the data after repeated random
	permutation of class labels (individuals with cancer or controls).
	{H}owever, the simpler naive {B}ayes model as well as the decision
	tree model performed quite similarly to the {SVM}. {T}he three {SNP}
	sites most useful in this model were (a) the +4536{T}/{C} site of
	the aldosterone synthase gene {CYP}11{B}2 at amino acid residue 386
	{V}al/{A}la ({T}/{C}) (rs4541); (b) the +4328{C}/{G} site of the
	aryl hydrocarbon hydroxylase {CYP}1{B}1 at amino acid residue 293
	{L}eu/{V}al ({C}/{G}) (rs5292); and (c) the +4449{C}/{T} site of
	the transcription factor {BCL}6 at amino acid 387 {A}sp/{A}sp (rs1056932).
	{N}o single {SNP} site on its own could achieve more than 60% in
	predictive accuracy. {W}e have shown that multiple {SNP} sites from
	different genes over distant parts of the genome are better at identifying
	breast cancer patients than any one {SNP} alone. {A}s high-throughput
	technology for {SNP}s improves and as more {SNP}s are identified,
	it is likely that much higher predictive accuracy will be achieved
	and a useful clinical tool developed.},
  eprint = {http://clincancerres.aacrjournals.org/cgi/reprint/10/8/2725.pdf},
  pdf = {../local/Listgarten2004Predictive.pdf},
  file = {Listgarten2004Predictive.pdf:local/Listgarten2004Predictive.pdf:PDF},
  keywords = {biosvm, breastcancer},
  owner = {jeanphilippevert},
  url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/10/8/2725}
}

@inproceedings{Liu2003Building,
  author = {Liu, B. and Dai, Y. and Li, X. and Lee, W. S. and Yu, P. S.},
  title = {Building Text Classifiers Using Positive and Unlabeled Examples},
  booktitle = {Proceedings of the Third IEEE International Conference on Data Mining},
  year = {2003},
  editor = {Wu, X. and Tuzhilin, A. and Shavlik, J.},
  series = {ICDM '03},
  pages = {179--186},
  address = {Washington, DC, USA},
  publisher = {IEEE Computer Society},
  acmid = {952139},
  pdf = {../local/Liu2003Building.pdf},
  file = {Liu2003Building.pdf:Liu2003Building.pdf:PDF},
  isbn = {0-7695-1978-4},
  keywords = {PUlearning},
  owner = {fantine},
  review = {Two-step method
	
	1) identifying a set of reliable negative documents
	
	2) building a set of classifiers by iteratively applying a classification
	algorithm and selecting a good classifier from the set
	
	
	Contributions
	
	-Proposal for step 1+2 : Naive Bayes+SVM
	
	-Evaluation of all possible combinations of step 1 and 2techniques
	accross the existing two-steps methods --> Benchmark system (LPU)
	
	-Proposal of a biased formulation of SVMs to solve the problem
	
	
	Biased SVMs
	
	Label known positive examples (P) +1 and unlabeled examples (U) -1.
	
	Discriminate between P and U, allowing misclassification for unlabeled
	examples.
	
	In a noiseless version, positive misclassifications are not allowed.
	To account for noise, errors on P might allowed but to a smaller
	extent than errors on U. Namely, positive errors are weighted by
	C+ whereas negative errors are weighted by C-, and C+ >> C-. These
	weights should be learned thanks to a separate validation set.
	
	
	Empirical evaluation
	
	2 datasets : Reuters (10 categories) and Usenet articles (20 newsgroups).
	
	They combine every step 1 technique with every step 2 technique and
	report macroaveraged F scores (like our global average performance
	on TFs).
	
	The data set is divided into a validation set (30%) and a training
	set. The validation set is used with the criterion described above
	to select parameters C+ and C-. Then F score is computed on the training
	set.
	
	Simulation of different level of knowledge by including in the training
	set a proportion gamma of positives in the unlabeled set (gamma =
	from 10% to 90%).
	
	For gamma = 0.3 and 0.7, biased SVM outperforms PEBL, S-EM, Roc-SVM
	and NB.},
  timestamp = {2009.07.01},
  url = {http://dl.acm.org/citation.cfm?id=951949.952139}
}

@inproceedings{Liu2002Partially,
  author = {Liu, B. and Lee, W. S. and Yu, P. S. and Li, X.},
  title = {Partially Supervised Classification of Text Documents},
  booktitle = {ICML '02: Proceedings of the Nineteenth International Conference
	on Machine Learning},
  year = {2002},
  editor = {Sammut, C. and Hoffmann, A. G.},
  pages = {387--394},
  address = {San Francisco, CA, USA},
  publisher = {Morgan Kaufmann Publishers Inc.},
  pdf = {../local/Liu2002Partially.pdf},
  file = {Liu2002Partially.pdf:Liu2002Partially.pdf:PDF},
  isbn = {1-55860-873-7},
  keywords = {PUlearning},
  owner = {fantine},
  timestamp = {2009.07.02},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.11.9010}
}

@article{Liu2004Gabor-based,
  author = {Chengjun Liu},
  title = {Gabor-based kernel {PCA} with fractional power polynomial models
	for face recognition.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2004},
  volume = {26},
  pages = {572-81},
  number = {5},
  month = {May},
  abstract = {This paper presents a novel {G}abor-based kernel {P}rincipal {C}omponent
	{A}nalysis ({PCA}) method by integrating the {G}abor wavelet representation
	of face images and the kernel {PCA} method for face recognition.
	{G}abor wavelets first derive desirable facial features characterized
	by spatial frequency, spatial locality, and orientation selectivity
	to cope with the variations due to illumination and facial expression
	changes. {T}he kernel {PCA} method is then extended to include fractional
	power polynomial models for enhanced face recognition performance.
	{A} fractional power polynomial, however, does not necessarily define
	a kernel function, as it might not define a positive semidefinite
	{G}ram matrix. {N}ote that the sigmoid kernels, one of the three
	classes of widely used kernel functions (polynomial kernels, {G}aussian
	kernels, and sigmoid kernels), do not actually define a positive
	semidefinite {G}ram matrix either. {N}evertheless, the sigmoid kernels
	have been successfully used in practice, such as in building support
	vector machines. {I}n order to derive real kernel {PCA} features,
	we apply only those kernel {PCA} eigenvectors that are associated
	with positive eigenvalues. {T}he feasibility of the {G}abor-based
	kernel {PCA} method with fractional power polynomial models has been
	successfully tested on both frontal and pose-angled face recognition,
	using two data sets from the {FERET} database and the {CMU} {PIE}
	database, respectively. {T}he {FERET} data set contains 600 frontal
	face images of 200 subjects, while the {PIE} data set consists of
	680 images across five poses (left and right profiles, left and right
	half profiles, and frontal view) with two different facial expressions
	(neutral and smiling) of 68 subjects. {T}he effectiveness of the
	{G}abor-based kernel {PCA} method with fractional power polynomial
	models is shown in terms of both absolute performance indices and
	comparative performance against the {PCA} method, the kernel {PCA}
	method with polynomial kernels, the kernel {PCA} method with fractional
	power polynomial models, the {G}abor wavelet-based {PCA} method,
	and the {G}abor wavelet-based kernel {PCA} method with polynomial
	kernels.}
}

@article{Liu2004Using,
  author = {Huiqing Liu and Hao Han and Jinyan Li and Limsoon Wong},
  title = {Using amino acid patterns to accurately predict translation initiation
	sites.},
  journal = {In {S}ilico {B}iol.},
  year = {2004},
  volume = {4},
  pages = {255-69},
  number = {3},
  abstract = {The translation initiation site ({TIS}) prediction problem is about
	how to correctly identify {TIS} in m{RNA}, c{DNA}, or other types
	of genomic sequences. {H}igh prediction accuracy can be helpful in
	a better understanding of protein coding from nucleotide sequences.
	{T}his is an important step in genomic analysis to determine protein
	coding from nucleotide sequences. {I}n this paper, we present an
	in silico method to predict translation initiation sites in vertebrate
	c{DNA} or m{RNA} sequences. {T}his method consists of three sequential
	steps as follows. {I}n the first step, candidate features are generated
	using k-gram amino acid patterns. {I}n the second step, a small number
	of top-ranked features are selected by an entropy-based algorithm.
	{I}n the third step, a classification model is built to recognize
	true {TIS}s by applying support vector machines or ensembles of decision
	trees to the selected features. {W}e have tested our method on several
	independent data sets, including two public ones and our own extracted
	sequences. {T}he experimental results achieved are better than those
	reported previously using the same data sets. {O}ur high accuracy
	not only demonstrates the feasibility of our method, but also indicates
	that there might be "amino acid" patterns around {TIS} in c{DNA}
	and m{RNA} sequences.},
  keywords = {biosvm},
  pii = {2004040022},
  url = {http://www.bioinfo.de/isb/2004/04/0022/}
}

@article{Liu2003in-silico,
  author = {Huiqing Liu and Hao Han and Jinyan Li and Limsoon Wong},
  title = {An in-silico method for prediction of polyadenylation signals in
	human sequences.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2003},
  volume = {14},
  pages = {84-93},
  abstract = {This paper presents a machine learning method to predict polyadenylation
	signals ({PAS}es) in human {DNA} and m{RNA} sequences by analysing
	features around them. {T}his method consists of three sequential
	steps of feature manipulation: generation, selection and integration
	of features. {I}n the first step, new features are generated using
	k-gram nucleotide acid or amino acid patterns. {I}n the second step,
	a number of important features are selected by an entropy-based algorithm.
	{I}n the third step, support vector machines are employed to recognize
	true {PAS}es from a large number of candidates. {O}ur study shows
	that true {PAS}es in {DNA} and m{RNA} sequences can be characterized
	by different features, and also shows that both upstream and downstream
	sequence elements are important for recognizing {PAS}es from {DNA}
	sequences. {W}e tested our method on several public data sets as
	well as our own extracted data sets. {I}n most cases, we achieved
	better validation results than those reported previously on the same
	data sets. {T}he important motifs observed are highly consistent
	with those reported in literature.},
  keywords = {biosvm}
}

@incollection{Liu2009Nonparametric,
  author = {Han Liu and John Lafferty and Larry Wasserman},
  title = {Nonparametric regression and classification with joint sparsity constraints},
  booktitle = {Advances in Neural Information Processing Systems 21},
  publisher = {MIT Press},
  year = {2009},
  editor = {D. Koller and D. Schuurmans and Y. Bengio and L. Bottou},
  pages = {969--976}
}

@inproceedings{Liu2002Comparative,
  author = {Liu, H. and Li, J. and Wong, L.},
  title = {A {C}omparative {S}tudy on {F}eature {S}election and {C}lassification
	{M}ethods {U}sing {G}ene {E}xpression {P}rofiles and {P}roteomic
	{P}atterns},
  booktitle = {Genome {I}nformatics 2002},
  year = {2002},
  editor = {R. Lathrop and K. Nakai and S. Miyano and T. Takagi and M. Kanehisa},
  volume = {12},
  address = {Tokyo},
  publisher = {Universal Academy Press},
  abstract = {Feature selection plays an important role in classification. {W}e
	present a comparative study on six feature selection heuristics by
	applying them to two sets of data. {T}he first set of data are gene
	expression profiles from {A}cute {L}ymphoblastic {L}eukemia ({ALL})
	patients. {T}he second set of data are proteomic patterns from ovarian
	cancer patients. {B}ased on features chosen by these methods, error
	rates of several classification algorithms were obtained for analysis.
	{O}ur results demonstrate the importance of feature selection in
	accurately classifying new samples.},
  pdf = {../local/Liu2002Comparative.pdf},
  file = {Liu2002Comparative.pdf:local/Liu2002Comparative.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://www.jsbi.org/journal/GIW02/GIW02F006.html}
}

@article{Liu2005Use,
  author = {Huiqing Liu and Jinyan Li and Limsoon Wong},
  title = {Use of extreme patient samples for outcome prediction from gene expression
	data.},
  journal = {Bioinformatics},
  year = {2005},
  month = {Jun},
  abstract = {M{OTIVATION}: {P}atient outcome prediction using microarray technologies
	is an important application in bioinformatics. {B}ased on patients'
	genotypic microarray data, predictions are made to estimate patients'
	survival time and their risk of tumor metastasis or recurrence. {S}o,
	accurate prediction can potentially help to provide better treatment
	for patients. {RESULTS}: {W}e present a new computational method
	for patient outcome prediction. {I}n the training phase of this method,
	we make use of two types of extreme patient samples: short-term survivors
	who got an unfavorable outcome within a short period and long-term
	survivors who were maintaining a favorable outcome after a long follow-up
	time. {T}hese extreme training samples yield a clear platform for
	us to identify relevant genes whose expression is closely related
	to the outcome. {T}he selected extreme samples and the relevant genes
	are then integrated by a support vector machine to build a prediction
	model, by which each validation sample is assigned a risk score that
	falls into one of special pre-defined risk groups. {W}e apply this
	method to several public data sets. {I}n most cases, patients in
	high and low risk groups stratified by our method have clearly distinguishable
	outcome status as seen in their {K}aplan-{M}eier curves. {W}e also
	show that the idea of selecting only extreme patient samples for
	training is effective for improving the prediction accuracy when
	different gene selection methods are used. {SUPPLEMENTARY} {INFORMATION}:
	http://research.i2r.a-star.edu.sg/huiqing/supplementaldata/survival/survival.html.},
  doi = {10.1093/bioinformatics/bti544},
  pdf = {../local/Liu2005Use.pdf},
  file = {Liu2005Use.pdf:local/Liu2005Use.pdf:PDF},
  keywords = {biosvm},
  pii = {bti544},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti544}
}

@book{Liu2007Computational,
  title = {Computational methods of feature selection},
  publisher = {Chapman \& Hall/CRC},
  year = {2007},
  author = {Liu, H. and Motoda, H.}
}

@article{Liu2004Quantitative,
  author = {H. X. Liu and C. X. Xue and R. S. Zhang and X. J. Yao and M. C. Liu
	and Z. D. Hu and B. T. Fan},
  title = {Quantitative prediction of logk of peptides in high-performance liquid
	chromatography based on molecular descriptors by using the heuristic
	method and support vector machine.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1979-86},
  number = {6},
  abstract = {A new method support vector machine ({SVM}) and the heuristic method
	({HM}) were used to develop the nonlinear and linear models between
	the capacity factor (logk) and seven molecular descriptors of 75
	peptides for the first time. {T}he molecular descriptors representing
	the structural features of the compounds only included the constitutional
	and topological descriptors, which can be obtained easily without
	optimizing the structure of the molecule. {T}he seven molecular descriptors
	selected by the heuristic method in {CODESSA} were used as inputs
	for {SVM}. {T}he results obtained by {SVM} were compared with those
	obtained by the heuristic method. {T}he prediction result of the
	{SVM} model is better than that of heuristic method. {F}or the test
	set, a predictive correlation coefficient {R} = 0.9801 and root-mean-square
	error of 0.1523 were obtained. {T}he prediction results are in very
	good agreement with the experimental values. {B}ut the linear model
	of the heuristic method is easier to understand and ready to use
	for a chemist. {T}his paper provided a new and effective method for
	predicting the chromatography retention of peptides and some insight
	into the structural features which are related to the capacity factor
	of peptides.},
  doi = {10.1021/ci049891a},
  pdf = {../local/Liu2004Quantitative.pdf},
  file = {Liu2004Quantitative.pdf:local/Liu2004Quantitative.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049891a}
}

@article{Liu2003Diagnosing,
  author = {H. X. Liu and R. S. Zhang and F. Luan and X. J. Yao and M. C. Liu
	and Z. D. Hu and B. T. Fan},
  title = {Diagnosing breast cancer based on support vector machines},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {900-7},
  number = {3},
  abstract = {The {S}upport {V}ector {M}achine ({SVM}) classification algorithm,
	recently developed from the machine learning community, was used
	to diagnose breast cancer. {A}t the same time, the {SVM} was compared
	to several machine learning techniques currently used in this field.
	{T}he classification task involves predicting the state of diseases,
	using data obtained from the {UCI} machine learning repository. {SVM}
	outperformed k-means cluster and two artificial neural networks on
	the whole. {I}t can be concluded that nine samples could be mislabeled
	from the comparison of several machine learning techniques.},
  doi = {10.1021/ci0256438},
  pdf = {../local/Liu2003Diagnosing.pdf},
  file = {Liu2003Diagnosing.pdf:local/Liu2003Diagnosing.pdf:PDF},
  keywords = {breastcancer},
  url = {http://dx.doi.org/10.1021/ci0256438}
}

@article{Liu2004Prediction,
  author = {H. X. Liu and R. S. Zhang and X. J. Yao and M. C. Liu and Z. D. Hu
	and B. T. Fan},
  title = {Prediction of the isoelectric point of an amino acid based on {GA}-{PLS}
	and {SVM}s.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {161-7},
  number = {1},
  abstract = {The support vector machine ({SVM}), as a novel type of a learning
	machine, for the first time, was used to develop a {QSPR} model that
	relates the structures of 35 amino acids to their isoelectric point.
	{M}olecular descriptors calculated from the structure alone were
	used to represent molecular structures. {T}he seven descriptors selected
	using {GA}-{PLS}, which is a sophisticated hybrid approach that combines
	{GA} as a powerful optimization method with {PLS} as a robust statistical
	method for variable selection, were used as inputs of {RBFNN}s and
	{SVM} to predict the isoelectric point of an amino acid. {T}he optimal
	{QSPR} model developed was based on support vector machines, which
	showed the following results: the root-mean-square error of 0.2383
	and the prediction correlation coefficient {R}=0.9702 were obtained
	for the whole data set. {S}atisfactory results indicated that the
	{GA}-{PLS} approach is a very effective method for variable selection,
	and the support vector machine is a very promising tool for the nonlinear
	approximation.},
  doi = {10.1021/ci034173u},
  pdf = {../local/Liu2004Prediction.pdf},
  file = {Liu2004Prediction.pdf:local/Liu2004Prediction.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci034173u}
}

@article{Liu2004QSAR,
  author = {H. X. Liu and R. S. Zhang and X. J. Yao and M. C. Liu and Z. D. Hu
	and B. T. Fan},
  title = {Q{SAR} and classification models of a novel series of {COX}-2 selective
	inhibitors: 1,5-diarylimidazoles based on support vector machines.},
  journal = {J {C}omput {A}ided {M}ol {D}es},
  year = {2004},
  volume = {18},
  pages = {389-99},
  number = {6},
  month = {Jun},
  abstract = {The support vector machine, which is a novel algorithm from the machine
	learning community, was used to develop quantitation and classification
	models which can be used as a potential screening mechanism for a
	novel series of {COX}-2 selective inhibitors. {E}ach compound was
	represented by calculated structural descriptors that encode constitutional,
	topological, geometrical, electrostatic, and quantum-chemical features.
	{T}he heuristic method was then used to search the descriptor space
	and select the descriptors responsible for activity. {Q}uantitative
	modelling results in a nonlinear, seven-descriptor model based on
	{SVM}s with root mean-square errors of 0.107 and 0.136 for training
	and prediction sets, respectively. {T}he best classification results
	are found using {SVM}s: the accuracy for training and test sets is
	91.2\% and 88.2\%, respectively. {T}his paper proposes a new and
	effective method for drug design and screening.},
  keywords = {biosvm chemoinformatics}
}

@article{Liu2003QSAR,
  author = {H. X. Liu and R. S. Zhang and X. J. Yao and M. C. Liu and Z. D. Hu
	and B. T. Fan},
  title = {Q{SAR} study of ethyl 2-[(3-methyl-2,5-dioxo(3-pyrrolinyl))amino]-4-(trifluoromethyl)
	pyrimidine-5-carboxylate: an inhibitor of {AP}-1 and {NF}-kappa {B}
	mediated gene expression based on support vector machines.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {1288-96},
  number = {4},
  abstract = {The support vector machine, as a novel type of learning machine, for
	the first time, was used to develop a {QSAR} model of 57 analogues
	of ethyl 2-[(3-methyl-2,5-dioxo(3-pyrrolinyl))amino]-4-(trifluoromethyl)pyrimidine-5-carboxylate
	({EPC}), an inhibitor of {AP}-1 and {NF}-kappa {B} mediated gene
	expression, based on calculated quantum chemical parameters. {T}he
	quantum chemical parameters involved in the model are {K}ier and
	{H}all index (order3) ({KHI}3), {I}nformation content (order 0) ({IC}0),
	{YZ} {S}hadow ({YZS}) and {M}ax partial charge for an {N} atom ({M}ax{PCN}),
	{M}in partial charge for an {N} atom ({M}in{PCN}). {T}he mean relative
	error of the training set, the validation set, and the testing set
	is 1.35\%, 1.52\%, and 2.23\%, respectively, and the maximum relative
	error is less than 5.00\%.},
  doi = {10.1021/ci0340355},
  pdf = {../local/Liu2003QSAR.pdf},
  file = {Liu2003QSAR.pdf:local/Liu2003QSAR.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci0340355}
}

@article{Liu2005[Establishment,
  author = {Jian Liu and Shu Zheng and Jie-kai Yu and Xue-bin Yu and Wei-guo
	Liu and Jian-min Zhang and Xun Hu},
  title = {[{E}stablishment of diagnostic model of cerebrospinal protein fingerprint
	pattern for glioma and its clinical application.]},
  journal = {Zhejiang {D}a {X}ue {X}ue {B}ao {Y}i {X}ue {B}an},
  year = {2005},
  volume = {34},
  pages = {141-7},
  number = {2},
  month = {Mar},
  abstract = {O{BJECTIVE}: {T}o establish the diagnostic model of cerebrospinal
	protein profile for gliomas by surface-enhanced laser desorption/ionization
	time-of-flight mass spectrometry ({SELDI}-{TOF} {MS}) and bioinformatics.
	{METHODS}: {S}eventy-five samples of cerebrospinal fluid from patients
	with gliomas, benign brain tumors and mild brain traumas were collected.
	{A} total of 50 samples from gliomas and non-brain-tumors were divided
	into training sets (33 cases including 17 gliomas and 16 non-brain-tumors)
	and testing sets (17 cases including 5 gliomas and 12 non-brain-tumors).
	{T}he cerebrospinal proteins bound to {H}4 chip were detected by
	{SELDI}-{TOF} {MS}, the profiles of cerebrospinal protein were gained
	and then analyzed with artificial neural network algorithm ({ANN});
	and the diagnostic model of cerebrospinal protein profiles for differentiating
	gliomas from non-brain-tumors was established. {F}orty-seven of cerebrospinal
	samples of gliomas and benign brain tumors were divided into training
	sets (31 cases including 13 gliomas and 18 benign brain tumors) and
	testing sets (16 cases including 9 gliomas and 7 benign brain tumors),
	the diagnostic model of cerebrospinal protein profiles for differentiating
	gliomas from benign brain tumors was established based on the same
	method. {T}he support vector machine ({SVM}) algorithm was also used
	for evaluation, both results were very similar, but the result derived
	from {ANN} was more stable than that from {SVM}. {RESULT}: {T}he
	diagnostic model of cerebrospinal protein profiles for differentiating
	gliomas from non-brain-tumors was established and was challenged
	with the test set randomly, the sensitivity and specificity were
	100\% and 91.7\%, respectively. {T}he cerebrospinal protein profiling
	model for differentiating gliomas from benign brain tumors was also
	developed and was challenged with the test set randomly, the sensitivity
	and specificity were 88.9\%, and 100\%, respectively. {CONCLUSION}:
	{T}he technology of {SELDI}-{TOF} {MS} which combined with analysis
	tools of bioinformatics is a novel effective method for screening
	and identifying tumor biomarkers of gliomas and it may provide a
	new approach for the clinical diagnosis of glioma.},
  keywords = {Algorithms, Animals, Artificial Intelligence, Computer-Assisted, Diagnosis,
	Electrodes, Electroencephalography, Feedback, Implanted, Male, Motor
	Cortex, Movement, Non-P.H.S., Non-U.S. Gov't, Rats, Research Support,
	Sprague-Dawley, Therapy, U.S. Gov't, User-Computer Interface, 15812888}
}

@article{Liu2005Multiclass,
  author = {Jane Jijun Liu and Gene Cutler and Wuxiong Li and Zheng Pan and Sihua
	Peng and Tim Hoey and Liangbiao Chen and Xuefeng Bruce Ling},
  title = {Multiclass cancer classification and biomarker discovery using {GA}-based
	algorithms.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2691-7},
  number = {11},
  month = {Jun},
  abstract = {M{OTIVATION}: {T}he development of microarray-based high-throughput
	gene profiling has led to the hope that this technology could provide
	an efficient and accurate means of diagnosing and classifying tumors,
	as well as predicting prognoses and effective treatments. {H}owever,
	the large amount of data generated by microarrays requires effective
	reduction of discriminant gene features into reliable sets of tumor
	biomarkers for such multiclass tumor discrimination. {T}he availability
	of reliable sets of biomarkers, especially serum biomarkers, should
	have a major impact on our understanding and treatment of cancer.
	{RESULTS}: {W}e have combined genetic algorithm ({GA}) and all paired
	({AP}) support vector machine ({SVM}) methods for multiclass cancer
	categorization. {P}redictive features can be automatically determined
	through iterative {GA}/{SVM}, leading to very compact sets of non-redundant
	cancer-relevant genes with the best classification performance reported
	to date. {I}nterestingly, these different classifier sets harbor
	only modest overlapping gene features but have similar levels of
	accuracy in leave-one-out cross-validations ({LOOCV}). {F}urther
	characterization of these optimal tumor discriminant features, including
	the use of nearest shrunken centroids ({NSC}), analysis of annotations
	and literature text mining, reveals previously unappreciated tumor
	subclasses and a series of genes that could be used as cancer biomarkers.
	{W}ith this approach, we believe that microarray-based multiclass
	molecular analysis can be an effective tool for cancer biomarker
	discovery and subsequent molecular cancer diagnosis.},
  doi = {10.1093/bioinformatics/bti419},
  pdf = {../local/Liu2005Multiclass.pdf},
  file = {Liu2005Multiclass.pdf:local/Liu2005Multiclass.pdf:PDF},
  keywords = {biosvm},
  pii = {bti419},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti419}
}

@article{Liu2007Network-based,
  author = {Liu, M. and Liberzon, A. and Kong, A. W. and Lai, W. R. and Park,
	P. J. and Kohane, I. S. and Kasif, S.},
  title = {Network-based analysis of affected biological processes in type 2
	diabetes models.},
  journal = {PLoS Genet.},
  year = {2007},
  volume = {3},
  pages = {e96},
  number = {6},
  month = {Jun},
  abstract = {Type 2 diabetes mellitus is a complex disorder associated with multiple
	genetic, epigenetic, developmental, and environmental factors. Animal
	models of type 2 diabetes differ based on diet, drug treatment, and
	gene knockouts, and yet all display the clinical hallmarks of hyperglycemia
	and insulin resistance in peripheral tissue. The recent advances
	in gene-expression microarray technologies present an unprecedented
	opportunity to study type 2 diabetes mellitus at a genome-wide scale
	and across different models. To date, a key challenge has been to
	identify the biological processes or signaling pathways that play
	significant roles in the disorder. Here, using a network-based analysis
	methodology, we identified two sets of genes, associated with insulin
	signaling and a network of nuclear receptors, which are recurrent
	in a statistically significant number of diabetes and insulin resistance
	models and transcriptionally altered across diverse tissue types.
	We additionally identified a network of protein-protein interactions
	between members from the two gene sets that may facilitate signaling
	between them. Taken together, the results illustrate the benefits
	of integrating high-throughput microarray studies, together with
	protein-protein interaction networks, in elucidating the underlying
	biological processes associated with a complex disorder.},
  doi = {10.1371/journal.pgen.0030096},
  pdf = {../local/Liu2007Network-based.pdf},
  file = {Liu2007Network-based.pdf:Liu2007Network-based.pdf:PDF},
  institution = {Department of Biomedical Engineering, Boston University, Boston,
	Massachusetts, United States of America. manwayl@bu.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {06-PLGE-RA-0555},
  pmid = {17571924},
  timestamp = {2011.10.08},
  url = {http://dx.doi.org/10.1371/journal.pgen.0030096}
}

@article{Liu2004Comments,
  author = {Xiaomei Liu and Lawrence O Hall and Kevin W Bowyer},
  title = {Comments on "a parallel mixture of {SVM}s for very large scale problems".},
  journal = {Neural {C}omput},
  year = {2004},
  volume = {16},
  pages = {1345-51},
  number = {7},
  month = {Jul},
  abstract = {Collobert, {B}engio, and {B}engio (2002) recently introduced a novel
	approach to using a neural network to provide a class prediction
	from an ensemble of support vector machines ({SVM}s). {T}his approach
	has the advantage that the required computation scales well to very
	large data sets. {E}xperiments on the {F}orest {C}over data set show
	that this parallel mixture is more accurate than a single {SVM},
	with 90.72\% accuracy reported on an independent test set. {A}lthough
	this accuracy is impressive, their article does not consider alternative
	types of classifiers. {W}e show that a simple ensemble of decision
	trees results in a higher accuracy, 94.75\%, and is computationally
	efficient. {T}his result is somewhat surprising and illustrates the
	general value of experimental comparisons using different types of
	classifiers.},
  doi = {10.1162/089976604323057416},
  url = {http://dx.doi.org/10.1162/089976604323057416}
}

@article{Liu2004Active,
  author = {Liu, Y.},
  title = {Active learning with support vector machine applied to gene expression
	data for cancer classification},
  journal = {J. {C}hem. {I}nf. {C}omput. {S}ci.},
  year = {2004},
  volume = {44},
  pages = {1936-1941},
  number = {6},
  abstract = {There is growing interest in the application of machine learning techniques
	in bioinformatics. {T}he supervised machine learning approach has
	been widely applied to bioinformatics and gained a lot of success
	in this research area. {W}ith this learning approach researchers
	first develop a large training set, which is a time-consuming and
	costly process. {M}oreover, the proportion of the positive examples
	and negative examples in the training set may not represent the real-world
	data distribution, which causes concept drift. {A}ctive learning
	avoids these problems. {U}nlike most conventional learning methods
	where the training set used to derive the model remains static, the
	classifier can actively choose the training data and the size of
	training set increases. {W}e introduced an algorithm for performing
	active learning with support vector machine and applied the algorithm
	to gene expression profiles of colon cancer, lung cancer, and prostate
	cancer samples. {W}e compared the classification performance of active
	learning with that of passive learning. {T}he results showed that
	employing the active learning method can achieve high accuracy and
	significantly reduce the need for labeled training instances. {F}or
	lung cancer classification, to achieve 96% of the total positives,
	only 31 labeled examples were needed in active learning whereas in
	passive learning 174 labeled examples were required. {T}hat meant
	over 82% reduction was realized by active learning. {I}n active learning
	the areas under the receiver operating characteristic ({ROC}) curves
	were over 0.81, while in passive learning the areas under the {ROC}
	curves were below 0.50.},
  doi = {10.1021/ci049810a},
  pdf = {../local/Liu2004Active.pdf},
  file = {Liu2004Active.pdf:local/Liu2004Active.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1021/ci049810a}
}

@article{Liu2004comparative,
  author = {Y. Liu},
  title = {A comparative study on feature selection methods for drug discovery.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1823-8},
  number = {5},
  abstract = {Feature selection is frequently used as a preprocessing step to machine
	learning. {T}he removal of irrelevant and redundant information often
	improves the performance of learning algorithms. {T}his paper is
	a comparative study of feature selection in drug discovery. {T}he
	focus is on aggressive dimensionality reduction. {F}ive methods were
	evaluated, including information gain, mutual information, a chi2-test,
	odds ratio, and {GSS} coefficient. {T}wo well-known classification
	algorithms, {N}aÃ¯ve {B}ayesian and {S}upport {V}ector {M}achine
	({SVM}), were used to classify the chemical compounds. {T}he results
	showed that {N}aÃ¯ve {B}ayesian benefited significantly from the
	feature selection, while {SVM} performed better when all features
	were used. {I}n this experiment, information gain and chi2-test were
	most effective feature selection methods. {U}sing information gain
	with a {N}aÃ¯ve {B}ayesian classifier, removal of up to 96\% of the
	features yielded an improved classification accuracy measured by
	sensitivity. {W}hen information gain was used to select the features,
	{SVM} was much less sensitive to the reduction of feature space.
	{T}he feature set size was reduced by 99\%, while losing only a few
	percent in terms of sensitivity (from 58.7\% to 52.5\%) and specificity
	(from 98.4\% to 97.2\%). {I}n contrast to information gain and chi2-test,
	mutual information had relatively poor performance due to its bias
	toward favoring rare features and its sensitivity to probability
	estimation errors.},
  doi = {10.1021/ci049875d},
  pdf = {../local/Liu2004comparative.pdf},
  file = {Liu2004comparative.pdf:local/Liu2004comparative.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049875d}
}

@article{Liu2005Gene,
  author = {Zhenqiu Liu and Dechang Chen and Halima Bensmail},
  title = {Gene expression data classification with kernel principal component
	analysis.},
  journal = {J {B}iomed {B}iotechnol},
  year = {2005},
  volume = {2005},
  pages = {155-9},
  number = {2},
  abstract = {One important feature of the gene expression data is that the number
	of genes ${M}$ far exceeds the number of samples ${N}$ . {S}tandard
	statistical methods do not work well when ${N} < {M}$ . {D}evelopment
	of new methodologies or modification of existing methodologies is
	needed for the analysis of the microarray data. {I}n this paper,
	we propose a novel analysis procedure for classifying the gene expression
	data. {T}his procedure involves dimension reduction using kernel
	principal component analysis ({KPCA}) and classification with logistic
	regression (discrimination). {KPCA} is a generalization and nonlinear
	version of principal component analysis. {T}he proposed algorithm
	was applied to five different gene expression datasets involving
	human tumor samples. {C}omparison with other popular classification
	methods such as support vector machines and neural networks shows
	that our algorithm is very promising in classifying gene expression
	data.},
  doi = {10.1155/JBB.2005.155},
  pdf = {../local/Liu2005Gene.pdf},
  file = {Liu2005Gene.pdf:local/Liu2005Gene.pdf:PDF},
  keywords = {biosvm},
  pii = {S1110724304406032_THIS_PII_IS_INCORRECT_},
  url = {http://dx.doi.org/10.1155/JBB.2005.155}
}

@article{Llinas2006Comparative,
  author = {Llin{\'a}s, M. and Bozdech, Z. and Wong, E. D. and Adai, A. T. and
	DeRisi, J. L.},
  title = {{C}omparative whole genome transcriptome analysis of three {P}lasmodium
	falciparum strains.},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {1166--1173},
  number = {4},
  abstract = {Gene expression patterns have been demonstrated to be highly variable
	between similar cell types, for example lab strains and wild strains
	of Saccharomyces cerevisiae cultured under identical growth conditions
	exhibit a wide range of expression differences. We have used a genome-wide
	approach to characterize transcriptional differences between strains
	of Plasmodium falciparum by characterizing the transcriptome of the
	48 h intraerythrocytic developmental cycle (IDC) for two strains,
	3D7 and Dd2 and compared these results to our prior work using the
	HB3 strain. These three strains originate from geographically diverse
	locations and possess distinct drug sensitivity phenotypes. Our goal
	was to identify transcriptional differences related to phenotypic
	properties of these strains including immune evasion and drug sensitivity.
	We find that the highly streamlined transcriptome is remarkably well
	conserved among all three strains, and differences in gene expression
	occur mainly in genes coding for surface antigens involved in parasite-host
	interactions. Our analysis also detects several transcripts that
	are unique to individual strains as well as identifying large chromosomal
	deletions and highly polymorphic regions across strains. The majority
	of these genes are uncharacterized and have no homology to other
	species. These tractable transcriptional differences provide important
	phenotypes for these otherwise highly related strains of Plasmodium.},
  doi = {10.1093/nar/gkj517},
  keywords = {plasmodium},
  pii = {34/4/1166},
  pmid = {16493140},
  timestamp = {2007.10.04},
  url = {http://dx.doi.org/10.1093/nar/gkj517}
}

@article{Lo2005Effect,
  author = {Siaw Ling Lo and Cong Zhong Cai and Yu Zong Chen and Maxey C M Chung},
  title = {Effect of training datasets on support vector machine prediction
	of protein-protein interactions.},
  journal = {Proteomics},
  year = {2005},
  volume = {5},
  pages = {876-84},
  number = {4},
  month = {Mar},
  abstract = {Knowledge of protein-protein interaction is useful for elucidating
	protein function via the concept of 'guilt-by-association'. {A} statistical
	learning method, {S}upport {V}ector {M}achine ({SVM}), has recently
	been explored for the prediction of protein-protein interactions
	using artificial shuffled sequences as hypothetical noninteracting
	proteins and it has shown promising results ({B}ock, {J}. {R}., {G}ough,
	{D}. {A}., {B}ioinformatics 2001, 17, 455-460). {I}t remains unclear
	however, how the prediction accuracy is affected if real protein
	sequences are used to represent noninteracting proteins. {I}n this
	work, this effect is assessed by comparison of the results derived
	from the use of real protein sequences with that derived from the
	use of shuffled sequences. {T}he real protein sequences of hypothetical
	noninteracting proteins are generated from an exclusion analysis
	in combination with subcellular localization information of interacting
	proteins found in the {D}atabase of {I}nteracting {P}roteins. {P}rediction
	accuracy using real protein sequences is 76.9\% compared to 94.1\%
	using artificial shuffled sequences. {T}he discrepancy likely arises
	from the expected higher level of difficulty for separating two sets
	of real protein sequences than that for separating a set of real
	protein sequences from a set of artificial sequences. {T}he use of
	real protein sequences for training a {SVM} classification system
	is expected to give better prediction results in practical cases.
	{T}his is tested by using both {SVM} systems for predicting putative
	protein partners of a set of thioredoxin related proteins. {T}he
	prediction results are consistent with observations, suggesting that
	real sequence is more practically useful in development of {SVM}
	classification system for facilitating protein-protein interaction
	prediction.},
  doi = {10.1002/pmic.200401118},
  pdf = {../local/Lo2005Effect.pdf},
  file = {Lo2005Effect.pdf:local/Lo2005Effect.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1002/pmic.200401118}
}

@article{Lobo1998Applications,
  author = {Miguel Sousa Lobo and Lobo I and Lieyen Vandenberghe and Herv Lebret
	and Stephen Boyd},
  title = {Applications of Second-order Cone Programming},
  journal = {Linear Algebra and its Applications},
  year = {1998},
  volume = {284},
  pages = {193--228},
  month = {November}
}

@article{Lockhart2000Genomics,
  author = {Lockhart, D.J. and Winzeler, E.A. and others},
  title = {Genomics, gene expression and DNA arrays},
  journal = {NATURE-LONDON-},
  year = {2000},
  pages = {827--836},
  publisher = {MACMILLAN MAGAZINES LTD}
}

@article{Lodhi2002Text,
  author = {Lodhi, H. and Saunders, C. and Shawe-Taylor, J. and Cristianini,
	N. and Watkins, C.je n'ai pas vraiment d'Ã©lÃ©ments de rÃ©ponse.},
  title = {Text classification using string kernels},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2002},
  volume = {2},
  pages = {419--444},
  pdf = {../local/lodh02.pdf},
  file = {lodh02.pdf:local/lodh02.pdf:PDF},
  keywords = {biosvm},
  subject = {kernel},
  url = {http://www.ai.mit.edu/projects/jmlr/papers/volume2/lodhi02a/abstract.html}
}

@inproceedings{Lodhi2000Text,
  author = {Lodhi, H. and Shawe-Taylor, J. and Cristianini, N. and Watkins, C.
	J. C. H.},
  title = {Text {C}lassification using {S}tring {K}ernels},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2000},
  pages = {563-569},
  pdf = {../local/lodh00.pdf},
  file = {lodh00.pdf:local/lodh00.pdf:PDF},
  keywords = {biosvm},
  subject = {kernel},
  url = {http://www.neurocolt.com/tech_reps/2000/00079.ps.gz}
}

@article{Lodish2000Molecular,
  author = {Lodish, H. and Berk, A. and Zipursky, S.L. and Matsudaira, P. and
	Baltimore, D. and Darnell, J.},
  title = {Molecular cell biology},
  journal = {New York},
  year = {2000},
  publisher = {Wiley Online Library}
}

@techreport{Logan2001Study,
  author = {Logan, B. and Moreno, P. and Suzek, B. and Weng, Z. and Kasif, S.},
  title = {A {S}tudy of {R}emote {H}omology {D}etection},
  institution = {Compaq Cambridge Research laboratory},
  year = {2001},
  number = {CRL 2001/05},
  month = {June},
  abstract = {Functional annotation of newly sequenced genomes is an important challenge
	for computational biology systems. {W}hile much progress has been
	made towards scalingup experimental methods for functional assignment
	to putative genes, most current genomic annotation systems rely on
	computational solutions for homology modeling via sequence or structural
	similarity. {W}e present a new method for remote homology detection
	that relies on combining probabilistic modeling and supervised learning
	in high-dimensional features spaces. {O}ur system uses a transformation
	that converts protein domains to fixed-dimension representative feature
	vectors, where each feature records the sensitivity of each protein
	domain to a previously learned set of ?protein motifs? or ?blocks?.
	{S}ubsequently, the system utilizes {S}upport {V}ector {M}achine
	({SVM}) classifiers to learn the boundaries between structural protein
	classes. {O}ur experiments suggest that this technique performs well
	relative to several other remote homology methods for the majority
	of protein domains in {SCOP} 1.37 {PDB}90.},
  pdf = {../local/Logan2001Study.pdf},
  file = {Logan2001Study.pdf:local/Logan2001Study.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Loi2007Definition,
  author = {Loi, Sherene and Haibe-Kains, Benjamin and Desmedt, Christine and
	Lallemand, Françoise and Tutt, Andrew M. and Gillet, Cheryl and Ellis,
	Paul and Harris, Adrian and Bergh, Jonas and Foekens, John A. and
	Klijn, Jan G M. and Larsimont, Denis and Buyse, Marc and Bontempi,
	Gianluca and Delorenzi, Mauro and Piccart, Martine J. and Sotiriou,
	Christos},
  title = {Definition of clinically distinct molecular subtypes in estrogen
	receptor-positive breast carcinomas through genomic grade.},
  journal = {J Clin Oncol},
  year = {2007},
  volume = {25},
  pages = {1239--1246},
  number = {10},
  month = {Apr},
  abstract = {A number of microarray studies have reported distinct molecular profiles
	of breast cancers (BC), such as basal-like, ErbB2-like, and two to
	three luminal-like subtypes. These were associated with different
	clinical outcomes. However, although the basal and the ErbB2 subtypes
	are repeatedly recognized, identification of estrogen receptor (ER)
	-positive subtypes has been inconsistent. Therefore, refinement of
	their molecular definition is needed.We have previously reported
	a gene expression grade index (GGI), which defines histologic grade
	based on gene expression profiles. Using this algorithm, we assigned
	ER-positive BC to either high-or low-genomic grade subgroups and
	compared these with previously reported ER-positive molecular classifications.
	As further validation, we classified 666 ER-positive samples into
	subtypes and assessed their clinical outcome.Two ER-positive molecular
	subgroups (high and low genomic grade) could be defined using the
	GGI. Despite tracking a single biologic pathway, these were highly
	comparable to the previously described luminal A and B classification
	and significantly correlated to the risk groups produced using the
	21-gene recurrence score. The two subtypes were associated with statistically
	distinct clinical outcome in both systemically untreated and tamoxifen-treated
	populations.The use of genomic grade can identify two clinically
	distinct ER-positive molecular subtypes in a simple and highly reproducible
	manner across multiple data sets. This study emphasizes the important
	role of proliferation-related genes in predicting prognosis in ER-positive
	BC.},
  doi = {10.1200/JCO.2006.07.1522},
  institution = {Jules Bordet Institute; Machine Learning Group, Université Libre
	de Bruxelles, Brussels, Belgium.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {25/10/1239},
  pmid = {17401012},
  timestamp = {2012.03.05},
  url = {http://dx.doi.org/10.1200/JCO.2006.07.1522}
}

@unpublished{Loosli2006SimpleSVM,
  author = {Loosli, G.},
  title = {SimpleSVM Toolbox},
  note = {Available at http://asi.insa-rouen.fr/~gloosli/simpleSVM.html},
  year = {2006},
  timestamp = {2008.02.10}
}

@article{Lopez08Statistical,
  author = {Lopez,, A.},
  title = {Statistical machine translation},
  journal = {ACM Comput. Surv.},
  year = {2008},
  volume = {40},
  pages = {1--49},
  number = {3},
  address = {New York, NY, USA},
  doi = {http://doi.acm.org/10.1145/1380584.1380586},
  issn = {0360-0300},
  publisher = {ACM}
}

@article{Lounici2008Sup-norm,
  author = {Lounici, K.},
  title = {Sup-norm convergence rate and sign concentration property of Lasso
	and Dantzig estimators},
  journal = {Electron. J. Statist.},
  year = {2008},
  volume = {2},
  pages = {90--102},
  doi = {10.1214/08-EJS177},
  pdf = {../local/Lounici2008Sup-norm.pdf},
  file = {Lounici2008Sup-norm.pdf:Lounici2008Sup-norm.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.02},
  url = {http://dx.doi.org/10.1214/08-EJS177}
}

@inproceedings{Lounici2009Taking,
  author = {Karim Lounici and Massimiliano Pontil and Alexandre B. Tsybakov and
	Sara van de Geer},
  title = {Taking Advantage of Sparsity in Multi-Task Learning},
  booktitle = {Proceedings of COLT},
  year = {2009}
}

@article{Lowery2008MicroRNAs,
  author = {Lowery, A. J. and Miller, N. and McNeill, R. E. and Kerin, M. J.},
  title = {{MicroRNAs} as prognostic indicators and therapeutic targets: potential
	effect on breast cancer management.},
  journal = {Clin. Cancer Res.},
  year = {2008},
  volume = {14},
  pages = {360--365},
  number = {2},
  month = {Jan},
  abstract = {The discovery of microRNAs (miRNA) as novel modulators of gene expression
	has resulted in a rapidly expanding repertoire of molecules in this
	family, as reflected in the concomitant expansion of scientific literature.
	MiRNAs are a category of naturally occurring RNA molecules that play
	important regulatory roles in plants and animals by targeting mRNAs
	for cleavage or translational repression. Characteristically, miRNAs
	are noncoding, single-stranded short (18-22 nucleotides) RNAs, features
	which possibly explain why they had not been intensively investigated
	until recently. Accumulating experimental evidence indicates that
	miRNAs play a pivotal role in many cellular functions via the regulation
	of gene expression. Furthermore, their dysregulation and/or mutation
	has been shown in carcinogenesis. We provide a brief review of miRNA
	biogenesis and discuss the technical challenges of modifying experimental
	techniques to facilitate the identification and characterization
	of these small RNAs. MiRNA function and their involvement in malignancy,
	particularly their putative role as oncogenes or tumor suppressors
	is also discussed, with a specific emphasis on breast cancer. Finally,
	we comment on the potential role of miRNAs in breast cancer management,
	particularly in improving current prognostic tools and achieving
	the goal of individualized cancer treatment.},
  doi = {10.1158/1078-0432.CCR-07-0992},
  pdf = {../local/Lowery2008MicroRNAs.pdf},
  file = {Lowery2008MicroRNAs.pdf:Lowery2008MicroRNAs.pdf:PDF},
  institution = {Department of Surgery, National University of Ireland, Galway, Ireland.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {14/2/360},
  pmid = {18223209},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1158/1078-0432.CCR-07-0992}
}

@article{Lu2003Preoperative,
  author = {Lu, C. and Van Gestel, T. and Suykens, J.A. and Van Huffel, S. and
	Vergote, I. and Timmerman, D.},
  title = {Preoperative prediction of malignancy of ovarian tumors using least
	squares support vector machines.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2003},
  volume = {28},
  pages = {281-306},
  number = {3},
  abstract = {In this work, we develop and evaluate several least squares support
	vector machine ({LS}-{SVM}) classifiers within the {B}ayesian evidence
	framework, in order to preoperatively predict malignancy of ovarian
	tumors. {T}he analysis includes exploratory data analysis, optimal
	input variable selection, parameter estimation, and performance evaluation
	via receiver operating characteristic ({ROC}) curve analysis. {LS}-{SVM}
	models with linear and radial basis function ({RBF}) kernels, and
	logistic regression models have been built on 265 training data,
	and tested on 160 newly collected patient data. {T}he {LS}-{SVM}
	model with nonlinear {RBF} kernel achieves the best performance,
	on the test set with the area under the {ROC} curve ({AUC}), sensitivity
	and specificity equal to 0.92, 81.5% and 84.0%, respectively. {T}he
	best averaged performance over 30 runs of randomized cross-validation
	is also obtained by an {LS}-{SVM} {RBF} model, with {AUC}, sensitivity
	and specificity equal to 0.94, 90.0% and 80.6%, respectively. {T}hese
	results show that the {LS}-{SVM} models have the potential to obtain
	a reliable preoperative distinction between benign and malignant
	ovarian tumors, and to assist the clinicians for making a correct
	diagnosis.},
  doi = {10.1016/S0933-3657(03)00051-4},
  pdf = {../local/Lu2003Preoperative.pdf},
  file = {Lu2003Preoperative.pdf:local/Lu2003Preoperative.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0933-3657(03)00051-4}
}

@article{Lu2005MicroRNA,
  author = {Lu, J. and Getz, G. and Miska, E. A. and Alvarez-Saavedra, E. and
	Lamb, J. and Peck, D. and Sweet-Cordero, A. and Ebert, D. L. and
	Mak, R. H. and Ferrando, A. A. and Downing, J. R. and Jacks, T. and
	Horvitz, H. R. and Golub, T. R.},
  title = {MicroRNA expression profiles classify human cancers.},
  journal = {Nature},
  year = {2005},
  volume = {435},
  pages = {834--838},
  number = {7043},
  month = {Jun},
  abstract = {Recent work has revealed the existence of a class of small non-coding
	RNA species, known as microRNAs (miRNAs), which have critical functions
	across various biological processes. Here we use a new, bead-based
	flow cytometric miRNA expression profiling method to present a systematic
	expression analysis of 217 mammalian miRNAs from 334 samples, including
	multiple human cancers. The miRNA profiles are surprisingly informative,
	reflecting the developmental lineage and differentiation state of
	the tumours. We observe a general downregulation of miRNAs in tumours
	compared with normal tissues. Furthermore, we were able to successfully
	classify poorly differentiated tumours using miRNA expression profiles,
	whereas messenger RNA profiles were highly inaccurate when applied
	to the same samples. These findings highlight the potential of miRNA
	profiling in cancer diagnosis.},
  doi = {10.1038/nature03702},
  pdf = {../local/Lu2005MicroRNA.pdf},
  file = {Lu2005MicroRNA.pdf:Lu2005MicroRNA.pdf:PDF},
  institution = {Broad Institute of MIT and Harvard, Cambridge, Massachusetts 02141,
	USA.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature03702},
  pmid = {15944708},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1038/nature03702}
}

@article{Lu2005Potential,
  author = {Wei-Zhen Lu and Wen-Jian Wang},
  title = {Potential assessment of the "support vector machine" method in forecasting
	ambient air pollutant trends.},
  journal = {Chemosphere},
  year = {2005},
  volume = {59},
  pages = {693-701},
  number = {5},
  month = {Apr},
  abstract = {Monitoring and forecasting of air quality parameters are popular and
	important topics of atmospheric and environmental research today
	due to the health impact caused by exposing to air pollutants existing
	in urban air. {T}he accurate models for air pollutant prediction
	are needed because such models would allow forecasting and diagnosing
	potential compliance or non-compliance in both short- and long-term
	aspects. {A}rtificial neural networks ({ANN}) are regarded as reliable
	and cost-effective method to achieve such tasks and have produced
	some promising results to date. {A}lthough {ANN} has addressed more
	attentions to environmental researchers, its inherent drawbacks,
	e.g., local minima, over-fitting training, poor generalization performance,
	determination of the appropriate network architecture, etc., impede
	the practical application of {ANN}. {S}upport vector machine ({SVM}),
	a novel type of learning machine based on statistical learning theory,
	can be used for regression and time series prediction and have been
	reported to perform well by some promising results. {T}he work presented
	in this paper aims to examine the feasibility of applying {SVM} to
	predict air pollutant levels in advancing time series based on the
	monitored air pollutant database in {H}ong {K}ong downtown area.
	{A}t the same time, the functional characteristics of {SVM} are investigated
	in the study. {T}he experimental comparisons between the {SVM} model
	and the classical radial basis function ({RBF}) network demonstrate
	that the {SVM} is superior to the conventional {RBF} network in predicting
	air quality parameters with different time series and of better generalization
	performance than the {RBF} model.},
  doi = {10.1016/j.chemosphere.2004.10.032},
  pdf = {../local/Lu2005Potential.pdf},
  file = {Lu2005Potential.pdf:local/Lu2005Potential.pdf:PDF},
  pii = {S0045-6535(04)00988-9},
  url = {http://dx.doi.org/10.1016/j.chemosphere.2004.10.032}
}

@article{Lu2003Expression,
  author = {Lu, Y.J. and Williamson, D. and Wang, R. and Summersgill, B. and
	Rodriguez, S. and Rogers, S. and Pritchard-Jones, K. and Campbell,
	C. and Shipley, J.},
  title = {Expression profiling targeting chromosomes for tumor classification
	and prediction of clinical behavior.},
  journal = {Genes {C}hromosomes {C}ancer},
  year = {2003},
  volume = {38},
  pages = {207-214},
  number = {3},
  abstract = {Tumors are associated with altered or deregulated gene products that
	affect critical cellular functions. {H}ere we assess the use of a
	global expression profiling technique that identifies chromosome
	regions corresponding to differential gene expression, termed comparative
	expressed sequence hybridization ({CESH}). {CESH} analysis was performed
	on a total of 104 tumors with a diagnosis of rhabdomyosarcoma, leiomyosarcoma,
	prostate cancer, and favorable-histology {W}ilms tumors. {T}hrough
	the use of the chromosome regions identified as variables, support
	vector machine analysis was applied to assess classification potential,
	and feature selection (recursive feature elimination) was used to
	identify the best discriminatory regions. {W}e demonstrate that the
	{CESH} profiles have characteristic patterns in tumor groups and
	were also able to distinguish subgroups of rhabdomyosarcoma. {T}he
	overall {CESH} profiles in favorable-histology {W}ilms tumors were
	found to correlate with subsequent clinical behavior. {C}lassification
	by use of {CESH} profiles was shown to be similar in performance
	to previous microarray expression studies and highlighted regions
	for further investigation. {W}e conclude that analysis of chromosomal
	expression profiles can group, subgroup, and even predict clinical
	behavior of tumors to a level of performance similar to that of microarray
	analysis. {CESH} is independent of selecting sequences for interrogation
	and is a simple, rapid, and widely accessible approach to identify
	clinically useful differential expression.},
  doi = {10.1002/gcc.10276},
  pdf = {../local/Lu2003Expression.pdf},
  file = {Lu2003Expression.pdf:local/Lu2003Expression.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Luan2005Classification,
  author = {Feng Luan and Ruisheng Zhang and Chunyan Zhao and Xiaojun Yao and
	Mancang Liu and Zhide Hu and Botao Fan},
  title = {Classification of the carcinogenicity of {N}-nitroso compounds based
	on support vector machines and linear discriminant analysis.},
  journal = {Chem {R}es {T}oxicol},
  year = {2005},
  volume = {18},
  pages = {198-203},
  number = {2},
  month = {Feb},
  abstract = {The support vector machine ({SVM}), as a novel type of learning machine,
	was used to develop a classification model of carcinogenic properties
	of 148 {N}-nitroso compounds. {T}he seven descriptors calculated
	solely from the molecular structures of compounds selected by forward
	stepwise linear discriminant analysis ({LDA}) were used as inputs
	of the {SVM} model. {T}he obtained results confirmed the discriminative
	capacity of the calculated descriptors. {T}he result of {SVM} (total
	accuracy of 95.2\%) is better than that of {LDA} (total accuracy
	of 89.8\%).},
  doi = {10.1021/tx049782q},
  pdf = {../local/Luan2005Classification.pdf},
  file = {Luan2005Classification.pdf:local/Luan2005Classification.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/tx049782q}
}

@unpublished{Lugosi2003Concentration-of-measure,
  author = {G. Lugosi},
  title = {Concentration-of-measure inequalities},
  note = {Lecture notes},
  month = {January},
  year = {2003}
}

@unpublished{Lugosi1998On,
  author = {Lugosi, G.},
  title = {On concentration-of-measure inequalities},
  note = {Seminar notes},
  year = {1998},
  pdf = {../local/lugo98.pdf},
  file = {lugo98.pdf:local/lugo98.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/concmeas.ps}
}

@article{Lugosi1999Adaptive,
  author = {Lugosi, G. and Nobel, A.},
  title = {Adaptive {M}odel {S}election {U}sing {E}mpirical {C}omplexities},
  journal = {Ann. {S}tat.},
  year = {1999},
  volume = {27},
  pages = {1830--1864},
  number = {6},
  month = dec,
  pdf = {../local/lugo99.pdf},
  file = {lugo99.pdf:local/lugo99.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/amsec.ps}
}

@article{Lugosi2004On,
  author = {Lugosi, G. and Vayatis, N.},
  title = {On the {B}ayes-risk consistency of regularized boosting methods},
  journal = {Ann. {S}tat.},
  year = {2004},
  volume = {32},
  pages = {30-55},
  doi = {10.1214/aos/1079120129},
  pdf = {../local/Lugosi2004On.pdf},
  file = {Lugosi2004On.pdf:local/Lugosi2004On.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1214/aos/1079120129}
}

@article{Lugosi1996Concept,
  author = {Lugosi, G. and Zeger, K. },
  title = {Concept learning using complexity regularization},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {48-54},
  number = {1},
  month = {Jan},
  abstract = {In pattern recognition or, as it has also been called, concept learning,
	the value of a { 0,1}-valued random variable {Y} is to be predicted
	based upon observing an {R}d-valued random variable {X}. {W}e apply
	the method of complexity regularization to learn concepts from large
	concept classes. {T}he method is shown to automatically find a good
	balance between the approximation error and the estimation error.
	{I}n particular, the error probability of the obtained classifier
	is shown to decrease as {O}(?(logn/n)) to the achievable optimum,
	for large nonparametric classes of distributions, as the sample size
	n grows. {W}e also show that if the {B}ayes error probability is
	zero and the {B}ayes rule is in a known family of decision rules,
	the error probability is {O}(logn/n) for many large families, possibly
	with infinite {VC} dimension },
  pdf = {../local/Lugosi1996Concept.pdf},
  file = {Lugosi1996Concept.pdf:local/Lugosi1996Concept.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Lugosi1995Nonparametric,
  author = {Lugosi, G. and Zeger, K. },
  title = {Nonparametric estimation via empirical risk minimization},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1995},
  volume = {41},
  pages = {677-687},
  number = {3},
  month = {May},
  abstract = {A general notion of universal consistency of nonparametric estimators
	is introduced that applies to regression estimation, conditional
	median estimation, curve fitting, pattern recognition, and learning
	concepts. {G}eneral methods for proving consistency of estimators
	based on minimizing the empirical error are shown. {I}n particular,
	distribution-free almost sure consistency of neural network estimates
	and generalized linear estimators is established },
  pdf = {../local/Lugosi1995Nonparametric.pdf},
  file = {Lugosi1995Nonparametric.pdf:local/Lugosi1995Nonparametric.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Lukas2004Brain,
  author = {L. Lukas and A. Devos and J. A K Suykens and L. Vanhamme and F. A.
	Howe and C. MajÃ³s and A. Moreno-Torres and M. Van der Graaf and
	A. R. Tate and C. ArÃºs and S. Van Huffel},
  title = {Brain tumor classification based on long echo proton {MRS} signals.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2004},
  volume = {31},
  pages = {73-89},
  number = {1},
  month = {May},
  abstract = {There has been a growing research interest in brain tumor classification
	based on proton magnetic resonance spectroscopy (1{H} {MRS}) signals.
	{F}our research centers within the {EU} funded {INTERPRET} project
	have acquired a significant number of long echo 1{H} {MRS} signals
	for brain tumor classification. {I}n this paper, we present an objective
	comparison of several classification techniques applied to the discrimination
	of four types of brain tumors: meningiomas, glioblastomas, astrocytomas
	grade {II} and metastases. {L}inear and non-linear classifiers are
	compared: linear discriminant analysis ({LDA}), support vector machines
	({SVM}) and least squares {SVM} ({LS}-{SVM}) with a linear kernel
	as linear techniques and {LS}-{SVM} with a radial basis function
	({RBF}) kernel as a non-linear technique. {K}ernel-based methods
	can perform well in processing high dimensional data. {T}his motivates
	the inclusion of {SVM} and {LS}-{SVM} in this study. {T}he analysis
	includes optimal input variable selection, (hyper-) parameter estimation,
	followed by performance evaluation. {T}he classification performance
	is evaluated over 200 stratified random samplings of the dataset
	into training and test sets. {R}eceiver operating characteristic
	({ROC}) curve analysis measures the performance of binary classification,
	while for multiclass classification, we consider the accuracy as
	performance measure. {B}ased on the complete magnitude spectra, automated
	binary classifiers are able to reach an area under the {ROC} curve
	({AUC}) of more than 0.9 except for the hard case glioblastomas versus
	metastases. {A}lthough, based on the available long echo 1{H} {MRS}
	data, we did not find any statistically significant difference between
	the performances of {LDA} and the kernel-based methods, the latter
	have the strength that no dimensionality reduction is required to
	obtain such a high performance.},
  doi = {10.1016/j.artmed.2004.01.001},
  pdf = {../local/Lukas2004Brain.pdf},
  file = {Lukas2004Brain.pdf:local/Lukas2004Brain.pdf:PDF},
  pii = {S0933365704000120},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.01.001}
}

@article{Lunn1929Isomerism,
  author = {A. C. Lunn and J. K. Senior},
  title = {Isomerism and configuration},
  journal = {J. Phys. Chem.},
  year = {1929},
  volume = {33},
  pages = {1027--1079}
}

@inproceedings{Luo2000Alignement,
  author = {Luo, B. and Hancock, E. R.},
  title = {Alignment and Correspondence Using Singular Value Decomposition},
  booktitle = {Proceedings of the Joint IAPR International Workshops on Advances
	in Pattern Recognition},
  year = {2000},
  pages = {226--235},
  address = {London, UK},
  publisher = {Springer-Verlag},
  isbn = {3-540-67946-4}
}

@article{Luo2004gene-silencing,
  author = {Luo, K. Q. and Chang, D. C.},
  title = {The gene-silencing efficiency of si{RNA} is strongly dependent on
	the local structure of m{RNA} at the targeted region.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {318},
  pages = {303-10},
  number = {1},
  month = {May},
  abstract = {The gene-silencing effect of short interfering {RNA} (si{RNA}) is
	known to vary strongly with the targeted position of the m{RNA}.
	{A} number of hypotheses have been suggested to explain this phenomenon.
	{W}e would like to test if this positional effect is mainly due to
	the secondary structure of the m{RNA} at the target site. {W}e proposed
	that this structural factor can be characterized by a single parameter
	called "the hydrogen bond ({H}-b) index," which represents the average
	number of hydrogen bonds formed between nucleotides in the target
	region and the rest of the m{RNA}. {T}his index can be determined
	using a computational approach. {W}e tested the correlation between
	the {H}-b index and the gene-silencing effects on three genes ({B}cl-2,
	h{TF}, and cyclin {B}1) using a variety of si{RNA}s. {W}e found that
	the gene-silencing effect is inversely dependent on the {H}-b index,
	indicating that the local m{RNA} structure at the targeted site is
	the main cause of the positional effect. {B}ased on this finding,
	we suggest that the {H}-b index can be a useful guideline for future
	si{RNA} design.},
  doi = {10.1016/j.bbrc.2004.04.027},
  keywords = {Animals, Apoptosis, Base Composition, Base Pairing, Base Sequence,
	Binding Sites, Cell Cycle, Cell Proliferation, Comparative Study,
	Cultured, Cyclin B, Cyclin D1, DNA-Binding Proteins, Down-Regulation,
	Extramural, Fluorescence, Gene Silencing, Gene Targeting, Genetic
	Vectors, Green Fluorescent Proteins, Hela Cells, Humans, Hydrogen
	Bonding, Luminescent Proteins, Male, Messenger, Mice, Microscopy,
	Models, Molecular, Molecular Sequence Data, N.I.H., Non-U.S. Gov't,
	Nucleic Acid Conformation, Nude, P.H.S., Prostatic Neoplasms, Proto-Oncogene
	Proteins c-bcl-2, Proto-Oncogene Proteins c-myc, RNA, Regression
	Analysis, Research Support, STAT3 Transcription Factor, Small Interfering,
	Thromboplastin, Trans-Activators, Tumor Cells, U.S. Gov't, 15110788},
  pii = {S0006291X04007284},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.04.027}
}

@article{Luo2004Recognizing,
  author = {Tong Luo and Kurt Kramer and Dmitry B Goldgof and Lawrence O Hall
	and Scott Samson and Andrew Remsen and Thomas Hopkins},
  title = {Recognizing plankton images from the shadow image particle profiling
	evaluation recorder.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1753-62},
  number = {4},
  month = {Aug},
  abstract = {We present a system to recognize underwater plankton images from the
	shadow image particle profiling evaluation recorder ({SIPPER}). {T}he
	challenge of the {SIPPER} image set is that many images do not have
	clear contours. {T}o address that, shape features that do not heavily
	depend on contour information were developed. {A} soft margin support
	vector machine ({SVM}) was used as the classifier. {W}e developed
	a way to assign probability after multiclass {SVM} classification.
	{O}ur approach achieved approximately 90\% accuracy on a collection
	of plankton images. {O}n another larger image set containing manually
	unidentifiable particles, it also provided 75.6\% overall accuracy.
	{T}he proposed approach was statistically significantly more accurate
	on the two data sets than a {C}4.5 decision tree and a cascade correlation
	neural network. {T}he single {SVM} significantly outperformed ensembles
	of decision trees created by bagging and random forests on the smaller
	data set and was slightly better on the other data set. {T}he 15-feature
	subset produced by our feature selection approach provided slightly
	better accuracy than using all 29 features. {O}ur probability model
	gave us a reasonable rejection curve on the larger data set.}
}

@article{Luo1997Mammalian,
  author = {Y. Luo and A. Batalao and H. Zhou and L. Zhu},
  title = {Mammalian two-hybrid system: a complementary approach to the yeast
	two-hybrid system.},
  journal = {Biotechniques},
  year = {1997},
  volume = {22},
  pages = {350--352},
  number = {2},
  month = {Feb},
  abstract = {Here we demonstrate the use of a mammalian two-hybrid system to study
	protein-protein interactions. Like the yeast two-hybrid system, this
	is a genetic, in vivo assay based on the reconstitution of the function
	of a transcriptional activator. In this system, one protein of interest
	is expressed as a fusion to the Gal4 DNA-binding domain and another
	protein is expressed as a fusion to the activation domain of the
	VP16 protein of the herpes simplex virus. The vectors that express
	these fusion proteins are cotransfected with a reporter chloramphenicol
	acetyltransferase (CAT) vector into a mammalian cell line. The reporter
	plasmid contains a cat gene under the control of five consensus Gal4
	binding sites. If the two fusion proteins interact, there will be
	a significant increase in expression of the cat reporter gene. Previously,
	it was reported that mouse p53 antitumor protein and simian virus
	40 large T antigen interact in a yeast two-hybrid system. Using a
	mammalian two-hybrid system, we were able to independently confirm
	this interaction. The mammalian two-hybrid system can be used as
	a complementary approach to verify protein-protein interactions detected
	by a yeast two-hybrid system screening. In addition, the mammalian
	two-hybrid system has two main advantages: (i) Assay results can
	be obtained within 48 h of transfection, and (ii) protein interactions
	in mammalian cells may better mimic actual in vivo interactions.},
  institution = {CLONTECH Laboratories, Palo Alto, CA, USA. yluo@clontech.com},
  keywords = {Antigens, Polyomavirus Transforming; Binding Sites; Chloramphenicol
	O-Acetyltransferase; DNA; DNA-Binding Proteins; Fungal Proteins;
	Genes, Reporter; Genetic Vectors; Hela Cells; Herpes Simplex Virus
	Protein Vmw65; Humans; Promoter Regions, Genetic; Recombinant Fusion
	Proteins; Saccharomyces cerevisiae Proteins; Simian virus 40; Transcription
	Factors; Transfection; Tumor Suppressor Protein p53},
  owner = {phupe},
  pmid = {9043710},
  timestamp = {2010.08.31}
}

@article{VonLuxburg2007A,
  author = {von Luxburg, Ulrike},
  title = {A tutorial on spectral clustering},
  journal = {Statistics and Computing},
  year = {2007},
  volume = {17},
  pages = {395--416},
  number = {4},
  month = {December},
  abstract = {Abstract\ \  In recent years, spectral clustering has become
	one of the most popular modern clustering algorithms. It is simple
	to implement, can be solved efficiently by standard linear algebra
	software, and very often outperforms traditional clustering algorithms
	such as the k-means algorithm. On the first glance spectral clustering
	appears slightly mysterious, and it is not obvious to see why it
	works at all and what it really does. The goal of this tutorial is
	to give some intuition on those questions. We describe different
	graph Laplacians and their basic properties, present the most common
	spectral clustering algorithms, and derive those algorithms from
	scratch by several different approaches. Advantages and disadvantages
	of the different spectral clustering algorithms are discussed.},
  doi = {10.1007/s11222-007-9033-z},
  url = {http://dx.doi.org/10.1007/s11222-007-9033-z}
}

@article{Lytle2007Target,
  author = {J. Robin Lytle and Therese A Yario and Joan A Steitz},
  title = {Target mRNAs are repressed as efficiently by microRNA-binding sites
	in the 5' UTR as in the 3' UTR.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2007},
  volume = {104},
  pages = {9667--9672},
  number = {23},
  month = {Jun},
  abstract = {In animals, microRNAs (miRNAs) bind to the 3' UTRs of their target
	mRNAs and interfere with translation, although the exact mechanism
	of inhibition of protein synthesis remains unclear. Functional miRNA-binding
	sites in the coding regions or 5' UTRs of endogenous mRNAs have not
	been identified. We studied the effect of introducing miRNA target
	sites into the 5' UTR of luciferase reporter mRNAs containing internal
	ribosome entry sites (IRESs), so that potential steric hindrance
	by a microribonucleoprotein complex would not interfere with the
	initiation of translation. In human HeLa cells, which express endogenous
	let-7a miRNA, the translational efficiency of these IRES-containing
	reporters with 5' let-7 complementary sites from the Caenorhabditis
	elegans lin-41 3' UTR was repressed. Similarly, the IRES-containing
	reporters were translationally repressed when human Ago2 was tethered
	to either the 5' or 3' UTR. Interestingly, the method of DNA transfection
	affected our ability to observe miRNA-mediated repression. Our results
	suggest that association with any position on a target mRNA is mechanistically
	sufficient for a microribonucleoprotein to exert repression of translation
	at some step downstream of initiation.},
  doi = {10.1073/pnas.0703820104},
  institution = {Department of Molecular Biophysics and Biochemistry, Howard Hughes
	Medical Institute, Yale University School of Medicine, New Haven,
	CT 06536, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0703820104},
  pmid = {17535905},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1073/pnas.0703820104}
}

@article{Levy-Leduc2009Detection,
  author = {L{\'e}vy-Leduc, C. and Roueff, F.},
  title = {Detection and localization of change-points in high-dimensional network
	traffic data},
  journal = {Ann. Appl. Stat.},
  year = {2009},
  volume = {3},
  pages = {637--662},
  number = {2},
  doi = {10.1214/08-AOAS232},
  pdf = {../local/Levy-Leduc2009Detection.pdf},
  file = {Levy-Leduc2009Detection.pdf:Levy-Leduc2009Detection.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.13},
  url = {http://dx.doi.org/10.1214/08-AOAS232}
}

@article{Lopez-Bigas2004Genome-wide,
  author = {L{\'o}pez-Bigas, N. and Ouzounis, C. A.},
  title = {Genome-wide identification of genes likely to be involved in human
	genetic disease},
  journal = {Nucleic Acids Res.},
  year = {2004},
  volume = {32},
  pages = {3108--3114},
  number = {10},
  abstract = {Sequence analysis of the group of proteins known to be associated
	with hereditary diseases allows the detection of key distinctive
	features shared within this group. The disease proteins are characterized
	by greater length of their amino acid sequence, a broader phylogenetic
	extent, and specific conservation and paralogy profiles compared
	with all human proteins. This unique property pattern provides insights
	into the global nature of hereditary diseases and moreover can be
	used to predict novel disease genes. We have developed a computational
	method that allows the detection of genes likely to be involved in
	hereditary disease in the human genome. The probability score assignments
	for the human genome are accessible at http://maine.ebi. ac.uk:8000/services/dgp.},
  doi = {10.1093/nar/gkh605},
  pdf = {../local/Lopez-Bigas2004Genome-wide.pdf},
  file = {Lopez-Bigas2004Genome-wide.pdf:Lopez-Bigas2004Genome-wide.pdf:PDF},
  institution = {Cambridge Outstation, Cambridge CB10 1SD, UK.},
  owner = {jp},
  pii = {32/10/3108},
  pmid = {15181176},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1093/nar/gkh605}
}

@article{Loenning2007Breast,
  author = {L{\o}nning, P. E.},
  title = {Breast cancer prognostication and prediction: are we making progress?},
  journal = {Ann. Oncol.},
  year = {2007},
  volume = {18 Suppl 8},
  pages = {viii3--viii7},
  month = {Sep},
  abstract = {Currently, much effort is being invested in the identification of
	new, accurate prognostic and predictive factors in breast cancer.
	Prognostic factors assess the patient's risk of relapse based on
	indicators such as intrinsic tumor biology and disease stage at diagnosis,
	and are traditionally used to identify patients who can be spared
	unnecessary adjuvant therapy based only on the risk of relapse. Lymph
	node status and tumor size are accepted as well-defined prognostic
	factors in breast cancer. Predictive factors, in contrast, determine
	the responsiveness of a particular tumor to a specific treatment.
	Despite recent advances in the understanding of breast cancer biology
	and changing practices in disease management, with the exception
	of hormone receptor status, which predicts responsiveness to endocrine
	treatment, no predictive factor for response to systemic therapy
	in breast cancer is widely accepted. While gene expression studies
	have provided important new information with regard to tumor biology
	and prognostication, attempts to identify predictive factors have
	not been successful so far. This article will focus on recent advances
	in prognostication and prediction, with emphasis on findings from
	gene expression profiling studies.},
  doi = {10.1093/annonc/mdm260},
  pdf = {../local/Loenning2007Breast.pdf},
  file = {Loenning2007Breast.pdf:Loenning2007Breast.pdf:PDF},
  institution = {Institute of Medicine, University of Bergen, Department of Oncology,
	Haukeland University Hospital, Bergen, Norway. per.lonning@helse-bergen.no},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {18/suppl_8/viii3},
  pmid = {17890212},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1093/annonc/mdm260}
}

@article{Ma2001Hormone-dependent,
  author = {H. Ma and C. T. Baumann and H. Li and B. D. Strahl and R. Rice and
	M. A. Jelinek and D. W. Aswad and C. D. Allis and G. L. Hager and
	M. R. Stallcup},
  title = {Hormone-dependent, CARM1-directed, arginine-specific methylation
	of histone H3 on a steroid-regulated promoter.},
  journal = {Curr Biol},
  year = {2001},
  volume = {11},
  pages = {1981--1985},
  number = {24},
  month = {Dec},
  abstract = {Activation of gene transcription involves chromatin remodeling by
	coactivator proteins that are recruited by DNA-bound transcription
	factors. Local modification of chromatin structure at specific gene
	promoters by ATP-dependent processes and by posttranslational modifications
	of histone N-terminal tails provides access to RNA polymerase II
	and its accompanying transcription initiation complex. While the
	roles of lysine acetylation, serine phosphorylation, and lysine methylation
	of histones in chromatin remodeling are beginning to emerge, low
	levels of arginine methylation of histones have only recently been
	documented, and its physiological role is unknown. The coactivator
	CARM1 methylates histone H3 at Arg17 and Arg26 in vitro and cooperates
	synergistically with p160-type coactivators (e.g., GRIP1, SRC-1,
	ACTR) and coactivators with histone acetyltransferase activity (e.g.,
	p300, CBP) to enhance gene activation by steroid and nuclear hormone
	receptors (NR) in transient transfection assays. In the current study,
	CARM1 cooperated with GRIP1 to enhance steroid hormone-dependent
	activation of stably integrated mouse mammary tumor virus (MMTV)
	promoters, and this coactivator function required the methyltransferase
	activity of CARM1. Chromatin immunoprecipitation assays and immunofluorescence
	studies indicated that CARM1 and the CARM1-methylated form of histone
	H3 specifically associated with a large tandem array of MMTV promoters
	in a hormone-dependent manner. Thus, arginine-specific histone methylation
	by CARM1 is an important part of the transcriptional activation process.},
  institution = {Department of Pathology, HMR 301, University of Southern California,
	2011 Zonal Avenue, Los Angeles, CA 90089, USA.},
  keywords = {Acetylation; Arginine, metabolism; Fluorescent Antibody Technique;
	Histones, chemistry/metabolism; Hormones, physiology; Lysine, metabolism;
	Mammary Tumor Virus, Mouse, genetics; Methylation; Phosphorylation;
	Precipitin Tests; Promoter Regions, Genetic; Protein-Arginine N-Methyltransferases,
	physiology; Serine, metabolism; Steroids, physiology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0960-9822(01)00600-5},
  pmid = {11747826},
  timestamp = {2010.11.23}
}

@article{Ma2004Structural,
  author = {Ma, J.B. and Ye, K. and Patel, D.J.},
  title = {{S}tructural basis for overhang-specific small interfering {RNA}
	recognition by the {PAZ} domain.},
  journal = {Nature},
  year = {2004},
  volume = {429},
  pages = {318--322},
  number = {6989},
  month = {May},
  abstract = {Short RNAs mediate gene silencing, a process associated with virus
	resistance, developmental control and heterochromatin formation in
	eukaryotes. RNA silencing is initiated through Dicer-mediated processing
	of double-stranded RNA into small interfering RNA (siRNA). The siRNA
	guide strand associates with the Argonaute protein in silencing effector
	complexes, recognizes complementary sequences and targets them for
	silencing. The PAZ domain is an RNA-binding module found in Argonaute
	and some Dicer proteins and its structure has been determined in
	the free state. Here, we report the 2.6 A crystal structure of the
	PAZ domain from human Argonaute eIF2c1 bound to both ends of a 9-mer
	siRNA-like duplex. In a sequence-independent manner, PAZ anchors
	the 2-nucleotide 3' overhang of the siRNA-like duplex within a highly
	conserved binding pocket, and secures the duplex by binding the 7-nucleotide
	phosphodiester backbone of the overhang-containing strand and capping
	the 5'-terminal residue of the complementary strand. On the basis
	of the structure and on binding assays, we propose that PAZ might
	serve as an siRNA-end-binding module for siRNA transfer in the RNA
	silencing pathway, and as an anchoring site for the 3' end of guide
	RNA within silencing effector complexes.},
  doi = {10.1038/nature02519},
  keywords = {sirna},
  pii = {nature02519},
  pmid = {15152257},
  timestamp = {2006.07.08},
  url = {http://dx.doi.org/10.1038/nature02519}
}

@article{Ma2005Structural,
  author = {Ma, J-B. and Yuan, Y.-R. and Meister, G.. and Pei, Y. and Tuschl,
	T. and Patel D.J.},
  title = {Structural basis for 5'-end-specific recognition of guide {RNA} by
	the {A}. fulgidus {PIWI} protein},
  journal = {Nature},
  year = {2005},
  volume = {434},
  pages = {666-670},
  keywords = {sirna},
  owner = {vert},
  timestamp = {2006.04.27}
}

@article{Ma2005PNAS,
  author = {Ma, L. and Wagner, J. and Rice, J. J. and Hu, W. and Levine, A. J.
	and Stolovitzky, G. A.},
  title = {A plausible model for the digital response of p53 to DNA damage},
  journal = {Proc Natl Acad Sci U S A},
  year = {2005},
  volume = {102},
  pages = {14266--71},
  number = {40},
  abstract = {Recent observations show that the single-cell response of p53 to ionizing
	radiation (IR) is "digital" in that it is the number of oscillations
	rather than the amplitude of p53 that shows dependence on the radiation
	dose. We present a model of this phenomenon. In our model, double-strand
	break (DSB) sites induced by IR interact with a limiting pool of
	DNA repair proteins, forming DSB-protein complexes at DNA damage
	foci. The persisting complexes are sensed by ataxia telangiectasia
	mutated (ATM), a protein kinase that activates p53 once it is phosphorylated
	by DNA damage. The ATM-sensing module switches on or off the downstream
	p53 oscillator, consisting of a feedback loop formed by p53 and its
	negative regulator, Mdm2. In agreement with experiments, our simulations
	show that by assuming stochasticity in the initial number of DSBs
	and the DNA repair process, p53 and Mdm2 exhibit a coordinated oscillatory
	dynamics upon IR stimulation in single cells, with a stochastic number
	of oscillations whose mean increases with IR dose. The damped oscillations
	previously observed in cell populations can be explained as the aggregate
	behavior of single cells.},
  keywords = {csbcbook}
}

@article{Ma2008Penalized,
  author = {Ma, S. and Huang, J.},
  title = {Penalized feature selection and classification in bioinformatics},
  journal = {Briefings in bioinformatics},
  year = {2008},
  volume = {9},
  pages = {392--403},
  number = {5},
  publisher = {Oxford Univ Press}
}

@article{Ma2006MSB,
  author = {Wenzhe Ma and Luhua Lai and Qi Ouyang and Chao Tang},
  title = {Robustness and modular design of the Drosophila segment polarity
	network.},
  journal = {Mol Syst Biol},
  year = {2006},
  volume = {2},
  pages = {70},
  abstract = {Biomolecular networks have to perform their functions robustly. A
	robust function may have preferences in the topological structures
	of the underlying network. We carried out an exhaustive computational
	analysis on network topologies in relation to a patterning function
	in Drosophila embryogenesis. We found that whereas the vast majority
	of topologies can either not perform the required function or only
	do so very fragilely, a small fraction of topologies emerges as particularly
	robust for the function. The topology adopted by Drosophila, that
	of the segment polarity network, is a top ranking one among all topologies
	with no direct autoregulation. Furthermore, we found that all robust
	topologies are modular-each being a combination of three kinds of
	modules. These modules can be traced back to three subfunctions of
	the patterning function, and their combinations provide a combinatorial
	variability for the robust topologies. Our results suggest that the
	requirement of functional robustness drastically reduces the choices
	of viable topology to a limited set of modular combinations among
	which nature optimizes its choice under evolutionary and other biological
	constraints.},
  doi = {10.1038/msb4100111},
  institution = {Center for Theoretical Biology, Peking University, Beijing, China.},
  keywords = {Animals; Biological Evolution; Body Patterning; Computer Simulation;
	Drosophila Proteins, physiology; Drosophila melanogaster, anatomy
	/&/ histology/physiology; Feedback, Physiological; Gene Expression
	Regulation, Developmental; Genes, Insect; Models, Biological; Signal
	Transduction; Systems Biology, methods; Transcription Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {msb4100111},
  pmid = {17170765},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/msb4100111}
}

@article{Ma2003GadE,
  author = {Ma, Z. and Gong, S. and Richard, H. and Tucker, D. L. and Conway,
	T. and Foster, J. W.},
  title = {{G}ad{E} ({Y}hi{E}) activates glutamate decarboxylase-dependent acid
	resistance in {E}scherichia coli {K}-12.},
  journal = {Mol. Microbiol.},
  year = {2003},
  volume = {49},
  pages = {1309--1320},
  number = {5},
  month = {Sep},
  abstract = {Commensal and pathogenic strains of Escherichia coli possess three
	inducible acid resistance systems that collaboratively protect cells
	against acid stress to pH 2 or below. The most effective system requires
	glutamate in the acid challenge media and relies on two glutamate
	decarboxylases (GadA and B) combined with a putative glutamate:gamma-aminobutyric
	acid antiporter (GadC). A complex network of regulators mediates
	induction of this system in response to various media, pH and growth
	phase signals. We report that the LuxR-like regulator GadE (formerly
	YhiE) is required for expression of gadA and gadBC regardless of
	media or growth conditions. This protein binds directly to the 20
	bp GAD box sequence found in the control regions of both loci. Two
	previously identified AraC-like regulators, GadX and GadW, are only
	needed for gadA/BC expression under some circumstances. Overexpression
	of GadX or GadW will not overcome a need for GadE. However, overexpression
	of GadE can supplant a requirement for GadX and W. Data provided
	also indicate that GadX and GadE can simultaneously bind the area
	around the GAD box region and probably form a complex. The gadA,
	gadBC and gadE genes are all induced by low pH in exponential phase
	cells grown in minimal glucose media. The acid induction of gadA/BC
	results primarily from the acid induction of gadE. Constitutive expression
	of GadE removes most pH control over the glutamate decarboxylase
	and antiporter genes. The small amount of remaining pH control is
	governed by GadX and W. The finding that gadE mutations also diminish
	the effectiveness of the other two acid resistance systems suggests
	that GadE influences the expression of additional acid resistance
	components. The number of regulatory proteins (five), sigma factors
	(two) and regulatory feedback loops focused on gadA/BC expression
	make this one of the most intensively regulated systems in E. coli.},
  pii = {3633},
  pmid = {12940989},
  timestamp = {2008.02.12}
}

@article{Maby2004Analysis,
  author = {E. Maby and R. Le Bouquin JeannÃ¨s and C. LiÃ©geois-Chauvel and B.
	Gourevitch and G. Faucon},
  title = {Analysis of auditory evoked potential parameters in the presence
	of radiofrequency fields using a support vector machines method.},
  journal = {Med {B}iol {E}ng {C}omput},
  year = {2004},
  volume = {42},
  pages = {562-8},
  number = {4},
  month = {Jul},
  abstract = {The paper presents a study of global system for mobile ({GSM}) phone
	radiofrequency effects on human cerebral activity. {T}he work was
	based on the study of auditory evoked potentials ({AEP}s) recorded
	from healthy humans and epileptic patients. {T}he protocol allowed
	the comparison of {AEP}s recorded with or without exposure to electrical
	fields. {T}en variables measured from {AEP}s were employed in the
	design of a supervised support vector machines classifier. {T}he
	classification performance measured the classifier's ability to discriminate
	features performed with or without radiofrequency exposure. {M}ost
	significant features were chosen by a backward sequential selection
	that ranked the variables according to their pertinence for the discrimination.
	{F}inally, the most discriminating features were analysed statistically
	by a {W}ilcoxon signed rank test. {F}or both populations, the {N}100
	amplitudes were reduced under the influence of {GSM} radiofrequency
	(mean attenuation of -0.36 micro{V} for healthy subjects and -0.60
	micro{V} for epileptic patients). {H}ealthy subjects showed a {N}100
	latency decrease (-5.23 ms in mean), which could be consistent with
	mild, localised heating. {T}he auditory cortical activity in humans
	was modified by {GSM} phone radiofrequencies, but an effect on brain
	functionality has not been proven.}
}

@article{MacBeath2002Protein,
  author = {Gavin MacBeath},
  title = {Protein microarrays and proteomics.},
  journal = {Nat Genet},
  year = {2002},
  volume = {32 Suppl},
  pages = {526--532},
  month = {Dec},
  abstract = {The system-wide study of proteins presents an exciting challenge in
	this information-rich age of whole-genome biology. Although traditional
	investigations have yielded abundant information about individual
	proteins, they have been less successful at providing us with an
	integrated understanding of biological systems. The promise of proteomics
	is that, by studying many components simultaneously, we will learn
	how proteins interact with each other, as well as with non-proteinaceous
	molecules, to control complex processes in cells, tissues and even
	whole organisms. Here, I discuss the role of microarray technology
	in this burgeoning area.},
  doi = {10.1038/ng1037},
  institution = {Department of Chemistry and Chemical Biology, and Bauer Center for
	Genomics Research, Harvard University, 12 Oxford Street, Cambridge,
	Massachusetts 02138, USA. macbeath@chemistry.harvard.edu},
  keywords = {Forecasting; Humans; Immunoassay, methods; Protein Array Analysis,
	methods; Proteomics, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {ng1037},
  pmid = {12454649},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1038/ng1037}
}

@article{Machado2005Detection,
  author = {Roberto F Machado and Daniel Laskowski and Olivia Deffenderfer and
	Timothy Burch and Shuo Zheng and Peter J Mazzone and Tarek Mekhail
	and Constance Jennings and James K Stoller and Jacqueline Pyle and
	Jennifer Duncan and Raed A Dweik and Serpil C Erzurum},
  title = {Detection of lung cancer by sensor array analyses of exhaled breath.},
  journal = {Am {J} {R}espir {C}rit {C}are {M}ed},
  year = {2005},
  volume = {171},
  pages = {1286-91},
  number = {11},
  month = {Jun},
  abstract = {R{ATIONALE}: {E}lectronic noses are successfully used in commercial
	applications, including detection and analysis of volatile organic
	compounds in the food industry. {OBJECTIVES}: {W}e hypothesized that
	the electronic nose could identify and discriminate between lung
	diseases, especially bronchogenic carcinoma. {METHODS}: {I}n a discovery
	and training phase, exhaled breath of 14 individuals with bronchogenic
	carcinoma and 45 healthy control subjects or control subjects without
	cancer was analyzed. {P}rincipal components and canonic discriminant
	analysis of the sensor data was used to determine whether exhaled
	gases could discriminate between cancer and noncancer. {D}iscrimination
	between classes was performed using {M}ahalanobis distance. {S}upport
	vector machine analysis was used to create and apply a cancer prediction
	model prospectively in a separate group of 76 individuals, 14 with
	and 62 without cancer. {MAIN} {RESULTS}: {P}rincipal components and
	canonic discriminant analysis demonstrated discrimination between
	samples from patients with lung cancer and those from other groups.
	{I}n the validation study, the electronic nose had 71.4\% sensitivity
	and 91.9\% specificity for detecting lung cancer; positive and negative
	predictive values were 66.6 and 93.4\%, respectively. {I}n this population
	with a lung cancer prevalence of 18\%, positive and negative predictive
	values were 66.6 and 94.5\%, respectively. {CONCLUSION}: {T}he exhaled
	breath of patients with lung cancer has distinct characteristics
	that can be identified with an electronic nose. {T}he results provide
	feasibility to the concept of using the electronic nose for managing
	and detecting lung cancer.},
  doi = {10.1164/rccm.200409-1184OC},
  pdf = {../local/Machado2005Detection.pdf},
  file = {Machado2005Detection.pdf:local/Machado2005Detection.pdf:PDF},
  pii = {200409-1184OC},
  url = {http://dx.doi.org/10.1164/rccm.200409-1184OC}
}

@inproceedings{MacQueen1967Some,
  author = {MacQueen, J.B.},
  title = {Some Methods for classification and Analysis of Multivariate Observations},
  booktitle = {{P}roceedings of 5th {B}erkeley {S}ymposium on {M}athematical {S}tatistics
	and {P}robability},
  year = {1967},
  pages = {281-297},
  publisher = {{U}niversity of {C}alifornia {P}ress},
  owner = {kb},
  timestamp = {2011.05.05}
}

@article{Madeira2004Biclustering,
  author = {Madeira, S. C. and Oliveira, A. L.},
  title = {Biclustering algorithms for biological data analysis: a survey.},
  journal = {IEEE/ACM Trans Comput Biol Bioinform},
  year = {2004},
  volume = {1},
  pages = {24--45},
  number = {1},
  abstract = {A large number of clustering approaches have been proposed for the
	analysis of gene expression data obtained from microarray experiments.
	However, the results from the application of standard clustering
	methods to genes are limited. This limitation is imposed by the existence
	of a number of experimental conditions where the activity of genes
	is uncorrelated. A similar limitation exists when clustering of conditions
	is performed. For this reason, a number of algorithms that perform
	simultaneous clustering on the row and column dimensions of the data
	matrix has been proposed. The goal is to find submatrices, that is,
	subgroups of genes and subgroups of conditions, where the genes exhibit
	highly correlated activities for every condition. In this paper,
	we refer to this class of algorithms as biclustering. Biclustering
	is also referred in the literature as coclustering and direct clustering,
	among others names, and has also been used in fields such as information
	retrieval and data mining. In this comprehensive survey, we analyze
	a large number of existing approaches to biclustering, and classify
	them in accordance with the type of biclusters they can find, the
	patterns of biclusters that are discovered, the methods used to perform
	the search, the approaches used to evaluate the solution, and the
	target applications.},
  doi = {10.1109/TCBB.2004.2},
  institution = {University of Beira Interior, Rua Marquês D'Avila e Bolama, Covilhã,
	Portugal. smadeira@di.ubi.pt},
  keywords = {Algorithms; Cluster Analysis; Computational Biology, methods; Gene
	Expression Profiling, statistics /&/ numerical data; Gene Expression,
	genetics; Humans; Models, Statistical; Oligonucleotide Array Sequence
	Analysis, methods; Saccharomyces cerevisiae, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17048406},
  timestamp = {2012.02.27},
  url = {http://dx.doi.org/10.1109/TCBB.2004.2}
}

@article{Maglogiannis2004Characterization,
  author = {Maglogiannis, I. G. and Zafiropoulos, E. P.},
  title = {Characterization of digital medical images utilizing support vector
	machines},
  journal = {B{MC} {M}ed. {I}nformat. {D}ecis. {M}aking},
  year = {2004},
  volume = {4},
  number = {4},
  abstract = {Background {I}n this paper we discuss an efficient methodology for
	the image analysis and characterization of digital images containing
	skin lesions using {S}upport {V}ector {M}achines and present the
	results of a preliminary study. {M}ethods {T}he methodology is based
	on the support vector machines algorithm for data classification
	and it has been applied to the problem of the recognition of malignant
	melanoma versus dysplastic naevus. {B}order and colour based features
	were extracted from digital images of skin lesions acquired under
	reproducible conditions, using basic image processing techniques.
	{T}wo alternative classification methods, the statistical discriminant
	analysis and the application of neural networks were also applied
	to the same problem and the results are compared. {R}esults {T}he
	{SVM} ({S}upport {V}ector {M}achines) algorithm performed quite well
	achieving 94.1% correct classification, which is better than the
	performance of the other two classification methodologies. {T}he
	method of discriminant analysis classified correctly 88% of cases
	(71% of {M}alignant {M}elanoma and 100% of {D}ysplastic {N}aevi),
	while the neural networks performed approximately the same. {C}onclusion
	{T}he use of a computer-based system, like the one described in this
	paper, is intended to avoid human subjectivity and to perform specific
	tasks according to a number of criteria. {H}owever the presence of
	an expert dermatologist is considered necessary for the overall visual
	assessment of the skin lesion and the final diagnosis.},
  doi = {10.1186/1472-6947-4-4},
  pdf = {../local/Maglogiannis2004Characterization.pdf},
  file = {Maglogiannis2004Characterization.pdf:local/Maglogiannis2004Characterization.pdf:PDF},
  owner = {vert}
}

@techreport{Mahe2006pharmacophorea,
  author = {P. Mah\'{e} and L. Ralaivola and V. Stoven and J.-P. Vert},
  title = {The pharmacophore kernel for virtual screening with support vector
	machines},
  institution = {Ecole des {M}ines de {P}aris},
  year = {2006},
  number = {Technical Report HAL:ccsd-00020066},
  month = {march},
  keywords = {chemoinformatics kernel-theory},
  owner = {mahe},
  timestamp = {2006.07.31},
  url = {http://hal.ccsd.cnrs.fr/ccsd-00020066}
}

@article{Mahe2009Graph,
  author = {Mah\'{e}, P. and Vert, J. P.},
  title = {Graph kernels based on tree patterns for molecules},
  journal = {Mach. Learn.},
  year = {2009},
  volume = {75},
  pages = {3--35},
  number = {1},
  doi = {10.1007/s10994-008-5086-2},
  pdf = {../local/Mahe2009Graph.pdf},
  file = {Mahe2009Graph.pdf:Mahe2009Graph.pdf:PDF},
  owner = {jp},
  timestamp = {2009.03.10},
  url = {http://dx.doi.org/10.1007/s10994-008-5086-2}
}

@article{Mahe2006Pharmacophore,
  author = {P. Mah{\'e} and L. Ralaivola and V. Stoven and J.-P. Vert},
  title = {The Pharmacophore Kernel for Virtual Screening with Support Vector
	Machines},
  journal = {J. Chem. Inf. Model.},
  year = {2006},
  volume = {46},
  pages = {2003-2014},
  number = {5},
  abstract = {We introduce a family of positive definite kernels specifically optimized
	for the manipulation of 3D structures of molecules with kernel methods.
	The kernels are based on the comparison of the three-point pharmacophores
	present in the 3D structures of molecules, a set of molecular features
	known to be particularly relevant for virtual screening applications.
	We present a computationally demanding exact implementation of these
	kernels, as well as fast approximations related to the classical
	fingerprint-based approaches. Experimental results suggest that this
	new approach is competitive with state-of-the-art algorithms based
	on the 2D structure of molecules for the detection of inhibitors
	of several drug targets.},
  doi = {10.1021/ci060138m},
  pdf = {../local/Mahe2006Pharmacophore.pdf},
  file = {Mahe2006Pharmacophore.pdf:Mahe2006Pharmacophore.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.09.13},
  url = {http://dx.doi.org/10.1021/ci060138m}
}

@inproceedings{Mahe2004Extensions,
  author = {Mah{\'e}, P. and Ueda, N. and Akutsu, T. and Perret, J.-L. and Vert,
	J.-P.},
  title = {Extensions of marginalized graph kernels},
  booktitle = {Proceedings of the {T}wenty-{F}irst {I}nternational {C}onference
	on {M}achine {L}earning ({ICML} 2004)},
  year = {2004},
  editor = {Greiner, R. and Schuurmans, D.},
  pages = {552-559},
  publisher = {ACM Press},
  abstract = {Positive definite kernels between labeled graphs have recently been
	proposed.{T}hey enable the application of kernel methods, such as
	support vectormachines, to the analysis and classification of graphs,
	for example, chemicalcompounds. {T}hese graph kernels are obtained
	by marginalizing a kernel betweenpaths with respect to a random walk
	model on the graph vertices along theedges. {W}e propose two extensions
	of these graph kernels, with the double goal toreduce their computation
	time and increase their relevance as measure ofsimilarity between
	graphs. {F}irst, we propose to modify the label of eachvertex by
	automatically adding information about its environment with the useof
	the {M}organ algorithm. {S}econd, we suggest a modification of the
	random walkmodel to prevent the walk from coming back to a vertex
	that was just visited.{T}hese extensions are then tested on benchmark
	experiments of chemicalcompounds classification, with promising results.},
  pdf = {../local/icmlMod.pdf:http\://cg.ensmp.fr/~vert/publi/04icml/icmlMod.pdf:PDF;icmlMod.pdf:http\},
  file = {icmlMod.pdf:http\://cg.ensmp.fr/~vert/publi/04icml/icmlMod.pdf:PDF;icmlMod.pdf:http\://cg.ensmp.fr/~vert/publi/04icml/icmlMod.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  owner = {vert}
}

@article{Mahe2005Graph,
  author = {Mah{\'e}, P. and Ueda, N. and Akutsu, T. and Perret, J.-L. and Vert,
	J.-P.},
  title = {Graph kernels for molecular structure-activity relationship analysis
	with support vector machines},
  journal = {J. Chem. Inf. Model.},
  year = {2005},
  volume = {45},
  pages = {939-51},
  number = {4},
  abstract = {The support vector machine algorithm together with graph kernel functions
	has recently been introduced to model structure-activity relationships
	({SAR}) of molecules from their 2{D} structure, without the need
	for explicit molecular descriptor computation. {W}e propose two extensions
	to this approach with the double goal to reduce the computational
	burden associated with the model and to enhance its predictive accuracy:
	description of the molecules by a {M}organ index process and definition
	of a second-order {M}arkov model for random walks on 2{D} structures.
	{E}xperiments on two mutagenicity data sets validate the proposed
	extensions, making this approach a possible complementary alternative
	to other modeling strategies.},
  doi = {10.1021/ci050039t},
  pdf = {../local/Mahe2005Graph.pdf},
  file = {Mahe2005Graph.pdf:local/Mahe2005Graph.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci050039t}
}

@techreport{Mahe2006Graph,
  author = {P. Mah{\'e} and J.-P. Vert},
  title = {Graph kernels based on tree patterns for molecules},
  institution = {HAL},
  year = {2006},
  number = {ccsd-00095488},
  month = {September},
  keywords = {chemoinformatics kernel-theory},
  location = {Mines ParisTech},
  owner = {mahe},
  timestamp = {2006.10.10},
  url = {https://hal.ccsd.cnrs.fr/ccsd-00095488}
}

@phdthesis{Mairal2010Sparse,
  author = {Mairal, J.},
  title = {Sparse coding for machine learning, image processing and computer
	vision},
  school = {{\'E}cole normale sup{\'e}rieure de Cachan-ENS Cachan},
  year = {2010}
}

@article{Mairal2010Online,
  author = {Mairal, J. and Bach, F. and Ponce, J. and Sapiro, G.},
  title = {Online Learning for Matrix Factorization and Sparse Coding},
  journal = {J. Mach. Learn. Res.},
  year = {2010},
  volume = {11},
  pages = {19--60},
  abstract = {Sparse coding—that is, modelling data vectors as sparse linear combinations
	of basis elements—is widely used in machine learning, neuroscience,
	signal processing, and statistics. This paper fo- cuses on the large-scale
	matrix factorization problem that consists of learning the basis
	set in order to adapt it to specific data. Variations of this problem
	include dictionary learning in signal pro- cessing, non-negative
	matrix factorization and sparse principal component analysis. In
	this paper, we propose to address these tasks with a new online optimization
	algorithm, based on stochastic approximations, which scales up gracefully
	to large data sets with millions of training samples, and extends
	naturally to various matrix factorization formulations, making it
	suitable for a wide range of learning problems. A proof of convergence
	is presented, along with experiments with natural images and genomic
	data demonstrating that it leads to state-of-the-art performance
	in terms of speed and optimization for both small and large data
	sets.},
  pdf = {../local/Mairal2010Online.pdf},
  file = {Mairal2010Online.pdf:Mairal2010Online.pdf:PDF},
  owner = {jp},
  timestamp = {2010.04.13},
  url = {http://jmlr.csail.mit.edu/papers/v11/mairal10a.html}
}

@unpublished{Mairal2012Path,
  author = {Mairal, J. and Yu, .B},
  title = {Path Coding Penalties for Directed Acyclic Graphs},
  year = {2012},
  owner = {jp},
  timestamp = {2012.03.06}
}

@article{Majoros2005Efficient,
  author = {Majoros, W. H. and Pertea, L. and Salzberg, S. L.},
  title = {Efficient implementation of a generalized pair hidden {M}arkov model
	for comparative gene finding.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1782--1788},
  number = {9},
  month = {May},
  abstract = {M{OTIVATION}: {T}he increased availability of genome sequences of
	closely related organisms has generated much interest in utilizing
	homology to improve the accuracy of gene prediction programs. {G}eneralized
	pair hidden {M}arkov models ({GPHMM}s) have been proposed as one
	means to address this need. {H}owever, all {GPHMM} implementations
	currently available are either closed-source or the details of their
	operation are not fully described in the literature, leaving a significant
	hurdle for others wishing to advance the state of the art in {GPHMM}
	design. {RESULTS}: {W}e have developed an open-source {GPHMM} gene
	finder, {TWAIN}, which performs very well on two related {A}spergillus
	species, {A}.fumigatus and {A}.nidulans, finding 89\% of the exons
	and predicting 74\% of the gene models exactly correctly in a test
	set of 147 conserved gene pairs. {W}e describe the implementation
	of this {GPHMM} and we explicitly address the assumptions and limitations
	of the system. {W}e suggest possible ways of relaxing those assumptions
	to improve the utility of the system without sacrificing efficiency
	beyond what is practical. {AVAILABILITY}: {A}vailable at http://www.tigr.org/software/pirate/twain/twain.html
	under the open-source {A}rtistic {L}icense.},
  doi = {10.1093/bioinformatics/bti297},
  pdf = {../local/Majoros2005Efficient.pdf},
  file = {Majoros2005Efficient.pdf:local/Majoros2005Efficient.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {bti297},
  pmid = {15691859},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti297}
}

@article{Majumder2005Relevance,
  author = {Shovan K Majumder and Nirmalya Ghosh and Pradeep K Gupta},
  title = {Relevance vector machine for optical diagnosis of cancer.},
  journal = {Lasers {S}urg {M}ed},
  year = {2005},
  volume = {36},
  pages = {323-33},
  number = {4},
  month = {Apr},
  abstract = {B{ACKGROUND} {AND} {OBJECTIVES}: {A} probability-based, robust diagnostic
	algorithm is an essential requirement for successful clinical use
	of optical spectroscopy for cancer diagnosis. {T}his study reports
	the use of the theory of relevance vector machine ({RVM}), a recent
	{B}ayesian machine-learning framework of statistical pattern recognition,
	for development of a fully probabilistic algorithm for autofluorescence
	diagnosis of early stage cancer of human oral cavity. {I}t also presents
	a comparative evaluation of the diagnostic efficacy of the {RVM}
	algorithm with that based on support vector machine ({SVM}) that
	has recently received considerable attention for this purpose. {STUDY}
	{DESIGN}/{MATERIALS} {AND} {METHODS}: {T}he diagnostic algorithms
	were developed using in vivo autofluorescence spectral data acquired
	from human oral cavity with a {N}(2) laser-based portable fluorimeter.
	{T}he spectral data of both patients as well as normal volunteers,
	enrolled at {O}ut {P}atient department of the {G}ovt. {C}ancer {H}ospital,
	{I}ndore for screening of oral cavity, were used for this purpose.
	{T}he patients selected had no prior confirmed malignancy and were
	diagnosed of squamous cell carcinoma ({SCC}), {G}rade-{I} on the
	basis of histopathology of biopsy taken from abnormal site subsequent
	to acquisition of spectra. {A}utofluorescence spectra were recorded
	from a total of 171 tissue sites from 16 patients and 154 healthy
	squamous tissue sites from 13 normal volunteers. {O}f 171 tissues
	sites from patients, 83 were {SCC} and the rest were contralateral
	uninvolved squamous tissue. {E}ach site was treated separately and
	classified via the diagnostic algorithm developed. {I}nstead of the
	spectral data from uninvolved sites of patients, the data from normal
	volunteers were used as the normal database for the development of
	diagnostic algorithms. {RESULTS}: {T}he diagnostic algorithms based
	on {RVM} were found to provide classification performance comparable
	to the state-of-the-art {SVM}s, while at the same time explicitly
	predicting the probability of class membership. {T}he sensitivity
	and specificity towards cancer were up to 88\% and 95\% for the training
	set data based on leave- one-out cross validation and up to 91\%
	and 96\% for the validation set data. {W}hen implemented on the spectral
	data of the uninvolved oral cavity sites from the patients, it yielded
	a specificity of up to 91\%. {CONCLUSIONS}: {T}he {B}ayesian framework
	of {RVM} formulation makes it possible to predict the posterior probability
	of class membership in discriminating early {SCC} from the normal
	squamous tissue sites of the oral cavity in contrast to dichotomous
	classification provided by the non-{B}ayesian {SVM}. {S}uch classification
	is very helpful in handling asymmetric misclassification costs like
	assigning different weights for having a false negative result for
	identifying cancer compared to false positive. {T}he results further
	demonstrate that for comparable diagnostic performances, the {RVM}-based
	algorithms use significantly fewer kernel functions and do not need
	to estimate any hoc parameters associated with the learning or the
	optimization technique to be used. {T}his implies a considerable
	saving in memory and computation in a practical implementation.},
  doi = {10.1002/lsm.20160},
  pdf = {../local/Majumder2005Relevance.pdf},
  file = {Majumder2005Relevance.pdf:local/Majumder2005Relevance.pdf:PDF},
  keywords = {, , 15825208},
  url = {http://dx.doi.org/10.1002/lsm.20160}
}

@article{Majumder2005Support,
  author = {S. K. Majumder and N. Ghosh and P. K. Gupta},
  title = {Support vector machine for optical diagnosis of cancer.},
  journal = {J {B}iomed {O}pt},
  year = {2005},
  volume = {10},
  pages = {024034},
  number = {2},
  abstract = {We report the application of a support vector machine ({SVM}) for
	the development of diagnostic algorithms for optical diagnosis of
	cancer. {B}oth linear and nonlinear {SVM}s have been investigated
	for this purpose. {W}e develop a methodology that makes use of {SVM}
	for both feature extraction and classification jointly by integrating
	the newly developed recursive feature elimination ({RFE}) in the
	framework of {SVM}. {T}his leads to significantly improved classification
	results compared to those obtained when an independent feature extractor
	such as principal component analysis ({PCA}) is used. {T}he integrated
	{SVM}-{RFE} approach is also found to outperform the classification
	results yielded by traditional {F}isher's linear discriminant ({FLD})-based
	algorithms. {A}ll the algorithms are developed using spectral data
	acquired in a clinical in vivo laser-induced fluorescence ({LIF})
	spectroscopic study conducted on patients being screened for cancer
	of the oral cavity and normal volunteers. {T}he best sensitivity
	and specificity values provided by the nonlinear {SVM}-{RFE} algorithm
	over the data sets investigated are 95 and 96\% toward cancer for
	the training set data based on leave-one-out cross validation and
	93 and 97\% toward cancer for the independent validation set data.
	{W}hen tested on the spectral data of the uninvolved oral cavity
	sites from the patients it yielded a specificity of 85\%.},
  doi = {10.1117/1.1897396},
  pdf = {../local/Majumder2005Support.pdf},
  file = {Majumder2005Support.pdf:local/Majumder2005Support.pdf:PDF},
  url = {http://dx.doi.org/10.1117/1.1897396}
}

@article{Mallat1989theory,
  author = {Mallat, S. G.},
  title = {A theory for multiresolution signal decomposition: the wavelet representation},
  journal = {IEEE T. Pattern. Anal.},
  year = {1989},
  volume = {2},
  pages = {674--693},
  pdf = {../local/Mallat1989theory.pdf},
  file = {Mallat1989theory.pdf:Mallat1989theory.pdf:PDF},
  owner = {jp},
  timestamp = {2012.12.13}
}

@article{Mallat1993Matching,
  author = {Mallat, S. G. and Zhang, Zhifeng},
  title = {Matching pursuits with time-frequency dictionaries},
  journal = {Signal Processing, IEEE Transactions on},
  year = {1993},
  volume = {41},
  pages = {3397--3415},
  number = {12},
  abstract = {The authors introduce an algorithm, called matching pursuit, that
	decomposes any signal into a linear expansion of waveforms that are
	selected from a redundant dictionary of functions. These waveforms
	are chosen in order to best match the signal structures. Matching
	pursuits are general procedures to compute adaptive signal representations.
	With a dictionary of Gabor functions a matching pursuit defines an
	adaptive time-frequency transform. They derive a signal energy distribution
	in the time-frequency plane, which does not include interference
	terms, unlike Wigner and Cohen class distributions. A matching pursuit
	isolates the signal structures that are coherent with respect to
	a given dictionary. An application to pattern extraction from noisy
	signals is described. They compare a matching pursuit decomposition
	with a signal expansion over an optimized wavepacket orthonormal
	basis, selected with the algorithm of Coifman and Wickerhauser see
	(IEEE Trans. Informat. Theory, vol. 38, Mar. 1992)},
  doi = {10.1109/78.258082},
  keywords = {pursuit},
  url = {http://dx.doi.org/10.1109/78.258082}
}

@article{Mallows1973Some,
  author = {Mallows, C. L.},
  title = {Some comments on $C_p$},
  journal = {Technometrics},
  year = {1973},
  volume = {15},
  pages = {661--675},
  keywords = {criteria, model, selection},
  url = {http://www.math.tau.ac.il/~yekutiel/MA%20seminar/Malows%202000.pdf}
}

@inproceedings{Malyutov2010Recovery,
  author = {Malyutov, M. },
  title = {Recovery of sparse active inputs in general systems: A review},
  booktitle = {Proc. IEEE Region 8 Int Computational Technologies in Electrical
	and Electronics Engineering (SIBIRCON) Conf},
  year = {2010},
  pages = {15--22},
  doi = {10.1109/SIBIRCON.2010.5555301},
  owner = {jp},
  timestamp = {2011.09.19}
}

@article{Mamanova2010Target-enrichment,
  author = {Lira Mamanova and Alison J Coffey and Carol E Scott and Iwanka Kozarewa
	and Emily H Turner and Akash Kumar and Eleanor Howard and Jay Shendure
	and Daniel J Turner},
  title = {Target-enrichment strategies for next-generation sequencing.},
  journal = {Nat Methods},
  year = {2010},
  volume = {7},
  pages = {111--118},
  number = {2},
  month = {Feb},
  abstract = {We have not yet reached a point at which routine sequencing of large
	numbers of whole eukaryotic genomes is feasible, and so it is often
	necessary to select genomic regions of interest and to enrich these
	regions before sequencing. There are several enrichment approaches,
	each with unique advantages and disadvantages. Here we describe our
	experiences with the leading target-enrichment technologies, the
	optimizations that we have performed and typical results that can
	be obtained using each. We also provide detailed protocols for each
	technology so that end users can find the best compromise between
	sensitivity, specificity and uniformity for their particular project.},
  doi = {10.1038/nmeth.1419},
  institution = {The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus,
	Hinxton, Cambridge, UK.},
  keywords = {Chromosome Mapping; Forecasting; Gene Targeting; In Situ Hybridization;
	Molecular Probe Techniques; Polymerase Chain Reaction; Sequence Analysis,
	DNA},
  owner = {phupe},
  pii = {nmeth.1419},
  pmid = {20111037},
  timestamp = {2010.08.30},
  url = {http://dx.doi.org/10.1038/nmeth.1419}
}

@article{Mamitsuka1998Predicting,
  author = {Mamitsuka, H.},
  title = {{P}redicting peptides that bind to {MHC} molecules using supervised
	learning of hidden {M}arkov models.},
  journal = {Proteins},
  year = {1998},
  volume = {33},
  pages = {460--474},
  number = {4},
  month = {Dec},
  abstract = {The binding of a major histocompatibility complex (MHC) molecule to
	a peptide originating in an antigen is essential to recognizing antigens
	in immune systems, and it has proved to be important to use computers
	to predict the peptides that will bind to an MHC molecule. The purpose
	of this paper is twofold: First, we propose to apply supervised learning
	of hidden Markov models (HMMs) to this problem, which can surpass
	existing methods for the problem of predicting MHC-binding peptides.
	Second, we generate peptides that have high probabilities to bind
	to a certain MHC molecule, based on our proposed method using peptides
	binding to MHC molecules as a set of training data. From our experiments,
	in a type of cross-validation test, the discrimination accuracy of
	our supervised learning method is usually approximately 2-15\% better
	than those of other methods, including backpropagation neural networks,
	which have been regarded as the most effective approach to this problem.
	Furthermore, using an HMM trained for HLA-A2, we present new peptide
	sequences that are provided with high binding probabilities by the
	HMM and that are thus expected to bind to HLA-A2 proteins. Peptide
	sequences not shown in this paper but with rather high binding probabilities
	can be obtained from the author.},
  keywords = {immunoinformatics},
  pii = {3.0.CO;2-M},
  pmid = {9849933},
  timestamp = {2007.01.25}
}

@article{Mammen1999Smooth,
  author = {Mammen, E. and Tsybakov, A.},
  title = {Smooth discrimination analysis},
  journal = {Ann. {S}tat.},
  year = {1999},
  volume = {27},
  pages = {1808-1829},
  number = {6},
  doi = {10.1214/aos/1017939240},
  pdf = {../local/Mammen1999Smooth.pdf},
  file = {Mammen1999Smooth.pdf:local/Mammen1999Smooth.pdf:PDF},
  url = {http://dx.doi.org/10.1214/aos/1017939240}
}

@article{Man2004Evaluating,
  author = {Man, M.Z. and Dyson, G. and Johnson, K. and Liao, B.},
  title = {Evaluating methods for classifying expression data.},
  journal = {J. {B}iopharm. {S}tat.},
  year = {2004},
  volume = {14},
  pages = {1065-1084},
  number = {4},
  abstract = {An attractive application of expression technologies is to predict
	drug efficacy or safety using expression data of biomarkers. {T}o
	evaluate the performance of various classification methods for building
	predictive models, we applied these methods on six expression datasets.
	{T}hese datasets were from studies using microarray technologies
	and had either two or more classes. {F}rom each of the original datasets,
	two subsets were generated to simulate two scenarios in biomarker
	applications. {F}irst, a 50-gene subset was used to simulate a candidate
	gene approach when it might not be practical to measure a large number
	of genes/biomarkers. {N}ext, a 2000-gene subset was used to simulate
	a whole genome approach. {W}e evaluated the relative performance
	of several classification methods by using leave-one-out cross-validation
	and bootstrap cross-validation. {A}lthough all methods perform well
	in both subsets for a relative easy dataset with two classes, differences
	in performance do exist among methods for other datasets. {O}verall,
	partial least squares discriminant analysis ({PLS}-{DA}) and support
	vector machines ({SVM}) outperform all other methods. {W}e suggest
	a practical approach to take advantage of multiple methods in biomarker
	applications.},
  doi = {10.1081/BIP-200035491},
  pdf = {../local/Man2004Evaluating.pdf},
  file = {Man2004Evaluating.pdf:local/Man2004Evaluating.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Manallack1999Neural,
  author = {D.T. Manallack and D.J. Livingstone},
  title = {Neural networks in drug-discovery: have they lived up with their
	promise?},
  journal = {Eur. J. Med. Chem.},
  year = {1999},
  volume = {34},
  pages = {195-208},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@article{Manevitz2001One-Class,
  author = {Manevitz, L. M. and Yousef, M},
  title = {One-Class {SVM}s for Document Classification},
  journal = {J. Mach. Learn. Res.},
  year = {2001},
  volume = {2},
  pages = {139--154},
  pdf = {../local/Manevitz2001One-Class.pdf},
  file = {Manevitz2001One-Class.pdf:Manevitz2001One-Class.pdf:PDF},
  owner = {fantine},
  timestamp = {2009.06.09},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.6083}
}

@article{Manly2001impact,
  author = {C. Manly and S. Louise-May and J. Hammer},
  title = {The impact of informatics and computational chemistry on synthesis
	and screening.},
  journal = {Drug {D}iscov. {T}oday},
  year = {2001},
  volume = {6},
  pages = {1101--1110},
  number = {21},
  month = {Nov},
  abstract = {High-throughput synthesis and screening technologies have enhanced
	the impact of computational chemistry on the drug discovery process.
	{F}rom the design of targeted, drug-like libraries to 'virtual' optimization
	of potency, selectivity and {ADME}/{T}ox properties, computational
	chemists are able to efficiently manage costly resources and dramatically
	shorten drug discovery cycle times. {T}his review will describe some
	of the successful strategies and applications of state-of-the-art
	algorithms to enhance drug discovery, as well as key points in the
	drug discovery process where computational methods can have, and
	have had, greatest impact.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pii = {S1359644601019900},
  pmid = {11677167},
  timestamp = {2006.02.03}
}

@article{Mann1947test,
  author = {Mann, H.B. and Whitney, D.R.},
  title = {On a test of whether one of two random variables is stochastically
	larger than the other},
  journal = {The annals of mathematical statistics},
  year = {1947},
  volume = {18},
  pages = {50--60},
  number = {1},
  publisher = {Institute of Mathematical Statistics}
}

@article{Mao2004Feature,
  author = {K. Z. Mao},
  title = {Feature subset selection for support vector machines through discriminative
	function pruning analysis.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {60-7},
  number = {1},
  month = {Feb},
  abstract = {In many pattern classification applications, data are represented
	by high dimensional feature vectors, which induce high computational
	cost and reduce classification speed in the context of support vector
	machines ({SVM}s). {T}o reduce the dimensionality of pattern representation,
	we develop a discriminative function pruning analysis ({DFPA}) feature
	subset selection method in the present study. {T}he basic idea of
	the {DFPA} method is to learn the {SVM} discriminative function from
	training data using all input variables available first, and then
	to select feature subset through pruning analysis. {I}n the present
	study, the pruning is implement using a forward selection procedure
	combined with a linear least square estimation algorithm, taking
	advantage of linear-in-the-parameter structure of the {SVM} discriminative
	function. {T}he strength of the {DFPA} method is that it combines
	good characters of both filter and wrapper methods. {F}irstly, it
	retains the simplicity of the filter method avoiding training of
	a large number of {SVM} classifier. {S}econdly, it inherits the good
	performance of the wrapper method by taking the {SVM} classification
	algorithm into account.}
}

@article{Mao2005Multiclass,
  author = {Yong Mao and Xiaobo Zhou and Daoying Pi and Youxian Sun and Stephen
	T C Wong},
  title = {Multiclass cancer classification by using fuzzy support vector machine
	and binary decision tree with gene selection.},
  journal = {J {B}iomed {B}iotechnol},
  year = {2005},
  volume = {2005},
  pages = {160-71},
  number = {2},
  abstract = {We investigate the problems of multiclass cancer classification with
	gene selection from gene expression data. {T}wo different constructed
	multiclass classifiers with gene selection are proposed, which are
	fuzzy support vector machine ({FSVM}) with gene selection and binary
	classification tree based on {SVM} with gene selection. {U}sing {F}
	test and recursive feature elimination based on {SVM} as gene selection
	methods, binary classification tree based on {SVM} with {F} test,
	binary classification tree based on {SVM} with recursive feature
	elimination based on {SVM}, and {FSVM} with recursive feature elimination
	based on {SVM} are tested in our experiments. {T}o accelerate computation,
	preselecting the strongest genes is also used. {T}he proposed techniques
	are applied to analyze breast cancer data, small round blue-cell
	tumors, and acute leukemia data. {C}ompared to existing multiclass
	cancer classifiers and binary classification tree based on {SVM}
	with {F} test or binary classification tree based on {SVM} with recursive
	feature elimination based on {SVM} mentioned in this paper, {FSVM}
	based on recursive feature elimination based on {SVM} can find most
	important genes that affect certain types of cancer with high recognition
	accuracy.},
  doi = {10.1155/JBB.2005.160},
  pdf = {../local/Mao2005Multiclass.pdf},
  file = {Mao2005Multiclass.pdf:local/Mao2005Multiclass.pdf:PDF},
  keywords = {biosvm},
  pii = {S1110724304406044_THIS_PII_IS_INCORRECT_},
  url = {http://dx.doi.org/10.1155/JBB.2005.160}
}

@article{Marbach2012Wisdom,
  author = {Marbach, D. and Costello, J.C. and K\"{u}ffner, R. and Vega, N. and
	Prill, R.J. and Camacho, D.M. and Allison, K.R. and the DREAM5 Consortium
	and Kellis, M. and Collins, J.J. and Stolovitzky, G.},
  title = {Wisdom of crowds for robust gene network inference},
  journal = {Nat. Methods},
  year = {2012},
  volume = {9},
  pages = {796--804},
  number = {8},
  doi = {10.1038/nmeth.2016},
  pdf = {../local/Marbach2012Wisdom.pdf},
  file = {Marbach2012Wisdom.pdf:Marbach2012Wisdom.pdf:PDF},
  owner = {anne-clairehaury},
  timestamp = {2012.03.23},
  url = {http://dx.doi.org/10.1038/nmeth.2016}
}

@article{Marbach2009Replaying,
  author = {Marbach, D. and Mattiussi, C. and Floreano, D.},
  title = {Replaying the evolutionary tape: biomimetic reverse engineering of
	gene networks.},
  journal = {Ann N Y Acad Sci},
  year = {2009},
  volume = {1158},
  pages = {234--245},
  month = {Mar},
  abstract = {In this paper, we suggest a new approach for reverse engineering gene
	regulatory networks, which consists of using a reconstruction process
	that is similar to the evolutionary process that created these networks.
	The aim is to integrate prior knowledge into the reverse-engineering
	procedure, thus biasing the search toward biologically plausible
	solutions. To this end, we propose an evolutionary method that abstracts
	and mimics the natural evolution of gene regulatory networks. Our
	method can be used with a wide range of nonlinear dynamical models.
	This allows us to explore novel model types such as the log-sigmoid
	model introduced here. We apply the biomimetic method to a gold-standard
	dataset from an in vivo gene network. The obtained results won a
	reverse engineering competition of the second DREAM conference (Dialogue
	on Reverse Engineering Assessments and Methods 2007, New York, NY).},
  doi = {10.1111/j.1749-6632.2008.03944.x},
  institution = {Laboratory of Intelligent Systems, Ecole Polytechnique Fédérale de
	Lausanne, Lausanne, Switzerland.},
  keywords = {Algorithms; Biomimetics; Computational Biology; Databases, Genetic;
	Evolution; Gene Regulatory Networks; Models, Biological; Nonlinear
	Dynamics},
  owner = {fantine},
  pii = {NYAS03944},
  pmid = {19348645},
  timestamp = {2010.10.19},
  url = {http://dx.doi.org/10.1111/j.1749-6632.2008.03944.x}
}

@article{Marbach2010Revealing,
  author = {Marbach, D. and Prill, R. J. and Schaffter, T. and Mattiussi, C.
	and Floreano, D. and Stolovitzky, G.},
  title = {Revealing strengths and weaknesses of methods for gene network inference},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2010},
  volume = {107},
  pages = {6286-6291},
  number = {14},
  abstract = {Numerous methods have been developed for inferring gene regulatory
	networks from expression data, however, both their absolute and comparative
	performance remain poorly understood. In this paper, we introduce
	a framework for critical performance assessment of methods for gene
	network inference. We present an in silico benchmark suite that we
	provided as a blinded, community-wide challenge within the context
	of the DREAM (Dialogue on Reverse Engineering Assessment and Methods)
	project. We assess the performance of 29 gene-network-inference methods,
	which have been applied independently by participating teams. Performance
	profiling reveals that current inference methods are affected, to
	various degrees, by different types of systematic prediction errors.
	In particular, all but the best-performing method failed to accurately
	infer multiple regulatory inputs (combinatorial regulation) of genes.
	The results of this community-wide experiment show that reliable
	network inference from gene expression data remains an unsolved problem,
	and they indicate potential ways of network reconstruction improvements.},
  doi = {10.1073/pnas.0913357107},
  eprint = {http://www.pnas.org/content/107/14/6286.full.pdf+html},
  url = {http://www.pnas.org/content/107/14/6286.abstract}
}

@article{Marbach2009Generating,
  author = {Marbach, D. and Schaffter, T. and Mattiussi, C. and Floreano, D.},
  title = {Generating realistic in silico gene networks for performance assessment
	of reverse engineering methods},
  journal = {J. Comput. Biol.},
  year = {2009},
  volume = {16},
  pages = {229--239},
  number = {2},
  doi = {10.1089/cmb.2008.09TT},
  publisher = {Mary Ann Liebert, Inc. 2 Madison Avenue Larchmont, NY 10538 USA},
  url = {http://online.liebertpub.com/doi/abs/10.1089/cmb.2008.09TT}
}

@article{Marchal2003Bioinformatics,
  author = {Marchal, I. and Golfier, G. and Dugas, O. and Majed, M.},
  title = {Bioinformatics in glycobiology.},
  journal = {Biochimie},
  year = {2003},
  volume = {85},
  pages = {75-81},
  number = {1-2},
  abstract = {In comparison with genes and proteins, attention paid to oligosaccharides
	that modify proteins is still marginal. {A}ccordingly, bioinformatics
	is so far poorly involved in glycobiology. {S}ome initiatives have
	been taken, however, to collect in databases all glycobiology-relevant
	information or to design specific data mining algorithms to infer
	predictions or identify oligosaccharide structures. {I}n this review,
	we make a non-exhaustive survey of the available glycobiology-related
	bioinformatic resources, focussing mainly on those resources that
	are available through the {W}orld {W}ide {W}eb. {S}ome well-curated
	databases are identified, but the development of specialised algorithms
	appears to be limited.},
  keywords = {glycans},
  pii = {S0300908403000683}
}

@article{Marcotte1999Detecting,
  author = {Marcotte, E.M. and Pellegrini, M. and Ng, H.-L. and Rice, D.W. and
	Yeates, T.O. and Eisenberg, D.},
  title = {Detecting {P}rotein {F}unction and {P}rotein-{P}rotein {I}nteractions
	from {G}enome {S}equences},
  journal = {Science},
  year = {1999},
  volume = {285},
  pages = {751--753},
  pdf = {../local/marc99b.pdf},
  file = {marc99b.pdf:local/marc99b.pdf:PDF},
  subject = {bio},
  url = {http://www.sciencemag.org/cgi/reprint/285/5428/751.pdf}
}

@article{Marcotte1999combined,
  author = {Marcotte, E. M. and Pellegrini, M. and Thompson, M. J. and Yeates,
	T. O. and Eisenberg, D.},
  title = {A combined algorithm for genome-wide prediction of protein function},
  journal = {Nature},
  year = {1999},
  volume = {402},
  pages = {83--86},
  month = {November},
  pdf = {../local/marc99.pdf},
  file = {marc99.pdf:local/marc99.pdf:PDF},
  subject = {bio},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v402/n6757/full/402083a0_fs.html&content_filetype=PDF}
}

@article{Mardis2008Impact,
  author = {Elaine R. Mardis},
  title = {The impact of next-generation sequencing technology on genetics},
  journal = {Trends Genet.},
  year = {2008},
  volume = {24},
  pages = {133-141}
}

@article{Mardis2008Next,
  author = {Elaine R Mardis},
  title = {Next-generation DNA sequencing methods.},
  journal = {Annu. Rev. Genomics Hum. Genet.},
  year = {2008},
  volume = {9},
  pages = {387--402},
  abstract = {Recent scientific discoveries that resulted from the application of
	next-generation DNA sequencing technologies highlight the striking
	impact of these massively parallel platforms on genetics. These new
	methods have expanded previously focused readouts from a variety
	of DNA preparation protocols to a genome-wide scale and have fine-tuned
	their resolution to single base precision. The sequencing of RNA
	also has transitioned and now includes full-length cDNA analyses,
	serial analysis of gene expression (SAGE)-based methods, and noncoding
	RNA discovery. Next-generation sequencing has also enabled novel
	applications such as the sequencing of ancient DNA samples, and has
	substantially widened the scope of metagenomic analysis of environmentally
	derived samples. Taken together, an astounding potential exists for
	these technologies to bring enormous change in genetic and biological
	research and to enhance our fundamental biological knowledge.},
  doi = {10.1146/annurev.genom.9.081307.164359},
  institution = {Department of Genetics and Molecular Microbiology and Genome Sequencing
	Center, Washington University School of Medicine, St. Louis MO 63108,
	USA. emardis@wustl.edu},
  keywords = {Chromatin Immunoprecipitation; Fossils; Gene Expression Profiling;
	Genome, Human; Genomics; Humans; RNA, Untranslated; Sequence Analysis,
	DNA},
  owner = {ljacob},
  pmid = {18576944},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1146/annurev.genom.9.081307.164359}
}

@article{Margolin2006ARACNE,
  author = {Margolin, A. A. and Nemenman, I. and Basso, K. and Wiggins, C. and
	Stolovitzky, G. and Dalla Favera, R. and Califano, A.},
  title = {{ARACNE}: an algorithm for the reconstruction of gene regulatory
	networks in a mammalian cellular contexts},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7 Suppl 1},
  pages = {S7},
  abstract = {BACKGROUND: Elucidating gene regulatory networks is crucial for understanding
	normal cell physiology and complex pathologic phenotypes. Existing
	computational methods for the genome-wide "reverse engineering" of
	such networks have been successful only for lower eukaryotes with
	simple genomes. Here we present ARACNE, a novel algorithm, using
	microarray expression profiles, specifically designed to scale up
	to the complexity of regulatory networks in mammalian cells, yet
	general enough to address a wider range of network deconvolution
	problems. This method uses an information theoretic approach to eliminate
	the majority of indirect interactions inferred by co-expression methods.
	RESULTS: We prove that ARACNE reconstructs the network exactly (asymptotically)
	if the effect of loops in the network topology is negligible, and
	we show that the algorithm works well in practice, even in the presence
	of numerous loops and complex topologies. We assess ARACNE's ability
	to reconstruct transcriptional regulatory networks using both a realistic
	synthetic dataset and a microarray dataset from human B cells. On
	synthetic datasets ARACNE achieves very low error rates and outperforms
	established methods, such as Relevance Networks and Bayesian Networks.
	Application to the deconvolution of genetic networks in human B cells
	demonstrates ARACNE's ability to infer validated transcriptional
	targets of the cMYC proto-oncogene. We also study the effects of
	misestimation of mutual information on network reconstruction, and
	show that algorithms based on mutual information ranking are more
	resilient to estimation errors. CONCLUSION: ARACNE shows promise
	in identifying direct transcriptional interactions in mammalian cellular
	networks, a problem that has challenged existing reverse engineering
	algorithms. This approach should enhance our ability to use microarray
	data to elucidate functional mechanisms that underlie cellular processes
	and to identify molecular targets of pharmacological compounds in
	mammalian cellular networks.},
  doi = {10.1186/1471-2105-7-S1-S7},
  pdf = {../local/Margolin2006ARACNE.pdf},
  file = {Margolin2006ARACNE.pdf:Margolin2006ARACNE.pdf:PDF},
  pii = {1471-2105-7-S1-S7},
  pmid = {16723010},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1186/1471-2105-7-S1-S7}
}

@article{Markowetz2010How,
  author = {Florian Markowetz},
  title = {How to understand the cell by breaking it: network analysis of gene
	perturbation screens.},
  journal = {PLoS Comput Biol},
  year = {2010},
  volume = {6},
  pages = {e1000655},
  number = {2},
  doi = {10.1371/journal.pcbi.1000655},
  institution = {Cancer Research UK Cambridge Research Institute, Cambridge, United
	Kingdom.},
  keywords = {Animals; Cell Physiological Processes; Cluster Analysis; Gene Regulatory
	Networks; Genomics; Humans; Models, Genetic; Models, Statistical;
	Phenotype; Signal Transduction; Systems Biology},
  owner = {phupe},
  pmid = {20195495},
  timestamp = {2010.08.30},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000655}
}

@article{Markowetz2003Support,
  author = {F. Markowetz and L. Edler and M. Vingron},
  title = {Support {V}ector {M}achines for {P}rotein {F}old {C}lass {P}rediction},
  journal = {Biometrical {J}ournal},
  year = {2003},
  volume = {45},
  pages = {377-389},
  number = {3},
  abstract = {Knowledge of the three-dimensional structure of a protein is essential
	for describing and understanding its function. {T}oday, a large number
	of known protein sequences faces a small number of identified structures.
	{T}hus, the need arises to predict structure from sequence without
	using time-consuming experimental identification. {I}n this paper
	the performance of {S}upport {V}ector {M}achines ({SVM}s) is compared
	to {N}eural {N}etworks and to standard statistical classification
	methods as {D}iscriminant {A}nalysis and {N}earest {N}eighbor {C}lassification.
	{W}e show that {SVM}s can beat the competing methods on a dataset
	of 268 protein sequences to be classified into a set of 42 fold classes.
	{W}e discuss misclassification with respect to biological function
	and similarity. {I}n a second step we examine the performance of
	{SVM}s if the embedding is varied from frequencies of single amino
	acids to frequencies of tripletts of amino acids. {T}his work shows
	that {SVM} provide a promising alternative to standard statistical
	classification and prediction methods in functional genomics.},
  doi = {10.1002/bimj.200390019},
  pdf = {../local/Markowetz2003Support.pdf},
  file = {Markowetz2003Support.pdf:local/Markowetz2003Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www3.interscience.wiley.com/cgi-bin/abstract/104525729/START}
}

@article{Markowetz2007Inferring,
  author = {Markowetz, F. and Spang, R.},
  title = {Inferring cellular networks - a review},
  journal = {B{MC} {B}ioinformatics},
  year = {2007},
  volume = {8},
  pages = {S5},
  number = {Suppl 6},
  abstract = {In this review we give an overview of computational and statistical
	methods to reconstruct cellular networks. Although this area of research
	is vast and fast developing, we show that most currently used methods
	can be organized by a few key concepts. The first part of the review
	deals with conditional independence models including Gaussian graphical
	models and Bayesian networks. The second part discusses probabilistic
	and graph-based methods for data from experimental interventions
	and perturbations.},
  doi = {10.1186/1471-2105-8-S6-S5},
  issn = {1471-2105},
  pubmedid = {17903286},
  url = {http://www.biomedcentral.com/1471-2105/8/S6/S5}
}

@article{Markowitz1952Portfolio,
  author = {H. Markowitz},
  title = {Portfolio Selection},
  journal = {The {J}ournal of {F}inance},
  year = {1952},
  volume = {7},
  pages = {77--91},
  number = {1},
  month = {March}
}

@article{Marsland2002self-organising,
  author = {Stephen Marsland and Jonathan Shapiro and Ulrich Nehmzow},
  title = {A self-organising network that grows when required.},
  journal = {Neural {N}etw},
  year = {2002},
  volume = {15},
  pages = {1041-58},
  number = {8-9},
  abstract = {The ability to grow extra nodes is a potentially useful facility for
	a self-organising neural network. {A} network that can add nodes
	into its map space can approximate the input space more accurately,
	and often more parsimoniously, than a network with predefined structure
	and size, such as the {S}elf-{O}rganising {M}ap. {I}n addition, a
	growing network can deal with dynamic input distributions. {M}ost
	of the growing networks that have been proposed in the literature
	add new nodes to support the node that has accumulated the highest
	error during previous iterations or to support topological structures.
	{T}his usually means that new nodes are added only when the number
	of iterations is an integer multiple of some pre-defined constant,
	{A}. {T}his paper suggests a way in which the learning algorithm
	can add nodes whenever the network in its current state does not
	sufficiently match the input. {I}n this way the network grows very
	quickly when new data is presented, but stops growing once the network
	has matched the data. {T}his is particularly important when we consider
	dynamic data sets, where the distribution of inputs can change to
	a new regime after some time. {W}e also demonstrate the preservation
	of neighbourhood relations in the data by the network. {T}he new
	network is compared to an existing growing network, the {G}rowing
	{N}eural {G}as ({GNG}), on a artificial dataset, showing how the
	network deals with a change in input distribution after some time.
	{F}inally, the new network is applied to several novelty detection
	tasks and is compared with both the {GNG} and an unsupervised form
	of the {R}educed {C}oulomb {E}nergy network on a robotic inspection
	task and with a {S}upport {V}ector {M}achine on two benchmark novelty
	detection tasks.},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics,
	Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet,
	Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models,
	Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Probability Learning, Protein Binding,
	Protein Conformation, Proteins, Quality Control, Quantum Theory,
	RNA, RNA Splicing, Receptors, Reference Values, Regression Analysis,
	Reproducibility of Results, Research Support, Robotics, Saccharomyces
	cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis,
	Signal Processing, Software, Statistical, Stomach Neoplasms, Structural,
	Structure-Activity Relationship, Thermodynamics, Transcription, Tumor
	Markers, U.S. Gov't, 12416693}
}

@article{Martin2005Predicting,
  author = {Martin, S. and Roe, D. and Faulon, J.-L.},
  title = {Predicting protein-protein interactions using signature products},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {218-226},
  number = {2},
  month = {Jan},
  abstract = {Motivation: {P}roteome-wide prediction of protein-protein interaction
	is a difficult and important problem in biology. {A}lthough there
	have been recent advances in both experimental and computational
	methods for predicting protein-protein interactions, we are only
	beginning to see a confluence of these techniques. {I}n this paper,
	we describe a very general, high-throughput method for predicting
	protein-protein interactions. {O}ur method combines a sequence-based
	description of proteins with experimental information that can be
	gathered from any type of protein-protein interaction screen. {T}he
	method uses a novel description of interacting proteins by extending
	the signature descriptor, which has demonstrated success in predicting
	peptide/protein binding interactions for individual proteins. {T}his
	descriptor is extended to protein pairs by taking signature products.
	{T}he signature product is implemented within a support vector machine
	classifier as a kernel function. {R}esults: {W}e have applied our
	method to publicly available yeast, {H}elicobacter pylori, human
	and mouse datasets. {W}e used the yeast and {H}.pylori datasets to
	verify the predictive ability of our method, achieving from 70 to
	80% accuracy rates using 10-fold cross-validation. {W}e used the
	human and mouse datasets to demonstrate that our method is capable
	of cross-species prediction. {F}inally, we reused the yeast dataset
	to explore the ability of our algorithm to predict domains. {C}ontact:
	smartin@sandia.gov.},
  doi = {10.1093/bioinformatics/bth483},
  pdf = {../local/Martin2005Predicting.pdf},
  file = {Martin2005Predicting.pdf:local/Martin2005Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/21/2/218}
}

@article{Martin2004Classification,
  author = {T. C. Martin and J. Moecks and A. Belooussov and S. Cawthraw and
	B. Dolenko and M. Eiden and J. Von Frese and W. Kohler and J. Schmitt
	and R. Somorjai and T. Udelhoven and S. Verzakov and W. Petrich},
  title = {Classification of signatures of {B}ovine {S}pongiform {E}ncephalopathy
	in serum using infrared spectroscopy.},
  journal = {Analyst},
  year = {2004},
  volume = {129},
  pages = {897-901},
  number = {10},
  month = {Oct},
  abstract = {Signatures of {B}ovine {S}pongiform {E}ncephalopathy ({BSE}) have
	been identified in serum by means of "{D}iagnostic {P}attern {R}ecognition
	({DPR})". {F}or {DPR}-analysis, mid-infrared spectroscopy of dried
	films of 641 serum samples was performed using disposable silicon
	sample carriers and a semi-automated {DPR} research system operating
	at room temperature. {T}he combination of four mathematical classification
	approaches (principal component analysis plus linear discriminant
	analysis, robust linear discriminant analysis, artificial neural
	network, support vector machine) allowed for a reliable assignment
	of spectra to the class "{BSE}-positive" or "{BSE}-negative". {A}n
	independent, blinded validation study was carried out on a second
	{DPR} research system at the {V}eterinary {L}aboratory {A}gency,
	{W}eybridge, {UK}. {O}ut of 84 serum samples originating from terminally-ill,
	{BSE}-positive cattle, 78 were classified correctly. {S}imilarly,
	73 out of 76 {BSE}-negative samples were correctly identified by
	{DPR} such that, numerically, an accuracy of 94.4 \% can be calculated.
	{A}t a confidence level of 0.95 (alpha = 0.05) these results correspond
	to a sensitivity > 85\% and a specificity > 90\%. {I}dentical class
	assignment by all four classifiers occurred in 75\% of the cases
	while ambiguous results were obtained in only 8 of the 160 cases.
	{W}ith an area under the {ROC} (receiver operating charateristics)
	curve of 0.991, {DPR} may potentially supply a valuable surrogate
	marker for {BSE} even in cases in which a deliberate bias towards
	improved sensitivity or specificity is desired. {T}o the best of
	our knowledge, {DPR} is the first and--up to now--only method which
	has demonstrated its capability of detecting {BSE}-related signatures
	in serum.},
  doi = {10.1039/b408950m},
  pdf = {../local/Martin2004Classification.pdf},
  file = {Martin2004Classification.pdf:local/Martin2004Classification.pdf:PDF},
  url = {http://dx.doi.org/10.1039/b408950m}
}

@article{Martin2005bioavailability,
  author = {Martin, Y. C.},
  title = {A bioavailability score},
  journal = {J. Med. Chem.},
  year = {2005},
  volume = {48},
  pages = {3164--3170},
  number = {9},
  month = {May},
  abstract = {Responding to a demonstrated need for scientists to forecast the permeability
	and bioavailability (F) properties of compounds before their purchase,
	synthesis, or advanced testing, we have developed a score that assigns
	the probability that a compound will have F > 10\% in the rat. Neither
	the rule-of-five, log P, log D, nor the combination of the number
	of rotatable bonds and polar surface area successfully categorized
	compounds. Instead, different properties govern the bioavailability
	of compounds depending on their predominant charge at biological
	pH. The fraction of anions with >10\% F falls from 85\% if the polar
	surface area (PSA) is < or = 75 A(2), to 56\% if 75 < PSA < 150 A(2),
	to 11\% if PSA is > or = 150 A(2). On the other hand, whereas 55\%
	of the neutral, zwitterionic, or cationic compounds that pass the
	rule-of-five have >10\% F, only 17\% of those that fail have > 10\%
	F. This same categorization distinguishes compounds that are poorly
	permeable from those that are permeable in Caco-2 cells. Further
	validation is provided with human bioavailability values from the
	literature.},
  doi = {10.1021/jm0492002},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {15857122},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1021/jm0492002}
}

@article{Martin1974Discriminant,
  author = {Y. C. Martin and J. B. Holland and C. H. Jarboe and N. Plotnikoff},
  title = {{D}iscriminant analysis of the relationship between physical properties
	and the inhibition of monoamine oxidase by aminotetralins and aminoindans.},
  journal = {J. Med. Chem.},
  year = {1974},
  volume = {17},
  pages = {409--413},
  number = {4},
  month = {Apr},
  keywords = {Animals, Brain, Dihydroxyphenylalanine, Dose-Response Relationship,
	Drug, Drug Synergism, Indenes, Mathematics, Mice, Monoamine Oxidase
	Inhibitors, Naphthalenes, Oxidation-Reduction, Oxotremorine, Reserpine,
	Structure-Activity Relationship, Tryptamines, 4830537},
  owner = {mahe},
  pmid = {4830537},
  timestamp = {2006.09.06}
}

@article{Martoglio2002decomposition,
  author = {Ann-Marie Martoglio and James W Miskin and Stephen K Smith and David
	J C MacKay},
  title = {A decomposition model to track gene expression signatures: preview
	on observer-independent classification of ovarian cancer.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {1617-24},
  number = {12},
  month = {Dec},
  abstract = {M{OTIVATION}: {A} number of algorithms and analytical models have
	been employed to reduce the multidimensional complexity of {DNA}
	array data and attempt to extract some meaningful interpretation
	of the results. {T}hese include clustering, principal components
	analysis, self-organizing maps, and support vector machine analysis.
	{E}ach method assumes an implicit model for the data, many of which
	separate genes into distinct clusters defined by similar expression
	profiles in the samples tested. {A} point of concern is that many
	genes may be involved in a number of distinct behaviours, and should
	therefore be modelled to fit into as many separate clusters as detected
	in the multidimensional gene expression space. {T}he analysis of
	gene expression data using a decomposition model that is independent
	of the observer involved would be highly beneficial to improve standard
	and reproducible classification of clinical and research samples.
	{RESULTS}: {W}e present a variational independent component analysis
	({ICA}) method for reducing high dimensional {DNA} array data to
	a smaller set of latent variables, each associated with a gene signature.
	{W}e present the results of applying the method to data from an ovarian
	cancer study, revealing a number of tissue type-specific and tissue
	type-independent gene signatures present in varying amounts among
	the samples surveyed. {T}he observer independent results of such
	molecular analysis of biological samples could help identify patients
	who would benefit from different treatment strategies. {W}e further
	explore the application of the model to similar high-throughput studies.},
  keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base
	Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative
	Study, Computer-Assisted, Cystadenoma, DNA, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Markers, Hemolysins, Humans, Leukemia, Lymphocytic, Markov
	Chains, Messenger, Models, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks
	(Computer), Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, Pattern
	Recognition, Quality Control, RNA, Reference Values, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Signal
	Processing, Statistical, Stomach Neoplasms, Transcription, Tumor
	Markers, 12490446}
}

@article{Maslov2002Specificity,
  author = {Maslov, S. and Sneppen, K.},
  title = {Specificity and stability in topology of protein networks},
  journal = {Science},
  year = {2002},
  volume = {296},
  pages = {910--913},
  pdf = {../local/masl02.pdf},
  file = {masl02.pdf:local/masl02.pdf:PDF},
  subject = {bionet},
  url = {http://www.sciencemag.org/cgi/reprint/296/5569/910.pdf}
}

@article{Mason1999New,
  author = {J. S. Mason and I. Morize and P. R. Menard and D. L. Cheney and C.
	Hulme and R. F. Labaudiniere},
  title = {{N}ew 4-point pharmacophore method for molecular similarity and diversity
	applications: overview of the method and applications, including
	a novel approach to the design of combinatorial libraries containing
	privileged substructures.},
  journal = {J. Med. Chem.},
  year = {1999},
  volume = {42},
  pages = {3251--3264},
  number = {17},
  month = {Aug},
  abstract = {A new 4-point pharmacophore method for molecular similarity and diversity
	that rapidly calculates all potential pharmacophores/pharmacophoric
	shapes for a molecule or a protein site is described. The method,
	an extension to the ChemDiverse/Chem-X software (Oxford Molecular,
	Oxford, England), has also been customized to enable a new internally
	referenced measure of pharmacophore diversity. The "privileged" substructure
	concept for the design of high-affinity ligands is presented, and
	an example of this new method is described for the design of combinatorial
	libraries for 7-transmembrane G-protein-coupled receptor targets,
	where "privileged" substructures are used as special features to
	internally reference the pharmacophoric shapes. Up to 7 features
	and 15 distance ranges are considered, giving up to 350 million potential
	4-point 3D pharmacophores/molecule. The resultant pharmacophore "key"
	("fingerprint") serves as a powerful measure for diversity or similarity,
	calculable for both a ligand and a protein site, and provides a consistent
	frame of reference for comparing molecules, sets of molecules, and
	protein sites. Explicit "on-the-fly" conformational sampling is performed
	for a molecule to enable the calculation of all geometries accessible
	for all combinations of four features (i.e., 4-point pharmacophores)
	at any desired sampling resolution. For a protein site, complementary
	site points to groups displayed in the site are generated and all
	combinations of four site points are considered. In this paper we
	report (i) the details of our customized implementation of the method
	and its modification to systematically measure 4-point pharmacophores
	relative to a "special" substructure of interest present in the molecules
	under study; (ii) comparisons of 3- and 4-point pharmacophore methods,
	highlighting the much increased resolution of the 4-point method;
	(iii) applications of the 4-point potential pharmacophore descriptors
	as a new measure of molecular similarity and diversity and for the
	design of focused/biased combinatorial libraries.},
  doi = {10.1021/jm9806998},
  owner = {mahe},
  pii = {jm9806998},
  pmid = {10464012},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/jm9806998}
}

@article{Massart2000Some,
  author = {Massart, P.},
  title = {Some applications of concentration inequalities to statistics},
  journal = {Ann. {F}ac. {S}c. {T}oulouse},
  year = {2000},
  volume = {IX},
  pages = {245-303},
  number = {2}
}

@incollection{Matache2002Hilbert,
  author = {Matache, M. T. and Matache, V.},
  title = {Hilbert spaces induced by {T}oeplitz covariance kernels},
  booktitle = {Lecture {N}otes in {C}ontrol and {I}nformation {S}ciences},
  publisher = {Springer},
  year = {2002},
  volume = {280},
  pages = {319-334},
  month = {Jan}
}

@article{Mateos2002Systematic,
  author = {Alvaro Mateos and JoaquÃn Dopazo and Ronald Jansen and Yuhai Tu
	and Mark Gerstein and Gustavo Stolovitzky},
  title = {Systematic learning of gene functional classes from {DNA} array expression
	data by using multilayer perceptrons.},
  journal = {Genome {R}es.},
  year = {2002},
  volume = {12},
  pages = {1703-15},
  number = {11},
  month = {Nov},
  abstract = {Recent advances in microarray technology have opened new ways for
	functional annotation of previously uncharacterised genes on a genomic
	scale. {T}his has been demonstrated by unsupervised clustering of
	co-expressed genes and, more importantly, by supervised learning
	algorithms. {U}sing prior knowledge, these algorithms can assign
	functional annotations based on more complex expression signatures
	found in existing functional classes. {P}reviously, support vector
	machines ({SVM}s) and other machine-learning methods have been applied
	to a limited number of functional classes for this purpose. {H}ere
	we present, for the first time, the comprehensive application of
	supervised neural networks ({SNN}s) for functional annotation. {O}ur
	study is novel in that we report systematic results for ~100 classes
	in the {M}unich {I}nformation {C}enter for {P}rotein {S}equences
	({MIPS}) functional catalog. {W}e found that only ~10\% of these
	are learnable (based on the rate of false negatives). {A} closer
	analysis reveals that false positives (and negatives) in a machine-learning
	context are not necessarily "false" in a biological sense. {W}e show
	that the high degree of interconnections among functional classes
	confounds the signatures that ought to be learned for a unique class.
	{W}e term this the "{B}orges effect" and introduce two new numerical
	indices for its quantification. {O}ur analysis indicates that classification
	systems with a lower {B}orges effect are better suitable for machine
	learning. {F}urthermore, we introduce a learning procedure for combining
	false positives with the original class. {W}e show that in a few
	iterations this process converges to a gene set that is learnable
	with considerably low rates of false positives and negatives and
	contains genes that are biologically related to the original class,
	allowing for a coarse reconstruction of the interactions between
	associated biological pathways. {W}e exemplify this methodology using
	the well-studied tricarboxylic acid cycle.},
  doi = {10.1101/gr.192502},
  pdf = {../local/Mateos2002Systematic.pdf},
  file = {Mateos2002Systematic.pdf:local/Mateos2002Systematic.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics,
	Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet,
	Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models,
	Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Protein Binding, Protein Conformation,
	Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12421757},
  url = {http://dx.doi.org/10.1101/gr.192502}
}

@article{Mathews1999Expanded,
  author = {Mathews, D. H. and Sabina, J. and Zuker, M. and Turner, D. H.},
  title = {Expanded sequence dependence of thermodynamic parameters improves
	prediction of {RNA} secondary structure.},
  journal = {J. {M}ol. {B}iol.},
  year = {1999},
  volume = {288},
  pages = {911-40},
  number = {5},
  month = {May},
  abstract = {An improved dynamic programming algorithm is reported for {RNA} secondary
	structure prediction by free energy minimization. {T}hermodynamic
	parameters for the stabilities of secondary structure motifs are
	revised to include expanded sequence dependence as revealed by recent
	experiments. {A}dditional algorithmic improvements include reduced
	search time and storage for multibranch loop free energies and improved
	imposition of folding constraints. {A}n extended database of 151,503
	nt in 955 structures? determined by comparative sequence analysis
	was assembled to allow optimization of parameters not based on experiments
	and to test the accuracy of the algorithm. {O}n average, the predicted
	lowest free energy structure contains 73 \% of known base-pairs when
	domains of fewer than 700 nt are folded; this compares with 64 \%
	accuracy for previous versions of the algorithm and parameters. {F}or
	a given sequence, a set of 750 generated structures contains one
	structure that, on average, has 86 \% of known base-pairs. {E}xperimental
	constraints, derived from enzymatic and flavin mononucleotide cleavage,
	improve the accuracy of structure predictions.},
  doi = {10.1006/jmbi.1999.2700},
  keywords = {sirna},
  pii = {S0022-2836(99)92700-6},
  url = {http://dx.doi.org/10.1006/jmbi.1999.2700}
}

@article{Mathews1999Expandeda,
  author = {D. H. Mathews and J. Sabina and M. Zuker and D. H. Turner},
  title = {{E}xpanded sequence dependence of thermodynamic parameters improves
	prediction of {RNA} secondary structure.},
  journal = {J. Mol. Biol.},
  year = {1999},
  volume = {288},
  pages = {911--940},
  number = {5},
  month = {May},
  abstract = {An improved dynamic programming algorithm is reported for RNA secondary
	structure prediction by free energy minimization. Thermodynamic parameters
	for the stabilities of secondary structure motifs are revised to
	include expanded sequence dependence as revealed by recent experiments.
	Additional algorithmic improvements include reduced search time and
	storage for multibranch loop free energies and improved imposition
	of folding constraints. An extended database of 151,503 nt in 955
	structures? determined by comparative sequence analysis was assembled
	to allow optimization of parameters not based on experiments and
	to test the accuracy of the algorithm. On average, the predicted
	lowest free energy structure contains 73 \% of known base-pairs when
	domains of fewer than 700 nt are folded; this compares with 64 \%
	accuracy for previous versions of the algorithm and parameters. For
	a given sequence, a set of 750 generated structures contains one
	structure that, on average, has 86 \% of known base-pairs. Experimental
	constraints, derived from enzymatic and flavin mononucleotide cleavage,
	improve the accuracy of structure predictions.},
  doi = {10.1006/jmbi.1999.2700},
  keywords = {16S, 23S, 5S, Affinity, Algorithms, Aluminum Silicates, Amino Acid,
	Amino Acid Sequence, Amyloidosis, Archaeal, Bacillus, Bacterial,
	Bacterial Proteins, Bacteriophage T4, Base Sequence, Chloroplast,
	Chromatography, Circular Dichroism, Comparative Study, Computational
	Biology, Databases, Electrophoresis, Entropy, Enzyme Stability, Escherichia
	coli, Factual, Fibroblast Growth Factor 2, Flavin Mononucleotide,
	Fluorescence, Genetic, Guanidine, Humans, Huntington Disease, Kinetics,
	Light, Models, Molecular Sequence Data, Non-P.H.S., Non-U.S. Gov't,
	Nucleic Acid Conformation, P.H.S., Peptides, Phylogeny, Polyacrylamide
	Gel, Predictive Value of Tests, Protein Binding, Protein Denaturation,
	Protein Folding, Protein Structure, RNA, Radiation, Recombinant Proteins,
	Research Support, Ribosomal, Scattering, Secondary, Sequence Homology,
	Solutions, Spectrometry, Statistical, Temperature, Thermodynamics,
	Time Factors, Trinucleotide Repeat Expansion, U.S. Gov't, alpha-Amylase,
	10329189},
  owner = {vert},
  pii = {S0022-2836(99)92700-6},
  pmid = {10329189},
  timestamp = {2006.04.27},
  url = {http://dx.doi.org/10.1006/jmbi.1999.2700}
}

@article{Matsuda2005novel,
  author = {Matsuda, A. and Vert, J.-P. and Saigo, H. and Ueda, N. and Toh, H.
	and Akutsu, T.},
  title = {A novel representation of protein sequences for prediction of subcellular
	location using support vector machines},
  journal = {Protein {S}ci.},
  year = {2005},
  volume = {14},
  pages = {2804-2813},
  number = {11},
  abstract = {As the number of complete genomes rapidly increases, accurate methods
	to automatically predict the subcellular location of proteins are
	increasingly useful to help their functional annotation. {I}n order
	to improve the predictive accuracy of the many prediction methods
	developed to date, a novel representation of protein sequences is
	proposed. {T}his representation involves local compositions of amino
	acids and twin amino acids, and local frequencies of distance between
	successive (basic, hydrophobic, and other) amino acids. {F}or calculating
	the local features, each sequence is split into three parts: {N}-terminal,
	middle, and {C}-terminal. {T}he {N}-terminal part is further divided
	into four regions to consider ambiguity in the length and position
	of signal sequences. {W}e tested this representation with support
	vector machines on two data sets extracted from the {SWISS}-{PROT}
	database. {T}hrough fivefold cross-validation tests, overall accuracies
	of more than 87% and 91% were obtained for eukaryotic and prokaryotic
	proteins, respectively. {I}t is concluded that considering the respective
	features in the {N}-terminal, middle, and {C}-terminal parts is helpful
	to predict the subcellular location.},
  doi = {10.1110/ps.051597405},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1110/ps.051597405}
}

@article{Matter1999Comparing,
  author = {H. Matter and T. P\"{o}tter},
  title = {Comparing 3{D} pharmacophore triplets and 2{D} fingerprints for selecting
	diverse compound subsets},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {1999},
  volume = {39},
  pages = {1211-1225},
  number = {6},
  abstract = {The performance of two important 2D and 3D molecular descriptors for
	rational design to maximize the structural diversity of databases
	is investigated in this publication. Those methods are based either
	on a 2D description using a binary fingerprint, which accounts for
	the absence or presence of molecular fragments, or a 3D description
	based on the geometry of pharmacophoric features encoded in a fingerprint
	(pharmacophoric definition triplets, PDTs). Both descriptors in combination
	with maximum dissimilarity selections, complete linkage hierarchical
	cluster analysis, or sequential dissimilarity selections were compared
	to random subsets as reference. This comparison is based on their
	ability to cover representative biological classes from parent databases
	(coverage analysis) and the degree of separation between active and
	inactive compounds for a biological target from hierarchical clustering
	(cluster separation analysis). While the similarity coefficients
	(Tanimoto, cosine) show only a minor influence, the number of conformations
	to generate the 3D PDT fingerprint lead to remarkably different results.
	PDT fingerprints derived from a lower number of conformers perform
	significantly better, but they are not comparable to a 2D fingerprint-based
	design. When 2D and 3D descriptors are combined with weighting factors
	> 0.5 for 2D fingerprints, a significant improvement of coverage
	and cluster separation results is observed for a small number of
	PDT conformers and medium sized subsets. Some combined descriptors
	outperform 2D fingerprints, but not for all subset populations. Applying
	sequential dissimilarity selection to PDT descriptors reveals that
	its performance is dependent on the initial ordering of compounds,
	while presorting according to 2D fingerprint diversity does not improve
	results. Finally the relationship between biological activity and
	similarity was investigated, showing that PDTs quantify smaller structural
	differences due to the large number of bits in the fingerprint.},
  doi = {10.1021/ci980185h},
  pdf = {../local/Matter1999Comparing.pdf},
  file = {Matter1999Comparing.pdf:Matter1999Comparing.pdf:PDF},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.02.03},
  url = {http://dx.doi.org}
}

@article{Mattfeldt2003Classification,
  author = {Mattfeldt, T. and Gottfried, H.W. and Wolter, H. and Schmidt, V.
	and Kestler, H.A. and Mayer, J.},
  title = {Classification of prostatic carcinoma with artificial neural networks
	using comparative genomic hybridization and quantitative stereological
	data},
  journal = {Pathol. {R}es. {P}ract.},
  year = {2003},
  volume = {199},
  pages = {773-784},
  number = {12},
  abstract = {Staging of prostate cancer is a mainstay of treatment decisions and
	prognostication. {I}n the present study, 50 p{T}2{N}0 and 28 p{T}3{N}0
	prostatic adenocarcinomas were characterized by {G}leason grading,
	comparative genomic hybridization ({CGH}), and histological texture
	analysis based on principles of stereology and stochastic geometry.
	{T}he cases were classified by learning vector quantization and support
	vector machines. {T}he quality of classification was tested by cross-validation.
	{C}orrect prediction of stage from primary tumor data was possible
	with an accuracy of 74-80% from different data sets. {T}he accuracy
	of prediction was similar when the {G}leason score was used as input
	variable, when stereological data were used, or when a combination
	of {CGH} data and stereological data was used. {T}he results of classification
	by learning vector quantization were slightly better than those by
	support vector machines. {A} method is briefly sketched by which
	training of neural networks can be adapted to unequal sample sizes
	per class. {P}rogression from p{T}2 to p{T}3 prostate cancer is correlated
	with complex changes of the epithelial cells in terms of volume fraction,
	of surface area, and of second-order stereological properties. {G}enetically,
	this progression is accompanied by a significant global increase
	in losses and gains of {DNA}, and specifically by increased numerical
	aberrations on chromosome arms 1q, 7p, and 8p.},
  doi = {10.1078/0344-0338-00496},
  keywords = {biosvm, cgh},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1078/0344-0338-00496}
}

@article{Mattfeldt2004Prediction,
  author = {T. Mattfeldt and H. A. Kestler and H. P. Sinn},
  title = {Prediction of the axillary lymph node status in mammary cancer on
	the basis of clinicopathological data and flow cytometry.},
  journal = {Med {B}iol {E}ng {C}omput},
  year = {2004},
  volume = {42},
  pages = {733-9},
  number = {6},
  month = {Nov},
  abstract = {Axillary lymph node status is a major prognostic factor in mammary
	carcinoma. {I}t is clinically desirable to predict the axillary lymph
	node status from data from the mammary cancer specimen. {I}n the
	study, the axillary lymph node status, routine histological parameters
	and flow-cytometric data were retrospectively obtained from 1139
	specimens of invasive mammary cancer. {T}he ten variables: age, tumour
	type, tumour grade, tumour size, skin infiltration, lymphangiosis
	carcinomatosa, p{T}4 category, percentage of tumour cells in {G}2/{M}-
	and {S}-phases of the cell cycle, and ploidy index were considered
	as predictor variables, and the single variable lymph node metastasis
	p{N} (0 for p{N}0, or 1 for p{N}1 or p{N}2) was used as an output
	variable. {A} stepwise logistic regression analysis, with the axillary
	lymph node as a dependent variable, was used for feature selection.
	{O}nly lymphangiosis carcinomatosa and tumour size proved to be significant
	as independent predictor variables; the other variables were non-contributory.
	{T}hree paradigms with supervised learning rules (multilayer perceptron,
	learning vector quantisation and support vector machines) were used
	for the purpose of prediction. {I}f any of these paradigms was used
	with the information from all ten input variables, 73\% of cases
	could be correctly predicted, with specificity ranging from 82 to
	84\% and sensitivity ranging from 60 to 63\%. {I}f only the two significant
	input variables were used, lymphangiosis carcinomatosa and tumour
	diameter, the prediction accuracy was no worse. {N}early identical
	results were obtained by two different techniques of cross-validation
	(leave-one-out against ten-fold cross validation). {I}t was concluded
	that: artificial neural networks can be used for risk stratification
	on the basis of routine data in individual cases of mammary cancer;
	and lymphangiosis carcinomatosa and tumour size are independent predictors
	of axillary lymph node metastasis in mammary cancer.},
  keywords = {breastcancer}
}

@article{Mattfeldt2004Classification,
  author = {Torsten Mattfeldt and Danilo Trijic and Hans-Werner Gottfried and
	Hans A Kestler},
  title = {Classification of incidental carcinoma of the prostate using learning
	vector quantization and support vector machines.},
  journal = {Cell {O}ncol},
  year = {2004},
  volume = {26},
  pages = {45-55},
  number = {1-2},
  abstract = {The subclassification of incidental prostatic carcinoma into the categories
	{T}1a and {T}1b is of major prognostic and therapeutic relevance.
	{I}n this paper an attempt was made to find out which properties
	mainly predispose to these two tumor categories, and whether it is
	possible to predict the category from a battery of clinical and histopathological
	variables using newer methods of multivariate data analysis. {T}he
	incidental prostatic carcinomas of the decade 1990-99 diagnosed at
	our department were reexamined. {B}esides acquisition of routine
	clinical and pathological data, the tumours were scored by immunohistochemistry
	for proliferative activity and p53-overexpression. {T}umour vascularization
	(angiogenesis) and epithelial texture were investigated by quantitative
	stereology. {L}earning vector quantization ({LVQ}) and support vector
	machines ({SVM}) were used for the purpose of prediction of tumour
	category from a set of 10 input variables (age, {G}leason score,
	preoperative {PSA} value, immunohistochemical scores for proliferation
	and p53-overexpression, 3 stereological parameters of angiogenesis,
	2 stereological parameters of epithelial texture). {I}n a stepwise
	logistic regression analysis with the tumour categories {T}1a and
	{T}1b as dependent variables, only the {G}leason score and the volume
	fraction of epithelial cells proved to be significant as independent
	predictor variables of the tumour category. {U}sing {LVQ} and {SVM}
	with the information from all 10 input variables, more than 80 of
	the cases could be correctly predicted as {T}1a or {T}1b category
	with specificity, sensitivity, negative and positive predictive value
	from 74-92\%. {U}sing only the two significant input variables {G}leason
	score and epithelial volume fraction, the accuracy of prediction
	was not worse. {T}hus, descriptive and quantitative texture parameters
	of tumour cells are of major importance for the extent of propagation
	in the prostate gland in incidental prostatic adenocarcinomas. {C}lassical
	statistical tools and neuronal approaches led to consistent conclusions.}
}

@article{Mattick2003Challenging,
  author = {John S. Mattick},
  title = {Challenging the dogma: the hidden layer of non-protein-coding {RNAs}
	in complex organisms},
  journal = {BioEssays},
  year = {2003},
  volume = {25},
  pages = {930-939},
  keywords = {csbcbook}
}

@article{Mattick2006Non,
  author = {John S. Mattick and Igor V. Makunin},
  title = {Non-coding {RNA}},
  journal = {Hum. Mol. Genet.},
  year = {2006},
  volume = {15},
  pages = {R17-R29},
  keywords = {csbcbook}
}

@article{Mavroforakis2005Significance,
  author = {Michael Mavroforakis and Harris Georgiou and Nikos Dimitropoulos
	and Dionisis Cavouras and Sergios Theodoridis},
  title = {Significance analysis of qualitative mammographic features, using
	linear classifiers, neural networks and support vector machines.},
  journal = {Eur {J} {R}adiol},
  year = {2005},
  volume = {54},
  pages = {80-9},
  number = {1},
  month = {Apr},
  abstract = {Advances in modern technologies and computers have enabled digital
	image processing to become a vital tool in conventional clinical
	practice, including mammography. {H}owever, the core problem of the
	clinical evaluation of mammographic tumors remains a highly demanding
	cognitive task. {I}n order for these automated diagnostic systems
	to perform in levels of sensitivity and specificity similar to that
	of human experts, it is essential that a robust framework on problem-specific
	design parameters is formulated. {T}his study is focused on identifying
	a robust set of clinical features that can be used as the base for
	designing the input of any computer-aided diagnosis system for automatic
	mammographic tumor evaluation. {A} thorough list of clinical features
	was constructed and the diagnostic value of each feature was verified
	against current clinical practices by an expert physician. {T}hese
	features were directly or indirectly related to the overall morphological
	properties of the mammographic tumor or the texture of the fine-scale
	tissue structures as they appear in the digitized image, while others
	contained external clinical data of outmost importance, like the
	patient's age. {T}he entire feature set was used as an annotation
	list for describing the clinical properties of mammographic tumor
	cases in a quantitative way, such that subsequent objective analyses
	were possible. {F}or the purposes of this study, a mammographic image
	database was created, with complete clinical evaluation descriptions
	and positive histological verification for each case. {A}ll tumors
	contained in the database were characterized according to the identified
	clinical features' set and the resulting dataset was used as input
	for discrimination and diagnostic value analysis for each one of
	these features. {S}pecifically, several standard methodologies of
	statistical significance analysis were employed to create feature
	rankings according to their discriminating power. {M}oreover, three
	different classification models, namely linear classifiers, neural
	networks and support vector machines, were employed to investigate
	the true efficiency of each one of them, as well as the overall complexity
	of the diagnostic task of mammographic tumor characterization. {B}oth
	the statistical and the classification results have proven the explicit
	correlation of all the selected features with the final diagnosis,
	qualifying them as an adequate input base for any type of similar
	automated diagnosis system. {T}he underlying complexity of the diagnostic
	task has justified the high value of sophisticated pattern recognition
	architectures.},
  doi = {10.1016/j.ejrad.2004.12.015},
  pdf = {../local/Mavroforakis2005Significance.pdf},
  file = {Mavroforakis2005Significance.pdf:local/Mavroforakis2005Significance.pdf:PDF},
  keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence,
	Butadienes, Chloroplasts, Comparative Study, Computer Simulation,
	Computer-Assisted, Diagnosis, Disinfectants, Dose-Response Relationship,
	Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines,
	Expert Systems, Feedback, Fungicides, Gene Expression Profiling,
	Genes, Genetic Markers, Humans, Implanted, Industrial, Information
	Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric
	Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement,
	Natural Language Processing, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Plant Proteins, Predictive Value of Tests, Proteins,
	Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility
	of Results, Research Support, Sprague-Dawley, Subcellular Fractions,
	Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer
	Interface, 15797296},
  pii = {S0720-048X(05)00023-9},
  url = {http://dx.doi.org/10.1016/j.ejrad.2004.12.015}
}

@article{Mayr2009Novel,
  author = {Lorenz M Mayr and Dejan Bojanic},
  title = {Novel trends in high-throughput screening.},
  journal = {Curr Opin Pharmacol},
  year = {2009},
  volume = {9},
  pages = {580--588},
  number = {5},
  month = {Oct},
  abstract = {High-throughput screening (HTS) is a well-established process for
	lead discovery in Pharma and Biotech companies and is now also being
	used for basic and applied research in academia. It comprises the
	screening of large chemical libraries for activity against biological
	targets via the use of automation, miniaturized assays and large-scale
	data analysis. Since its first advent in the early to mid 1990s,
	the field of HTS has seen not only a continuous change in technology
	and processes, but also an adaptation to various needs in lead discovery.
	HTS has now evolved into a mature discipline that is a crucial source
	of chemical starting points for drug discovery. Whereas in previous
	years much emphasis has been put on a steady increase in screening
	capacity ('quantitative increase') via automation and miniaturization,
	the past years have seen a much greater emphasis on content and quality
	('qualitative increase'). Today, many experts in the field see HTS
	at a crossroad with the need to decide on either higher throughput/more
	experimentation or a greater focus on assays of greater physiological
	relevance, both of which may lead to higher productivity in pharmaceutical
	R&D. In this paper, we describe the development of HTS over the past
	decade and point out our own ideas for future directions of HTS in
	biomedical research. We predict that the trend toward further miniaturization
	will slow down with the balanced implementation of 384 well, 1536
	well, and 384 low volume well plates. Furthermore, we envisage that
	there will be much more emphasis on rigorous assay and chemical characterization,
	particularly considering that novel and more difficult target classes
	will be pursued. In recent years we have witnessed a clear trend
	in the drug discovery community toward rigorous hit validation by
	the use of orthogonal readout technologies, label free and biophysical
	methodologies. We also see a trend toward a more flexible use of
	the various screening approaches in lead discovery, that is, the
	use of both full deck compound screening as well as the use of focused
	screening and iterative screening approaches. Moreover, we expect
	greater usage of target identification strategies downstream of phenotypic
	screening and the more effective implementation of affinity selection
	technologies as a result of advances in chemical diversity methodologies.
	We predict that, ultimately, each hit finding strategy will be much
	more project-related, tailor-made, and better integrated into the
	broader drug discovery efforts.},
  doi = {10.1016/j.coph.2009.08.004},
  institution = {Novartis Institutes for BioMedical Research, Center of Proteomic
	Chemistry, Protease Platform, Novartis Campus, CH-4002 Basel, Switzerland.
	Lorenz.Mayr@novartis.com},
  keywords = {Animals; Automation, Laboratory; Computer Simulation; Computer-Aided
	Design, trends; Cost-Benefit Analysis; Drug Discovery, economics/standards/trends;
	High-Throughput Screening Assays, economics/standards/trends; Humans;
	Miniaturization; Models, Molecular; Quality Control; Small Molecule
	Libraries; Structure-Activity Relationship; Systems Integration;
	Time Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S1471-4892(09)00128-3},
  pmid = {19775937},
  timestamp = {2010.07.26},
  url = {http://dx.doi.org/10.1016/j.coph.2009.08.004}
}

@article{Mayr2003Cross-reactive,
  author = {Torsten Mayr and Christian Igel and Gregor Liebsch and Ingo Klimant
	and Otto S Wolfbeis},
  title = {Cross-reactive metal ion sensor array in a micro titer plate format.},
  journal = {Anal {C}hem},
  year = {2003},
  volume = {75},
  pages = {4389-96},
  number = {17},
  month = {Sep},
  abstract = {A cross-reactive array in a micro titer plate ({MTP}) format is described
	that is based on a versatile and highly flexible scheme. {I}t makes
	use of rather unspecific metal ions probes having almost identical
	fluorescence spectra, thus enabling (a) interrogation at identical
	analytical wavelengths, and (b) imaging of the probes contained in
	the wells of the {MTP} using a {CCD} camera and an array of blue-light-emitting
	diodes as a light source. {T}he unselective response of the indicators
	in the presence of mixtures of five divalent cations generates a
	characteristic pattern that was analyzed by chemometric tools. {T}he
	fluorescence intensity of the indicators was transferred into a time-dependent
	parameter applying a scheme called dual lifetime referencing. {I}n
	this method, the fluorescence decay profile of the indicator is referenced
	against the phosphorescence of an inert reference dye added to the
	system. {T}he intrinsically referenced measurements also were performed
	using blue {LED}s as light sources and a {CCD} camera without intensifiers
	as the detector. {T}he best performance was observed if each well
	was excited by a single {LED}. {T}he assembly allows the detection
	of dye concentrations in the nanomoles-per-liter range without amplification
	and the acquisition of 96 wells simultaneously. {T}he pictures obtained
	form the basis for evaluation by pattern recognition algorithms.
	{S}upport vector machines are capable of predicting the presence
	of significant concentrations of metal ions with high accuracy.},
  keywords = {Agrochemicals, Air Pollutants, Aircraft, Algorithms, Artificial Intelligence,
	Automated, Base Composition, Base Sequence, Bayes Theorem, Carbonic
	Anhydrase Inhibitors, Cluster Analysis, Colonic Neoplasms, Comparative
	Study, Computational Biology, Computer Simulation, Computer Systems,
	Computer-Assisted, Computing Methodologies, Confidence Intervals,
	Cytosine, DNA, Data Interpretation, Databases, Diagnosis, Drug Design,
	Enhancer Elements (Genetics), Environmental Monitoring, Enzyme Inhibitors,
	Ethanol, Exons, Forecasting, Fourier Transform Infrared, Gene Expression
	Profiling, Gene Expression Regulation, Genetic, Genetic Screening,
	Glucuronosyltransferase, Guanine, Humans, Image Interpretation, Isoenzymes,
	Least-Squares Analysis, Leukemia, Linear Models, Lymphoma, Models,
	Molecular, Molecular Conformation, Molecular Sequence Data, Natural
	Disasters, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Oligonucleotide Array Sequence
	Analysis, Online Systems, P.H.S., Pattern Recognition, Pharmaceutical
	Preparations, Phenotype, Photography, Probability, Pyrimidines, Quantitative
	Structure-Activity Relationship, RNA Precursors, RNA Splice Sites,
	RNA Splicing, Radiation, Reproducibility of Results, Research Support,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Signal Processing, Software, Spectroscopy, Statistical, Subtraction
	Technique, Terminology, Thermodynamics, Time Factors, U.S. Gov't,
	Untranslated Regions, Video Recording, Walking, 14632041}
}

@article{Mazumder2012Exact,
  author = {Mazumder, R. and Hastie, T.},
  title = {Exact Covariance Thresholding into Connected Components for Large-Scale
	Graphical Lasso},
  journal = {J. Mach. Learn. Res.},
  year = {2012},
  volume = {13},
  pages = {781--794},
  month = {Mar},
  pdf = {../local/Mazumder2012Exact.pdf},
  file = {Mazumder2012Exact.pdf:Mazumder2012Exact.pdf:PDF},
  owner = {jp},
  timestamp = {2012.04.14},
  url = {http://www.jmlr.org/papers/volume13/mazumder12a/mazumder12a.pdf}
}

@article{McAuliffe2004Multiple-sequence,
  author = {McAuliffe, J. D. and Pachter, L. and Jordan, M. I.},
  title = {Multiple-sequence functional annotation and the generalized hidden
	{M}arkov phylogeny.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1850--1860},
  number = {12},
  month = {Aug},
  abstract = {M{OTIVATION}: {P}hylogenetic shadowing is a comparative genomics principle
	that allows for the discovery of conserved regions in sequences from
	multiple closely related organisms. {W}e develop a formal probabilistic
	framework for combining phylogenetic shadowing with feature-based
	functional annotation methods. {T}he resulting model, a generalized
	hidden {M}arkov phylogeny ({GHMP}), applies to a variety of situations
	where functional regions are to be inferred from evolutionary constraints.
	{RESULTS}: {W}e show how {GHMP}s can be used to predict complete
	shared gene structures in multiple primate sequences. {W}e also describe
	shadower, our implementation of such a prediction system. {W}e find
	that shadower outperforms previously reported ab initio gene finders,
	including comparative human-mouse approaches, on a small sample of
	diverse exonic regions. {F}inally, we report on an empirical analysis
	of shadower's performance which reveals that as few as five well-chosen
	species may suffice to attain maximal sensitivity and specificity
	in exon demarcation. {AVAILABILITY}: {A} {W}eb server is available
	at http://bonaire.lbl.gov/shadower},
  doi = {10.1093/bioinformatics/bth153},
  pdf = {../local/McAuliffe2004Multiple-sequence.pdf},
  file = {McAuliffe2004Multiple-sequence.pdf:local/McAuliffe2004Multiple-sequence.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {bth153},
  pmid = {14988105},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth153}
}

@article{McCarthy2010Third,
  author = {Alice McCarthy},
  title = {Third generation DNA sequencing: pacific biosciences' single molecule
	real time technology.},
  journal = {Chem Biol},
  year = {2010},
  volume = {17},
  pages = {675--676},
  number = {7},
  month = {Jul},
  doi = {10.1016/j.chembiol.2010.07.004},
  owner = {phupe},
  pii = {S1074-5521(10)00247-4},
  pmid = {20659677},
  timestamp = {2010.08.20},
  url = {http://dx.doi.org/10.1016/j.chembiol.2010.07.004}
}

@article{Mcgann2003Shape,
  author = {Mcgann, M. R. and Almond, H. R. and Nicholls, A. and Grant, A. J.
	and Brown, F. K. },
  title = {Gaussian docking functions},
  journal = {Biopolymers},
  year = {2003},
  volume = {68},
  pages = {76--90},
  number = {1},
  abstract = {A shape-based Gaussian docking function is constructed which uses
	Gaussian functions to represent the shapes of individual atoms. A
	set of 20 trypsin ligand-protein complexes are drawn from the Protein
	Data Bank (PDB), the ligands are separated from the proteins, and
	then are docked back into the active sites using numerical optimization
	of this function. It is found that by employing this docking function,
	quasi-Newton optimization is capable of moving ligands great distances
	[on average 7 \r{A} root mean square distance (RMSD)] to locate the
	correctly docked structure. It is also found that a ligand drawn
	from one PDB file can be docked into a trypsin structure drawn from
	any of the trypsin PDB files. This implies that this scoring function
	is not limited to more accurate x-ray structures, as is the case
	for many of the conventional docking methods, but could be extended
	to homology models. {\copyright} 2002 Wiley Periodicals, Inc. Biopolymers
	68: 76-90, 2003},
  address = {Open Eye Scientific Software, Santa Fe, NM 87501, USA; Johnson \&
	Johnson Pharmaceutical Research and Development LLC, Springhouse,
	PA 19477, USA; Astra Zeneca Pharmaceuticals, EST(Chem) 26F17, Mereside,
	Macclesfield, Cheshire, SK10 4TG UK; Johnson \& Johnson Pharmaceutical
	Research and Development, LCC, 1000 Route 202, Raritan, NJ 08869},
  citeulike-article-id = {1112505},
  doi = {http://dx.doi.org/10.1002/bip.10207},
  keywords = {docking, energyfunctions, openeye},
  posted-at = {2007-02-19 11:10:26},
  priority = {2},
  url = {http://dx.doi.org/10.1002/bip.10207}
}

@article{McGinnis1983Implementation,
  author = {L. F. McGinnis},
  title = {Implementation and Testing of a Primal-Dual Algorithm for the Assignment
	Problem},
  journal = {Operations Research},
  year = {1983},
  volume = {31},
  pages = {277--291},
  number = {2}
}

@article{McGregor1997Clustering,
  author = {M.J. McGregor and V. Pallai},
  title = {Clustering of {L}arge {D}atabases of {C}ompounds: {U}sing the MDL
	"{K}eys" as {S}tructural {D}escriptors},
  journal = {J Chem Inf Comput Sci},
  year = {1997},
  volume = {37},
  pages = {443-448},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{McGregor1999Pharmacophore,
  author = {M. J. McGregor and S. M. Muskal},
  title = {{P}harmacophore fingerprinting. 1. {A}pplication to {QSAR} and focused
	library design.},
  journal = {J Chem Inf Comput Sci},
  year = {1999},
  volume = {39},
  pages = {569--574},
  number = {3},
  abstract = {A new method of rapid pharmacophore fingerprinting (PharmPrint method)
	has been developed. A basis set of 10,549 three-point pharmacophores
	has been constructed by enumerating several distance ranges and pharmacophoric
	features. Software has been developed to assign pharmacophoric types
	to atoms in chemical structures, generate multiple conformations,
	and construct the binary fingerprint according to the pharmacophores
	that result. The fingerprint is used as a descriptor for developing
	a quantitative structure-activity relationship (QSAR) model using
	partial least squares. An example is given using sets of ligands
	for the estrogen receptor (ER). The result is compared with previously
	published results on the same data to show the superiority of a full
	3D, conformationally flexible approach. The QSAR model can be readily
	interpreted in structural/chemical terms. Further examples are given
	using binary activity data and some of our novel in-house compounds,
	which show the value of the model when crossing compound classes.},
  keywords = {Chemistry, Combinatorial Chemistry Techniques, Drug Design, Drug Evaluation,
	Estradiol Congeners, Estrogen, Least-Squares Analysis, Ligands, Models,
	Molecular, Pharmaceutical, Preclinical, Receptors, Software, Structure-Activity
	Relationship, 10361729},
  owner = {mahe},
  pmid = {10361729},
  timestamp = {2006.08.22}
}

@article{McKernan2009Sequence,
  author = {Kevin Judd McKernan and Heather E Peckham and Gina L Costa and Stephen
	F McLaughlin and Yutao Fu and Eric F Tsung and Christopher R Clouser
	and Cisyla Duncan and Jeffrey K Ichikawa and Clarence C Lee and Zheng
	Zhang and Swati S Ranade and Eileen T Dimalanta and Fiona C Hyland
	and Tanya D Sokolsky and Lei Zhang and Andrew Sheridan and Haoning
	Fu and Cynthia L Hendrickson and Bin Li and Lev Kotler and Jeremy
	R Stuart and Joel A Malek and Jonathan M Manning and Alena A Antipova
	and Damon S Perez and Michael P Moore and Kathleen C Hayashibara
	and Michael R Lyons and Robert E Beaudoin and Brittany E Coleman
	and Michael W Laptewicz and Adam E Sannicandro and Michael D Rhodes
	and Rajesh K Gottimukkala and Shan Yang and Vineet Bafna and Ali
	Bashir and Andrew MacBride and Can Alkan and Jeffrey M Kidd and Evan
	E Eichler and Martin G Reese and Francisco M De La Vega and Alan
	P Blanchard},
  title = {Sequence and structural variation in a human genome uncovered by
	short-read, massively parallel ligation sequencing using two-base
	encoding.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1527--1541},
  number = {9},
  month = {Sep},
  abstract = {We describe the genome sequencing of an anonymous individual of African
	origin using a novel ligation-based sequencing assay that enables
	a unique form of error correction that improves the raw accuracy
	of the aligned reads to >99.9\%, allowing us to accurately call SNPs
	with as few as two reads per allele. We collected several billion
	mate-paired reads yielding approximately 18x haploid coverage of
	aligned sequence and close to 300x clone coverage. Over 98\% of the
	reference genome is covered with at least one uniquely placed read,
	and 99.65\% is spanned by at least one uniquely placed mate-paired
	clone. We identify over 3.8 million SNPs, 19\% of which are novel.
	Mate-paired data are used to physically resolve haplotype phases
	of nearly two-thirds of the genotypes obtained and produce phased
	segments of up to 215 kb. We detect 226,529 intra-read indels, 5590
	indels between mate-paired reads, 91 inversions, and four gene fusions.
	We use a novel approach for detecting indels between mate-paired
	reads that are smaller than the standard deviation of the insert
	size of the library and discover deletions in common with those detected
	with our intra-read approach. Dozens of mutations previously described
	in OMIM and hundreds of nonsynonymous single-nucleotide and structural
	variants in genes previously implicated in disease are identified
	in this individual. There is more genetic variation in the human
	genome still to be uncovered, and we provide guidance for future
	surveys in populations and cancer biopsies.},
  doi = {10.1101/gr.091868.109},
  pdf = {../local/McKernan2009Sequence.pdf},
  file = {McKernan2009Sequence.pdf:McKernan2009Sequence.pdf:PDF},
  institution = {Life Technologies, Beverly, Massachusetts 01915, USA. Kevin.McKernan@appliedbiosystems.com},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.091868.109},
  pmid = {19546169},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.091868.109}
}

@article{McKnight2003Categorization,
  author = {Larry McKnight and Padmini Srinivasan},
  title = {Categorization of sentence types in medical abstracts.},
  journal = {A{MIA} {A}nnu {S}ymp {P}roc},
  year = {2003},
  pages = {440-4},
  abstract = {This study evaluated the use of machine learning techniques in the
	classification of sentence type. 7253 structured abstracts and 204
	unstructured abstracts of {R}andomized {C}ontrolled {T}rials from
	{M}ed{LINE} were parsed into sentences and each sentence was labeled
	as one of four types ({I}ntroduction, {M}ethod, {R}esult, or {C}onclusion).
	{S}upport {V}ector {M}achine ({SVM}) and {L}inear {C}lassifier models
	were generated and evaluated on cross-validated data. {T}reating
	sentences as a simple "bag of words", the {SVM} model had an average
	{ROC} area of 0.92. {A}dding a feature of relative sentence location
	improved performance markedly for some models and overall increasing
	the average {ROC} to 0.95. {L}inear classifier performance was significantly
	worse than the {SVM} in all datasets. {U}sing the {SVM} model trained
	on structured abstracts to predict unstructured abstracts yielded
	performance similar to that of models trained with unstructured abstracts
	in 3 of the 4 types. {W}e conclude that classification of sentence
	type seems feasible within the domain of {RCT}'s. {I}dentification
	of sentence types may be helpful for providing context to end users
	or other text summarization techniques.},
  keywords = {biosvm},
  pii = {D030003164}
}

@article{McKusick2007Mendelian,
  author = {McKusick, V.A.},
  title = {Mendelian Inheritance in Man and its online version, OMIM.},
  journal = {Am. J. Hum. Genet.},
  year = {2007},
  volume = {80},
  pages = {588--604},
  number = {4},
  month = {Apr},
  doi = {10.1086/514346},
  institution = {McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University
	School of Medicine, Baltimore, MD, USA. mckusick@peas.welch.jhu.edu},
  keywords = {Databases, Genetic; Epigenesis, Genetic; Genetic Predisposition to
	Disease; Genetic Variation; History, 20th Century; History, 21st
	Century; Internet; Phenotype; Terminology as Topic},
  owner = {mordelet},
  pii = {S0002-9297(07)61121-5},
  pmid = {17357067},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1086/514346}
}

@article{McManus2002Gene,
  author = {McManus, M. T. and Sharp, P. A.},
  title = {{G}ene silencing in mammals by small interfering {RNA}s.},
  journal = {Nat. Rev. Genet.},
  year = {2002},
  volume = {3},
  pages = {737--747},
  number = {10},
  month = {Oct},
  abstract = {Among the 3 billion base pairs of the human genome, there are approximately
	30,000-40,000 protein-coding genes, but the function of at least
	half of them remains unknown. A new tool - short interfering RNAs
	(siRNAs) - has now been developed for systematically deciphering
	the functions and interactions of these thousands of genes. siRNAs
	are an intermediate of RNA interference, the process by which double-stranded
	RNA silences homologous genes. Although the use of siRNAs to silence
	genes in vertebrate cells was only reported a year ago, the emerging
	literature indicates that most vertebrate genes can be studied with
	this technology.},
  doi = {10.1038/nrg908},
  pdf = {../local/McManus2002Gene.pdf},
  file = {McManus2002Gene.pdf:McManus2002Gene.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {nrg908},
  pmid = {12360232},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1038/nrg908}
}

@article{McMichael2002quest,
  author = {McMichael, A. and Hanke, T.},
  title = {{T}he quest for an {AIDS} vaccine: is the {CD}8+ {T}-cell approach
	feasible?},
  journal = {Nat. Rev. Immunol.},
  year = {2002},
  volume = {2},
  pages = {283--291},
  number = {4},
  month = {Apr},
  abstract = {The rationale for developing anti-HIV vaccines that stimulate cytotoxic
	T-lymphocyte responses is given. We argue that such vaccines will
	work, provided that attention is paid to the development of memory
	T-cell responses that are strong and preferably activated. Furthermore,
	the vaccine should match the prevailing virus clade as closely as
	possible. Vaccines will have to stimulate a wide range of responses,
	but it is not clear how this can be achieved.},
  doi = {10.1038/nri779},
  keywords = {immunoinformatics},
  pmid = {12001999},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1038/nri779}
}

@article{Meier2008group,
  author = {Meier, L. and van de Geer, S. and B{\"u}hlmann, P.},
  title = {The group lasso for logistic regression},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2008},
  volume = {70},
  pages = {53-71},
  number = {1},
  doi = {10.1111/j.1467-9868.2007.00627.x},
  pdf = {../local/Meier2008The.pdf},
  file = {Meier2008The.pdf:Meier2008The.pdf:PDF},
  keywords = {lasso},
  url = {http://dx.doi.org/10.1111/j.1467-9868.2007.00627.x}
}

@article{Meinicke2004Oligo,
  author = {Meinicke, P. and Tech, M. and Morgenstern, B. and Merkl, R.},
  title = {Oligo kernels for datamining on biological sequences: a case study
	on prokaryotic translation initiation sites.},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {169},
  abstract = {Background {K}ernel-based learning algorithms are among the most advanced
	machine learning methods and have been successfully applied to a
	variety of sequence classification tasks within the field of bioinformatics.
	{C}onventional kernels utilized so far do not provide an easy interpretation
	of the learnt representations in terms of positional and compositional
	variability of the underlying biological signals. {R}esults {W}e
	propose a kernel-based approach to datamining on biological sequences.
	{W}ith our method it is possible to model and analyze positional
	variability of oligomers of any length in a natural way. {O}n one
	hand this is achieved by mapping the sequences to an intuitive but
	high-dimensional feature space, well-suited for interpretation of
	the learnt models. {O}n the other hand, by means of the kernel trick
	we can provide a general learning algorithm for that high-dimensional
	representation because all required statistics can be computed without
	performing an explicit feature space mapping of the sequences. {B}y
	introducing a kernel parameter that controls the degree of position-dependency,
	our feature space representation can be tailored to the characteristics
	of the biological problem at hand. {A} regularized learning scheme
	enables application even to biological problems for which only small
	sets of example sequences are available. {O}ur approach includes
	a visualization method for transparent representation of characteristic
	sequence features. {T}hereby importance of features can be measured
	in terms of discriminative strength with respect to classification
	of the underlying sequences. {T}o demonstrate and validate our concept
	on a biochemically well-defined case, we analyze {E}. coli translation
	initiation sites in order to show that we can find biologically relevant
	signals. {F}or that case, our results clearly show that the {S}hine-{D}algarno
	sequence is the most important signal upstream a start codon. {T}he
	variability in position and composition we found for that signal
	is in accordance with previous biological knowledge. {W}e also find
	evidence for signals downstream of the start codon, previously introduced
	as transcriptional enhancers. {T}hese signals are mainly characterized
	by occurrences of adenine in a region of about 4 nucleotides next
	to the start codon. {C}onclusions {W}e showed that the oligo kernel
	can provide a valuable tool for the analysis of relevant signals
	in biological sequences. {I}n the case of translation initiation
	sites we could clearly deduce the most discriminative motifs and
	their positional variation from example sequences. {A}ttractive features
	of our approach are its flexibility with respect to oligomer length
	and position conservation. {B}y means of these two parameters oligo
	kernels can easily be adapted to different biological problems.},
  doi = {10.1186/1471-2105-5-169},
  pdf = {../local/Meinicke2004Oligo.pdf},
  file = {Meinicke2004Oligo.pdf:local/Meinicke2004Oligo.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.biomedcentral.com/1471-2105/5/169}
}

@misc{Meinshausen2009Stability,
  author = {Meinshausen, Nicolai and Buehlmann, Peter},
  title = {Stability Selection},
  month = {May},
  year = {2009},
  abstract = {Estimation of structure, such as in variable selection, graphical
	modelling or cluster analysis is notoriously difficult, especially
	for high-dimensional data. We introduce stability selection. It is
	based on subsampling in combination with (high-dimensional) selection
	algorithms. As such, the method is extremely general and has a very
	wide range of applicability. Stability selection provides finite
	sample control for some error rates of false discoveries and hence
	a transparent principle to choose a proper amount of regularisation
	for structure estimation. Variable selection and structure estimation
	improve markedly for a range of selection methods if stability selection
	is applied. We prove for randomised Lasso that stability selection
	will be variable selection consistent even if the necessary conditions
	needed for consistency of the original Lasso method are violated.
	We demonstrate stability selection for variable selection and Gaussian
	graphical modelling, using real and simulated data.},
  comment = {How to choose the right amount of regularization.},
  eprint = {0809.2932},
  keywords = {model\_selection, regularization, resampling, sensitivity},
  url = {http://arxiv.org/abs/0809.2932}
}

@article{Meinshausen2010Stability,
  author = {Meinshausen, N. and B{\"u}hlmann, P.},
  title = {Stability selection},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2010},
  volume = {72},
  pages = {417--473},
  number = {4},
  doi = {10.1111/j.1467-9868.2010.00740.x},
  pdf = {../local/Meinshausen2010Stability.pdf},
  file = {Meinshausen2010Stability.pdf:Meinshausen2010Stability.pdf:PDF},
  owner = {jp},
  timestamp = {2010.11.15},
  url = {http://dx.doi.org/10.1111/j.1467-9868.2010.00740.x}
}

@article{Meinshausen2006High,
  author = {Meinshausen, N. and B{\"u}hlmann, P.},
  title = {High dimensional graphs and variable selection with the Lasso},
  journal = {Ann. Stat.},
  year = {2006},
  volume = {34},
  pages = {1436--1462},
  abstract = {The pattern of zero entries in the inverse covariance matrix of a
	multivariate normal distribution corresponds to conditional independence
	restrictions between variables. Covariance selection aims at estimating
	those structural zeros from data. We show that neighborhood selection
	with the Lasso is a computationally attractive alternative to standard
	covariance selection for sparse high-dimensional graphs. Neighborhood
	selection estimates the conditional independence restrictions separately
	for each node in the graph and is hence equivalent to variable selection
	for Gaussian linear models. We show that the proposed neighborhood
	selection scheme is consistent for sparse high-dimensional graphs.
	Consistency hinges on the choice of the penalty parameter. The oracle
	value for optimal prediction does not lead to a consistent neighborhood
	estimate. Controlling instead the probability of falsely joining
	some distinct connectivity components of the graph, consistent estimation
	for sparse graphs is achieved (with exponential rates), even when
	the number of variables grows as the number of observations raised
	to an arbitrary power.},
  doi = {10.1214/009053606000000281},
  pdf = {../local/Meinshausen2006High.pdf},
  file = {Meinshausen2006High.pdf:Meinshausen2006High.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1214/009053606000000281}
}

@article{Meinshausen2007Discussion,
  author = {N. Meinshausen and G. Rocha and B. Yu},
  title = {Discussion: A tale of three cousins: Lasso, L2Boosting and Dantzig},
  journal = {ANNALS OF STATISTICS},
  year = {2007},
  volume = {35},
  pages = {2373},
  url = {http://doi:10.1214/009053607000000460}
}

@article{Meireles2003Differentially,
  author = {Meireles, S.I. and Carvalho, A.F. and Hirata, R. and Montagnini,
	A.L. and Martins, W.K. and Runza, F.B. and Stolf, B.S. and Termini,
	L. and Neto, C.E. and Silva, R.L. and Soares, F.A. and Neves, E.J.
	and Reis, L.F.},
  title = {Differentially expressed genes in gastric tumors identified by c{DNA}
	array.},
  journal = {Cancer {L}ett.},
  year = {2003},
  volume = {190},
  pages = {199-211},
  number = {2},
  month = {Feb},
  abstract = {Using c{DNA} fragments from the {FAPESP}/l{ICR} {C}ancer {G}enome
	{P}roject, we constructed a c{DNA} array having 4512 elements and
	determined gene expression in six normal and six tumor gastric tissues.
	{U}sing t-statistics, we identified 80 c{DNA}s whose expression in
	normal and tumor samples differed more than 3.5 sample standard deviations.
	{U}sing {S}elf-{O}rganizing {M}ap, the expression profile of these
	c{DNA}s allowed perfect separation of malignant and non-malignant
	samples. {U}sing the supervised learning procedure {S}upport {V}ector
	{M}achine, we identified trios of c{DNA}s that could be used to classify
	samples as normal or tumor, based on single-array analysis. {F}inally,
	we identified genes with altered linear correlation when their expression
	in normal and tumor samples were compared. {F}urther investigation
	concerning the function of these genes could contribute to the understanding
	of gastric carcinogenesis and may prove useful in molecular diagnostics.},
  doi = {10.1016/S0304-3835(02)00587},
  pdf = {../local/Meireles2003Differentially.pdf},
  file = {Meireles2003Differentially.pdf:local/Meireles2003Differentially.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0304-3835(02)00587-6}
}

@article{Meister2004Mechanisms,
  author = {Meister, G. and Tuschl, T.},
  title = {Mechanisms of gene silencing by double-stranded {RNA}.},
  journal = {Nature},
  year = {2004},
  volume = {431},
  pages = {343-9},
  number = {7006},
  month = {Sep},
  abstract = {Double-stranded {RNA} (ds{RNA}) is an important regulator of gene
	expression in many eukaryotes. {I}t triggers different types of gene
	silencing that are collectively referred to as {RNA} silencing or
	{RNA} interference. {A} key step in known silencing pathways is the
	processing of ds{RNA}s into short {RNA} duplexes of characteristic
	size and structure. {T}hese short ds{RNA}s guide {RNA} silencing
	by specific and distinct mechanisms. {M}any components of the {RNA}
	silencing machinery still need to be identified and characterized,
	but a more complete understanding of the process is imminent.},
  doi = {10.1038/nature02873},
  pdf = {../local/Meister2004Mechanisms.pdf},
  file = {Meister2004Mechanisms.pdf:local/Meister2004Mechanisms.pdf:PDF},
  keywords = {sirna},
  pii = {nature02873},
  url = {http://dx.doi.org/10.1038/nature02873}
}

@article{Mello2004Revealing,
  author = {Craig C. Mello and Darryl {Conte Jr}},
  title = {Revealing the world of {RNA} interference},
  journal = {Nature},
  year = {2004},
  volume = {43},
  pages = {338-342},
  keywords = {csbcbook}
}

@book{Menard2001Applied,
  title = {Applied logistic regression analysis},
  publisher = {Sage Publications, Incorporated},
  year = {2001},
  author = {Menard, S.},
  volume = {106}
}

@inproceedings{Menchetti2005Weighted,
  author = {S. Menchetti and F. Costa and P. Frasconi},
  title = {Weighted decomposition kernels},
  booktitle = {Proceedings of the {T}wenty-{S}econd {I}nternational {C}onference
	on {M}achine {L}earning ({ICML} 2005)},
  year = {2005},
  editor = {De Raedt, L. and Wrobel, S.},
  pages = {585-592},
  publisher = {ACM Press},
  owner = {pmahe},
  timestamp = {2007.11.21}
}

@unpublished{Mendelson2003Geometric,
  author = {S. Mendelson},
  title = {Geometric parameters in {L}earning {T}heory},
  note = {Lecture notes},
  year = {2003}
}

@article{Puijalon2004Malaria,
  author = {O. Mercereau-Puijalon},
  title = {Malaria research in the post-genomic era},
  journal = {J. Soc. Biol.},
  year = {2004},
  volume = {198},
  pages = {193--197},
  number = {3},
  abstract = {Genomic sequence determination of Plasmodium falciparum and other
	species of the genus, as well as that of Anopheles gambiae, and human,
	rat and mouse genome sequencing have completely changed the landscape
	of fundamental research about malaria. These data should urgently
	be exploited, in order to develop new tools to combat the disease:
	new drugs, fine dissection of the cascade of events following infection
	of the various vector species and vertebrate host, analysis of the
	complex interaction leading to the pathology or, inversely, contributing
	to sustained protection. Powerful population biology tools are now
	available, allowing to investigate genetic exchanges within natural
	population and to identify factors structuring parasitic and vector
	populations. Nevertheless, important impediments persist, including
	the complexity of experimental systems and the unclear relevance
	of animals models. Numerous challenges are to be faced; they call
	upon a more efficient organisation of research efforts in the systematic
	explorations using the powerful novel post-genomic technologies,
	as well as the development of new tools and experimental models required
	by functional genomics and integrative biology.},
  keywords = {plasmodium},
  pmid = {15662935},
  timestamp = {2006.04.13}
}

@article{Mercier2004Biological,
  author = {Mercier, G. and Berthault, N. and Mary, J. and Peyre, J. and Antoniadis,
	A. and Comet, J.-P. and Cornuejols, A. and Froidevaux, C. and Dutreix,
	M.},
  title = {Biological detection of low radiation doses by combining results
	of two microarray analysis methods.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {e12},
  number = {1},
  abstract = {The accurate determination of the biological effects of low doses
	of pollutants is a major public health challenge. {DNA} microarrays
	are a powerful tool for investigating small intracellular changes.
	{H}owever, the inherent low reliability of this technique, the small
	number of replicates and the lack of suitable statistical methods
	for the analysis of such a large number of attributes (genes) impair
	accurate data interpretation. {T}o overcome this problem, we combined
	results of two independent analysis methods ({ANOVA} and {RELIEF}).
	{W}e applied this analysis protocol to compare gene expression patterns
	in {S}accharomyces cerevisiae growing in the absence and continuous
	presence of varying low doses of radiation. {G}lobal distribution
	analysis highlights the importance of mitochondrial membrane functions
	in the response. {W}e demonstrate that microarrays detect cellular
	changes induced by irradiation at doses that are 1000-fold lower
	than the minimal dose associated with mutagenic effects.},
  doi = {10.1093/nar/gnh002},
  owner = {vert},
  pii = {32/1/e12},
  pmid = {14722227},
  timestamp = {2006.02.27},
  url = {http://dx.doi.org/10.1093/nar/gnh002}
}

@article{Mercier2001Transcriptional,
  author = {Mercier, G. and Denis, Y. and Marc, P. and Picard, L. and Dutreix,
	M.},
  title = {Transcriptional induction of repair genes during slowing of replication
	in irradiated {{S}}accharomyces cerevisiae.},
  journal = {Mutat. {R}es.},
  year = {2001},
  volume = {487},
  pages = {157--172},
  number = {3-4},
  month = {Dec},
  abstract = {We investigated the inhibition of cell-cycle progression and replication
	and the induction of the transcriptional response in diploid budding
	yeast populations exposed to two different doses of gamma-rays resulting
	in 15 and 85\% survival respectively. {W}e studied the kinetics of
	the cellular response to ionizing treatment during the period required
	for all of the surviving cells to achieve at least one cell division.
	{T}he length of these periods increased with the dose. {I}rradiated
	populations arrested as large-budded cells containing partially replicated
	chromosomes. {T}he extent of the {S}-phase was proportional to the
	amount of damage and lasted 3 or 7h depending on the irradiation
	dose. {I}n parallel to the division study, we carried out a kinetic
	analysis of the expression of 126 selected genes by use of dedicated
	microarrays. {A}bout 26 genes were induced by irradiation and displayed
	various pattern of expression. {I}nterestingly, 10 repair genes ({RAD}51,
	{RAD}54, {CDC}8, {MSH}2, {RFA}2, {RFA}3, {UBC}5, {SRS}2, {SPO}12
	and {TOP}1), involved in recombination and {DNA} synthesis, display
	similar regulation of expression in the two irradiated populations.
	{T}heir pattern of expression were confirmed by {N}orthern analysis.
	{A}t the two doses, the expression of this group of genes closely
	followed the extended replication period, and their expression resumed
	when replication restarted. {T}hese results suggest that the damage-induced
	response and {DNA} synthesis are closely regulated during repair.
	{T}he analysis of the promoter regions indicates a high occurrence
	of the three {MCB}, {HAP} and {UASH} regulatory boxes in the promoters
	of this group of genes. {T}he association of the three boxes could
	confer an irradiation-replication specific regulation.},
  owner = {vert},
  pii = {S0921877701001161},
  pmid = {11738942},
  timestamp = {2006.02.27}
}

@article{Merhav1998Universal,
  author = {Merhav, N. and Feder, M. },
  title = {Universal prediction},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1998},
  volume = {44},
  pages = {2124-2147},
  number = {6},
  month = {Oct},
  abstract = {This paper consists of an overview on universal prediction from an
	information-theoretic perspective. {S}pecial attention is given to
	the notion of probability assignment under the self-information loss
	function, which is directly related to the theory of universal data
	compression. {B}oth the probabilistic setting and the deterministic
	setting of the universal prediction problem are described with emphasis
	on the analogy and the differences between results in the two settings},
  pdf = {../local/Merhav1998Universal.pdf},
  file = {Merhav1998Universal.pdf:local/Merhav1998Universal.pdf:PDF},
  owner = {vert}
}

@article{Merhav1995strong,
  author = {Merhav, N. and Feder, M.},
  title = {A strong version of the redundancy-capacity theorem of universal},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1995},
  volume = {41},
  pages = {714-722},
  number = {3},
  month = {May},
  abstract = {The capacity of the channel induced by a given class of sources is
	well known to be an attainable lower bound on the redundancy of universal
	codes with respect to this class, both in the minimax sense and in
	the {B}ayesian (maximin) sense. {W}e show that this capacity is essentially
	a lower bound also in a stronger sense, that is, for ?most? sources
	in the class. {T}his result extends {R}issanen's (1984, 1986) lower
	bound for parametric families. {W}e demonstrate the applicability
	of this result in several examples, e.g., parametric families with
	growing dimensionality, piecewise-fixed sources, arbitrarily varying
	sources, and noisy samples of learnable functions. {F}inally, we
	discuss implications of our results to statistical inference },
  pdf = {../local/Merhav1995strong.pdf},
  file = {Merhav1995strong.pdf:local/Merhav1995strong.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Merhav1993Universal,
  author = {Merhav, N. and Feder, M. },
  title = {Universal schemes for sequential decision from individual data sequences},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {1280-1292},
  number = {4},
  month = {Jul},
  abstract = {Sequential decision algorithms are investigated in relation to a family
	of additive performance criteria for individual data sequences. {S}imple
	universal sequential schemes are known, under certain conditions,
	to approach optimality uniformly as fast as n-1 log n, where n is
	the sample size. {F}or the case of finite-alphabet observations,
	the class of schemes that can be implemented by finite-state machines
	({FSM}s) is studied. {I}t is shown that {M}arkovian machines with
	sufficiently long memory exist, which are asymptotically nearly as
	good as any given deterministic or randomized {FSM} for the purpose
	of sequential decision. {F}or the continuous-valued observation case,
	a useful class of parametric schemes is discussed with special attention
	to the recursive least squares algorithm },
  pdf = {../local/Merhav1993Universal.pdf},
  file = {Merhav1993Universal.pdf:local/Merhav1993Universal.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Merhav1993Some,
  author = {Merhav, N. and Feder, M. and Gutman, M. },
  title = {Some properties of sequential predictors for binary {M}arkov sources},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {887-892},
  number = {3},
  month = {May},
  abstract = {Universal predictions of the next outcome of a binary sequence drawn
	from a {M}arkov source with unknown parameters is considered. {F}or
	a given source, the predictability is defined as the least attainable
	expected fraction of prediction errors. {A} lower bound is derived
	on the maximum rate at which the predictability is asymptotically
	approached uniformly over all sources in the {M}arkov class. {T}his
	bound is achieved by a simple majority predictor. {F}or {B}ernoulli
	sources, bounds on the large deviations performance are investigated.
	{A} lower bound is derived for the probability that the fraction
	of errors will exceed the predictability by a prescribed amount ?>0.
	{T}his bound is achieved by the same predictor if ? is sufficiently
	small },
  pdf = {../local/Merhav1993Some.pdf},
  file = {Merhav1993Some.pdf:local/Merhav1993Some.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Mering2002Comparative,
  author = {von Mering, C. and Krause, R. and Snel, B. and Cornell, M. and Oliver,
	S. G. and Fields, S. and Bork, P.},
  title = {{C}omparative assessment of large-scale data sets of protein-protein
	interactions.},
  journal = {Nature},
  year = {2002},
  volume = {417},
  pages = {399--403},
  number = {6887},
  month = {May},
  abstract = {Comprehensive protein protein interaction maps promise to reveal many
	aspects of the complex regulatory network underlying cellular function.
	Recently, large-scale approaches have predicted many new protein
	interactions in yeast. To measure their accuracy and potential as
	well as to identify biases, strengths and weaknesses, we compare
	the methods with each other and with a reference set of previously
	reported protein interactions.},
  doi = {10.1038/nature750},
  pii = {nature750},
  pmid = {12000970},
  timestamp = {2007.02.01},
  url = {http://dx.doi.org/10.1038/nature750}
}

@article{Merkwirth2004Ensemble,
  author = {Christian Merkwirth and Harald Mauser and Tanja Schulz-Gasch and
	Olivier Roche and Martin Stahl and Thomas Lengauer},
  title = {Ensemble methods for classification in cheminformatics.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1971-8},
  number = {6},
  abstract = {We describe the application of ensemble methods to binary classification
	problems on two pharmaceutical compound data sets. {S}everal variants
	of single and ensembles models of k-nearest neighbors classifiers,
	support vector machines ({SVM}s), and single ridge regression models
	are compared. {A}ll methods exhibit robust classification even when
	more features are given than observations. {O}n two data sets dealing
	with specific properties of drug-like substances (cytochrome {P}450
	inhibition and "{F}requent {H}itters", i.e., unspecific protein inhibition),
	we achieve classification rates above 90\%. {W}e are able to reduce
	the cross-validated misclassification rate for the {F}requent {H}itters
	problem by a factor of 2 compared to previous results obtained for
	the same data set with different modeling techniques.},
  doi = {10.1021/ci049850e},
  pdf = {../local/Merkwirth2004Ensemble.pdf},
  file = {Merkwirth2004Ensemble.pdf:local/Merkwirth2004Ensemble.pdf:PDF},
  keywords = {chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci049850e}
}

@article{Meron2004Finite-memory,
  author = {Meron, E. and Feder, M.},
  title = {Finite-memory universal prediction of individual sequences},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2004},
  volume = {50},
  pages = {1506-1523},
  number = {7},
  month = {Jul},
  abstract = {The problem of predicting the next outcome of an individual binary
	sequence under the constraint that the universal predictor has a
	finite memory, is explored. {I}n this analysis, the finite-memory
	universal predictors are either deterministic or random time-invariant
	finite-state ({FS}) machines with {K} states ({K}-state machines).
	{T}he paper provides bounds on the asymptotic achievable regret of
	these constrained universal predictors as a function of {K}, the
	number of their states, for long enough sequences. {T}he specific
	results are as follows. {W}hen the universal predictors are deterministic
	machines, the comparison class consists of constant predictors, and
	prediction is with respect to the 0-1 loss function ({H}amming distance),
	we get tight bounds indicating that the optimal asymptotic regret
	is 1/(2{K}). {I}n that case of {K}-state deterministic universal
	predictors, the constant predictors comparison class, but prediction
	is with respect to the self-information (code length) and the square-error
	loss functions, we show an upper bound on the regret (coding redundancy)
	of {O}({K}/sup -2/3/) and a lower bound of /spl {T}heta/({K}/sup
	-4/5/). {F}or these loss functions, if the predictor is allowed to
	be a random {K}-state machine, i.e., a machine with random state
	transitions, we get a lower bound of /spl {T}heta/(1/{K}) on the
	regret, with a matching upper bound of {O}(1/{K}) for the square-error
	loss, and an upper bound of {O}(log{K}/{K}) {T}hroughout the paper
	for the self-information loss. {I}n addition, we provide results
	for all these loss functions in the case where the comparison class
	consists of all predictors that are order-{L} {M}arkov machines.},
  pdf = {../local/Meron2004Finite-memory.pdf},
  file = {Meron2004Finite-memory.pdf:local/Meron2004Finite-memory.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Mestres2004Computational,
  author = {Jordi Mestres},
  title = {Computational chemogenomics approaches to systematic knowledge-based
	drug discovery.},
  journal = {Curr Opin Drug Discov Devel},
  year = {2004},
  volume = {7},
  pages = {304--313},
  number = {3},
  month = {May},
  abstract = {Chemogenomics, the identification of all possible drugs for all possible
	targets, has recently emerged as a new paradigm in drug discovery
	in which efficiency in the compound design and optimization process
	is achieved through the gain and reuse of targeted knowledge. As
	targeted knowledge resides at the interface between chemistry and
	biology, computational tools aimed at integrating the chemical and
	biological spaces play a central role in chemogenomics. This review
	covers the recent progress made in integrative computational approaches
	to data annotation and knowledge generation for the systematic knowledge-based
	design and screening of chemical libraries.},
  keywords = {Chemistry, Pharmaceutical; Combinatorial Chemistry Techniques; Computational
	Biology; Drug Design; Genomics; Ligands; Proteins; Receptors, G-Protein-Coupled},
  owner = {vert},
  pmid = {15216933},
  timestamp = {2007.08.02}
}

@article{Metzker2010Sequencing,
  author = {Metzker, M. L.},
  title = {Sequencing technologies - the next generation.},
  journal = {Nat. Rev. Genet.},
  year = {2010},
  volume = {11},
  pages = {31--46},
  number = {1},
  month = {Jan},
  abstract = {Demand has never been greater for revolutionary technologies that
	deliver fast, inexpensive and accurate genome information. This challenge
	has catalysed the development of next-generation sequencing (NGS)
	technologies. The inexpensive production of large volumes of sequence
	data is the primary advantage over conventional methods. Here, I
	present a technical review of template preparation, sequencing and
	imaging, genome alignment and assembly approaches, and recent advances
	in current and near-term commercially available NGS instruments.
	I also outline the broad range of applications for NGS technologies,
	in addition to providing guidelines for platform selection to address
	biological questions of interest.},
  doi = {10.1038/nrg2626},
  institution = { Human Genetics, Baylor College of Medicine, Houston, Texas 77030,
	USA. mmetzker@bcm.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nrg2626},
  pmid = {19997069},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.1038/nrg2626}
}

@article{Mewes2002MIPS:,
  author = {H.W. Mewes and D. Frishman and U. G{\"u}ldener and G. Mannhaupt and
	K. Mayer and M. Mokrejs and B. Morgenstern and M. M{\"u}nsterkoetter
	and S. Rudd and B. Weil},
  title = {M{IPS}: a database for genomes and protein sequences},
  journal = {Nucleic {A}cids {R}es.},
  year = {2002},
  volume = {30},
  pages = {31--34},
  number = {1},
  pdf = {../local/Mewes2002MIPS.pdf},
  file = {Mewes2002MIPS.pdf:local/Mewes2002MIPS.pdf:PDF},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/30/1/31}
}

@article{Mezlini2013iReckon,
  author = {Mezlini, A. M. and Smith, E. J. M. and Fiume, M. and Buske, O. and
	Savich, G. L. and Shah, S. and Aparicio, S. and Chiang, D. Y. and
	Goldenberg, A. and Brudno, M.},
  title = {{iReckon}: Simultaneous isoform discovery and abundance estimation
	from {RNA}-seq data.},
  journal = {Genome Res},
  year = {2013},
  volume = {23},
  pages = {519--529},
  number = {3},
  month = {Mar},
  abstract = {High-throughput RNA sequencing (RNA-seq) promises to revolutionize
	our understanding of genes and their role in human disease by characterizing
	the RNA content of tissues and cells. The realization of this promise,
	however, is conditional on the development of effective computational
	methods for the identification and quantification of transcripts
	from incomplete and noisy data. In this article, we introduce iReckon,
	a method for simultaneous determination of the isoforms and estimation
	of their abundances. Our probabilistic approach incorporates multiple
	biological and technical phenomena, including novel isoforms, intron
	retention, unspliced pre-mRNA, PCR amplification biases, and multimapped
	reads. iReckon utilizes regularized expectation-maximization to accurately
	estimate the abundances of known and novel isoforms. Our results
	on simulated and real data demonstrate a superior ability to discover
	novel isoforms with a significantly reduced number of false-positive
	predictions, and our abundance accuracy prediction outmatches that
	of other state-of-the-art tools. Furthermore, we have applied iReckon
	to two cancer transcriptome data sets, a triple-negative breast cancer
	patient sample and the MCF7 breast cancer cell line, and show that
	iReckon is able to reconstruct the complex splicing changes that
	were not previously identified. QT-PCR validations of the isoforms
	detected in the MCF7 cell line confirmed all of iReckon's predictions
	and also showed strong agreement (r = 0.94) with the predicted abundances.},
  doi = {10.1101/gr.142232.112},
  pdf = {../local/Mezlini2013iReckon.pdf},
  file = {Mezlini2013iReckon.pdf:Mezlini2013iReckon.pdf:PDF},
  institution = {Department of Computer Science, University of Toronto, Ontario M5S
	2E4, Canada;},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.142232.112},
  pmid = {23204306},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1101/gr.142232.112}
}

@article{Micchelli2005On,
  author = {Charles A Micchelli and Massimiliano Pontil},
  title = {On learning vector-valued functions.},
  journal = {Neural {C}omput},
  year = {2005},
  volume = {17},
  pages = {177-204},
  number = {1},
  month = {Jan},
  abstract = {In this letter, we provide a study of learning in a {H}ilbert space
	of vectorvalued functions. {W}e motivate the need for extending learning
	theory of scalar-valued functions by practical considerations and
	establish some basic results for learning vector-valued functions
	that should prove useful in applications. {S}pecifically, we allow
	an output space {Y} to be a {H}ilbert space, and we consider a reproducing
	kernel {H}ilbert space of functions whose values lie in {Y}. {I}n
	this setting, we derive the form of the minimal norm interpolant
	to a finite set of data and apply it to study some regularization
	functionals that are important in learning theory. {W}e consider
	specific examples of such functionals corresponding to multiple-output
	regularization networks and support vector machines, for both regression
	and classification. {F}inally, we provide classes of operator-valued
	kernels of the dot product and translation-invariant type.},
  doi = {10.1162/0899766052530802},
  keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota,
	Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms,
	Comparative Study, Computer Simulation, Computer-Assisted, Computing
	Methodologies, Crystallography, DNA, DNA Primers, Databases, Decision
	Support Techniques, Diagnostic Imaging, Enzymes, Feedback, Fixation,
	Gene Expression Profiling, Genetic, Hordeum, Host-Parasite Relations,
	Humans, Image Enhancement, Image Interpretation, Informatics, Information
	Storage and Retrieval, Kinetics, Logistic Models, Magnetic Resonance
	Spectroscopy, Mathematical Computing, Models, Nanotechnology, Neural
	Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics,
	Ocular, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern
	Recognition, Plant, Plants, Predictive Value of Tests, Protein, Protein
	Conformation, Regression Analysis, Research Support, Sample Size,
	Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence
	Homology, Signal Processing, Skin, Software, Statistical, Subtraction
	Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral Proteins,
	X-Ray, 15563752},
  url = {http://dx.doi.org/10.1162/0899766052530802}
}

@article{Michiels2005Prediction,
  author = {Michiels, S. and Koscielny, S. and Hill, C.},
  title = {Prediction of cancer outcome with microarrays: a multiple random
	validation strategy},
  journal = {Lancet},
  year = {2005},
  volume = {365},
  pages = {488--492},
  number = {9458},
  abstract = {BACKGROUND: General studies of microarray gene-expression profiling
	have been undertaken to predict cancer outcome. Knowledge of this
	gene-expression profile or molecular signature should improve treatment
	of patients by allowing treatment to be tailored to the severity
	of the disease. We reanalysed data from the seven largest published
	studies that have attempted to predict prognosis of cancer patients
	on the basis of DNA microarray analysis. METHODS: The standard strategy
	is to identify a molecular signature (ie, the subset of genes most
	differentially expressed in patients with different outcomes) in
	a training set of patients and to estimate the proportion of misclassifications
	with this signature on an independent validation set of patients.
	We expanded this strategy (based on unique training and validation
	sets) by using multiple random sets, to study the stability of the
	molecular signature and the proportion of misclassifications. FINDINGS:
	The list of genes identified as predictors of prognosis was highly
	unstable; molecular signatures strongly depended on the selection
	of patients in the training sets. For all but one study, the proportion
	misclassified decreased as the number of patients in the training
	set increased. Because of inadequate validation, our chosen studies
	published overoptimistic results compared with those from our own
	analyses. Five of the seven studies did not classify patients better
	than chance. INTERPRETATION: The prognostic value of published microarray
	results in cancer studies should be considered with caution. We advocate
	the use of validation by repeated random sampling.},
  doi = {10.1016/S0140-6736(05)17866-0},
  institution = {Biostatistics and Epidemiology Unit, Institut Gustave Roussy, Villejuif,
	France.},
  keywords = {featureselection, breastcancer, microarray},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0140-6736(05)17866-0},
  pmid = {15705458},
  timestamp = {2010.10.12},
  url = {http://dx.doi.org/10.1016/S0140-6736(05)17866-0}
}

@article{Middendorf2004Discriminative,
  author = {Middendorf, M. and Ziv, E. and Adams, C. and Hom, J. and Koytcheff,
	R. and Levovitz, C. and Woods, G. and Chen, L. and Wiggins, C.},
  title = {Discriminative topological features reveal biological network mechanisms.},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {181},
  abstract = {B{ACKGROUND}: {R}ecent genomic and bioinformatic advances have motivated
	the development of numerous network models intending to describe
	graphs of biological, technological, and sociological origin. {I}n
	most cases the success of a model has been evaluated by how well
	it reproduces a few key features of the real-world data, such as
	degree distributions, mean geodesic lengths, and clustering coefficients.
	{O}ften pairs of models can reproduce these features with indistinguishable
	fidelity despite being generated by vastly different mechanisms.
	{I}n such cases, these few target features are insufficient to distinguish
	which of the different models best describes real world networks
	of interest; moreover, it is not clear a priori that any of the presently-existing
	algorithms for network generation offers a predictive description
	of the networks inspiring them. {RESULTS}: {W}e present a method
	to assess systematically which of a set of proposed network generation
	algorithms gives the most accurate description of a given biological
	network. {T}o derive discriminative classifiers, we construct a mapping
	from the set of all graphs to a high-dimensional (in principle infinite-dimensional)
	"word space". {T}his map defines an input space for classification
	schemes which allow us to state unambiguously which models are most
	descriptive of a given network of interest. {O}ur training sets include
	networks generated from 17 models either drawn from the literature
	or introduced in this work. {W}e show that different duplication-mutation
	schemes best describe the {E}. coli genetic network, the {S}. cerevisiae
	protein interaction network, and the {C}. elegans neuronal network,
	out of a set of network models including a linear preferential attachment
	model and a small-world model. {CONCLUSIONS}: {O}ur method is a first
	step towards systematizing network models and assessing their predictability,
	and we anticipate its usefulness for a number of communities.},
  doi = {10.1186/1471-2105-5-181},
  pdf = {../local/Middendorf2004Discriminative.pdf},
  file = {Middendorf2004Discriminative.pdf:local/Middendorf2004Discriminative.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.biomedcentral.com/1471-2105/5/181}
}

@article{Miertus2000Concepts,
  author = {S. Miertus and G. Fassina and P.F. Seneci},
  title = {Concepts of {C}ombinatorial {C}hemistry and {C}ombinatorial {T}echnologies},
  journal = {Chemick{\'e} {L}isty},
  year = {2000},
  volume = {94},
  pages = {1104-1110},
  pdf = {../local/stat.php?id=pdf10:http\},
  file = {stat.php?id=pdf10:http\://www.combichemistry.com/statdir/stat.php?id=pdf10:PDF},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Mika2004NLProt,
  author = {Sven Mika and Burkhard Rost},
  title = {N{LP}rot: extracting protein names and sequences from papers.},
  journal = {Nucleic {A}cids {R}es},
  year = {2004},
  volume = {32},
  pages = {W634-7},
  number = {Web Server issue},
  month = {Jul},
  abstract = {Automatically extracting protein names from the literature and linking
	these names to the associated entries in sequence databases is becoming
	increasingly important for annotating biological databases. {NLP}rot
	is a novel system that combines dictionary- and rule-based filtering
	with several support vector machines ({SVM}s) to tag protein names
	in {P}ub{M}ed abstracts. {W}hen considering partially tagged names
	as errors, {NLP}rot still reached a precision of 75\% at a recall
	of 76\%. {B}y many criteria our system outperformed other tagging
	methods significantly; in particular, it proved very reliable even
	for novel names. {N}ames encountered particularly frequently in {D}rosophila,
	such as white, wing and bizarre, constitute an obvious limitation
	of {NLP}rot. {O}ur method is available both as an {I}nternet server
	and as a program for download (http://cubic.bioc.columbia.edu/services/{NLP}rot/).
	{I}nput can be {P}ub{M}ed/{MEDLINE} identifiers, authors, titles
	and journals, as well as collections of abstracts, or entire papers.},
  doi = {10.1093/nar/gkh427},
  keywords = {biosvm nlp},
  pii = {32/suppl_2/W634},
  url = {http://dx.doi.org/10.1093/nar/gkh427}
}

@article{Mika2004Protein,
  author = {Mika, Sven and Rost, Burkhard},
  title = {Protein names precisely peeled off free text},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {i241-i247},
  number = {Suppl. 1},
  abstract = {Motivation: {A}utomatically identifying protein names from the scientific
	literature is a pre-requisite for the increasing demand in data-mining
	this wealth of information. {E}xisting approaches are based on dictionaries,
	rules and machine-learning. {H}ere, we introduced a novel system
	that combines a pre-processing dictionary- and rule-based filtering
	step with several separately trained support vector machines ({SVM}s)
	to identify protein names in the {MEDLINE} abstracts. {R}esults:
	{O}ur new tagging-system {NLP}rot is capable of extracting protein
	names with a precision (accuracy) of 75% at a recall (coverage) of
	76% after training on a corpus, which was used before by other groups
	and contains 200 annotated abstracts. {F}or our estimate of sustained
	performance, we considered partially identified names as false positives.
	{O}ne important issue frequently ignored in the literature is the
	redundancy in evaluation sets. {W}e suggested some guidelines for
	removing overly inadequate overlaps between training and testing
	sets. {A}pplying these new guidelines, our program appeared to significantly
	out-perform other methods tagging protein names. {NLP}rot was so
	successful due to the {SVM}-building blocks that succeeded in utilizing
	the local context of protein names in the scientific literature.
	{W}e challenge that our system may constitute the most general and
	precise method for tagging protein names. {A}vailability: http://cubic.bioc.columbia.edu/services/nlprot/},
  pdf = {../local/Mika2004Protein.pdf},
  file = {Mika2004Protein.pdf:Mika2004Protein.pdf:PDF},
  keywords = {biosvm nlp},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/suppl_1/i241}
}

@inproceedings{Mika1999Fisher,
  author = {S. Mika and G. R{\"a}tsch and J. Weston and B. Sch{\"o}lkopf and
	K.R. M{\"u}ller},
  title = {Fisher discriminant analysis with kernels},
  booktitle = {Neural {N}etworks for {S}ignal {P}rocessing {IX}},
  year = {1999},
  editor = {Y.-H. Hu and J. Larsen and E. Wilson and S. Douglas},
  pages = {41--48},
  publisher = {IEEE},
  pdf = {../local/mika99.pdf},
  file = {mika99.pdf:local/mika99.pdf:PDF},
  subject = {kernel},
  url = {http://ida.first.gmd.de/~mika/PS/MikRaeWesSchMue99.ps}
}

@article{Milik1998Application,
  author = {Milik, M. and Sauer, D. and Brunmark, A. P. and Yuan, L. and Vitiello,
	A. and Jackson, M. R. and Peterson, P. A. and Skolnick, J. and Glass,
	C. A.},
  title = {{A}pplication of an artificial neural network to predict specific
	class {I} {MHC} binding peptide sequences.},
  journal = {Nat. Biotechnol.},
  year = {1998},
  volume = {16},
  pages = {753--756},
  number = {8},
  month = {Aug},
  abstract = {Computational methods were used to predict the sequences of peptides
	that bind to the MHC class I molecule, K(b). The rules for predicting
	binding sequences, which are limited, are based on preferences for
	certain amino acids in certain positions of the peptide. It is apparent
	though, that binding can be influenced by the amino acids in all
	of the positions of the peptide. An artificial neural network (ANN)
	has the ability to simultaneously analyze the influence of all of
	the amino acids of the peptide and thus may improve binding predictions.
	ANNs were compared to statistically analyzed peptides for their abilities
	to predict the sequences of K(b) binding peptides. ANN systems were
	trained on a library of binding and nonbinding peptide sequences
	from a phage display library. Statistical and ANN methods identified
	strong binding peptides with preferred amino acids. ANNs detected
	more subtle binding preferences, enabling them to predict medium
	binding peptides. The ability to predict class I MHC molecule binding
	peptides is useful for immunolological therapies involving cytotoxic-T
	cells.},
  doi = {10.1038/nbt0898-753},
  keywords = {immunoinformatics},
  pmid = {9702766},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1038/nbt0898-753}
}

@article{Miller1993On,
  author = {Miller, J.W. and Goodman, R. and Smyth, P. },
  title = {On loss functions which minimize to conditional expected values and
	posterior probabilities},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {1404-1408},
  number = {4},
  month = {Jul},
  abstract = {A loss function, or objective function, is a function used to compare
	parameters when fitting a model to data. {T}he loss function gives
	a distance between the model output and the desired output. {T}wo
	common examples are the squared-error loss function and the cross
	entropy loss function. {M}inimizing the mean-square error loss function
	is equivalent to minimizing the mean square difference between the
	model output and the expected value of the output given a particular
	input. {T}his property of minimization to the expected value is formalized
	as {P}-admissibility. {T}he necessary and sufficient conditions for
	{P}-admissibility, leading to a parametric description of all {P}-admissible
	loss functions, are found. {I}n particular, it is shown that two
	of the simplest members of this class of functions are the squared
	error and the cross entropy loss functions. {O}ne application of
	this work is in the choice of a loss function for training neural
	networks to provide probability estimates },
  pdf = {../local/Miller1993On.pdf},
  file = {Miller1993On.pdf:local/Miller1993On.pdf:PDF},
  owner = {vert}
}

@article{Miller2007Expression,
  author = {Miller, L.D. and Liu, E.T.},
  title = {Expression genomics in breast cancer research: microarrays at the
	crossroads of biology and medicine},
  journal = {Breast Cancer Res.},
  year = {2007},
  volume = {9},
  pages = {206},
  abstract = {Genome-wide expression microarray studies have revealed that the biological
	and clinical heterogeneity of breast cancer can be partly explained
	by information embedded within a complex but ordered transcriptional
	architecture. Comprising this architecture are gene expression networks,
	or signatures, reflecting biochemical and behavioral properties of
	tumors that might be harnessed to improve disease subtyping, patient
	prognosis and prediction of therapeutic response. Emerging 'hypothesis-driven'
	strategies that incorporate knowledge of pathways and other biological
	phenomena in the signature discovery process are linking prognosis
	and therapy prediction with transcriptional readouts of tumorigenic
	mechanisms that better inform therapeutic options.},
  doi = {10.1186/bcr1662},
  pdf = {../local/Miller2007Expression.pdf},
  file = {Miller2007Expression.pdf:Miller2007Expression.pdf:PDF},
  keywords = {csbcbook, csbcbook-ch3},
  url = {http://dx.doi.org/10.1186/bcr1662}
}

@book{Milnor1969Topology,
  title = {Topology from the Differentiable Viewpoint},
  publisher = {Univ. Press of Virginia},
  year = {1969},
  author = {J.W. Milnor},
  isbn = {978-0-691-04833-8}
}

@article{Miranda2006pattern-based,
  author = {Kevin C Miranda and Tien Huynh and Yvonne Tay and Yen-Sin Ang and
	Wai-Leong Tam and Andrew M Thomson and Bing Lim and Isidore Rigoutsos},
  title = {A pattern-based method for the identification of MicroRNA binding
	sites and their corresponding heteroduplexes.},
  journal = {Cell},
  year = {2006},
  volume = {126},
  pages = {1203--1217},
  number = {6},
  month = {Sep},
  abstract = {We present rna22, a method for identifying microRNA binding sites
	and their corresponding heteroduplexes. Rna22 does not rely upon
	cross-species conservation, is resilient to noise, and, unlike previous
	methods, it first finds putative microRNA binding sites in the sequence
	of interest, then identifies the targeting microRNA. Computationally,
	we show that rna22 identifies most of the currently known heteroduplexes.
	Experimentally, with luciferase assays, we demonstrate average repressions
	of 30\% or more for 168 of 226 tested targets. The analysis suggests
	that some microRNAs may have as many as a few thousand targets, and
	that between 74\% and 92\% of the gene transcripts in four model
	genomes are likely under microRNA control through their untranslated
	and amino acid coding regions. We also extended the method's key
	idea to a low-error microRNA-precursor-discovery scheme; our studies
	suggest that the number of microRNA precursors in mammalian genomes
	likely ranges in the tens of thousands.},
  doi = {10.1016/j.cell.2006.07.031},
  pdf = {../local/Miranda2006pattern-based.pdf},
  file = {Miranda2006pattern-based.pdf:Miranda2006pattern-based.pdf:PDF},
  institution = {Bioinformatics and Pattern Discovery Group, IBM Thomas J. Watson
	Research Center, Yorktown Heights, P.O. Box 218, NY 10598, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092-8674(06)01099-3},
  pmid = {16990141},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1016/j.cell.2006.07.031}
}

@article{Mirzadegan2003Sequence,
  author = {Mirzadegan, T. and Benk{\"o}, G. and Filipek, S. and Palczewski,
	K.},
  title = {Sequence analyses of {G}-protein-coupled receptors: similarities
	to rhodopsin},
  journal = {Biochemistry},
  year = {2003},
  volume = {42},
  pages = {2759--2767},
  number = {10},
  month = {Mar},
  doi = {10.1021/bi027224+},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {12627940},
  timestamp = {2008.07.16}
}

@article{Mishra2006Human,
  author = {Mishra, G.R. and Suresh, M. and Kumaran, K. and Kannabiran, N. and
	Suresh, S. and Bala, P. and Shivakumar, K. and Anuradha, N. and Reddy,
	R. and Raghavan, T.M. and Menon, S. and Hanumanthu, G. and Gupta,
	M. and Upendran, S. and Gupta, S. and Mahesh, M. and Jacob, B. and
	Mathew, P. and Chatterjee, P. and Arun, K.S. and Sharma, S. and Chandrika,
	K.N. and Deshpande, N. and Palvankar, K. and Raghavnath, R. and Krishnakanth,
	R. and Karathia, H. and Rekha, B. and Nayak, R. and Vishnupriya,
	G. and Kumar, H.G.M. and Nagini, M. and Kumar, G.S.S. and Jose, R.
	and Deepthi, P. and Mohan, S.S. and GandhiT.K.B. and Harsha, H.C.
	and Deshpande, K.S. and Sarker, M. and Prasad, T.S.K. and Pandey,
	A.},
  title = {Human protein reference database--2006 update.},
  journal = {Nucleic Acids Res},
  year = {2006},
  volume = {34},
  pages = {D411--D414},
  number = {Database issue},
  month = {Jan},
  abstract = {Human Protein Reference Database (HPRD) (http://www.hprd.org) was
	developed to serve as a comprehensive collection of protein features,
	post-translational modifications (PTMs) and protein-protein interactions.
	Since the original report, this database has increased to >20 000
	proteins entries and has become the largest database for literature-derived
	protein-protein interactions (>30 000) and PTMs (>8000) for human
	proteins. We have also introduced several new features in HPRD including:
	(i) protein isoforms, (ii) enhanced search options, (iii) linking
	of pathway annotations and (iv) integration of a novel browser, GenProt
	Viewer (http://www.genprot.org), developed by us that allows integration
	of genomic and proteomic information. With the continued support
	and active participation by the biomedical community, we expect HPRD
	to become a unique source of curated information for the human proteome
	and spur biomedical discoveries based on integration of genomic,
	transcriptomic and proteomic data.},
  doi = {10.1093/nar/gkj141},
  institution = {Institute of Bioinformatics, International Tech Park, Bangalore 560
	066, India.},
  keywords = {Databases, Protein; Genomics; Humans; Internet; Protein Interaction
	Mapping; Protein Isoforms; Protein Processing, Post-Translational;
	Proteins; Proteome; Proteomics; Signal Transduction; Systems Integration;
	User-Computer Interface},
  owner = {fantine},
  pii = {34/suppl_1/D411},
  pmid = {16381900},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/nar/gkj141}
}

@article{Mishra2008Review,
  author = {K. P. Mishra and L. Ganju and M. Sairam and P. K. Banerjee and R.
	C. Sawhney},
  title = {A review of high throughput technology for the screening of natural
	products.},
  journal = {Biomed Pharmacother},
  year = {2008},
  volume = {62},
  pages = {94--98},
  number = {2},
  month = {Feb},
  abstract = {High throughput screening is commonly defined as automatic testing
	of potential drug candidates at a rate in excess of 10,000 compounds
	per week. The aim of high throughput drug discovery is to test large
	compound collections for potentially active compounds ('hits') in
	order to allow further development of compounds for pre-clinical
	testing ('leads'). High throughput technology has emerged over the
	last few years as an important tool for drug discovery and lead optimisation.
	In this approach, the molecular diversity and range of biological
	properties displayed by secondary metabolites constitutes a challenge
	to combinatorial strategies for natural products synthesis and derivatization.
	This article reviews the approach of High throughput technique for
	the screening of natural products for drug discovery.},
  doi = {10.1016/j.biopha.2007.06.012},
  institution = {Defence Institute of Physiology and Allied Sciences, Lucknow Road,
	Timarpur, Delhi 110054, India.},
  keywords = {Automation; Biological Products, pharmacology; Combinatorial Chemistry
	Techniques; Drug Design; Drug Evaluation, Preclinical; Technology,
	Pharmaceutical, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S0753-3322(07)00127-8},
  pmid = {17692498},
  timestamp = {2010.07.26},
  url = {http://dx.doi.org/10.1016/j.biopha.2007.06.012}
}

@article{Misra2002Interactive,
  author = {Misra, J. and Schmitt, W. and Hwang, D. and Hsiao, L.-L. and Gullans,
	S. and Stephanopoulos, G. and Stephanopoulos, G.},
  title = {Interactive exploration of microarray gene expression patterns in
	a reduced dimensional space.},
  journal = {Genome Res.},
  year = {2002},
  volume = {12},
  pages = {1112--1120},
  number = {7},
  month = {Jul},
  abstract = {The very high dimensional space of gene expression measurements obtained
	by DNA microarrays impedes the detection of underlying patterns in
	gene expression data and the identification of discriminatory genes.
	In this paper we show the use of projection methods such as principal
	components analysis (PCA) to obtain a direct link between patterns
	in the genes and patterns in samples. This feature is useful in the
	initial interactive pattern exploration of gene expression data and
	data-driven learning of the nature and types of samples. Using oligonucleotide
	microarray measurements of 40 samples from different normal human
	tissues, we show that distinct patterns are obtained when the genes
	are projected on a two-dimensional plane spanned by the loadings
	of the two major principal components. These patterns define the
	particular genes associated with a sample class (i.e., tissue). When
	used separately from the other genes, these class-specific (i.e.,
	tissue-specific) genes in turn define distinct tissue patterns in
	the projection space spanned by the scores of the two major principal
	components. In this study, PCA projection facilitated discriminatory
	gene selection for different tissues and identified tissue-specific
	gene expression signatures for liver, skeletal muscle, and brain
	samples. Furthermore, it allowed the classification of nine new samples
	belonging to these three types using the linear combination of the
	expression levels of the tissue-specific genes determined from the
	first set of samples. The application of the technique to other published
	data sets is also discussed.},
  doi = {10.1101/gr.225302},
  pdf = {../local/Misra2002Interactive.pdf},
  file = {Misra2002Interactive.pdf:Misra2002Interactive.pdf:PDF},
  institution = {Department of Chemical Engineering, Massachusetts Institute of Technology,
	Cambridge, Massachusetts 02139, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12097349},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1101/gr.225302}
}

@article{Mitelman2007Impact,
  author = {Mitelman, F. and Johansson, B. and Mertens, F.},
  title = {{{T}he impact of translocations and gene fusions on cancer causation}},
  journal = {Nat. Rev. Cancer},
  year = {2007},
  volume = {7},
  pages = {233--245},
  keywords = {csbcbook}
}

@article{Miteva2005Fast,
  author = {M. A. Miteva and W. H. Lee and M. O. Montes and B. O. Villoutreix},
  title = {{F}ast structure-based virtual ligand screening combining {FRED},
	{DOCK}, and {S}urflex.},
  journal = {J. Med. Chem.},
  year = {2005},
  volume = {48},
  pages = {6012--6022},
  number = {19},
  month = {Sep},
  abstract = {A protocol was devised in which FRED, DOCK, and Surflex were combined
	in a multistep virtual ligand screening (VLS) procedure to screen
	the pocket of four different proteins. One goal was to evaluate the
	impact of chaining "freely available packages to academic users"
	on docking/scoring accuracy and CPU time consumption. A bank of 65
	660 compounds including 49 known actives was generated. Our procedure
	is successful because docking/scoring parameters are tuned according
	to the nature of the binding pocket and because a shape-based filtering
	tool is applied prior to flexible docking. The obtained enrichment
	factors are in line with those reported in recent studies. We suggest
	that consensus docking/scoring could be valuable to some drug discovery
	projects. The present protocol could process the entire bank for
	one receptor in less than a week on one processor, suggesting that
	VLS experiments could be performed even without large computer resources.},
  doi = {10.1021/jm050262h},
  keywords = {Binding Sites, Databases, Estrogen, Factor VIIa, Factual, Ligands,
	Molecular Structure, Neuraminidase, Non-U.S. Gov't, Protein Binding,
	Quantitative Structure-Activity Relationship, Receptors, Research
	Support, Thymidine Kinase, 16162004},
  owner = {mahe},
  pmid = {16162004},
  timestamp = {2006.09.07},
  url = {http://dx.doi.org/10.1021/jm050262h}
}

@article{Mitra2007p53,
  author = {Mitra, A.P. and Birkhahn, M. and Cote, RJ.},
  title = {p53 and retinoblastoma pathways in bladder cancer.},
  journal = {World J Urol.},
  year = {2007},
  volume = {25},
  pages = {563-571},
  owner = {lcalzone},
  timestamp = {2010.04.27}
}

@article{Mitra2004probabilistic,
  author = {Pabitra Mitra and C. A. Murthy and Sankar K Pal},
  title = {A probabilistic active support vector learning algorithm.},
  journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell},
  year = {2004},
  volume = {26},
  pages = {413-8},
  number = {3},
  month = {Mar},
  abstract = {The paper describes a probabilistic active learning strategy for support
	vector machine ({SVM}) design in large data applications. {T}he learning
	strategy is motivated by the statistical query model. {W}hile most
	existing methods of active {SVM} learning query for points based
	on their proximity to the current separating hyperplane, the proposed
	method queries for a set of points according to a distribution as
	determined by the current separating hyperplane and a newly defined
	concept of an adaptive confidence factor. {T}his enables the algorithm
	to have more robust and efficient learning capabilities. {T}he confidence
	factor is estimated from local information using the k nearest neighbor
	principle. {T}he effectiveness of the method is demonstrated on real-life
	data sets both in terms of generalization performance, query complexity,
	and training time.}
}

@article{Mitsumori2005Gene,
  author = {Tomohiro Mitsumori and Sevrani Fation and Masaki Murata and Kouichi
	Doi and Hirohumi Doi},
  title = {Gene/protein name recognition based on support vector machine using
	dictionary as features.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6 Suppl 1},
  pages = {S8},
  abstract = {B{ACKGROUND}: {A}utomated information extraction from biomedical literature
	is important because a vast amount of biomedical literature has been
	published. {R}ecognition of the biomedical named entities is the
	first step in information extraction. {W}e developed an automated
	recognition system based on the {SVM} algorithm and evaluated it
	in {T}ask 1.{A} of {B}io{C}re{A}t{I}v{E}, a competition for automated
	gene/protein name recognition. {RESULTS}: {I}n the work presented
	here, our recognition system uses the feature set of the word, the
	part-of-speech ({POS}), the orthography, the prefix, the suffix,
	and the preceding class. {W}e call these features "internal resource
	features", i.e., features that can be found in the training data.
	{A}dditionally, we consider the features of matching against dictionaries
	to be external resource features. {W}e investigated and evaluated
	the effect of these features as well as the effect of tuning the
	parameters of the {SVM} algorithm. {W}e found that the dictionary
	matching features contributed slightly to the improvement in the
	performance of the f-score. {W}e attribute this to the possibility
	that the dictionary matching features might overlap with other features
	in the current multiple feature setting. {CONCLUSION}: {D}uring {SVM}
	learning, each feature alone had a marginally positive effect on
	system performance. {T}his supports the fact that the {SVM} algorithm
	is robust on the high dimensionality of the feature vector space
	and means that feature selection is not required.},
  doi = {10.1186/1471-2105-6-S1-S8},
  pdf = {../local/Mitsumori2005Gene.pdf},
  file = {Mitsumori2005Gene.pdf:local/Mitsumori2005Gene.pdf:PDF},
  keywords = {biosvm nlp},
  pii = {1471-2105-6-S1-S8},
  url = {http://dx.doi.org/10.1186/1471-2105-6-S1-S8}
}

@article{Mittal2004Improving,
  author = {Mittal, V.},
  title = {Improving the efficiency of {RNA} interference in mammals.},
  journal = {Nat. {R}ev. {G}enet.},
  year = {2004},
  volume = {5},
  pages = {355-65},
  number = {5},
  month = {May},
  doi = {10.1038/nrg1323},
  keywords = {sirna},
  pii = {nrg1323},
  url = {http://dx.doi.org/10.1038/nrg1323}
}

@article{Miwakeichi2001comparison,
  author = {F. Miwakeichi and R. Ramirez-Padron and P. A. Valdes-Sosa and T.
	Ozaki},
  title = {A comparison of non-linear non-parametric models for epilepsy data.},
  journal = {Comput. {B}iol. {M}ed.},
  year = {2001},
  volume = {31},
  pages = {41-57},
  number = {1},
  month = {Jan},
  abstract = {E{EG} spike and wave ({SW}) activity has been described through a
	non-parametric stochastic model estimated by the {N}adaraya-{W}atson
	({NW}) method. {I}n this paper the performance of the {NW}, the local
	linear polynomial regression and support vector machines ({SVM})
	methods were compared. {T}he noise-free realizations obtained by
	the {NW} and {SVM} methods reproduced {SW} better than as reported
	in previous works. {T}he tuning parameters had to be estimated manually.
	{A}dding dynamical noise, only the {NW} method was capable of generating
	{SW} similar to training data. {T}he standard deviation of the dynamical
	noise was estimated by means of the correlation dimension.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electroencephalography,
	Electrophysiology, Epilepsy, Escherichia coli Proteins, Factual,
	Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling,
	Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic
	Predisposition to Disease, Genomics, Hemolysins, Humans, Indians,
	Information Storage and Retrieval, Initiator, Ion Channels, Kinetics,
	Leukemia, Likelihood Functions, Linear Models, Lipid Bilayers, Logistic
	Models, Lymphocytic, MEDLINE, Male, Markov Chains, Melanoma, Models,
	Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks
	(Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear
	Dynamics, Normal Distribution, North American, Nucleic Acid Conformation,
	Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles,
	Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical,
	Pigmented, Predictive Value of Tests, Promoter Regions (Genetics),
	Protein Biosynthesis, Protein Folding, Protein Structure, Proteins,
	Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment,
	Sequence Analysis, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Sound Spectrography, Statistical, Stochastic
	Processes, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription,
	Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, Vertebrates,
	11058693},
  pii = {S0010482500000214}
}

@article{Model2001Feature,
  author = {Model, F. and Adorjan, P. and Olek, A. and Piepenbrock, C.},
  title = {Feature selection for {DNA} methylation based cancer classification},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {S157-S164},
  number = {Supp. 1},
  abstract = {Molecular portraits, such as m{RNA} expression or {DNA} methylation
	patterns, have been shown to be strongly correlated with phenotypical
	parameters. {T}hese molecular patterns can be revealed routinely
	on a genomic scale. {H}owever, class prediction based on these patterns
	is an under-determined problem, due to the extreme high dimensionality
	of the data compared to the usually small number of available samples.
	{T}his makes a reduction of the data dimensionality necessary. {H}ere
	we demonstrate how phenotypic classes can be predicted by combining
	feature selection and discriminant analysis. {B}y comparing several
	feature selection methods we show that the right dimension reduction
	strategy is of crucial importance for the classification performance.
	{T}he techniques are demonstrated by methylation pattern based discrimination
	between acute lymphoblastic leukemia and acute myeloid leukemia.
	{C}ontact: {F}abian.{M}odel@epigenomics.com},
  pdf = {../local/Model2001Feature.pdf},
  file = {Model2001Feature.pdf:local/Model2001Feature.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/17/suppl_1/S157}
}

@article{Modha1998Memory-universal,
  author = {Modha, D.S. and Masry, E.},
  title = {Memory-universal prediction of stationary random processes},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1998},
  volume = {44},
  pages = {117-133},
  number = {1},
  month = {Jan},
  abstract = {We consider the problem of one-step-ahead prediction of a real-valued,
	stationary, strongly mixing random process ({X}i)i=-??. {T}he best
	mean-square predictor of {X}0 is its conditional mean given the entire
	infinite past ({X}i)i=-?-1. {G}iven a sequence of observations {X}1,
	{X}2, {XN}, we propose estimators for the conditional mean based
	on sequences of parametric models of increasing memory and of increasing
	dimension, for example, neural networks and {L}egendre polynomials.
	{T}he proposed estimators select both the model memory and the model
	dimension, in a data-driven fashion, by minimizing certain complexity
	regularized least squares criteria. {W}hen the underlying predictor
	function has a finite memory, we establish that the proposed estimators
	are memory-universal: the proposed estimators, which do not know
	the true memory, deliver the same statistical performance (rates
	of integrated mean-squared error) as that delivered by estimators
	that know the true memory. {F}urthermore, when the underlying predictor
	function does not have a finite memory, we establish that the estimator
	based on {L}egendre polynomials is consistent },
  pdf = {../local/Modha1998Memory-universal.pdf},
  file = {Modha1998Memory-universal.pdf:local/Modha1998Memory-universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Modha1996Minimum,
  author = {Modha, D.S. and Masry, E. },
  title = {Minimum complexity regression estimation with weakly dependent observations},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {2133-2145},
  number = {6},
  month = {Nov},
  abstract = {The minimum complexity regression estimation framework ({B}arron,
	1991; {B}arron and {C}over, 1991 and {R}issanen, 1989) is a general
	data-driven methodology for estimating a regression function from
	a given list of parametric models using independent and identically
	distributed (i.i.d.) observations. {W}e extend {B}arron's regression
	estimation framework to m-dependent observations and to strongly
	mixing observations. {I}n particular, we propose abstract minimum
	complexity regression estimators for dependent observations, which
	may be adapted to a particular list of parametric models, and establish
	upper bounds on the statistical risks of the proposed estimators
	in terms of certain deterministic indices of resolvability. {A}ssuming
	that the regression function satisfies a certain {F}ourier-transform-type
	representation, we examine minimum complexity regression estimators
	adapted to a list of parametric models based on neural networks and
	by using the upper bounds for the abstract estimators, we establish
	rates of convergence for the statistical risks of these estimators.
	{A}lso, as a key tool, we extend the classical {B}ernstein inequality
	from i.i.d. random variables to m-dependent processes and to strongly
	mixing processes },
  doi = {10.1109/WITS.1994.513898},
  pdf = {../local/Modha1996Minimum.pdf},
  file = {Modha1996Minimum.pdf:local/Modha1996Minimum.pdf:PDF},
  keywords = {information-theory},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/WITS.1994.513898}
}

@article{Mohamed2005Prostate,
  author = {S. S. Mohamed and M. M A Salama and M. Kamel and E. F. El-Saadany
	and K. Rizkalla and J. Chin},
  title = {Prostate cancer multi-feature analysis using trans-rectal ultrasound
	images.},
  journal = {Phys {M}ed {B}iol},
  year = {2005},
  volume = {50},
  pages = {N175-85},
  number = {15},
  month = {Aug},
  abstract = {This note focuses on extracting and analysing prostate texture features
	from trans-rectal ultrasound ({TRUS}) images for tissue characterization.
	{O}ne of the principal contributions of this investigation is the
	use of the information of the images' frequency domain features and
	spatial domain features to attain a more accurate diagnosis. {E}ach
	image is divided into regions of interest ({ROI}s) by the {G}abor
	multi-resolution analysis, a crucial stage, in which segmentation
	is achieved according to the frequency response of the image pixels.
	{T}he pixels with a similar response to the same filter are grouped
	to form one {ROI}. {N}ext, from each {ROI} two different statistical
	feature sets are constructed; the first set includes four grey level
	dependence matrix ({GLDM}) features and the second set consists of
	five grey level difference vector ({GLDV}) features. {T}hese constructed
	feature sets are then ranked by the mutual information feature selection
	({MIFS}) algorithm. {H}ere, the features that provide the maximum
	mutual information of each feature and class (cancerous and non-cancerous)
	and the minimum mutual information of the selected features are chosen,
	yeilding a reduced feature subset. {T}he two constructed feature
	sets, {GLDM} and {GLDV}, as well as the reduced feature subset, are
	examined in terms of three different classifiers: the condensed k-nearest
	neighbour ({CNN}), the decision tree ({DT}) and the support vector
	machine ({SVM}). {T}he accuracy classification results range from
	87.5\% to 93.75\%, where the performance of the {SVM} and that of
	the {DT} are significantly better than the performance of the {CNN}.},
  doi = {10.1088/0031-9155/50/15/N02},
  pdf = {../local/Mohamed2005Prostate.pdf},
  file = {Mohamed2005Prostate.pdf:local/Mohamed2005Prostate.pdf:PDF},
  keywords = {, , 16030375},
  pii = {S0031-9155(05)89652-6},
  url = {http://dx.doi.org/10.1088/0031-9155/50/15/N02}
}

@inproceedings{Mohar1991Laplacian,
  author = {B. Mohar},
  title = {The {L}aplacian spectrum of graphs},
  booktitle = {Graph theory, combinatorics, and applications},
  year = {1991},
  editor = {Y. Alavi and G. Chartrand and O. Ollermann and A. Schwenk},
  pages = {871--898},
  address = {New-York},
  publisher = {John Wiley and Sons, Inc.},
  pdf = {../local/moha91.pdf},
  file = {moha91.pdf:local/moha91.pdf:PDF},
  subject = {net},
  url = {http://www.fmf.uni-lj.si/~mohar/Papers/Spec.pdf}
}

@incollection{Mohar1997Some,
  author = {B. Mohar},
  title = {Some applications of {L}aplace eigenvalues of graphs},
  booktitle = {Graph {S}ymmetry: {A}lgebraic {M}ethods and {A}pplications},
  publisher = {Kluwer},
  year = {1997},
  editor = {G. Hahn and G. Sabidussi},
  volume = {497},
  series = {NATO ASI Series C},
  pages = {227--275},
  address = {Dordrecht},
  pdf = {../local/moha97.pdf},
  file = {moha97.pdf:local/moha97.pdf:PDF},
  subject = {net},
  url = {http://citeseer.nj.nec.com/mohar97some.html}
}

@article{Moitessier2008Towards,
  author = {N. Moitessier and P. Englebienne and D. Lee and J. Lawandi and C.
	R. Corbeil},
  title = {Towards the development of universal, fast and highly accurate docking/scoring
	methods: a long way to go.},
  journal = {Br. J. Pharmacol.},
  year = {2008},
  volume = {153 Suppl 1},
  pages = {S7--26},
  month = {Mar},
  abstract = {Accelerating the drug discovery process requires predictive computational
	protocols capable of reducing or simplifying the synthetic and/or
	combinatorial challenge. Docking-based virtual screening methods
	have been developed and successfully applied to a number of pharmaceutical
	targets. In this review, we first present the current status of docking
	and scoring methods, with exhaustive lists of these. We next discuss
	reported comparative studies, outlining criteria for their interpretation.
	In the final section, we describe some of the remaining developments
	that would potentially lead to a universally applicable docking/scoring
	method.},
  doi = {10.1038/sj.bjp.0707515},
  institution = {Department of Chemistry, McGill University, MontrÃ©al, QuÃ©bec, Canada.
	nicolas.moitessier@mcgill.ca},
  keywords = {Algorithms; Animals; Artificial Intelligence; Computer Simulation;
	Drug Evaluation, Preclinical, methods; Humans; Metals, chemistry;
	Models, Molecular; Molecular Conformation; Nucleic Acids, chemistry/drug
	effects; Proteins, chemistry/drug effects; Reproducibility of Results;
	Stochastic Processes},
  owner = {bricehoffmann},
  pii = {0707515},
  pmid = {18037925},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1038/sj.bjp.0707515}
}

@article{Moler2000Analysis,
  author = {Moler, E. J. and Chow, M. L. and Mian, I. S.},
  title = {Analysis of molecular profile data using generative and discriminative
	methods},
  journal = {Physiol. {G}enomics},
  year = {2000},
  volume = {4},
  pages = {109-126},
  number = {2},
  month = {Dec},
  abstract = {A modular framework is proposed for modeling and understanding the
	relationships between molecular profile data and other domain knowledge
	using a combination of generative (here, graphical models) and discriminative
	[{S}upport {V}ector {M}achines ({SVM}s)] methods. {A}s illustration,
	naive {B}ayes models, simple graphical models, and {SVM}s were applied
	to published transcription profile data for 1,988 genes in 62 colon
	adenocarcinoma tissue specimens labeled as tumor or nontumor. {T}hese
	unsupervised and supervised learning methods identified three classes
	or subtypes of specimens, assigned tumor or nontumor labels to new
	specimens and detected six potentially mislabeled specimens. {T}he
	probability parameters of the three classes were utilized to develop
	a novel gene relevance, ranking, and selection method. {SVM}s trained
	to discriminate nontumor from tumor specimens using only the 50-200
	top-ranked genes had the same or better generalization performance
	than the full repertoire of 1,988 genes. {A}pproximately 90 marker
	genes were pinpointed for use in understanding the basic biology
	of colon adenocarcinoma, defining targets for therapeutic intervention
	and developing diagnostic tools. {T}hese potential markers highlight
	the importance of tissue biology in the etiology of cancer. {C}omparative
	analysis of molecular profile data is proposed as a mechanism for
	predicting the physiological function of genes in instances when
	comparative sequence analysis proves uninformative, such as with
	human and yeast translationally controlled tumour protein. {G}raphical
	models and {SVM}s hold promise as the foundations for developing
	decision support systems for diagnosis, prognosis, and monitoring
	as well as inferring biological networks.},
  pdf = {../local/Moler2000Analysis.pdf},
  file = {Moler2000Analysis.pdf:local/Moler2000Analysis.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://physiolgenomics.physiology.org/cgi/content/abstract/4/2/109}
}

@article{Molloy1998size,
  author = {Molloy, M. and Reed, B.},
  title = {The size of the giant component of a random graph with a given degree
	sequence},
  journal = {Combinator. {P}robab. {C}omput.},
  year = {1998},
  volume = {7},
  pages = {295--305},
  pdf = {../local/moll98.pdf},
  file = {moll98.pdf:local/moll98.pdf:PDF},
  subject = {compnet},
  url = {http://www.cs.toronto.edu/~molloy/webpapers/size.ps}
}

@article{Molloy1995critical,
  author = {Molloy, M. and Reed, B.},
  title = {A critical point for random graphs with a given degree sequence},
  journal = {Random {S}truct. {A}lgorithm.},
  year = {1995},
  volume = {6},
  pages = {161--179},
  pdf = {../local/moll95.pdf},
  file = {moll95.pdf:local/moll95.pdf:PDF},
  subject = {compnet},
  url = {http://www.cs.toronto.edu/~molloy/webpapers/gc2.ps}
}

@article{Mootha2003PGC,
  author = {Mootha, V. K. and Lindgren, C. M. and Eriksson, K.-F. and Subramanian,
	A. and Sihag, S. and Lehar, J. and Puigserver, P. and Carlsson, E.
	and Ridderstr\r{a}le, M. and Laurila, E. and Houstis, N. and Daly,
	M. J. and Patterson, N. and Mesirov, J. P. and Golub, T. R. and Tamayo,
	P. and Spiegelman, B. and Lander, E. S. and Hirschhorn, J. N. and
	Altshuler, D. and Groop, L. C.},
  title = {{PGC-1$\alpha$}-responsive genes involved in oxidative phosphorylation
	are coordinately downregulated in human diabetes.},
  journal = {Nat. Genet.},
  year = {2003},
  volume = {34},
  pages = {267--273},
  number = {3},
  month = {Jul},
  abstract = {DNA microarrays can be used to identify gene expression changes characteristic
	of human disease. This is challenging, however, when relevant differences
	are subtle at the level of individual genes. We introduce an analytical
	strategy, Gene Set Enrichment Analysis, designed to detect modest
	but coordinate changes in the expression of groups of functionally
	related genes. Using this approach, we identify a set of genes involved
	in oxidative phosphorylation whose expression is coordinately decreased
	in human diabetic muscle. Expression of these genes is high at sites
	of insulin-mediated glucose disposal, activated by PGC-1alpha and
	correlated with total-body aerobic capacity. Our results associate
	this gene set with clinically important variation in human metabolism
	and illustrate the value of pathway relationships in the analysis
	of genomic profiling experiments.},
  doi = {10.1038/ng1180},
  pdf = {../local/Mootha2003PGC.pdf},
  file = {Mootha2003PGC.pdf:Mootha2003PGC.pdf:PDF},
  institution = {Whitehead Institute/MIT Center for Genome Research, Cambridge, Massachusetts,
	USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng1180},
  pmid = {12808457},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1038/ng1180}
}

@phdthesis{Mordelet2010Learning,
  author = {Mordelet, F.},
  title = {Learning from positive and unlabeled examples in biology},
  school = {Mines ParisTech},
  year = {2010},
  owner = {jp},
  timestamp = {2011.01.24}
}

@article{Mordelet2011ProDiGe,
  author = {Fantine Mordelet and Jean-Philippe Vert},
  title = {{ProDiGe}: Prioritization Of Disease Genes with multitask machine
	learning from positive and unlabeled examples.},
  journal = {BMC Bioinformatics},
  year = {2011},
  volume = {12},
  pages = {389},
  __markedentry = {[jp]},
  abstract = {Elucidating the genetic basis of human diseases is a central goal
	of genetics and molecular biology. While traditional linkage analysis
	and modern high-throughput techniques often provide long lists of
	tens or hundreds of disease gene candidates, the identification of
	disease genes among the candidates remains time-consuming and expensive.
	Efficient computational methods are therefore needed to prioritize
	genes within the list of candidates, by exploiting the wealth of
	information available about the genes in various databases.We propose
	ProDiGe, a novel algorithm for Prioritization of Disease Genes. ProDiGe
	implements a novel machine learning strategy based on learning from
	positive and unlabeled examples, which allows to integrate various
	sources of information about the genes, to share information about
	known disease genes across diseases, and to perform genome-wide searches
	for new disease genes. Experiments on real data show that ProDiGe
	outperforms state-of-the-art methods for the prioritization of genes
	in human diseases.ProDiGe implements a new machine learning paradigm
	for gene prioritization, which could help the identification of new
	disease genes. It is freely available at http://cbio.ensmp.fr/prodige.},
  doi = {10.1186/1471-2105-12-389},
  pdf = {../local/Mordelet2011ProDiGe.pdf},
  file = {Mordelet2011ProDiGe.pdf:Mordelet2011ProDiGe.pdf:PDF},
  institution = {Centre for Computational Biology, Mines ParisTech, Fontainebleau,
	F-77300 France.},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-12-389},
  pmid = {21977986},
  timestamp = {2012.03.12},
  url = {http://dx.doi.org/10.1186/1471-2105-12-389}
}

@techreport{Mordelet2010bagging,
  author = {Mordelet, F. and Vert, J-P.},
  title = {A bagging {SVM} to learn from positive and unlabeled examples},
  institution = {HAL},
  year = {2010},
  number = {00523336},
  month = {October},
  abstract = {We consider the problem of learning a binary classifier from a training
	set of positive and unlabeled examples, both in the inductive and
	in the transductive setting. This problem, often referred to as \emph{PU
	learning}, differs from the standard supervised classification problem
	by the lack of negative examples in the training set. It corresponds
	to an ubiquitous situation in many applications such as information
	retrieval or gene ranking, when we have identified a set of data
	of interest sharing a particular property, and we wish to automatically
	retrieve additional data sharing the same property among a large
	and easily available pool of unlabeled data. We propose a conceptually
	simple method, akin to bagging, to approach both inductive and transductive
	PU learning problems, by converting them into series of supervised
	binary classification problems discriminating the known positive
	examples from random subsamples of the unlabeled set. We empirically
	demonstrate the relevance of the method on simulated and real data,
	where it performs at least as well as existing methods while being
	faster.},
  owner = {jp},
  timestamp = {2010.11.01},
  url = {http://hal.archives-ouvertes.fr/hal-00523336}
}

@article{Mordelet2008SIRENE,
  author = {Mordelet, F. and Vert, J.-P.},
  title = {{SIRENE}: Supervised inference of regulatory networks},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {i76--i82},
  number = {16},
  doi = {10.1093/bioinformatics/btn273},
  pdf = {../local/Mordelet2008SIRENE.pdf},
  file = {Mordelet2008SIRENE.pdf:Mordelet2008SIRENE.pdf:PDF},
  timestamp = {2008.05.26},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn273}
}

@article{Moreau1980Autocorrelation,
  author = {G. Moreau and P. Broto},
  title = {Autocorrelation of molecular structures: Application 
	
	to {SAR} studies},
  journal = {Nouv. J. Chim.},
  year = {1980},
  volume = {757},
  pages = {764},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.09.08}
}

@article{moreau1963inf,
  author = {Moreau, JJ},
  title = {Inf-convolution des fonctions numeriques sur un espace vectoriel
	proximite et dualite dans un espace hilbertien},
  journal = {Comptes Rendus de l’Academie des Sciences de Paris},
  year = {1963},
  volume = {256},
  pages = {125--129}
}

@article{Moreau1965Proximite,
  author = {Moreau, J.-J.},
  title = {Proximit{\'e} et dualit{\'e} dans un espace hilbertien},
  journal = {Bulletin de la S.M.F.},
  year = {1965},
  volume = {93},
  pages = {273-299},
  owner = {anne-clairehaury},
  timestamp = {2012.10.14}
}

@article{Morgan1965Generation,
  author = {Morgan, H.L.},
  title = {The {G}eneration of {U}nique {M}achine {D}escription for {C}hemical
	{S}tructures - {A} {T}echnique {D}eveloped at {C}hemical {A}bstracts
	{S}ervice},
  journal = {J {C}hem {D}oc},
  year = {1965},
  volume = {5},
  pages = {107-113},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@article{Morin2008Application,
  author = {Ryan D Morin and Michael D O'Connor and Malachi Griffith and Florian
	Kuchenbauer and Allen Delaney and Anna-Liisa Prabhu and Yongjun Zhao
	and Helen McDonald and Thomas Zeng and Martin Hirst and Connie J
	Eaves and Marco A Marra},
  title = {Application of massively parallel sequencing to microRNA profiling
	and discovery in human embryonic stem cells.},
  journal = {Genome Res},
  year = {2008},
  volume = {18},
  pages = {610--621},
  number = {4},
  month = {Apr},
  abstract = {MicroRNAs (miRNAs) are emerging as important, albeit poorly characterized,
	regulators of biological processes. Key to further elucidation of
	their roles is the generation of more complete lists of their numbers
	and expression changes in different cell states. Here, we report
	a new method for surveying the expression of small RNAs, including
	microRNAs, using Illumina sequencing technology. We also present
	a set of methods for annotating sequences deriving from known miRNAs,
	identifying variability in mature miRNA sequences, and identifying
	sequences belonging to previously unidentified miRNA genes. Application
	of this approach to RNA from human embryonic stem cells obtained
	before and after their differentiation into embryoid bodies revealed
	the sequences and expression levels of 334 known plus 104 novel miRNA
	genes. One hundred seventy-one known and 23 novel microRNA sequences
	exhibited significant expression differences between these two developmental
	states. Owing to the increased number of sequence reads, these libraries
	represent the deepest miRNA sampling to date, spanning nearly six
	orders of magnitude of expression. The predicted targets of those
	miRNAs enriched in either sample shared common features. Included
	among the high-ranked predicted gene targets are those implicated
	in differentiation, cell cycle control, programmed cell death, and
	transcriptional regulation.},
  doi = {10.1101/gr.7179508},
  pdf = {../local/Morin2008Application.pdf},
  file = {Morin2008Application.pdf:Morin2008Application.pdf:PDF},
  institution = {Genome Sciences Centre, BC Cancer Agency, Vancouver, British Columbia
	V5Z 1L3, Canada.},
  keywords = {ngs, sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.7179508},
  pmid = {18285502},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1101/gr.7179508}
}

@article{Morley2004Genetic,
  author = {Morley, Michael and Molony, Cliona M. and Weber, Teresa M. and Devlin,
	James L. and Ewens, Kathryn G. and Spielman, Richard S. and Cheung,
	Vivian G.},
  title = {Genetic analysis of genome-wide variation in human gene expression.},
  journal = {Nature},
  year = {2004},
  volume = {430},
  pages = {743--747},
  number = {7001},
  month = {Aug},
  abstract = {Natural variation in gene expression is extensive in humans and other
	organisms, and variation in the baseline expression level of many
	genes has a heritable component. To localize the genetic determinants
	of these quantitative traits (expression phenotypes) in humans, we
	used microarrays to measure gene expression levels and performed
	genome-wide linkage analysis for expression levels of 3,554 genes
	in 14 large families. For approximately 1,000 expression phenotypes,
	there was significant evidence of linkage to specific chromosomal
	regions. Both cis- and trans-acting loci regulate variation in the
	expression levels of genes, although most act in trans. Many gene
	expression phenotypes are influenced by several genetic determinants.
	Furthermore, we found hotspots of transcriptional regulation where
	significant evidence of linkage for several expression phenotypes
	(up to 31) coincides, and expression levels of many genes that share
	the same regulatory region are significantly correlated. The combination
	of microarray techniques for phenotyping and linkage analysis for
	quantitative traits allows the genetic mapping of determinants that
	contribute to variation in human gene expression.},
  doi = {10.1038/nature02797},
  institution = {Department of Pediatrics, University of Pennsylvania, The Children's
	Hospital of Philadelphia, Philadelphia, Pennsylvania 19104, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature02797},
  pmid = {15269782},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1038/nature02797}
}

@article{Morris2005Real,
  author = {Morris, R. J. and Najmanovich, R.J. and Kahraman, A. and Thornton,
	J.M.},
  title = {Real spherical harmonic expansion coefficients as 3D shape descriptors
	for protein binding pocket and ligand comparisons.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2347--2355},
  number = {10},
  month = {May},
  abstract = {MOTIVATION: An increasing number of protein structures are being determined
	for which no biochemical characterization is available. The analysis
	of protein structure and function assignment is becoming an unexpected
	challenge and a major bottleneck towards the goal of well-annotated
	genomes. As shape plays a crucial role in biomolecular recognition
	and function, the examination and development of shape description
	and comparison techniques is likely to be of prime importance for
	understanding protein structure-function relationships. RESULTS:
	A novel technique is presented for the comparison of protein binding
	pockets. The method uses the coefficients of a real spherical harmonics
	expansion to describe the shape of a protein's binding pocket. Shape
	similarity is computed as the L2 distance in coefficient space. Such
	comparisons in several thousands per second can be carried out on
	a standard linux PC. Other properties such as the electrostatic potential
	fit seamlessly into the same framework. The method can also be used
	directly for describing the shape of proteins and other molecules.
	AVAILABILITY: A limited version of the software for the real spherical
	harmonics expansion of a set of points in PDB format is freely available
	upon request from the authors. Binding pocket comparisons and ligand
	prediction will be made available through the protein structure annotation
	pipeline Profunc (written by Roman Laskowski) which will be accessible
	from the EBI website shortly.},
  owner = {vero},
  pmid = {15728116},
  timestamp = {2009.02.04}
}

@article{Morris2007Identification,
  author = {Stephanie A Morris and Bhargavi Rao and Benjamin A Garcia and Sandra
	B Hake and Robert L Diaz and Jeffrey Shabanowitz and Donald F Hunt
	and C. David Allis and Jason D Lieb and Brian D Strahl},
  title = {Identification of histone H3 lysine 36 acetylation as a highly conserved
	histone modification.},
  journal = {J Biol Chem},
  year = {2007},
  volume = {282},
  pages = {7632--7640},
  number = {10},
  month = {Mar},
  abstract = {Histone lysine acetylation is a major mechanism by which cells regulate
	the structure and function of chromatin, and new sites of acetylation
	continue to be discovered. Here we identify and characterize histone
	H3K36 acetylation (H3K36ac). By mass spectrometric analyses of H3
	purified from Tetrahymena thermophila and Saccharomyces cerevisiae
	(yeast), we find that H3K36 can be acetylated or methylated. Using
	an antibody specific to H3K36ac, we show that this modification is
	conserved in mammals. In yeast, genome-wide ChIP-chip experiments
	show that H3K36ac is localized predominantly to the promoters of
	RNA polymerase II-transcribed genes, a pattern inversely related
	to that of H3K36 methylation. The pattern of H3K36ac localization
	is similar to that of other sites of H3 acetylation, including H3K9ac
	and H3K14ac. Using histone acetyltransferase complexes purified from
	yeast, we show that the Gcn5-containing SAGA complex that regulates
	transcription specifically acetylates H3K36 in vitro. Deletion of
	GCN5 completely abolishes H3K36ac in vivo. These data expand our
	knowledge of the genomic targets of Gcn5, show H3K36ac is highly
	conserved, and raise the intriguing possibility that the transition
	between H3K36ac and H3K36me acts as an "acetyl/methyl switch" governing
	chromatin function along transcription units.},
  doi = {10.1074/jbc.M607909200},
  institution = {Department of Biochemistry and Biophysics, University of North Carolina
	School of Medicine, Chapel Hill, North Carolina 27599, USA.},
  keywords = {Acetylation; Amino Acid Sequence; Animals; Chromatin Immunoprecipitation;
	Conserved Sequence; Histone Acetyltransferases, physiology; Histones,
	chemistry; Humans; Lysine; Methylation; Mice; Molecular Sequence
	Data; Promoter Regions, Genetic; Saccharomyces cerevisiae Proteins,
	physiology; Saccharomyces cerevisiae, chemistry; Tetrahymena, chemistry},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {M607909200},
  pmid = {17189264},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1074/jbc.M607909200}
}

@article{Mortazavi2008Mapping,
  author = {Mortazavi, A. and Williams, B. A. and McCue, K. and Schaeffer, L.
	and Wold, B.},
  title = {Mapping and quantifying mammalian transcriptomes by {RNA-Seq}},
  journal = {Nat. Methods},
  year = {2008},
  volume = {5},
  pages = {621--628},
  number = {7},
  month = {Jul},
  abstract = {We have mapped and quantified mouse transcriptomes by deeply sequencing
	them and recording how frequently each gene is represented in the
	sequence sample (RNA-Seq). This provides a digital measure of the
	presence and prevalence of transcripts from known and previously
	unknown genes. We report reference measurements composed of 41-52
	million mapped 25-base-pair reads for poly(A)-selected RNA from adult
	mouse brain, liver and skeletal muscle tissues. We used RNA standards
	to quantify transcript prevalence and to test the linear range of
	transcript detection, which spanned five orders of magnitude. Although
	>90\% of uniquely mapped reads fell within known exons, the remaining
	data suggest new and revised gene models, including changed or additional
	promoters, exons and 3' untranscribed regions, as well as new candidate
	microRNA precursors. RNA splice events, which are not readily measured
	by standard gene expression microarray or serial analysis of gene
	expression methods, were detected directly by mapping splice-crossing
	sequence reads. We observed 1.45 x 10(5) distinct splices, and alternative
	splices were prominent, with 3,500 different genes expressing one
	or more alternate internal splices.},
  doi = {10.1038/nmeth.1226},
  pdf = {../local/Mortazavi2008Mapping.pdf},
  file = {Mortazavi2008Mapping.pdf:Mortazavi2008Mapping.pdf:PDF},
  institution = {Division of Biology, MC 156-29, California Institute of Technology,
	Pasadena, California 91125, USA.},
  owner = {jp},
  pii = {nmeth.1226},
  pmid = {18516045},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1038/nmeth.1226}
}

@article{Morvai1997Weakly,
  author = {Morvai, G. and Yakowitz, S.J. and Algoet, P. },
  title = {Weakly convergent nonparametric forecasting of stationary time series},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1997},
  volume = {43},
  pages = {483-498},
  number = {2},
  month = {Mar},
  abstract = {The conditional distribution of the next outcome given the infinite
	past of a stationary process can be inferred from finite but growing
	segments of the past. {S}everal schemes are known for constructing
	pointwise consistent estimates, but they all demand prohibitive amounts
	of input data. {W}e consider real-valued time series and construct
	conditional distribution estimates that make much more efficient
	use of the input data. {T}he estimates are consistent in a weak sense,
	and the question whether they are pointwise-consistent is still open.
	{F}or finite-alphabet processes one may rely on a universal data
	compression scheme like the {L}empel-{Z}iv (1978) algorithm to construct
	conditional probability mass function estimates that are consistent
	in expected information divergence. {C}onsistency in this strong
	sense cannot be attained in a universal sense for all stationary
	processes with values in an infinite alphabet, but weak consistency
	can. {S}ome applications of the estimates to on-line forecasting,
	regression, and classification are discussed },
  pdf = {../local/Morvai1997Weakly.pdf},
  file = {Morvai1997Weakly.pdf:local/Morvai1997Weakly.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Mosley2008Cell,
  author = {Mosley, J.D. and Keri, R.A.},
  title = {Cell cycle correlated genes dictate the prognostic power of breast
	cancer gene lists},
  journal = {BMC Medical Genomics},
  year = {2008},
  volume = {1},
  pages = {11},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@techreport{Mukherjee1998Support,
  author = {S. Mukherjee and P. Tamayo and J. P. Mesirov and D. Slonim and A.
	Verri and T. Poggio},
  title = {Support vector machine classification of microarray data},
  institution = {C.B.L.C.},
  year = {1998},
  number = {182},
  note = {A.I. Memo 1677},
  pdf = {../local/Mukherjee1998Support.pdf},
  file = {Mukherjee1998Support.pdf:local/Mukherjee1998Support.pdf:PDF},
  keywords = {biosvm microarray},
  subject = {biokernel},
  url = {http://citeseer.nj.nec.com/437379.html}
}

@techreport{Murphy1999Modelling,
  author = {Murphy, K. and Mian, S.},
  title = {Modelling gene expression data using dynamic {B}ayesian networks},
  institution = {Computer Science Division, University of California, Berkeley, CA.},
  year = {1999},
  pdf = {../local/Murphy1999Modelling.pdf},
  file = {Murphy1999Modelling.pdf:local/Murphy1999Modelling.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  timestamp = {2006.01.18}
}

@inproceedings{Murphy2003Using,
  author = {Murphy, Kevin and Torralba, Antonio and Freeman, William T.F.},
  title = {Using the forest to see the trees: a graphical model relating features,
	objects and scenes},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2003},
  address = {Vancouver, BC},
  publisher = {MIT Press},
  pdf = {../local/Murphy2003Using.pdf},
  file = {Murphy2003Using.pdf:local/Murphy2003Using.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@article{Murzin1995SCOP,
  author = {Murzin, A. G. and Brenner, S. E. and Hubbard, T. and Chothia, C.},
  title = {S{COP}: {A} structural classification of proteins database for the
	investigation of sequences and structures},
  journal = {J. {M}ol. {B}iol.},
  year = {1995},
  volume = {247},
  pages = {536--540}
}

@article{Mustafi2009Topology,
  author = {Debarshi Mustafi and Krzysztof Palczewski},
  title = {Topology of class A G protein-coupled receptors: insights gained
	from crystal structures of rhodopsins, adrenergic and adenosine receptors.},
  journal = {Mol Pharmacol},
  year = {2009},
  volume = {75},
  pages = {1--12},
  number = {1},
  month = {Jan},
  abstract = {Biological membranes are densely packed with membrane proteins that
	occupy approximately half of their volume. In almost all cases, membrane
	proteins in the native state lack the higher-order symmetry required
	for their direct study by diffraction methods. Despite many technical
	difficulties, numerous crystal structures of detergent solubilized
	membrane proteins have been determined that illustrate their internal
	organization. Among such proteins, class A G protein-coupled receptors
	have become amenable to crystallization and high resolution X-ray
	diffraction analyses. The derived structures of native and engineered
	receptors not only provide insights into their molecular arrangements
	but also furnish a framework for designing and testing potential
	models of transformation from inactive to active receptor signaling
	states and for initiating rational drug design.},
  doi = {10.1124/mol.108.051938},
  institution = {Department of Pharmacology, School of Medicine, Case Western Reserve
	University, Cleveland, Ohio 44106-4965, USA.},
  keywords = {Animals; Crystallography, X-Ray; Humans; Models, Molecular; Protein
	Structure, Secondary; Receptors, Adrenergic; Receptors, G-Protein-Coupled;
	Receptors, Purinergic P1; Rhodopsin},
  owner = {ljacob},
  pii = {mol.108.051938},
  pmid = {18945819},
  timestamp = {2009.11.09},
  url = {http://dx.doi.org/10.1124/mol.108.051938}
}

@article{Myasnikova2002Support,
  author = {Myasnikova, E. and Samsonova, A. and Samsonova, M. and Reinitz, J.},
  title = {Support vector regression applied to the determination of the developmental
	age of a {D}rosophila embryo from its segmentation gene expression
	patterns},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {S87-S95},
  number = {Suppl. 1},
  abstract = {Motivation: {I}n this paper we address the problem of the determination
	of developmental age of an embryo from its segmentation gene expression
	patterns in {D}rosophila. {R}esults: {B}y applying support vector
	regression we have developed a fast method for automated staging
	of an embryo on the basis of its gene expression pattern. {S}upport
	vector regression is a statistical method for creating regression
	functions of arbitrary type from a set of training data. {T}he training
	set is composed of embryos for which the precise developmental age
	was determined by measuring the degree of membrane invagination.
	{T}esting the quality of regression on the training set showed good
	prediction accuracy. {T}he optimal regression function was then used
	for the prediction of the gene expression based age of embryos in
	which the precise age has not been measured by membrane morphology.
	{M}oreover, we show that the same accuracy of prediction can be achieved
	when the dimensionality of the feature vector was reduced by applying
	factor analysis. {T}he data reduction allowed us to avoid over-fitting
	and to increase the efficiency of the algorithm. {A}vailability:
	{T}his software may be obtained from the authors. {C}ontact: samson@fn.csa.ru
	{K}eywords: gene expression patterns; development; embryo staging;
	support vector regression; segmentation genes; {D}rosophila.},
  pdf = {../local/Myasnikova2002Support.pdf},
  file = {Myasnikova2002Support.pdf:local/Myasnikova2002Support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/18/suppl_1/S87}
}

@article{Mueller2005Classifying,
  author = {K.-R. M{\"u}ller and G. R{\"a}tsch and S. Sonnenburg and S. Mika
	and M. Grimm and N. Heinrich},
  title = {Classifying 'drug-likeness' with {K}ernel-based learning methods.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {249-53},
  number = {2},
  abstract = {In this article we report about a successful application of modern
	machine learning technology, namely {S}upport {V}ector {M}achines,
	to the problem of assessing the 'drug-likeness' of a chemical from
	a given set of descriptors of the substance. {W}e were able to drastically
	improve the recent result by {B}yvatov et al. (2003) on this task
	and achieved an error rate of about 7\% on unseen compounds using
	{S}upport {V}ector {M}achines. {W}e see a very high potential of
	such machine learning techniques for a variety of computational chemistry
	problems that occur in the drug discovery and drug design process.},
  doi = {10.1021/ci049737o},
  pdf = {../local/Mueller2005Classifying.pdf},
  file = {Mueller2005Classifying.pdf:local/Mueller2005Classifying.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci049737o}
}

@article{Nabieva2005Whole-proteome,
  author = {Elena Nabieva and Kam Jim and Amit Agarwal and Bernard Chazelle and
	Mona Singh},
  title = {Whole-proteome prediction of protein function via graph-theoretic
	analysis of interaction maps.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21 Suppl 1},
  pages = {i302--i310},
  month = {Jun},
  abstract = {MOTIVATION: Determining protein function is one of the most important
	problems in the post-genomic era. For the typical proteome, there
	are no functional annotations for one-third or more of its proteins.
	Recent high-throughput experiments have determined proteome-scale
	protein physical interaction maps for several organisms. These physical
	interactions are complemented by an abundance of data about other
	types of functional relationships between proteins, including genetic
	interactions, knowledge about co-expression and shared evolutionary
	history. Taken together, these pairwise linkages can be used to build
	whole-proteome protein interaction maps. RESULTS: We develop a network-flow
	based algorithm, FunctionalFlow, that exploits the underlying structure
	of protein interaction maps in order to predict protein function.
	In cross-validation testing on the yeast proteome, we show that FunctionalFlow
	has improved performance over previous methods in predicting the
	function of proteins with few (or no) annotated protein neighbors.
	By comparing several methods that use protein interaction maps to
	predict protein function, we demonstrate that FunctionalFlow performs
	well because it takes advantage of both network topology and some
	measure of locality. Finally, we show that performance can be improved
	substantially as we consider multiple data sources and use them to
	create weighted interaction networks. AVAILABILITY: http://compbio.cs.princeton.edu/function},
  doi = {10.1093/bioinformatics/bti1054},
  institution = {Computer Science Department, Princeton University Princeton, NJ 08544,
	USA.},
  keywords = {Algorithms; Computational Biology, methods; Evolution, Molecular;
	Fungal Proteins, chemistry; Genomics; Models, Statistical; Models,
	Theoretical; Protein Interaction Mapping, methods; Proteins, chemistry;
	Proteomics, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {21/suppl_1/i302},
  pmid = {15961472},
  timestamp = {2010.04.03},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1054}
}

@article{Nacu2007Gene,
  author = {Nacu, S. and Critchley-Thorne, R. and Lee, P. and Holmes, S.},
  title = {Gene expression network analysis and applications to immunology},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {850--858},
  number = {7},
  month = {Apr},
  abstract = {We address the problem of using expression data and prior biological
	knowledge to identify differentially expressed pathways or groups
	of genes. Following an idea of Ideker et al. (2002), we construct
	a gene interaction network and search for high-scoring subnetworks.
	We make several improvements in terms of scoring functions and algorithms,
	resulting in higher speed and accuracy and easier biological interpretation.
	We also assign significance levels to our results, adjusted for multiple
	testing. Our methods are successfully applied to three human microarray
	data sets, related to cancer and the immune system, retrieving several
	known and potential pathways. The method, denoted by the acronym
	GXNA (Gene eXpression Network Analysis) is implemented in software
	that is publicly available and can be used on virtually any microarray
	data set. SUPPLEMENTARY INFORMATION: The source code and executable
	for the software, as well as certain supplemental materials, can
	be downloaded from http://stat.stanford.edu/~serban/gxna.},
  doi = {10.1093/bioinformatics/btm019},
  pdf = {../local/Nacu2007Gene.pdf},
  file = {Nacu2007Gene.pdf:Nacu2007Gene.pdf:PDF},
  institution = {Department of Statistics, Stanford University, Stanford, CA 94305,
	USA. serban@stat.stanford.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btm019},
  pmid = {17267429},
  timestamp = {2011.10.04},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm019}
}

@article{Naderi2006gene-expression,
  author = {Naderi, A. and Teschendorff, A. E. and Barbosa-Morais, N. L. and
	Pinder, S. E. and Green, A. R. and Powe, D. G. and Robertson, J.
	F. R. and Aparicio, S. and Ellis, I. O. and Brenton, J. D. and Caldas,
	C.},
  title = {A gene-expression signature to predict survival in breast cancer
	across independent data sets},
  journal = {Oncogene},
  year = {2006},
  volume = {26},
  pages = {1507--1516},
  number = {10},
  doi = {10.1038/sj.onc.1209920},
  owner = {jp},
  timestamp = {2011.01.14},
  url = {http://dx.doi.org/10.1038/sj.onc.1209920}
}

@article{Nair2005Mimicking,
  author = {Rajesh Nair and Burkhard Rost},
  title = {Mimicking cellular sorting improves prediction of subcellular localization.},
  journal = {J {M}ol {B}iol},
  year = {2005},
  volume = {348},
  pages = {85-100},
  number = {1},
  month = {Apr},
  abstract = {Predicting the native subcellular compartment of a protein is an important
	step toward elucidating its function. {H}ere we introduce {LOC}tree,
	a hierarchical system combining support vector machines ({SVM}s)
	and other prediction methods. {LOC}tree predicts the subcellular
	compartment of a protein by mimicking the mechanism of cellular sorting
	and exploiting a variety of sequence and predicted structural features
	in its input. {C}urrently {LOC}tree does not predict localization
	for membrane proteins, since the compositional properties of membrane
	proteins significantly differ from those of non-membrane proteins.
	{W}hile any information about function can be used by the system,
	we present estimates of performance that are valid when only the
	amino acid sequence of a protein is known. {W}hen evaluated on a
	non-redundant test set, {LOC}tree achieved sustained levels of 74\%
	accuracy for non-plant eukaryotes, 70\% for plants, and 84\% for
	prokaryotes. {W}e rigorously benchmarked {LOC}tree in comparison
	to the best alternative methods for localization prediction. {LOC}tree
	outperformed all other methods in nearly all benchmarks. {L}ocalization
	assignments using {LOC}tree agreed quite well with data from recent
	large-scale experiments. {O}ur preliminary analysis of a few entirely
	sequenced organisms, namely human ({H}omo sapiens), yeast ({S}accharomyces
	cerevisiae), and weed ({A}rabidopsis thaliana) suggested that over
	35\% of all non-membrane proteins are nuclear, about 20\% are retained
	in the cytosol, and that every fifth protein in the weed resides
	in the chloroplast.},
  doi = {10.1016/j.jmb.2005.02.025},
  pdf = {../local/Nair2005Mimicking.pdf},
  file = {Nair2005Mimicking.pdf:local/Nair2005Mimicking.pdf:PDF},
  keywords = {biosvm},
  pii = {S0022-2836(05)00177-4},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.02.025}
}

@article{Najmanovich2008Detection,
  author = {Najmanovich, R. and Kurbatova, N. and Thornton, J.},
  title = {Detection of 3D atomic similarities and their use in the discrimination
	of small molecule protein-binding sites.},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {i105--i111},
  number = {16},
  month = {Aug},
  abstract = {MOTIVATION: Current computational methods for the prediction of function
	from structure are restricted to the detection of similarities and
	subsequent transfer of functional annotation. In a significant minority
	of cases, global sequence or structural (fold) similarities do not
	provide clues about protein function. In these cases, one alternative
	is to detect local binding site similarities. These may still reflect
	more distant evolutionary relationships as well as unique physico-chemical
	constraints necessary for binding similar ligands, thus helping pinpoint
	the function. In the present work, we ask the following question:
	is it possible to discriminate within a dataset of non-homologous
	proteins those that bind similar ligands based on their binding site
	similarities? METHODS: We implement a graph-matching-based method
	for the detection of 3D atomic similarities introducing some simplifications
	that allow us to extend its applicability to the analysis of large
	allatom binding site models. This method, called IsoCleft, does not
	require atoms to be connected either in sequence or space. We apply
	the method to a cognate-ligand bound dataset of non-homologous proteins.
	We define a family of binding site models with decreasing knowledge
	about the identity of the ligand-interacting atoms to uncouple the
	questions of predicting the location of the binding site and detecting
	binding site similarities. Furthermore, we calculate the individual
	contributions of binding site size, chemical composition and geometry
	to prediction performance. RESULTS: We find that it is possible to
	discriminate between different ligand-binding sites. In other words,
	there is a certain uniqueness in the set of atoms that are in contact
	to specific ligand scaffolds. This uniqueness is restricted to the
	atoms in close proximity of the ligand in which case, size and chemical
	composition alone are sufficient to discriminate binding sites. Discrimination
	ability decreases with decreasing knowledge about the identity of
	the ligand-interacting binding site atoms. The decrease is quite
	abrupt when considering size and chemical composition alone, but
	much slower when including geometry. We also observe that certain
	ligands are easier to discriminate. Interestingly, the subset of
	binding site atoms belonging to highly conserved residues is not
	sufficient to discriminate binding sites, implying that convergently
	evolved binding sites arrived at dissimilar solutions. AVAILABILITY:
	IsoCleft can be obtained from the authors.},
  owner = {vero},
  pmid = {18689810},
  timestamp = {2009.02.04}
}

@article{Nakabayashi2006JTB,
  author = {Nakabayashi, J. and Sasaki, A.},
  title = {A mathematical model for apoptosome assembly: The optimal cytochrome
	c/Apaf-1 ratio},
  journal = {Journal of Theoretical Biology},
  year = {2006},
  volume = {242},
  pages = {280 - 287},
  number = {2},
  abstract = {Apoptosis, a highly conserved form of cell suicide, is regulated by
	apoptotic signals and their transduction with caspases, a family
	of cystein proteases. Caspases are constantly expressed in the normal
	cells as inactive pro-enzymes. The activity of caspase is regulated
	by the proteolysis. Sequential proteolytic reactions of caspases
	are needed to execute apoptosis. Mitochondrial pathway is one of
	these apoptotic signal pathways, in which caspases are oligomerized
	into characteristic heptamer structure, called apoptosome, with caspase-9
	that activate the effector caspases for apoptosis. To investigate
	the dynamics of signal transduction pathway regulated by oligomerization,
	we construct a mathematical model for Apaf-1 heptamer assembly process.
	The model first reveals that intermediate products can remain unconverted
	even after all assemble reactions are completed. The second result
	of the model is that the conversion efficiency of Apaf-1 heptamer
	assembly is maximized when the initial concentration of cytochrome
	c is equal to that of Apaf-1. When the concentration of cytochrome
	c is sufficiently larger or smaller than that of Apaf-1, the final
	Apaf-1 heptamer production is decreased, because intermediate Apaf-1
	oligomers (tetramers and bigger oligomers), which themselves are
	unable to form active heptamer, accumulate too fast in the cells,
	choking a smooth production of Apaf-1 heptamer. Slow activation of
	Apaf-1 monomers and small oligomers increase the conversion efficiency.
	We also study the optimal number of subunits comprising an active
	oligomer that maximize the conversion efficiency in assembly process,
	and found that the tetramer is the optimum.},
  doi = {DOI: 10.1016/j.jtbi.2006.02.022},
  issn = {0022-5193},
  keywords = {csbcbook},
  url = {http://www.sciencedirect.com/science/article/B6WMD-4JVT1Y9-1/2/88df2dfdc8df3b8d6a17c602e71aeb74}
}

@article{Nakabayashi2006Mathematical,
  author = {Nakabayashi, J. and Sasaki, A.},
  title = {A mathematical model for apoptosome assembly: The optimal cytochrome
	c/Apaf-1 ratio},
  journal = {J. Theor. Biol.},
  year = {2006},
  volume = {242},
  pages = {280--287},
  number = {2},
  abstract = {Apoptosis, a highly conserved form of cell suicide, is regulated by
	apoptotic signals and their transduction with caspases, a family
	of cystein proteases. Caspases are constantly expressed in the normal
	cells as inactive pro-enzymes. The activity of caspase is regulated
	by the proteolysis. Sequential proteolytic reactions of caspases
	are needed to execute apoptosis. Mitochondrial pathway is one of
	these apoptotic signal pathways, in which caspases are oligomerized
	into characteristic heptamer structure, called apoptosome, with caspase-9
	that activate the effector caspases for apoptosis. To investigate
	the dynamics of signal transduction pathway regulated by oligomerization,
	we construct a mathematical model for Apaf-1 heptamer assembly process.
	The model first reveals that intermediate products can remain unconverted
	even after all assemble reactions are completed. The second result
	of the model is that the conversion efficiency of Apaf-1 heptamer
	assembly is maximized when the initial concentration of cytochrome
	c is equal to that of Apaf-1. When the concentration of cytochrome
	c is sufficiently larger or smaller than that of Apaf-1, the final
	Apaf-1 heptamer production is decreased, because intermediate Apaf-1
	oligomers (tetramers and bigger oligomers), which themselves are
	unable to form active heptamer, accumulate too fast in the cells,
	choking a smooth production of Apaf-1 heptamer. Slow activation of
	Apaf-1 monomers and small oligomers increase the conversion efficiency.
	We also study the optimal number of subunits comprising an active
	oligomer that maximize the conversion efficiency in assembly process,
	and found that the tetramer is the optimum.},
  doi = {DOI: 10.1016/j.jtbi.2006.02.022},
  issn = {0022-5193},
  keywords = {csbcbook},
  owner = {jp},
  timestamp = {2012.05.11},
  url = {http://www.sciencedirect.com/science/article/B6WMD-4JVT1Y9-1/2/88df2dfdc8df3b8d6a17c602e71aeb74}
}

@article{Nakamura1998ATM,
  author = {Yusuke Nakamura},
  title = {{ATM}: the p53 booster},
  journal = {Nature Medicine},
  year = {1998},
  volume = {4},
  pages = {1231-1232},
  keywords = {csbcbook}
}

@incollection{Nakaya2001Extraction,
  author = {A. Nakaya and S. Goto and M. Kanehisa},
  title = {Extraction of correlated gene clusters by multiple graph comparison},
  booktitle = {Genome {I}nformatics 2001},
  publisher = {Universal Academy Press},
  year = {2001},
  pages = {44--53},
  address = {Tokyo, Japan},
  pdf = {../local/Nakaya2001Extraction.pdf},
  file = {Nakaya2001Extraction.pdf:local/Nakaya2001Extraction.pdf:PDF},
  url = {http://www.jsbi.org/journal/GIW01/GIW01F05.html}
}

@article{Nakayama2001Role,
  author = {J. Nakayama and J. C. Rice and B. D. Strahl and C. D. Allis and S.
	I. Grewal},
  title = {Role of histone H3 lysine 9 methylation in epigenetic control of
	heterochromatin assembly.},
  journal = {Science},
  year = {2001},
  volume = {292},
  pages = {110--113},
  number = {5514},
  month = {Apr},
  abstract = {The assembly of higher order chromatin structures has been linked
	to the covalent modifications of histone tails. We provide in vivo
	evidence that lysine 9 of histone H3 (H3 Lys9) is preferentially
	methylated by the Clr4 protein at heterochromatin-associated regions
	in fission yeast. Both the conserved chromo- and SET domains of Clr4
	are required for H3 Lys9 methylation in vivo. Localization of Swi6,
	a homolog of Drosophila HP1, to heterochomatic regions is dependent
	on H3 Lys9 methylation. Moreover, an H3-specific deacetylase Clr3
	and a beta-propeller domain protein Rik1 are required for H3 Lys9
	methylation by Clr4 and Swi6 localization. These data define a conserved
	pathway wherein sequential histone modifications establish a "histone
	code" essential for the epigenetic inheritance of heterochromatin
	assembly.},
  doi = {10.1126/science.1060118},
  institution = {Cold Spring Harbor Laboratory, Post Office Box 100, Cold Spring Harbor,
	NY 11724, USA.},
  keywords = {Acetylation; Cell Cycle Proteins, chemistry/genetics/metabolism; Centromere,
	metabolism; Chromosomes, Fungal, metabolism; Fungal Proteins, genetics/metabolism;
	Gene Silencing; Genes, Fungal; Heterochromatin, metabolism; Histone
	Deacetylases, genetics/metabolism; Histone-Lysine N-Methyltransferase;
	Histones, chemistry/metabolism; Lysine, metabolism; Methylation;
	Methyltransferases, chemistry/genetics/metabolism; Mutation; Protein
	Methyltransferases; Protein Structure, Tertiary; Recombinant Proteins,
	chemistry/metabolism; Saccharomyces cerevisiae Proteins; Schizosaccharomyces
	pombe Proteins; Schizosaccharomyces, genetics/metabolism; Transcription
	Factors, metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {1060118},
  pmid = {11283354},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1126/science.1060118}
}

@article{Narendra1977branch,
  author = {Narendra, P.M. and Fukunaga, K.},
  title = {A branch and bound algorithm for feature subset selection},
  journal = {Computers, IEEE Transactions on},
  year = {1977},
  volume = {100},
  pages = {917--922},
  number = {9},
  publisher = {IEEE}
}

@article{Natarajan1995Sparse,
  author = {Natarajan, B. K.},
  title = {Sparse Approximate Solutions to Linear Systems},
  journal = {SIAM J. Comput.},
  year = {1995},
  volume = {24},
  pages = {227--234},
  number = {2},
  address = {Philadelphia, PA, USA},
  doi = {http://dx.doi.org/10.1137/S0097539792240406},
  issn = {0097-5397},
  publisher = {Society for Industrial and Applied Mathematics}
}

@article{Natsoulis2005Classification,
  author = {Georges Natsoulis and Laurent El Ghaoui and Gert R G Lanckriet and
	Alexander M Tolley and Fabrice Leroy and Shane Dunlea and Barrett
	P Eynon and Cecelia I Pearson and Stuart Tugendreich and Kurt Jarnagin},
  title = {Classification of a large microarray data set: algorithm comparison
	and analysis of drug signatures.},
  journal = {Genome {R}es.},
  year = {2005},
  volume = {15},
  pages = {724-36},
  number = {5},
  month = {May},
  abstract = {A large gene expression database has been produced that characterizes
	the gene expression and physiological effects of hundreds of approved
	and withdrawn drugs, toxicants, and biochemical standards in various
	organs of live rats. {I}n order to derive useful biological knowledge
	from this large database, a variety of supervised classification
	algorithms were compared using a 597-microarray subset of the data.
	{O}ur studies show that several types of linear classifiers based
	on {S}upport {V}ector {M}achines ({SVM}s) and {L}ogistic {R}egression
	can be used to derive readily interpretable drug signatures with
	high classification performance. {B}oth methods can be tuned to produce
	classifiers of drug treatments in the form of short, weighted gene
	lists which upon analysis reveal that some of the signature genes
	have a positive contribution (act as "rewards" for the class-of-interest)
	while others have a negative contribution (act as "penalties") to
	the classification decision. {T}he combination of reward and penalty
	genes enhances performance by keeping the number of false positive
	treatments low. {T}he results of these algorithms are combined with
	feature selection techniques that further reduce the length of the
	drug signatures, an important step towards the development of useful
	diagnostic biomarkers and low-cost assays. {M}ultiple signatures
	with no genes in common can be generated for the same classification
	end-point. {C}omparison of these gene lists identifies biological
	processes characteristic of a given class.},
  doi = {10.1101/gr.2807605},
  pdf = {../local/Natsoulis2005Classification.pdf},
  file = {Natsoulis2005Classification.pdf:local/Natsoulis2005Classification.pdf:PDF},
  pii = {15/5/724},
  url = {http://dx.doi.org/10.1101/gr.2807605}
}

@article{Natt2004Prediction,
  author = {Natt, N.K. and Kaur, H. and Raghava, G.P.},
  title = {Prediction of transmembrane regions of beta-barrel proteins using
	{ANN}- and {SVM}-based methods.},
  journal = {Proteins},
  year = {2004},
  volume = {56},
  pages = {11-18},
  number = {1},
  abstract = {This article describes a method developed for predicting transmembrane
	beta-barrel regions in membrane proteins using machine learning techniques:
	artificial neural network ({ANN}) and support vector machine ({SVM}).
	{T}he {ANN} used in this study is a feed-forward neural network with
	a standard back-propagation training algorithm. {T}he accuracy of
	the {ANN}-based method improved significantly, from 70.4% to 80.5%,
	when evolutionary information was added to a single sequence as a
	multiple sequence alignment obtained from {PSI}-{BLAST}. {W}e have
	also developed an {SVM}-based method using a primary sequence as
	input and achieved an accuracy of 77.4%. {T}he {SVM} model was modified
	by adding 36 physicochemical parameters to the amino acid sequence
	information. {F}inally, {ANN}- and {SVM}-based methods were combined
	to utilize the full potential of both techniques. {T}he accuracy
	and {M}atthews correlation coefficient ({MCC}) value of {SVM}, {ANN},
	and combined method are 78.5%, 80.5%, and 81.8%, and 0.55, 0.63,
	and 0.64, respectively. {T}hese methods were trained and tested on
	a nonredundant data set of 16 proteins, and performance was evaluated
	using "leave one out cross-validation" ({LOOCV}). {B}ased on this
	study, we have developed a {W}eb server, {TBBP}red, for predicting
	transmembrane beta-barrel regions in proteins (available at http://www.imtech.res.in/raghava/tbbpred).},
  doi = {10.1002/prot.20092},
  pdf = {../local/Natt2004Prediction.pdf},
  file = {Natt2004Prediction.pdf:local/Natt2004Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.20092}
}

@article{Nattkemper2005Evaluation,
  author = {Tim W Nattkemper and Bert Arnrich and Oliver Lichte and Wiebke Timm
	and Andreas Degenhard and Linda Pointon and Carmel Hayes and Martin
	O Leach and The UK MARIBS Breast Screening Study},
  title = {Evaluation of radiological features for breast tumour classification
	in clinical screening with machine learning methods.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  volume = {34},
  pages = {129-39},
  number = {2},
  month = {Jun},
  abstract = {O{BJECTIVE}: {I}n this work, methods utilizing supervised and unsupervised
	machine learning are applied to analyze radiologically derived morphological
	and calculated kinetic tumour features. {T}he features are extracted
	from dynamic contrast enhanced magnetic resonance imaging ({DCE}-{MRI})
	time-course data. {MATERIAL}: {T}he {DCE}-{MRI} data of the female
	breast are obtained within the {UK} {M}ulticenter {B}reast {S}creening
	{S}tudy. {T}he group of patients imaged in this study is selected
	on the basis of an increased genetic risk for developing breast cancer.
	{METHODS}: {T}he k-means clustering and self-organizing maps ({SOM})
	are applied to analyze the signal structure in terms of visualization.
	{W}e employ k-nearest neighbor classifiers (k-nn), support vector
	machines ({SVM}) and decision trees ({DT}) to classify features using
	a computer aided diagnosis ({CAD}) approach. {RESULTS}: {R}egarding
	the unsupervised techniques, clustering according to features indicating
	benign and malignant characteristics is observed to a limited extend.
	{T}he supervised approaches classified the data with 74\% accuracy
	({DT}) and providing an area under the receiver-operator-characteristics
	({ROC}) curve ({AUC}) of 0.88 ({SVM}). {CONCLUSION}: {I}t was found
	that contour and wash-out type ({WOT}) features determined by the
	radiologists lead to the best {SVM} classification results. {A}lthough
	a fast signal uptake in early time-point measurements is an important
	feature for malignant/benign classification of tumours, our results
	indicate that the wash-out characteristics might be considered as
	important.},
  keywords = {breastcancer}
}

@article{Npg2007DNA,
  author = {{Nature Publishing Group}},
  title = {{DNA} {T}echnologies - {M}ilestones timeline},
  journal = {Nature Milestones},
  year = {2007},
  note = {{http://www.nature.com/milestones/miledna/timeline.html}},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Needleman1970general,
  author = {S.B. Needleman and C.D. Wunsch},
  title = {A general method applicable to the search for similarities in the
	amino acid sequences of two proteins},
  journal = {J. {M}ol. {B}iol.},
  year = {1970},
  volume = {48},
  pages = {443--453}
}

@article{Nesterov1994Interior,
  author = {Y. Nesterov and A. Nemirovsky},
  title = {Interior Point Polynomial Methods in Convex Programming: Theory and
	Applications.},
  journal = {SIAM},
  year = {1994}
}

@article{Netterwald20101000,
  author = {Netterwald, J},
  title = {The \$1000 Genome: Coming Soon?},
  journal = {Drug Discovery \& Development},
  year = {2010},
  volume = {13},
  pages = {14-15},
  owner = {philippe},
  timestamp = {2010.07.28}
}

@article{Network2008Comprehensive,
  author = {Cancer Genome Atlas Research Network},
  title = {Comprehensive genomic characterization defines human glioblastoma
	genes and core pathways},
  journal = {Nature},
  year = {2008},
  volume = {455},
  pages = {1061--1068},
  number = {7216},
  month = {Oct},
  abstract = {Human cancer cells typically harbour multiple chromosomal aberrations,
	nucleotide substitutions and epigenetic modifications that drive
	malignant transformation. The Cancer Genome Atlas (TCGA) pilot project
	aims to assess the value of large-scale multi-dimensional analysis
	of these molecular characteristics in human cancer and to provide
	the data rapidly to the research community. Here we report the interim
	integrative analysis of DNA copy number, gene expression and DNA
	methylation aberrations in 206 glioblastomas--the most common type
	of adult brain cancer--and nucleotide sequence aberrations in 91
	of the 206 glioblastomas. This analysis provides new insights into
	the roles of ERBB2, NF1 and TP53, uncovers frequent mutations of
	the phosphatidylinositol-3-OH kinase regulatory subunit gene PIK3R1,
	and provides a network view of the pathways altered in the development
	of glioblastoma. Furthermore, integration of mutation, DNA methylation
	and clinical treatment data reveals a link between MGMT promoter
	methylation and a hypermutator phenotype consequent to mismatch repair
	deficiency in treated glioblastomas, an observation with potential
	clinical implications. Together, these findings establish the feasibility
	and power of TCGA, demonstrating that it can rapidly expand knowledge
	of the molecular basis of cancer.},
  doi = {10.1038/nature07385},
  pdf = {../local/Network2008Comprehensive.pdf},
  file = {Network2008Comprehensive.pdf:Network2008Comprehensive.pdf:PDF},
  owner = {jp},
  pii = {nature07385},
  pmid = {18772890},
  timestamp = {2008.11.26},
  url = {http://dx.doi.org/10.1038/nature07385}
}

@book{Neuhaus2007Bridging,
  title = {Bridging the Gap Between Graph Edit Distance and Kernel Machines},
  publisher = {World Scientific},
  year = {2007},
  author = {Neuhaus, M. and Bunke, H.},
  month = {September},
  bibsource = {http://www.visionbib.com/bibliography/match575.html#TT47912}
}

@article{Neuhaus2006Edit,
  author = {Neuhaus, M. and Bunke, H.},
  title = {Edit distance-based kernel functions for structural pattern classification},
  journal = {Pattern Recognition},
  year = {2006},
  volume = {39},
  pages = {1852--1863},
  number = {10},
  month = {Oct},
  abstract = {A common approach in structural pattern classification is to define
	a dissimilarity measure on patterns and apply a distance-based nearest-neighbor
	classifier. In this paper, we introduce an alternative method for
	classification using kernel functions based on edit distance. The
	proposed approach is applicable to both string and graph representations
	of patterns. By means of the kernel functions introduced in this
	paper, string and graph classification can be performed in an implicit
	vector space using powerful statistical algorithms. The validity
	of the kernel method cannot be established for edit distance in general.
	However, by evaluating theoretical criteria we show that the kernel
	functions are nevertheless suitable for classification, and experiments
	on various string and graph datasets clearly demonstrate that nearest-neighbor
	classifiers can be outperformed by support vector machines using
	the proposed kernel functions.},
  doi = {10.1016/j.patcog.2006.04.012},
  pdf = {../local/Neuhaus2006Edit.pdf},
  file = {Neuhaus2006Edit.pdf:local/Neuhaus2006Edit.pdf:PDF},
  keywords = {image},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1016/j.patcog.2006.04.012}
}

@inproceedings{Neuhaus06Fast,
  author = {M. Neuhaus and K. Riesen and H. Bunke},
  title = {Fast Suboptimal Algorithms for the Computation of Graph Edit Distance.},
  booktitle = {SSPR/SPR},
  year = {2006},
  editor = {Dit-Yan Yeung and James T. Kwok and Ana L. N. Fred and Fabio Roli
	and Dick de Ridder},
  volume = {4109},
  series = {Lecture Notes in Computer Science},
  pages = {163-172},
  publisher = {Springer},
  date = {2006-08-22},
  ee = {http://dx.doi.org/10.1007/11815921_17},
  interhash = {e36316a00d394d5ad024646e7a6d53f0},
  intrahash = {b05f82717fe7c777540d3510f641f9cb},
  isbn = {3-540-37236-9},
  url = {http://dblp.uni-trier.de/db/conf/sspr/sspr2006.html#NeuhausRB06}
}

@article{Neuvial2006Spatial,
  author = {Pierre Neuvial and Philippe Hupé and Isabel Brito and Stéphane Liva
	and Elodie Manié and Caroline Brennetot and François Radvanyi and
	Alain Aurias and Emmanuel Barillot},
  title = {Spatial normalization of array-CGH data.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {264},
  abstract = {BACKGROUND: Array-based comparative genomic hybridization (array-CGH)
	is a recently developed technique for analyzing changes in DNA copy
	number. As in all microarray analyses, normalization is required
	to correct for experimental artifacts while preserving the true biological
	signal. We investigated various sources of systematic variation in
	array-CGH data and identified two distinct types of spatial effect
	of no biological relevance as the predominant experimental artifacts:
	continuous spatial gradients and local spatial bias. Local spatial
	bias affects a large proportion of arrays, and has not previously
	been considered in array-CGH experiments. RESULTS: We show that existing
	normalization techniques do not correct these spatial effects properly.
	We therefore developed an automatic method for the spatial normalization
	of array-CGH data. This method makes it possible to delineate and
	to eliminate and/or correct areas affected by spatial bias. It is
	based on the combination of a spatial segmentation algorithm called
	NEM (Neighborhood Expectation Maximization) and spatial trend estimation.
	We defined quality criteria for array-CGH data, demonstrating significant
	improvements in data quality with our method for three data sets
	coming from two different platforms (198, 175 and 26 BAC-arrays).
	CONCLUSION: We have designed an automatic algorithm for the spatial
	normalization of BAC CGH-array data, preventing the misinterpretation
	of experimental artifacts as biologically relevant outliers in the
	genomic profile. This algorithm is implemented in the R package MANOR
	(Micro-Array NORmalization), which is described at http://bioinfo.curie.fr/projects/manor
	and available from the Bioconductor site http://www.bioconductor.org.
	It can also be tested on the CAPweb bioinformatics platform at http://bioinfo.curie.fr/CAPweb.},
  doi = {10.1186/1471-2105-7-264},
  institution = {Institut Curie, Service de Bioinformatique, 26, rue d'Ulm, Paris,
	75248 cedex 05, France. pierre.neuvial@curie.fr},
  keywords = {Algorithms; Artifacts; Base Sequence; Chromosome Mapping, methods;
	Computer Simulation; Data Interpretation, Statistical; Gene Dosage;
	In Situ Hybridization, methods; Models, Genetic; Models, Statistical;
	Molecular Sequence Data; Oligonucleotide Array Sequence Analysis,
	methods; Sequence Analysis, DNA, methods},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1471-2105-7-264},
  pmid = {16716215},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1186/1471-2105-7-264}
}

@incollection{Newman2002Random,
  author = {M. E. J. Newman},
  title = {Random graphs as models of networks},
  booktitle = {Handbook of {G}raphs and {N}etworks},
  publisher = {Wiley-VCH},
  year = {2002},
  editor = {S. Bornholdt and H. G. Schuster},
  address = {Berlin},
  note = {To appear},
  pdf = {../local/newm02.pdf},
  file = {newm02.pdf:local/newm02.pdf:PDF},
  subject = {compnet},
  url = {http://arxiv.org/abs/cond-mat/0202208/}
}

@article{Newman2001Random,
  author = {M. E. J. Newman and S. H. Strogatz and D. J. Watts},
  title = {Random graphs with arbitrary degree distributions and their applications},
  journal = {Phys. {R}ev. {E}},
  year = {2001},
  volume = {64},
  pages = {26118},
  pdf = {../local/newm01.pdf},
  file = {newm01.pdf:local/newm01.pdf:PDF},
  subject = {compnet},
  url = {http://xxx.lanl.gov/abs/cond-mat/0007235}
}

@inproceedings{Ng2001On,
  author = {Andrew Y. Ng and Michael I. Jordan and Yair Weiss},
  title = {On Spectral Clustering: Analysis and an algorithm},
  booktitle = {Advances in Neural Information Processing Systems 14},
  year = {2001},
  pages = {849--856},
  publisher = {MIT Press}
}

@article{Nguyen2005Prediction,
  author = {Minh N Nguyen and Jagath C Rajapakse},
  title = {Prediction of protein relative solvent accessibility with a two-stage
	{SVM} approach.},
  journal = {Proteins},
  year = {2005},
  volume = {59},
  pages = {30-7},
  number = {1},
  month = {Apr},
  abstract = {Information on relative solvent accessibility ({RSA}) of amino acid
	residues in proteins provides valuable clues to the prediction of
	protein structure and function. {A} two-stage approach with support
	vector machines ({SVM}s) is proposed, where an {SVM} predictor is
	introduced to the output of the single-stage {SVM} approach to take
	into account the contextual relationships among solvent accessibilities
	for the prediction. {B}y using the position-specific scoring matrices
	({PSSM}s) generated by {PSI}-{BLAST}, the two-stage {SVM} approach
	achieves accuracies up to 90.4\% and 90.2\% on the {M}anesh data
	set of 215 protein structures and the {RS}126 data set of 126 nonhomologous
	globular proteins, respectively, which are better than the highest
	published scores on both data sets to date. {A} {W}eb server for
	protein {RSA} prediction using a two-stage {SVM} method has been
	developed and is available (http://birc.ntu.edu.sg/~pas0186457/rsa.html).},
  doi = {10.1002/prot.20404},
  pdf = {../local/Nguyen2005Prediction.pdf},
  file = {Nguyen2005Prediction.pdf:local/Nguyen2005Prediction.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1002/prot.20404}
}

@article{Nguyen2005Two-stage,
  author = {M. N. Nguyen and J. C. Rajapakse},
  title = {Two-stage multi-class support vector machines to protein secondary
	structure prediction.},
  journal = {Pac {S}ymp {B}iocomput},
  year = {2005},
  pages = {346-57},
  abstract = {Bioinformatics techniques to protein secondary structure ({PSS}) prediction
	are mostly single-stage approaches in the sense that they predict
	secondary structures of proteins by taking into account only the
	contextual information in amino acid sequences. {I}n this paper,
	we propose two-stage {M}ulti-class {S}upport {V}ector {M}achine ({MSVM})
	approach where a {MSVM} predictor is introduced to the output of
	the first stage {MSVM} to capture the sequential relationship among
	secondary structure elements for the prediction. {B}y using position
	specific scoring matrices, generated by {PSI}-{BLAST}, the two-stage
	{MSVM} approach achieves {Q}3 accuracies of 78.0\% and 76.3\% on
	the {RS}126 dataset of 126 nonhomologous globular proteins and the
	{CB}396 dataset of 396 nonhomologous proteins, respectively, which
	are better than the highest scores published on both datasets to
	date.},
  keywords = {biosvm}
}

@article{Nguyen2003Multi-class,
  author = {Minh N Nguyen and Jagath C Rajapakse},
  title = {Multi-class support vector machines for protein secondary structure
	prediction.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2003},
  volume = {14},
  pages = {218-27},
  abstract = {The solution of binary classification problems using the {S}upport
	{V}ector {M}achine ({SVM}) method has been well developed. {T}hough
	multi-class classification is typically solved by combining several
	binary classifiers, recently, several multi-class methods that consider
	all classes at once have been proposed. {H}owever, these methods
	require resolving a much larger optimization problem and are applicable
	to small datasets. {T}hree methods based on binary classifications:
	one-against-all ({OAA}), one-against-one ({OAO}), and directed acyclic
	graph ({DAG}), and two approaches for multi-class problem by solving
	one single optimization problem, are implemented to predict protein
	secondary structure. {O}ur experiments indicate that multi-class
	{SVM} methods are more suitable for protein secondary structure ({PSS})
	prediction than the other methods, including binary {SVM}s, because
	their capacity to solve an optimization problem in one step. {F}urthermore,
	in this paper, we argue that it is feasible to extend the prediction
	accuracy by adding a second-stage multi-class {SVM} to capture the
	contextual information among secondary structural elements and thereby
	further improving the accuracies. {W}e demonstrate that two-stage
	{SVM}s perform better than single-stage {SVM} techniques for {PSS}
	prediction using two datasets and report a maximum accuracy of 79.5\%.},
  keywords = {biosvm}
}

@article{Nguyen2006Experimental,
  author = {Nam-Ky Nguyen and E. R. Williams},
  title = {Experimental designs for 2-colour cDNA microarray experiments},
  journal = {Applied Stochastic Models in Business and Industry},
  year = {2006},
  volume = {22},
  pages = {631–638},
  owner = {phupe},
  timestamp = {2011.04.08}
}

@misc{Nicholls2005Openeye,
  author = {A. Nicholls},
  title = {OEChem, version 1.3.4, OpenEye Scientific Software},
  howpublished = {website},
  year = {2005},
  owner = {bricehoffmann},
  timestamp = {2009.02.13}
}

@article{Niedringhaus2011Landscape,
  author = {Thomas P Niedringhaus and Denitsa Milanova and Matthew B Kerby and
	Michael P Snyder and Annelise E Barron},
  title = {Landscape of Next-Generation Sequencing Technologies.},
  journal = {Anal Chem},
  year = {2011},
  month = {May},
  doi = {10.1021/ac2010857},
  institution = {Department of Chemical Engineering, Stanford University, Palo Alto,
	California, United States.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {phupe},
  pmid = {21612267},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1021/ac2010857}
}

@article{Nielsen2010Visualizing,
  author = {Cydney B Nielsen and Michael Cantor and Inna Dubchak and David Gordon
	and Ting Wang},
  title = {Visualizing genomes: techniques and challenges.},
  journal = {Nat Methods},
  year = {2010},
  volume = {7},
  pages = {S5--S15},
  number = {3 Suppl},
  month = {Mar},
  abstract = {As our ability to generate sequencing data continues to increase,
	data analysis is replacing data generation as the rate-limiting step
	in genomics studies. Here we provide a guide to genomic data visualization
	tools that facilitate analysis tasks by enabling researchers to explore,
	interpret and manipulate their data, and in some cases perform on-the-fly
	computations. We will discuss graphical methods designed for the
	analysis of de novo sequencing assemblies and read alignments, genome
	browsing, and comparative genomics, highlighting the strengths and
	limitations of these approaches and the challenges ahead.},
  doi = {10.1038/nmeth.1422},
  institution = {British Columbia Cancer Agency, Genome Sciences Centre, Vancouver,
	Canada. cydneyn@bcgsc.ca},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nmeth.1422},
  pmid = {20195257},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.1038/nmeth.1422}
}

@article{Nielsen1997Identification,
  author = {Nielsen, H. and Engelbrecht, J. and Brunak, S. and von Heijne, G.},
  title = {Identification of prokaryotic and eukaryotic signal peptides and
	prediction of their cleavage sites},
  journal = {Protein {E}ng.},
  year = {1997},
  volume = {10},
  pages = {1--6},
  number = {1},
  pdf = {../local/niel97.pdf},
  file = {niel97.pdf:local/niel97.pdf:PDF},
  subject = {bioprot},
  url = {http://protein.oupjournals.org/cgi/content/abstract/10/1/1}
}

@article{Nielsen2004Improved,
  author = {Nielsen, M. and Lundegaard, C. and Worning, P. and Hvid, C. S. and
	Lamberth, K. and Buus, S. and Brunak, S. and Lund, O.},
  title = {{I}mproved prediction of {MHC} class {I} and class {II} epitopes
	using a novel {G}ibbs sampling approach.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1388--1397},
  number = {9},
  month = {Jun},
  abstract = {MOTIVATION: Prediction of which peptides will bind a specific major
	histocompatibility complex (MHC) constitutes an important step in
	identifying potential T-cell epitopes suitable as vaccine candidates.
	MHC class II binding peptides have a broad length distribution complicating
	such predictions. Thus, identifying the correct alignment is a crucial
	part of identifying the core of an MHC class II binding motif. In
	this context, we wish to describe a novel Gibbs motif sampler method
	ideally suited for recognizing such weak sequence motifs. The method
	is based on the Gibbs sampling method, and it incorporates novel
	features optimized for the task of recognizing the binding motif
	of MHC classes I and II. The method locates the binding motif in
	a set of sequences and characterizes the motif in terms of a weight-matrix.
	Subsequently, the weight-matrix can be applied to identifying effectively
	potential MHC binding peptides and to guiding the process of rational
	vaccine design. RESULTS: We apply the motif sampler method to the
	complex problem of MHC class II binding. The input to the method
	is amino acid peptide sequences extracted from the public databases
	of SYFPEITHI and MHCPEP and known to bind to the MHC class II complex
	HLA-DR4(B1*0401). Prior identification of information-rich (anchor)
	positions in the binding motif is shown to improve the predictive
	performance of the Gibbs sampler. Similarly, a consensus solution
	obtained from an ensemble average over suboptimal solutions is shown
	to outperform the use of a single optimal solution. In a large-scale
	benchmark calculation, the performance is quantified using relative
	operating characteristics curve (ROC) plots and we make a detailed
	comparison of the performance with that of both the TEPITOPE method
	and a weight-matrix derived using the conventional alignment algorithm
	of ClustalW. The calculation demonstrates that the predictive performance
	of the Gibbs sampler is higher than that of ClustalW and in most
	cases also higher than that of the TEPITOPE method.},
  doi = {10.1093/bioinformatics/bth100},
  keywords = {immunoinformatics},
  pii = {bth100},
  pmid = {14962912},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth100}
}

@article{Nielsen2003Reliable,
  author = {Nielsen, M. and Lundegaard, C. and Worning, P. and Lauem{\o}ller,
	S. L. and Lamberth, K. and Buus, S. and Brunak, S. and Lund, O.},
  title = {{R}eliable prediction of {T}-cell epitopes using neural networks
	with novel sequence representations.},
  journal = {Protein Sci.},
  year = {2003},
  volume = {12},
  pages = {1007--1017},
  number = {5},
  month = {May},
  abstract = {In this paper we describe an improved neural network method to predict
	T-cell class I epitopes. A novel input representation has been developed
	consisting of a combination of sparse encoding, Blosum encoding,
	and input derived from hidden Markov models. We demonstrate that
	the combination of several neural networks derived using different
	sequence-encoding schemes has a performance superior to neural networks
	derived using a single sequence-encoding scheme. The new method is
	shown to have a performance that is substantially higher than that
	of other methods. By use of mutual information calculations we show
	that peptides that bind to the HLA A*0204 complex display signal
	of higher order sequence correlations. Neural networks are ideally
	suited to integrate such higher order correlations when predicting
	the binding affinity. It is this feature combined with the use of
	several neural networks derived from different and novel sequence-encoding
	schemes and the ability of the neural network to be trained on data
	consisting of continuous binding affinities that gives the new method
	an improved performance. The difference in predictive performance
	between the neural network methods and that of the matrix-driven
	methods is found to be most significant for peptides that bind strongly
	to the HLA molecule, confirming that the signal of higher order sequence
	correlation is most strongly present in high-binding peptides. Finally,
	we use the method to predict T-cell epitopes for the genome of hepatitis
	C virus and discuss possible applications of the prediction method
	to guide the process of rational vaccine design.},
  keywords = {immunoinformatics},
  pmid = {12717023},
  timestamp = {2007.01.25}
}

@article{Nielsen2011Genotype,
  author = {Nielsen, R. and Paul, J. S. and Albrechtsen, A. and Song, Y. S.},
  title = {Genotype and {SNP} calling from next-generation sequencing data.},
  journal = {Nat. Rev. Genet.},
  year = {2011},
  volume = {12},
  pages = {443--451},
  number = {6},
  month = {Jun},
  abstract = {Meaningful analysis of next-generation sequencing (NGS) data, which
	are produced extensively by genetics and genomics studies, relies
	crucially on the accurate calling of SNPs and genotypes. Recently
	developed statistical methods both improve and quantify the considerable
	uncertainty associated with genotype calling, and will especially
	benefit the growing number of studies using low- to medium-coverage
	data. We review these methods and provide a guide for their use in
	NGS studies.},
  doi = {10.1038/nrg2986},
  pdf = {../local/Nielsen2011Genotype.pdf},
  file = {Nielsen2011Genotype.pdf:Nielsen2011Genotype.pdf:PDF},
  institution = {Department of Integrative Biology, University of California, Berkeley,
	CA 94720, USA. rasmus_nielsen@berkeley.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nrg2986},
  pmid = {21587300},
  timestamp = {2011.10.27},
  url = {http://dx.doi.org/10.1038/nrg2986}
}

@article{Nigam2000Text,
  author = {Nigam, K. and Mccallum, A.K. and Thrun, S. and Mitchell, T.M.},
  title = {Text Classification from Labeled and Unlabeled Documents using EM},
  journal = {Mach. Learn.},
  year = {2000},
  volume = {39},
  pages = {103--134},
  number = {2/3},
  citeulike-article-id = {224914},
  citeulike-linkout-0 = {http://citeseer.ist.psu.edu/nigam99text.html},
  citeulike-linkout-1 = {http://citeseer.lcs.mit.edu/nigam99text.html},
  citeulike-linkout-2 = {http://citeseer.ifi.unizh.ch/nigam99text.html},
  citeulike-linkout-3 = {http://citeseer.comp.nus.edu.sg/nigam99text.html},
  keywords = {em, semi-supervised-learning, text-classification, unlabeled-data},
  owner = {fantine},
  timestamp = {2009.07.09},
  url = {http://citeseer.ist.psu.edu/nigam99text.html}
}

@article{Nilsson2004Approximate,
  author = {Nilsson, J. and Fioretos, T. and HÃ¶glund, M. and Fontes, M.},
  title = {Approximate geodesic distances reveal biologically relevant structures
	in microarray data.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {874-80},
  number = {6},
  month = {Apr},
  abstract = {M{OTIVATION}: {G}enome-wide gene expression measurements, as currently
	determined by the microarray technology, can be represented mathematically
	as points in a high-dimensional gene expression space. {G}enes interact
	with each other in regulatory networks, restricting the cellular
	gene expression profiles to a certain manifold, or surface, in gene
	expression space. {T}o obtain knowledge about this manifold, various
	dimensionality reduction methods and distance metrics are used. {F}or
	data points distributed on curved manifolds, a sensible distance
	measure would be the geodesic distance along the manifold. {I}n this
	work, we examine whether an approximate geodesic distance measure
	captures biological similarities better than the traditionally used
	{E}uclidean distance. {RESULTS}: {W}e computed approximate geodesic
	distances, determined by the {I}somap algorithm, for one set of lymphoma
	and one set of lung cancer microarray samples. {C}ompared with the
	ordinary {E}uclidean distance metric, this distance measure produced
	more instructive, biologically relevant, visualizations when applying
	multidimensional scaling. {T}his suggests the {I}somap algorithm
	as a promising tool for the interpretation of microarray data. {F}urthermore,
	the results demonstrate the benefit and importance of taking nonlinearities
	in gene expression data into account.},
  doi = {10.1093/bioinformatics/btg496},
  pdf = {../local/Nilsson2004Approximate.pdf},
  file = {Nilsson2004Approximate.pdf:local/Nilsson2004Approximate.pdf:PDF},
  keywords = {dimred},
  pii = {btg496},
  url = {http://dx.doi.org/10.1093/bioinformatics/btg496}
}

@article{Nilsson2009reliable,
  author = {Nilsson, R. and Bj{\"o}rkegren, J. and Tegn{\'e}r, J.},
  title = {On reliable discovery of molecular signatures},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {38},
  abstract = {Background: Molecular signatures are sets of genes, proteins, genetic
	variants or other variables that can be used as markers for a particular
	phenotype. Reliable signature discovery methods could yield valuable
	insight into cell biology and mechanisms of human disease. However,
	it is currently not clear how to control error rates such as the
	false discovery rate (FDR) in signature discovery. Moreover, signatures
	for cancer gene expression have been shown to be unstable, that is,
	difficult to replicate in independent studies, casting doubts on
	their reliability.
	
	Results: We demonstrate that with modern prediction methods, signatures
	that yield accurate predictions may still have a high FDR. Further,
	we show that even signatures with low FDR may fail to replicate in
	independent studies due to limited statistical power. Thus, neither
	stability nor predictive accuracy are relevant when FDR control is
	the primary goal. We therefore develop a general statistical hypothesis
	testing framework that for the first time provides FDR control for
	signature discovery. Our method is demonstrated to be correct in
	simulation studies. When applied to five cancer data sets, the method
	was able to discover molecular signatures with 5% FDR in three cases,
	while two data sets yielded no significant findings.
	
	Conclusion: Our approach enables reliable discovery of molecular signatures
	from genome-wide data with current sample sizes. The statistical
	framework developed herein is potentially applicable to a wide range
	of prediction problems in bioinformatics.},
  doi = {10.1186/1471-2105-10-38},
  pdf = {../local/Nilsson2009reliable.pdf},
  file = {Nilsson2009reliable.pdf:Nilsson2009reliable.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1186/1471-2105-10-38}
}

@incollection{Noble2004Support,
  author = {Noble, W. S.},
  title = {Support vector machine applications in computational biology},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {71-92},
  abstract = {During the past three years, the support vector machine learning algorithm
	has been extensively applied within the field of computational biology.
	{T}he algorithm has been used to detect patterns within and among
	biological sequences, to classify genes and patients based upon gene
	expression profiles, and has recently been applied to several new
	biological problems. {T}his chapter reviews the state of the art
	with respect to {SVM} applications in computational biology.},
  pdf = {../local/Noble2004Support.pdf},
  file = {Noble2004Support.pdf:local/Noble2004Support.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@book{Nocedal2006Numerical,
  title = {Numerical optimization},
  publisher = {Springer},
  year = {2006},
  author = {Nocedal, J. and Wright, S.},
  owner = {jp},
  timestamp = {2011.05.02}
}

@article{Noon93Efficient,
  author = {C. Noon and J.C. Bean},
  title = {An efficient transformation of the generalized traveling salesman
	problem},
  journal = {INFOR},
  year = {1993},
  pages = {39--44}
}

@article{OConnor2006BiotechnolLett,
  author = {O'Connor, K. C. and Muhitch, J. W. and Lacks, D. J. and Al-Rubeai,
	M.},
  title = {Modeling suppression of cell death by Bcl-2 over-expression in myeloma
	NS0 6A1 cells},
  journal = {Biotechnol Lett},
  year = {2006},
  volume = {28},
  pages = {1919--24},
  number = {23},
  abstract = {A novel population-balance model was employed to evaluate the suppression
	of cell death in myeloma NS0 6A1 cells metabolically engineered to
	over-express the apoptotic suppressor Bcl-2. The model is robust
	in its ability to simulate cell population dynamics in batch suspension
	culture and in response to thymidine-induced growth inhibition: 89
	percent of simulated cell concentrations are within two standard
	deviations of experimental data. Kinetic rate constants in model
	equations suggest that Bcl-2 over-expression extends culture longevity
	from 6 days to at least 15 days by suppressing the specific rate
	of early apoptotic cell formation by more than 6-fold and necrotic
	cell formation by at least 3-fold, despite nearly a 3-fold decrease
	in initial cell growth rate and no significant change in the specific
	rate of late apoptotic cell formation. This computational analysis
	supports a mechanism in which Bcl-2 is a common mediator of early
	apoptotic and necrotic events occurring at rates that are dependent
	on cellular factors accumulating over time. The model has current
	application to the rational design of cell cultures through metabolic
	engineering for the industrial production of biopharmaceuticals.},
  keywords = {csbcbook}
}

@article{ODonnell2005Gene,
  author = {Rebekah K O'Donnell and Michael Kupferman and S. Jack Wei and Sunil
	Singhal and Randal Weber and Bert O'Malley and Yi Cheng and Mary
	Putt and Michael Feldman and Barry Ziober and Ruth J Muschel},
  title = {Gene expression signature predicts lymphatic metastasis in squamous
	cell carcinoma of the oral cavity.},
  journal = {Oncogene},
  year = {2005},
  volume = {24},
  pages = {1244-51},
  number = {7},
  month = {Feb},
  abstract = {Metastasis via the lymphatics is a major risk factor in squamous cell
	carcinoma of the oral cavity ({OSCC}). {W}e sought to determine whether
	the presence of metastasis in the regional lymph node could be predicted
	by a gene expression signature of the primary tumor. {A} total of
	18 {OSCC}s were characterized for gene expression by hybridizing
	{RNA} to {A}ffymetrix {U}133{A} gene chips. {G}enes with differential
	expression were identified using a permutation technique and verified
	by quantitative {RT}-{PCR} and immunohistochemistry. {A} predictive
	rule was built using a support vector machine, and the accuracy of
	the rule was evaluated using crossvalidation on the original data
	set and prediction of an independent set of four patients. {M}etastatic
	primary tumors could be differentiated from nonmetastatic primary
	tumors by a signature gene set of 116 genes. {T}his signature gene
	set correctly predicted the four independent patients as well as
	associating five lymph node metastases from the original patient
	set with the metastatic primary tumor group. {W}e concluded that
	lymph node metastasis could be predicted by gene expression profiles
	of primary oral cavity squamous cell carcinomas. {T}he presence of
	a gene expression signature for lymph node metastasis indicates that
	clinical testing to assess risk for lymph node metastasis should
	be possible.},
  doi = {10.1038/sj.onc.1208285},
  pdf = {../local/O'Donnell2005Gene.pdf},
  file = {O'Donnell2005Gene.pdf:local/O'Donnell2005Gene.pdf:PDF},
  keywords = {biosvm microarray},
  pii = {1208285},
  url = {http://dx.doi.org/10.1038/sj.onc.1208285}
}

@misc{ODriscoll2004Virtual,
  author = {Cath O'Driscoll},
  title = {A {V}irtual {S}pace {O}dyssey},
  howpublished = {Nature Horizon : Charting Chemical Space},
  year = {2004},
  owner = {mahe},
  timestamp = {2006.09.04}
}

@article{OFlanagan2005Non,
  author = {R. A. O'Flanagan and G. Paillard and R. Lavery and A. M. Sengupta},
  title = {Non-additivity in protein-{DNA} binding.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2254-63},
  number = {10},
  month = {May},
  abstract = {M{OTIVATION}: {L}ocalizing protein binding sites within genomic {DNA}
	is of considerable importance, but remains difficult for protein
	families, such as transcription factors, which have loosely defined
	target sequences. {I}t is generally assumed that protein affinity
	for {DNA} involves additive contributions from successive nucleotide
	pairs within the target sequence. {T}his is not necessarily true,
	and non-additive effects have already been experimentally demonstrated
	in a small number of cases. {T}he principal origin of non-additivity
	involves the so-called indirect component of protein-{DNA} recognition
	which is related to the sequence dependence of {DNA} deformation
	induced during complex formation. {N}on-additive effects are difficult
	to study because they require the identification of many more binding
	sequences than are normally necessary for describing additive specificity
	(typically via the construction of weight matrices). {RESULTS}: {I}n
	the present work we will use theoretically estimated binding energies
	as a basis for overcoming this problem. {O}ur approach enables us
	to study the full combinatorial set of sequences for a variety of
	{DNA}-binding proteins, make a detailed analysis of non-additive
	effects and exploit this information to improve binding site predictions
	using either weight matrices or support vector machines. {T}he results
	underline the fact that, even in the presence of significant deformation,
	non-additive effects may involve only a limited number of dinucleotide
	steps. {T}his information helps to reduce the number of binding sites
	which need to be identified for successful predictions and to avoid
	problems of over-fitting. {AVAILABILITY}: {T}he {SVM} software is
	available upon request from the authors.},
  doi = {10.1093/bioinformatics/bti361},
  pdf = {../local/OFlanagan2005Non.pdf},
  file = {OFlanagan2005Non.pdf:local/OFlanagan2005Non.pdf:PDF},
  keywords = {biosvm},
  pii = {bti361},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti361}
}

@article{OHagan2003Array,
  author = {O'Hagan, R. C. and Brennan, C. W. and Strahs, A. and Zhang, X. and
	Kannan, K. and Donovan, M. and Cauwels, C. and Sharpless, N. E. and
	Wong, W. H. and Chin, L.},
  title = {Array comparative genome hybridization for tumor classification and
	gene discovery in mouse models of malignant melanoma},
  journal = {Cancer Res.},
  year = {2003},
  volume = {63},
  pages = {5352--5356},
  number = {17},
  month = {Sep},
  abstract = {Chromosomal numerical aberrations (CNAs), particularly regional amplifications
	and deletions, are a hallmark of solid tumor genomes. These genomic
	alterations carry the potential to convey etiologic and clinical
	significance by virtue of their clonality within a tumor cell population,
	their distinctive patterns in relation to tumor staging, and their
	recurrence across different tumor types. In this study, we showed
	that array-based comparative genomic hybridization (CGH) analysis
	of genome-wide CNAs can classify tumors on the basis of differing
	etiologies and provide mechanistic insights to specific biological
	processes. In a RAS-induced p19(Arf-/-) mouse model that experienced
	accelerated melanoma formation after UV exposure, array-CGH analysis
	was effective in distinguishing phenotypically identical melanomas
	that differed solely by previous UV exposure. Moreover, classification
	by array-CGH identified key CNAs unique to each class, including
	amplification of cyclin-dependent kinase 6 in UV-treated cohort,
	a finding consistent with our recent report that UVB targets components
	of the p16(INK4a)-cyclin-dependent kinase-RB pathway in melanoma
	genesis (K. Kannan, et al., Proc. Natl. Acad. Sci. USA, 21: 2003).
	These results are the first to establish the utility of array-CGH
	as a means of etiology-based tumor classification in genetically
	defined cancer-prone models.},
  pdf = {../local/OHagan2003Array.pdf},
  file = {OHagan2003Array.pdf:OHagan2003Array.pdf:PDF},
  keywords = {cgh},
  owner = {franck},
  pmid = {14500367},
  timestamp = {2007.10.31}
}

@techreport{Obozinski2011Group,
  author = {G. Obozinski and L. Jacob and J.-P. Vert},
  title = {Group Lasso with Overlaps: the Latent Group Lasso approach},
  institution = {arXiv},
  year = {2011}
}

@article{Obozinski2010Joint,
  author = {Obozinski, G. and Taskar, B. and Jordan, M.I.},
  title = {Joint covariate selection and joint subspace selection for multiple
	classification problems},
  journal = {Statistics and Computing},
  year = {2010},
  volume = {20},
  pages = {231--252},
  number = {2},
  publisher = {Springer}
}

@techreport{Obozinski2007Multi-task,
  author = {G. Obozinski and B. Taskar and M. I. Jordan},
  title = {Multi-task Feature Selection},
  institution = {arXiv},
  year = {2007}
}

@article{Obozinski2011Support,
  author = {Obozinski, G. and Wainwright, M. J. and Jordan, M. I.},
  title = {Support union recovery in high-dimensional multivariate regression},
  journal = {Ann. Stat.},
  year = {2011},
  volume = {39},
  pages = {1--47},
  number = {1},
  pdf = {../local/Obozinski2011Support.pdf},
  file = {Obozinski2011Support.pdf:Obozinski2011Support.pdf:PDF},
  owner = {jp},
  timestamp = {2013.01.01}
}

@techreport{Obozinski2008Union,
  author = {Obozinski, G. and Wainwright, M. J. and Jordan, M. I.},
  title = {Union support recovery in high-dimensional multivariate regression},
  institution = {arXiv},
  year = {2008},
  number = {0808.0711v1},
  month = {August},
  pdf = {../local/Obozinski2008Union.pdf},
  file = {Obozinski2008Union.pdf:Obozinski2008Union.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.05.02}
}

@article{Ogawa2000New,
  author = {Nobuo Ogawa and Joseph DeRisi and Patrick O. Brown},
  title = {New {C}omponents of a {S}ystem for {P}hosphate {A}ccumulation and
	{P}olyphosphate {M}etabolism in {S}accharomyces cerevisiae {R}evealed
	by {G}enomic {E}xpression {A}nalysis},
  journal = {Mol. {B}iol. {C}ell},
  year = {2000},
  volume = {11},
  pages = {4309--4321},
  month = {Dec},
  pdf = {../local/ogaw00.pdf},
  file = {ogaw00.pdf:local/ogaw00.pdf:PDF},
  subject = {microarray},
  url = {http://www.molbiolcell.org/cgi/reprint/11/12/4309.pdf}
}

@article{Okada2004retinal,
  author = {Okada, T. and Sugihara, M. and Bondar, A.-N. and Elstner, M. and
	Entel, P. and Buss, V.},
  title = {The retinal conformation and its environment in rhodopsin in light
	of a new 2.2 A crystal structure.},
  journal = {J. Mol. Biol.},
  year = {2004},
  volume = {342},
  pages = {571--583},
  number = {2},
  month = {Sep},
  abstract = {A new high-resolution structure is reported for bovine rhodopsin,
	the visual pigment in rod photoreceptor cells. Substantial improvement
	of the resolution limit to 2.2 A has been achieved by new crystallization
	conditions, which also reduce significantly the probability of merohedral
	twinning in the crystals. The new structure completely resolves the
	polypeptide chain and provides further details of the chromophore
	binding site including the configuration about the C6-C7 single bond
	of the 11-cis-retinal Schiff base. Based on both an earlier structure
	and the new improved model of the protein, a theoretical study of
	the chromophore geometry has been carried out using combined quantum
	mechanics/force field molecular dynamics. The consistency between
	the experimental and calculated chromophore structures is found to
	be significantly improved for the 2.2 A model, including the angle
	of the negatively twisted 6-s-cis-bond. Importantly, the new crystal
	structure refinement reveals significant negative pre-twist of the
	C11-C12 double bond and this is also supported by the theoretical
	calculation although the latter converges to a smaller value. Bond
	alternation along the unsaturated chain is significant, but weaker
	in the calculated structure than the one obtained from the X-ray
	data. Other differences between the experimental and theoretical
	structures in the chromophore binding site are discussed with respect
	to the unique spectral properties and excited state reactivity of
	the chromophore.},
  doi = {10.1016/j.jmb.2004.07.044},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {S0022-2836(04)00873-3},
  pmid = {15327956},
  timestamp = {2008.04.01},
  url = {http://dx.doi.org/10.1016/j.jmb.2004.07.044}
}

@article{Okamoto2007Prediction,
  author = {Okamoto, S. and Yamanishi, Y. and Ehira, S. and Kawashima, S. and
	Tonomura, K. and Kanehisa, M.},
  title = {Prediction of nitrogen metabolism-related genes in Anabaena by kernel-based
	network analysis.},
  journal = {Proteomics},
  year = {2007},
  volume = {7},
  pages = {900--909},
  number = {6},
  month = {Mar},
  abstract = {Prediction of molecular interaction networks from large-scale datasets
	in genomics and other omics experiments is an important task in terms
	of both developing bioinformatics methods and solving biological
	problems. We have applied a kernel-based network inference method
	for extracting functionally related genes to the response of nitrogen
	deprivation in cyanobacteria Anabaena sp. PCC 7120 integrating three
	heterogeneous datasets: microarray data, phylogenetic profiles, and
	gene orders on the chromosome. We obtained 1348 predicted genes that
	are somehow related to known genes in the Kyoto Encyclopedia of Genes
	and Genomes (KEGG) pathways. While this dataset contained previously
	known genes related to the nitrogen deprivation condition, it also
	contained additional genes. Thus, we attempted to select any relevant
	genes using the constraints of Pfam domains and NtcA-binding sites.
	We found candidates of nitrogen metabolism-related genes, which are
	depicted as extensions of existing KEGG pathways. The prediction
	of functional relationships between proteins rather than functions
	of individual proteins will thus assist the discovery from the large-scale
	datasets.},
  doi = {10.1002/pmic.200600862},
  institution = {Bioinformatics Center, Institute for Chemical Research, Kyoto University,
	Uji, Japan. so@kazusa.or.jp},
  owner = {jp},
  pmid = {17370268},
  timestamp = {2008.06.01},
  url = {http://dx.doi.org/10.1002/pmic.200600862}
}

@article{Okuno2007GLIDA,
  author = {Okuno, Y. and Tamon, A. and Yabuuchi, H. and Niijima, S. and Minowa,
	Y. and Tonomura, K. and Kunimoto, R. and Feng, C.},
  title = {{GLIDA}: {GPCR} ligand database for chemical genomics drug discovery
	database and tools update.},
  journal = {Nucleic Acids Res.},
  year = {2007},
  volume = {36},
  pages = {D907--D912},
  number = {Database issue},
  month = {Nov},
  abstract = {G-protein coupled receptors (GPCRs) represent one of the most important
	families of drug targets in pharmaceutical development. GLIDA is
	a public GPCR-related Chemical Genomics database that is primarily
	focused on the integration of information between GPCRs and their
	ligands. It provides interaction data between GPCRs and their ligands,
	along with chemical information on the ligands, as well as biological
	information regarding GPCRs. These data are connected with each other
	in a relational database, allowing users in the field of Chemical
	Genomics research to easily retrieve such information from either
	biological or chemical starting points. GLIDA includes a variety
	of similarity search functions for the GPCRs and for their ligands.
	Thus, GLIDA can provide correlation maps linking the searched homologous
	GPCRs (or ligands) with their ligands (or GPCRs). By analyzing the
	correlation patterns between GPCRs and ligands, we can gain more
	detailed knowledge about their conserved molecular recognition patterns
	and improve drug design efforts by focusing on inferred candidates
	for GPCR-specific drugs. This article provides a summary of the GLIDA
	database and user facilities, and describes recent improvements to
	database design, data contents, ligand classification programs, similarity
	search options and graphical interfaces. GLIDA is publicly available
	at http://pharminfo.pharm.kyoto-u.ac.jp/services/glida/. We hope
	that it will prove very useful for Chemical Genomics research and
	GPCR-related drug discovery.},
  doi = {10.1093/nar/gkm948},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {gkm948},
  pmid = {17986454},
  timestamp = {2008.01.15},
  url = {http://dx.doi.org/10.1093/nar/gkm948}
}

@article{Okuno2006GLIDA,
  author = {Okuno, Y. and Yang, J. and Taneishi, K. and Yabuuchi, H. and Tsujimoto,
	G.},
  title = {{GLIDA}: {GPCR}-ligand database for chemical genomic drug discovery},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {D673--D677},
  number = {Database issue},
  month = {Jan},
  abstract = {G-protein coupled receptors (GPCRs) represent one of the most important
	families of drug targets in pharmaceutical development. GPCR-LIgand
	DAtabase (GLIDA) is a novel public GPCR-related chemical genomic
	database that is primarily focused on the correlation of information
	between GPCRs and their ligands. It provides correlation data between
	GPCRs and their ligands, along with chemical information on the ligands,
	as well as access information to the various web databases regarding
	GPCRs. These data are connected with each other in a relational database,
	allowing users in the field of GPCR-related drug discovery to easily
	retrieve such information from either biological or chemical starting
	points. GLIDA includes structure similarity search functions for
	the GPCRs and for their ligands. Thus, GLIDA can provide correlation
	maps linking the searched homologous GPCRs (or ligands) with their
	ligands (or GPCRs). By analyzing the correlation patterns between
	GPCRs and ligands, we can gain more detailed knowledge about their
	interactions and improve drug design efforts by focusing on inferred
	candidates for GPCR-specific drugs. GLIDA is publicly available at
	http://gdds.pharm.kyoto-u.ac.jp:8081/glida. We hope that it will
	prove very useful for chemical genomic research and GPCR-related
	drug discovery.},
  doi = {10.1093/nar/gkj028},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {34/suppl_1/D673},
  pmid = {16381956},
  timestamp = {2008.01.15},
  url = {http://dx.doi.org/10.1093/nar/gkj028}
}

@article{Oloff2006Chemometric,
  author = {Scott Oloff and Shuxing Zhang and Nagamani Sukumar and Curt Breneman
	and Alexander Tropsha},
  title = {Chemometric analysis of ligand receptor complementarity: identifying
	Complementary Ligands Based on Receptor Information (CoLiBRI).},
  journal = {J. Chem. Inf. Model.},
  year = {2006},
  volume = {46},
  pages = {844--851},
  number = {2},
  abstract = {We have developed a novel structure-based chemoinformatics approach
	to search for Complimentary Ligands Based on Receptor Information
	(CoLiBRI). CoLiBRI is based on the representation of both receptor
	binding sites and their respective ligands in a space of universal
	chemical descriptors. The binding site atoms involved in the interaction
	with ligands are identified by the means of a computational geometry
	technique known as Delaunay tessellation as applied to X-ray characterized
	ligand-receptor complexes. TAE/RECON multiple chemical descriptors
	are calculated independently for each ligand as well as for its active
	site atoms. The representation of both ligands and active sites using
	chemical descriptors allows the application of well-known chemometric
	techniques in order to correlate chemical similarities between active
	sites and their respective ligands. We have established a protocol
	to map patterns of nearest neighbor active site vectors in a multidimensional
	TAE/RECON space onto those of their complementary ligands and vice
	versa. This protocol affords the prediction of a virtual complementary
	ligand vector in the ligand chemical space from the position of a
	known active site vector. This prediction is followed by chemical
	similarity calculations between this virtual ligand vector and those
	calculated for molecules in a chemical database to identify real
	compounds most similar to the virtual ligand. Consequently, the knowledge
	of the receptor active site structure affords straightforward and
	efficient identification of its complementary ligands in large databases
	of chemical compounds using rapid chemical similarity searches. Conversely,
	starting from the ligand chemical structure, one may identify possible
	complementary receptor cavities as well. We have applied the CoLiBRI
	approach to a data set of 800 X-ray characterized ligand-receptor
	complexes in the PDBbind database. Using a k nearest neighbor (kNN)
	pattern recognition approach and variable selection, we have shown
	that knowledge of the active site structure affords identification
	of its complimentary ligand among the top 1\% of a large chemical
	database in over 90\% of all test active sites when a binding site
	of the same protein family was present in the training set. In the
	case where test receptors are highly dissimilar and not present among
	the receptor families in the training set, the prediction accuracy
	is decreased; however, CoLiBRI was still able to quickly eliminate
	75\% of the chemical database as improbable ligands. CoLiBRI affords
	rapid prefiltering of a large chemical database to eliminate compounds
	that have little chance of binding to a receptor active site.},
  doi = {10.1021/ci050065r},
  keywords = {Algorithms; Binding Sites; Binding, Competitive; Computational Biology;
	Databases, Factual; Drug Design; Drug Evaluation, Preclinical; Ligands;
	Models, Biological; Structure-Activity Relationship},
  owner = {laurent},
  pmid = {16563016},
  timestamp = {2007.09.22},
  url = {http://dx.doi.org/10.1021/ci050065r}
}

@article{Olson2005Closed-loop,
  author = {Byron P Olson and Jennie Si and Jing Hu and Jiping He},
  title = {Closed-loop cortical control of direction using support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {S}yst {R}ehabil {E}ng},
  year = {2005},
  volume = {13},
  pages = {72-80},
  number = {1},
  month = {Mar},
  abstract = {Motor neuroprosthetics research has focused on reproducing natural
	limb motions by correlating firing rates of cortical neurons to continuous
	movement parameters. {W}e propose an alternative system where specific
	spatial-temporal spike patterns, emerging in tasks, allow detection
	of classes of behavior with the aid of sophisticated nonlinear classification
	algorithms. {S}pecifically, we attempt to examine ensemble activity
	from motor cortical neurons, not to reproduce the action this neural
	activity normally precedes, but rather to predict an output supervisory
	command to potentially control a vehicle. {T}o demonstrate the principle,
	this design approach was implemented in a discrete directional task
	taking a small number of motor cortical signals (8-10 single units)
	fed into a support vector machine ({SVM}) to produce the commands
	{L}eft and {R}ight. {I}n this study, rats were placed in a conditioning
	chamber performing a binary paddle pressing task mimicking the control
	of a wheelchair turning left or right. {F}our animal subjects (male
	{S}prague-{D}awley rats) were able to use such a brain-machine interface
	({BMI}) with an average accuracy of 78\% on their first day of exposure.
	{A}dditionally, one animal continued to use the interface for three
	consecutive days with an average accuracy over 90\%.},
  keywords = {Algorithms, Animals, Artificial Intelligence, Computer-Assisted, Diagnosis,
	Electrodes, Electroencephalography, Feedback, Implanted, Male, Motor
	Cortex, Movement, Non-P.H.S., Non-U.S. Gov't, Rats, Research Support,
	Sprague-Dawley, Therapy, U.S. Gov't, User-Computer Interface, 15813408}
}

@article{Ong2002Stable,
  author = {Shao-En Ong and Blagoy Blagoev and Irina Kratchmarova and Dan Bach
	Kristensen and Hanno Steen and Akhilesh Pandey and Matthias Mann},
  title = {Stable isotope labeling by amino acids in cell culture, SILAC, as
	a simple and accurate approach to expression proteomics.},
  journal = {Mol Cell Proteomics},
  year = {2002},
  volume = {1},
  pages = {376--386},
  number = {5},
  month = {May},
  abstract = {Quantitative proteomics has traditionally been performed by two-dimensional
	gel electrophoresis, but recently, mass spectrometric methods based
	on stable isotope quantitation have shown great promise for the simultaneous
	and automated identification and quantitation of complex protein
	mixtures. Here we describe a method, termed SILAC, for stable isotope
	labeling by amino acids in cell culture, for the in vivo incorporation
	of specific amino acids into all mammalian proteins. Mammalian cell
	lines are grown in media lacking a standard essential amino acid
	but supplemented with a non-radioactive, isotopically labeled form
	of that amino acid, in this case deuterated leucine (Leu-d3). We
	find that growth of cells maintained in these media is no different
	from growth in normal media as evidenced by cell morphology, doubling
	time, and ability to differentiate. Complete incorporation of Leu-d3
	occurred after five doublings in the cell lines and proteins studied.
	Protein populations from experimental and control samples are mixed
	directly after harvesting, and mass spectrometric identification
	is straightforward as every leucine-containing peptide incorporates
	either all normal leucine or all Leu-d3. We have applied this technique
	to the relative quantitation of changes in protein expression during
	the process of muscle cell differentiation. Proteins that were found
	to be up-regulated during this process include glyceraldehyde-3-phosphate
	dehydrogenase, fibronectin, and pyruvate kinase M2. SILAC is a simple,
	inexpensive, and accurate procedure that can be used as a quantitative
	proteomic approach in any cell culture system.},
  institution = {Protein Interaction Laboratory, University of Southern Denmark, Odense,
	Denmark.},
  keywords = {3T3 Cells; Amino Acids; Animals; Cell Culture Techniques; Cell Differentiation;
	Cell Line; Deuterium; Genetic Techniques; Hydrogen-Ion Concentration;
	Leucine; Mice; Muscles; Peptides; Proteomics; Time Factors; Up-Regulation},
  owner = {phupe},
  pmid = {12118079},
  timestamp = {2010.08.19}
}

@article{Opper2001Universal,
  author = {M. Opper and R. Urbanczik},
  title = {Universal learning curves of support vector machines.},
  journal = {Phys {R}ev {L}ett},
  year = {2001},
  volume = {86},
  pages = {4410-3},
  number = {19},
  month = {May},
  abstract = {Using methods of statistical physics, we investigate the role of model
	complexity in learning with support vector machines ({SVM}s), which
	are an important alternative to neural networks. {W}e show the advantages
	of using {SVM}s with kernels of infinite complexity on noisy target
	rules, which, in contrast to common theoretical beliefs, are found
	to achieve optimal generalization error although the training error
	does not converge to the generalization error. {M}oreover, we find
	a universal asymptotics of the learning curves which depend only
	on the target rule but not on the {SVM} kernel.},
  keywords = {Algorithms, Amino Acid Sequence, Artificial Intelligence, Biological,
	Cell Compartmentation, Chemistry, Comparative Study, Computational
	Biology, Computer Simulation, Computer-Assisted, Databases, Decision
	Trees, Diagnosis, Discriminant Analysis, Electrophysiology, Factual,
	Gastric Emptying, Humans, Logistic Models, Melanoma, Models, Neural
	Networks (Computer), Nevus, Non-U.S. Gov't, Organelles, P.H.S., Physical,
	Pigmented, Predictive Value of Tests, Proteins, Proteome, Reproducibility
	of Results, Research Support, Skin Diseases, Skin Neoplasms, Skin
	Pigmentation, Software, Stomach Diseases, U.S. Gov't, 11328187}
}

@article{Opper2000Gaussian,
  author = {M. Opper and O. Winther},
  title = {Gaussian processes for classification: mean-field algorithms.},
  journal = {Neural {C}omput},
  year = {2000},
  volume = {12},
  pages = {2655-84},
  number = {11},
  month = {Nov},
  abstract = {We derive a mean-field algorithm for binary classification with gaussian
	processes that is based on the {TAP} approach originally proposed
	in statistical physics of disordered systems. {T}he theory also yields
	an approximate leave-one-out estimator for the generalization error,
	which is computed with no extra computational cost. {W}e show that
	from the {TAP} approach, it is possible to derive both a simpler
	"naive" mean-field theory and support vector machines ({SVM}s) as
	limiting cases. {F}or both mean-field algorithms and support vector
	machines, simulation results for three small benchmark data sets
	are presented. {T}hey show that one may get state-of-the-art performance
	by using the leave-one-out estimator for model selection and the
	built-in leave-one-out estimators are extremely precise when compared
	to the exact leave-one-out estimate. {T}he second result is taken
	as strong support for the internal consistency of the mean-field
	approach.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Colonic
	Neoplasms, Comparative Study, Computational Biology, Computer Simulation,
	Computer-Assisted, DNA, Data Interpretation, Databases, Decision
	Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination
	Learning, Electric Conductivity, Electrophysiology, Escherichia coli
	Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene
	Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Markers, Genetic Predisposition to Disease, Hemolysins, Humans,
	Indians, Ion Channels, Kinetics, Leukemia, Likelihood Functions,
	Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains,
	Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic,
	Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S.
	Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic
	Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ
	Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern
	Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter
	Regions (Genetics), Protein Folding, Protein Structure, Proteins,
	Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment,
	Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation,
	Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	Type 2, U.S. Gov't, 11110131}
}

@article{Ordonez2005Learning,
  author = {Celestino OrdÃ³Ã±ez and Javier Taboada and Fernando Bastante and
	Jose MarÃa MatÃas and Angel Manuel FelicÃsimo},
  title = {Learning machines applied to potential forest distribution.},
  journal = {Environ {M}anage},
  year = {2005},
  volume = {35},
  pages = {109-20},
  number = {1},
  month = {Jan},
  abstract = {The clearing of forests to obtain land for pasture and agriculture
	and the replacement of autochthonous species by other faster-growing
	varieties of trees for timber have both led to the loss of vast areas
	of forest worldwide. {A}t present, many developed countries are attempting
	to reverse these effects, establishing policies for the restoration
	of older woodland systems. {R}eforestation is a complex matter, planned
	and carried out by experts who need objective information regarding
	the type of forest that can be sustained in each area. {T}his information
	is obtained by drawing up feasibility models constructed using statistical
	methods that make use of the information provided by morphological
	and environmental variables (height, gradient, rainfall, etc.) that
	partially condition the presence or absence of a specific kind of
	forestation in an area. {T}he aim of this work is to construct a
	set of feasibility models for woodland located in the basin of the
	{R}iver {L}iÃ©bana ({NW} {S}pain), to serve as a support tool for
	the experts entrusted with carrying out the reforestation project.
	{T}he techniques used are multilayer perceptron neural networks and
	support vector machines. {T}heir results will be compared to the
	results obtained by traditional techniques (such as discriminant
	analysis and logistic regression) by measuring the degree of fit
	between each model and the existing distribution of woodlands. {T}he
	interpretation and problems of the feasibility models are commented
	on in the {D}iscussion section.},
  keywords = {Artificial Intelligence, Conservation of Natural Resources, Decision
	Support Techniques, Ecosystem, Environment, Forestry, Regression
	Analysis, Spain, 15984068}
}

@article{Ornstein1993Entropy,
  author = {Ornstein, D.S. and Weiss, B. },
  title = {Entropy and data compression schemes},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {78-83},
  number = {1},
  month = {Jan},
  abstract = {Some new ways of defining the entropy of a process by observing a
	single typical output sequence as well as a new kind of {S}hannon-{M}c{M}illan-{B}reiman
	theorem are presented. {T}his provides a new and conceptually very
	simple ways of estimating the entropy of an ergodic stationary source
	as well as new insight into the workings of such well-known data
	compression schemes as the {L}empel-{Z}iv algorithm },
  pdf = {../local/Ornstein1993Entropy.pdf},
  file = {Ornstein1993Entropy.pdf:local/Ornstein1993Entropy.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Ortiz-Estevez2010ACNE,
  author = {Maria Ortiz-Estevez and Henrik Bengtsson and Angel Rubio},
  title = {ACNE: a summarization method to estimate allele-specific copy numbers
	for Affymetrix SNP arrays.},
  journal = {Bioinformatics},
  year = {2010},
  volume = {26},
  pages = {1827--1833},
  number = {15},
  month = {Aug},
  abstract = {MOTIVATION: Current algorithms for estimating DNA copy numbers (CNs)
	borrow concepts from gene expression analysis methods. However, single
	nucleotide polymorphism (SNP) arrays have special characteristics
	that, if taken into account, can improve the overall performance.
	For example, cross hybridization between alleles occurs in SNP probe
	pairs. In addition, most of the current CN methods are focused on
	total CNs, while it has been shown that allele-specific CNs are of
	paramount importance for some studies. Therefore, we have developed
	a summarization method that estimates high-quality allele-specific
	CNs. RESULTS: The proposed method estimates the allele-specific DNA
	CNs for all Affymetrix SNP arrays dealing directly with the cross
	hybridization between probes within SNP probesets. This algorithm
	outperforms (or at least it performs as well as) other state-of-the-art
	algorithms for computing DNA CNs. It better discerns an aberration
	from a normal state and it also gives more precise allele-specific
	CNs. AVAILABILITY: The method is available in the open-source R package
	ACNE, which also includes an add on to the aroma.affymetrix framework
	(http://www.aroma-project.org/).},
  doi = {10.1093/bioinformatics/btq300},
  institution = {Group of Bioinformatics, CEIT and TECNUN, University of Navarra,
	San Sebastian, Spain.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {btq300},
  pmid = {20529889},
  timestamp = {2010.08.05},
  url = {http://dx.doi.org/10.1093/bioinformatics/btq300}
}

@article{Osborne1999A,
  author = {M. R. Osborne and Brett Presnell and B.A. Turlach},
  title = {A New Approach to Variable Selection in Least Squares Problems},
  journal = {{IMA} {J}ournal of {N}umerical {A}nalysis},
  year = {1999},
  volume = {20},
  pages = {389--404}
}

@article{Osborne1999On,
  author = {Michael R. Osborne and Brett Presnell and Berwin A. Turlach},
  title = {On the LASSO and Its Dual},
  journal = {Journal of Computational and Graphical Statistics},
  year = {1999},
  volume = {9},
  pages = {319--337}
}

@article{Osowski2004Support,
  author = {Stanislaw Osowski and Linh Tran Hoai and Tomasz Markiewicz},
  title = {Support vector machine-based expert system for reliable heartbeat
	recognition.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2004},
  volume = {51},
  pages = {582-9},
  number = {4},
  month = {Apr},
  abstract = {This paper presents a new solution to the expert system for reliable
	heartbeat recognition. {T}he recognition system uses the support
	vector machine ({SVM}) working in the classification mode. {T}wo
	different preprocessing methods for generation of features are applied.
	{O}ne method involves the higher order statistics ({HOS}) while the
	second the {H}ermite characterization of {QRS} complex of the registered
	electrocardiogram ({ECG}) waveform. {C}ombining the {SVM} network
	with these preprocessing methods yields two neural classifiers, which
	have been combined into one final expert system. {T}he combination
	of classifiers utilizes the least mean square method to optimize
	the weights of the weighted voting integrating scheme. {T}he results
	of the performed numerical experiments for the recognition of 13
	heart rhythm types on the basis of {ECG} waveforms confirmed the
	reliability and advantage of the proposed approach.}
}

@article{Oti2008Conserved,
  author = {Oti, M. and van Reeuwijk, J. and Huynen, M.A. and Brunner, H.G.},
  title = {Conserved co-expression for candidate disease gene prioritization.},
  journal = {BMC Bioinformatics},
  year = {2008},
  volume = {9},
  pages = {208},
  abstract = {BACKGROUND: Genes that are co-expressed tend to be involved in the
	same biological process. However, co-expression is not a very reliable
	predictor of functional links between genes. The evolutionary conservation
	of co-expression between species can be used to predict protein function
	more reliably than co-expression in a single species. Here we examine
	whether co-expression across multiple species is also a better prioritizer
	of disease genes than is co-expression between human genes alone.
	RESULTS: We use co-expression data from yeast (S. cerevisiae), nematode
	worm (C. elegans), fruit fly (D. melanogaster), mouse and human and
	find that the use of evolutionary conservation can indeed improve
	the predictive value of co-expression. The effect that genes causing
	the same disease have higher co-expression than do other genes from
	their associated disease loci, is significantly enhanced when co-expression
	data are combined across evolutionarily distant species. We also
	find that performance can vary significantly depending on the co-expression
	datasets used, and just using more data does not necessarily lead
	to better prioritization. Instead, we find that dataset quality is
	more important than quantity, and using a consistent microarray platform
	per species leads to better performance than using more inclusive
	datasets pooled from various platforms. CONCLUSION: We find that
	evolutionarily conserved gene co-expression prioritizes disease candidate
	genes better than human gene co-expression alone, and provide the
	integrated data as a new resource for disease gene prioritization
	tools.},
  doi = {10.1186/1471-2105-9-208},
  institution = {Centre for Molecular and Biomolecular Informatics, Nijmegen Centre
	for Molecular Life Sciences, Radboud University Nijmegen Medical
	Centre, Geert Grooteplein 26-28, 6525 GA, Nijmegen, The Netherlands.
	m.oti@cmbi.ru.nl},
  keywords = {Animals; Base Sequence; Caenorhabditis elegans; Conserved Sequence;
	Databases, Genetic; Disease; Drosophila melanogaster; Evolution,
	Molecular; Gene Dosage; Gene Expression; Gene Expression Profiling;
	Gene Frequency; Genetic Predisposition to Disease; Humans; Mice;
	Oligonucleotide Array Sequence Analysis; Penetrance; Predictive Value
	of Tests; Saccharomyces cerevisiae; Sample Size; Species Specificity},
  owner = {mordelet},
  pii = {1471-2105-9-208},
  pmid = {18433471},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1186/1471-2105-9-208}
}

@article{Oti2006Predicting,
  author = {M. Oti and B. Snel and M. A. Huynen and H. G. Brunner},
  title = {Predicting disease genes using protein-protein interactions.},
  journal = {J Med Genet},
  year = {2006},
  volume = {43},
  pages = {691--698},
  number = {8},
  month = {Aug},
  abstract = {BACKGROUND: The responsible genes have not yet been identified for
	many genetically mapped disease loci. Physically interacting proteins
	tend to be involved in the same cellular process, and mutations in
	their genes may lead to similar disease phenotypes. OBJECTIVE: To
	investigate whether protein-protein interactions can predict genes
	for genetically heterogeneous diseases. METHODS: 72,940 protein-protein
	interactions between 10,894 human proteins were used to search 432
	loci for candidate disease genes representing 383 genetically heterogeneous
	hereditary diseases. For each disease, the protein interaction partners
	of its known causative genes were compared with the disease associated
	loci lacking identified causative genes. Interaction partners located
	within such loci were considered candidate disease gene predictions.
	Prediction accuracy was tested using a benchmark set of known disease
	genes. RESULTS: Almost 300 candidate disease gene predictions were
	made. Some of these have since been confirmed. On average, 10\% or
	more are expected to be genuine disease genes, representing a 10-fold
	enrichment compared with positional information only. Examples of
	interesting candidates are AKAP6 for arrythmogenic right ventricular
	dysplasia 3 and SYN3 for familial partial epilepsy with variable
	foci. CONCLUSIONS: Exploiting protein-protein interactions can greatly
	increase the likelihood of finding positional candidate disease genes.
	When applied on a large scale they can lead to novel candidate gene
	predictions.},
  doi = {10.1136/jmg.2006.041376},
  keywords = {Animals; Benchmarking; Databases, Protein; Disease; Genetic Predisposition
	to Disease; Humans; Protein Binding; Proteins},
  owner = {mordelet},
  pii = {jmg.2006.041376},
  pmid = {16611749},
  timestamp = {2010.09.28},
  url = {http://dx.doi.org/10.1136/jmg.2006.041376}
}

@article{Overbeek2000WIT,
  author = {Overbeek, R. and Larsen, N. and Pusch, G. D. and D'Souza, M. and
	Selkov, E. Jr. and Kyrpides, N. and Fonstein, M. and Maltsev, N.
	and Selkov, E.},
  title = {W{IT}: integrated system for high-throughput genome sequence analysis
	and metabolic reconstruction},
  journal = {Nucleic {A}cids {R}es.},
  year = {2000},
  volume = {28},
  pages = {123--125},
  pdf = {../local/Overbeek2000WIT.pdf},
  file = {Overbeek2000WIT.pdf:local/Overbeek2000WIT.pdf:PDF},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/28/1/123}
}

@article{Overhoff2005Local,
  author = {Overhoff, M. and Alken, M. and Far, R. K. and Lemaitre, M. and Lebleu,
	B. and Sczakiel, G. and Robbins, I.},
  title = {{L}ocal {RNA} target structure influences si{RNA} efficacy: a systematic
	global analysis.},
  journal = {J. Mol. Biol.},
  year = {2005},
  volume = {348},
  pages = {871--881},
  number = {4},
  month = {May},
  abstract = {The efficiency with which small interfering RNAs (siRNAs) down-regulate
	specific gene expression in living cells is variable and a number
	of sequence-governed, biochemical parameters of the siRNA duplex
	have been proposed for the design of an efficient siRNA. Some of
	these parameters have been clearly identified to influence the assembly
	of the RNA-induced silencing complex (RISC), or to favour the sequence
	preferences of the RISC endonuclease. For other parameters, it is
	difficult to ascertain whether the influence is a determinant of
	the siRNA per se, or a determinant of the target RNA, especially
	its local structural characteristics. In order to gain an insight
	into the effects of local target structure on the biological activity
	of siRNA, we have used large sets of siRNAs directed against local
	targets of the mRNAs of ICAM-1 and survivin. Target structures were
	classified as accessible or inaccessible using an original, iterative
	computational approach and by experimental RNase H mapping. The effectiveness
	of siRNA was characterized by measuring the IC50 values in cell culture
	and the maximal extent of target suppression. Mean IC50 values were
	tenfold lower for accessible local target sites, with respect to
	inaccessible ones. Mean maximal target suppression was improved.
	These data illustrate that local target structure does, indeed, influence
	the activity of siRNA. We suggest that local target screening can
	significantly improve the hit rate in the design of biologically
	active siRNAs.},
  doi = {10.1016/j.jmb.2005.03.012},
  keywords = {sirna},
  owner = {vert},
  pii = {S0022-2836(05)00270-6},
  pmid = {15843019},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.03.012}
}

@article{Oyang2005Data,
  author = {Yen-Jen Oyang and Shien-Ching Hwang and Yu-Yen Ou and Chien-Yu Chen
	and Zhi-Wei Chen},
  title = {Data classification with radial basis function networks based on
	a novel kernel density estimation algorithm.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {225-36},
  number = {1},
  month = {Jan},
  abstract = {This paper presents a novel learning algorithm for efficient construction
	of the radial basis function ({RBF}) networks that can deliver the
	same level of accuracy as the support vector machines ({SVM}s) in
	data classification applications. {T}he proposed learning algorithm
	works by constructing one {RBF} subnetwork to approximate the probability
	density function of each class of objects in the training data set.
	{W}ith respect to algorithm design, the main distinction of the proposed
	learning algorithm is the novel kernel density estimation algorithm
	that features an average time complexity of {O}(n log n), where n
	is the number of samples in the training data set. {O}ne important
	advantage of the proposed learning algorithm, in comparison with
	the {SVM}, is that the proposed learning algorithm generally takes
	far less time to construct a data classifier with an optimized parameter
	setting. {T}his feature is of significance for many contemporary
	applications, in particular, for those applications in which new
	objects are continuously added into an already large database. {A}nother
	desirable feature of the proposed learning algorithm is that the
	{RBF} networks constructed are capable of carrying out data classification
	with more than two classes of objects in one single run. {I}n other
	words, unlike with the {SVM}, there is no need to resort to mechanisms
	such as one-against-one or one-against-all for handling datasets
	with more than two classes of objects. {T}he comparison with {SVM}
	is of particular interest, because it has been shown in a number
	of recent studies that {SVM} generally are able to deliver higher
	classification accuracy than the other existing data classification
	algorithms. {A}s the proposed learning algorithm is instance-based,
	the data reduction issue is also addressed in this paper. {O}ne interesting
	observation in this regard is that, for all three data sets used
	in data reduction experiments, the number of training samples remaining
	after a naive data reduction mechanism is applied is quite close
	to the number of support vectors identified by the {SVM} software.
	{T}his paper also compares the performance of the {RBF} networks
	constructed with the proposed learning algorithm and those constructed
	with a conventional cluster-based learning algorithm. {T}he most
	interesting observation learned is that, with respect to data classification,
	the distributions of training samples near the boundaries between
	different classes of objects carry more crucial information than
	the distributions of samples in the inner parts of the clusters.}
}

@article{Polya1934,
  author = {G. P\'olya},
  title = {Algebraische Berechnung der Anzhal der Isomeren einiger organischer
	Verbindungen},
  journal = {Z. Kristal.},
  year = {1936},
  volume = {93},
  pages = {415--443}
}

@article{Paatero1994Positive,
  author = {Paatero, P. and Tapper, U.},
  title = {Positive matrix factorization: A non-negative factor model with optimal
	utilization of error estimates of data values},
  journal = {Environmetrics},
  year = {1994},
  volume = {5},
  pages = {111--126},
  number = {2},
  doi = {10.1002/env.3170050203},
  owner = {jp},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1002/env.3170050203}
}

@techreport{Pachter2011Models,
  author = {Pachter, L.},
  title = {Models for transcript quantification from {RNA}-Seq},
  institution = {arXiv},
  year = {2011},
  number = {1104-3889},
  pdf = {../local/Pachter2011Models.pdf},
  file = {Pachter2011Models.pdf:Pachter2011Models.pdf:PDF},
  owner = {jp},
  timestamp = {2013.03.29}
}

@article{Pahikkala2005Contextual,
  author = {Tapio Pahikkala and Filip Ginter and Jorma Boberg and Jouni Jarvinen
	and Tapio Salakoski},
  title = {Contextual weighting for {S}upport {V}ector {M}achines in literature
	mining: an application to gene versus protein name disambiguation.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {157},
  number = {1},
  month = {Jun},
  abstract = {B{ACKGROUND}: {T}he ability to distinguish between genes and proteins
	is essential for understanding biological text. {S}upport {V}ector
	{M}achines ({SVM}s) have been proven to be very efficient in general
	data mining tasks. {W}e explore their capability for the gene versus
	protein name disambiguation task. {RESULTS}: {W}e incorporated into
	the conventional {SVM} a weighting scheme based on distances of context
	words from the word to be disambiguated. {T}his weighting scheme
	increased the performance of {SVM}s by five percentage points giving
	performance better than 85\% as measured by the area under {ROC}
	curve and outperformed the {W}eighted {A}dditive {C}lassifier, which
	also incorporates the weighting, and the {N}aive {B}ayes classifier.
	{CONCLUSIONS}: {W}e show that the performance of {SVM}s can be improved
	by the proposed weighting scheme. {F}urthermore, our results suggest
	that in this study the increase of the classification performance
	due to the weighting is greater than that obtained by selecting the
	underlying classifier or the kernel part of the {SVM}.},
  doi = {10.1186/1471-2105-6-157},
  pdf = {../local/Pahikkala2005Contextual.pdf},
  file = {Pahikkala2005Contextual.pdf:local/Pahikkala2005Contextual.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-157},
  url = {http://dx.doi.org/10.1186/1471-2105-6-157}
}

@article{Pai2006Prospects,
  author = {Pai, S. I. and Lin, Y.-Y. and Macaes, B. and Meneshian, A. and HungC.-F.
	and Wu, T.-C.},
  title = {{P}rospects of {RNA} interference therapy for cancer.},
  journal = {Gene Ther.},
  year = {2006},
  volume = {13},
  pages = {464--477},
  number = {6},
  month = {Mar},
  abstract = {RNA interference (RNAi) is a powerful gene-silencing process that
	holds great promise in the field of cancer therapy. The discovery
	of RNAi has generated enthusiasm within the scientific community,
	not only because it has been used to rapidly identify key molecules
	involved in many disease processes including cancer, but also because
	RNAi has the potential to be translated into a technology with major
	therapeutic applications. Our evolving understanding of the molecular
	pathways important for carcinogenesis has created opportunities for
	cancer therapy employing RNAi technology to target the key molecules
	within these pathways. Many gene products involved in carcinogenesis
	have already been explored as targets for RNAi intervention, and
	RNAi targeting of molecules crucial for tumor-host interactions and
	tumor resistance to chemo- or radiotherapy has also been investigated.
	In most of these studies, the silencing of critical gene products
	by RNAi technology has generated significant antiproliferative and/or
	proapoptotic effects in cell-culture systems or in preclinical animal
	models. Nevertheless, significant obstacles, such as in vivo delivery,
	incomplete suppression of target genes, nonspecific immune responses
	and the so-called off-target effects, need to be overcome before
	this technology can be successfully translated into the clinical
	arena. Significant progress has already been made in addressing some
	of these issues, and it is foreseen that early phase clinical trials
	will be initiated in the very near future.},
  doi = {10.1038/sj.gt.3302694},
  keywords = {sirna},
  owner = {vert},
  pii = {3302694},
  pmid = {16341059},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1038/sj.gt.3302694}
}

@article{Paik2007Development,
  author = {Paik, Soonmyung},
  title = {Development and clinical utility of a 21-gene recurrence score prognostic
	assay in patients with early breast cancer treated with tamoxifen.},
  journal = {Oncologist},
  year = {2007},
  volume = {12},
  pages = {631--635},
  number = {6},
  month = {Jun},
  abstract = {Although patients diagnosed with axillary node-negative estrogen receptor-positive
	breast cancer have an excellent prognosis, about 15\% of them fail
	after 5 years of tamoxifen treatment. Clinical trials have provided
	evidence that there is a significant benefit from chemotherapy for
	these patients, but it would be significant overtreatment if all
	of them were treated with chemotherapy. Therefore, context-specific
	prognostic assays that can identify those who need chemotherapy in
	addition to tamoxifen, or those who are essentially cured by tamoxifen
	alone, and can be performed using routinely processed tumor biopsy
	tissue would be clinically useful. Using a stepwise approach of going
	through independent model-building and validation sets, a 21-gene
	recurrence score (RS), based on monitoring of mRNA expression levels
	of 16 cancer-related genes in relation to five reference genes, has
	been developed. The RS identified approximately 50\% of the patients
	who had excellent prognosis after tamoxifen alone. Subsequent study
	suggested that high-risk patients identified with the RS preferentially
	benefit from chemotherapy. Ideally the RS should be used as a continuous
	variable. A prospective study-the Trial Assigning Individualized
	Options for Treatment (Rx) (TAILORx)-to examine whether chemotherapy
	is required for the intermediate-risk group defined by the RS is
	accruing in North America.},
  doi = {10.1634/theoncologist.12-6-631},
  institution = {Division of Pathology, National Surgical Adjuvant Breast and Bowel
	Project, Pittsburgh, Pennsylvania 15206, USA. soon.paik@nsabp.org},
  keywords = {Breast Neoplasms, drug therapy/genetics/pathology; Estrogen Antagonists,
	therapeutic use; Female; Gene Expression Profiling; Gene Expression
	Regulation, Neoplastic; Genetic Predisposition to Disease; Humans;
	Neoplasm Recurrence, Local; Prognosis; Risk Factors; Tamoxifen, therapeutic
	use; Time Factors; Treatment Outcome},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {12/6/631},
  pmid = {17602054},
  timestamp = {2012.03.09},
  url = {http://dx.doi.org/10.1634/theoncologist.12-6-631}
}

@article{Paik2006Gene,
  author = {Paik, Soonmyung and Tang, Gong and Shak, Steven and Kim, Chungyeul
	and Baker, Joffre and Kim, Wanseop and Cronin, Maureen and Baehner,
	Frederick L. and Watson, Drew and Bryant, John and Costantino, Joseph
	P. and Geyer, Jr, Charles E and Wickerham, D Lawrence and Wolmark,
	Norman},
  title = {Gene expression and benefit of chemotherapy in women with node-negative,
	estrogen receptor-positive breast cancer.},
  journal = {J Clin Oncol},
  year = {2006},
  volume = {24},
  pages = {3726--3734},
  number = {23},
  month = {Aug},
  abstract = {The 21-gene recurrence score (RS) assay quantifies the likelihood
	of distant recurrence in women with estrogen receptor-positive, lymph
	node-negative breast cancer treated with adjuvant tamoxifen. The
	relationship between the RS and chemotherapy benefit is not known.The
	RS was measured in tumors from the tamoxifen-treated and tamoxifen
	plus chemotherapy-treated patients in the National Surgical Adjuvant
	Breast and Bowel Project (NSABP) B20 trial. Cox proportional hazards
	models were utilized to test for interaction between chemotherapy
	treatment and the RS.A total of 651 patients were assessable (227
	randomly assigned to tamoxifen and 424 randomly assigned to tamoxifen
	plus chemotherapy). The test for interaction between chemotherapy
	treatment and RS was statistically significant (P = .038). Patients
	with high-RS (> or = 31) tumors (ie, high risk of recurrence) had
	a large benefit from chemotherapy (relative risk, 0.26; 95\% CI,
	0.13 to 0.53; absolute decrease in 10-year distant recurrence rate:
	mean, 27.6\%; SE, 8.0\%). Patients with low-RS (< 18) tumors derived
	minimal, if any, benefit from chemotherapy treatment (relative risk,
	1.31; 95\% CI, 0.46 to 3.78; absolute decrease in distant recurrence
	rate at 10 years: mean, -1.1\%; SE, 2.2\%). Patients with intermediate-RS
	tumors did not appear to have a large benefit, but the uncertainty
	in the estimate can not exclude a clinically important benefit.The
	RS assay not only quantifies the likelihood of breast cancer recurrence
	in women with node-negative, estrogen receptor-positive breast cancer,
	but also predicts the magnitude of chemotherapy benefit.},
  doi = {10.1200/JCO.2005.04.7985},
  institution = {Division of Pathology, Operations Center, and Biostatistical Center,
	National Surgical Adjuvant Breast and Bowel Project, Pittsburgh,
	PA 15212, USA. soon.paik@nsabp.org},
  keywords = {Adult; Aged; Antineoplastic Combined Chemotherapy Protocols, administration
	/&/ dosage/therapeutic use; Breast Neoplasms, drug therapy/metabolism/pathology/prevention
	/&/ control; Cisplatin, administration /&/ dosage; Female; Fluorouracil,
	administration /&/ dosage; Gene Expression Regulation, Neoplastic;
	Humans; Linear Models; Lymphatic Metastasis; Methotrexate, administration
	/&/ dosage; Middle Aged; Mitomycins, administration /&/ dosage; Neoplasm
	Proteins, metabolism; Neoplasm Recurrence, Local, metabolism/prevention
	/&/ control; Odds Ratio; Predictive Value of Tests; Prognosis; Proportional
	Hazards Models; Randomized Controlled Trials as Topic; Receptors,
	Estrogen, metabolism; Recurrence, prevention /&/ control; Reverse
	Transcriptase Polymerase Chain Reaction; Risk Assessment; Risk Factors;
	Tamoxifen, administration /&/ dosage; Tumor Markers, Biological,
	metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {JCO.2005.04.7985},
  pmid = {16720680},
  timestamp = {2012.03.09},
  url = {http://dx.doi.org/10.1200/JCO.2005.04.7985}
}

@article{Pajares2004On,
  author = {Gonzalo Pajares and JesÃºs M de la Cruz},
  title = {On combining support vector machines and simulated annealing in stereovision
	matching.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1646-57},
  number = {4},
  month = {Aug},
  abstract = {This paper outlines a method for solving the stereovision matching
	problem using edge segments as the primitives. {I}n stereovision
	matching, the following constraints are commonly used: epipolar,
	similarity, smoothness, ordering, and uniqueness. {W}e propose a
	new strategy in which such constraints are sequentially combined.
	{T}he goal is to achieve high performance in terms of correct matches
	by combining several strategies. {T}he contributions of this paper
	are reflected in the development of a similarity measure through
	a support vector machines classification approach; the transformation
	of the smoothness, ordering and epipolar constraints into the form
	of an energy function, through an optimization simulated annealing
	approach, whose minimum value corresponds to a good matching solution
	and by introducing specific conditions to overcome the violation
	of the smoothness and ordering constraints. {T}he performance of
	the proposed method is illustrated by comparative analysis against
	some recent global matching methods.}
}

@article{Palmer2003Comparison,
  author = {Gregory M Palmer and Changfang Zhu and Tara M Breslin and Fushen
	Xu and Kennedy W Gilchrist and Nirmala Ramanujam},
  title = {Comparison of multiexcitation fluorescence and diffuse reflectance
	spectroscopy for the diagnosis of breast cancer ({M}arch 2003).},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2003},
  volume = {50},
  pages = {1233-42},
  number = {11},
  month = {Nov},
  abstract = {Nonmalignant (n = 36) and malignant (n = 20) tissue samples were obtained
	from breast cancer and breast reduction surgeries. {T}hese tissues
	were characterized using multiple excitation wavelength fluorescence
	spectroscopy and diffuse reflectance spectroscopy in the ultraviolet-visible
	wavelength range, immediately after excision. {S}pectra were then
	analyzed using principal component analysis ({PCA}) as a data reduction
	technique. {PCA} was performed on each fluorescence spectrum, as
	well as on the diffuse reflectance spectrum individually, to establish
	a set of principal components for each spectrum. {A} {W}ilcoxon rank-sum
	test was used to determine which principal components show statistically
	significant differences between malignant and nonmalignant tissues.
	{F}inally, a support vector machine ({SVM}) algorithm was utilized
	to classify the samples based on the diagnostically useful principal
	components. {C}ross-validation of this nonparametric algorithm was
	carried out to determine its classification accuracy in an unbiased
	manner. {M}ultiexcitation fluorescence spectroscopy was successful
	in discriminating malignant and nonmalignant tissues, with a sensitivity
	and specificity of 70\% and 92\%, respectively. {T}he sensitivity
	(30\%) and specificity (78\%) of diffuse reflectance spectroscopy
	alone was significantly lower. {C}ombining fluorescence and diffuse
	reflectance spectra did not improve the classification accuracy of
	an algorithm based on fluorescence spectra alone. {T}he fluorescence
	excitation-emission wavelengths identified as being diagnostic from
	the {PCA}-{SVM} algorithm suggest that the important fluorophores
	for breast cancer diagnosis are most likely tryptophan, {NAD}({P}){H}
	and flavoproteins.}
}

@article{Pan2004Comprehensive,
  author = {Fei Pan and Baoying Wang and Xin Hu and William Perrizo},
  title = {Comprehensive vertical sample-based {KNN}/{LSVM} classification for
	gene expression analysis.},
  journal = {J {B}iomed {I}nform},
  year = {2004},
  volume = {37},
  pages = {240-8},
  number = {4},
  month = {Aug},
  abstract = {Classification analysis of microarray gene expression data has been
	widely used to uncover biological features and to distinguish closely
	related cell types that often appear in the diagnosis of cancer.
	{H}owever, the number of dimensions of gene expression data is often
	very high, e.g., in the hundreds or thousands. {A}ccurate and efficient
	classification of such high-dimensional data remains a contemporary
	challenge. {I}n this paper, we propose a comprehensive vertical sample-based
	{KNN}/{LSVM} classification approach with weights optimized by genetic
	algorithms for high-dimensional data. {E}xperiments on common gene
	expression datasets demonstrated that our approach can achieve high
	accuracy and efficiency at the same time. {T}he improvement of speed
	is mainly related to the vertical data representation, {P}-tree,{P}atents
	are pending on the {P}-tree technology. {T}his work is partially
	supported by {GSA} {G}rant {ACT}#:{K}96130308. and its optimized
	logical algebra. {T}he high accuracy is due to the combination of
	a {KNN} majority voting approach and a local support vector machine
	approach that makes optimal decisions at the local level. {A}s a
	result, our approach could be a powerful tool for high-dimensional
	gene expression data analysis.},
  doi = {10.1016/j.jbi.2004.07.003},
  pdf = {../local/Pan2004Comprehensive.pdf},
  file = {Pan2004Comprehensive.pdf:local/Pan2004Comprehensive.pdf:PDF},
  keywords = {biosvm},
  pii = {S1532-0464(04)00070-X},
  url = {http://dx.doi.org/10.1016/j.jbi.2004.07.003}
}

@techreport{Pan2008A,
  author = {Sinno Jialin Pan and Qiang Yang},
  title = {A Survey on Transfer Learning},
  institution = {Department of Computer Science and Engineering, Hong Kong University
	of Science and Technology, Hong Kong, China},
  year = {2008},
  number = {HKUST-CS08-08},
  month = {November},
  url = {http://www.cse.ust.hk/~sinnopan/publications/TLsurvey\_0822.pdf}
}

@article{Pandey2000Proteomics,
  author = {Pandey, A. and Mann, M.},
  title = {Proteomics to study genes and genomes},
  journal = {Nature},
  year = {2000},
  volume = {405},
  pages = {837--846},
  pdf = {../local/pand00.pdf},
  file = {pand00.pdf:local/pand00.pdf:PDF},
  subject = {bioprot},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v405/n6788/full/405837a0_fs.html&content_filetype=pdf}
}

@article{Pang2005Face,
  author = {Shaoning Pang and Daijin Kim and Sung Yang Bang},
  title = {Face membership authentication using {SVM} classification tree generated
	by membership-based {LLE} data partition.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {436-46},
  number = {2},
  month = {Mar},
  abstract = {This paper presents a new membership authentication method by face
	classification using a support vector machine ({SVM}) classification
	tree, in which the size of membership group and the members in the
	membership group can be changed dynamically. {U}nlike our previous
	{SVM} ensemble-based method, which performed only one face classification
	in the whole feature space, the proposed method employed a divide
	and conquer strategy that first performs a recursive data partition
	by membership-based locally linear embedding ({LLE}) data clustering,
	then does the {SVM} classification in each partitioned feature subset.
	{O}ur experimental results show that the proposed {SVM} tree not
	only keeps the good properties that the {SVM} ensemble method has,
	such as a good authentication accuracy and the robustness to the
	change of members, but also has a considerable improvement on the
	stability under the change of membership group size.},
  keywords = {80 and over, Aged, Algorithms, Area Under Curve, Cross-Sectional Studies,
	Decision Trees, Diagnostic Imaging, Diagnostic Techniques, Face,
	Glaucoma, Humans, Lasers, Least-Squares Analysis, Middle Aged, Nerve
	Fibers, Non-U.S. Gov't, Ophthalmological, Optic Nerve Diseases, P.H.S.,
	Photic Stimulation, ROC Curve, Research Support, Retinal Ganglion
	Cells, Sensitivity and Specificity, Statistics, U.S. Gov't, 15787150}
}

@article{Papadakis2007Efficient,
  author = {P. Papadakis and I. Pratikakis and S. Perantonis and T. Theoharis},
  title = {Efficient 3D shape matching and retrieval using a concrete radialized
	spherical projection representation},
  journal = {Pattern Recogn.},
  year = {2007},
  volume = {40},
  pages = {2437--2452},
  number = {9},
  address = {New York, NY, USA},
  doi = {http://dx.doi.org/10.1016/j.patcog.2006.12.026},
  issn = {0031-3203},
  publisher = {Elsevier Science Inc.}
}

@article{Papadopoulos2005Characterization,
  author = {A. Papadopoulos and D. I. Fotiadis and A. Likas},
  title = {Characterization of clustered microcalcifications in digitized mammograms
	using neural networks and support vector machines.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  volume = {34},
  pages = {141-50},
  number = {2},
  month = {Jun},
  abstract = {O{BJECTIVE}: {D}etection and characterization of microcalcification
	clusters in mammograms is vital in daily clinical practice. {T}he
	scope of this work is to present a novel computer-based automated
	method for the characterization of microcalcification clusters in
	digitized mammograms. {METHODS} {AND} {MATERIAL}: {T}he proposed
	method has been implemented in three stages: (a) the cluster detection
	stage to identify clusters of microcalcifications, (b) the feature
	extraction stage to compute the important features of each cluster
	and (c) the classification stage, which provides with the final characterization.
	{I}n the classification stage, a rule-based system, an artificial
	neural network ({ANN}) and a support vector machine ({SVM}) have
	been implemented and evaluated using receiver operating characteristic
	({ROC}) analysis. {T}he proposed method was evaluated using the {N}ijmegen
	and {M}ammographic {I}mage {A}nalysis {S}ociety ({MIAS}) mammographic
	databases. {T}he original feature set was enhanced by the addition
	of four rule-based features. {RESULTS} {AND} {CONCLUSIONS}: {I}n
	the case of {N}ijmegen dataset, the performance of the {SVM} was
	{A}z=0.79 and 0.77 for the original and enhanced feature set, respectively,
	while for the {MIAS} dataset the corresponding characterization scores
	were {A}z=0.81 and 0.80. {U}tilizing neural network classification
	methodology, the corresponding performance for the {N}ijmegen dataset
	was {A}z=0.70 and 0.76 while for the {MIAS} dataset it was {A}z=0.73
	and 0.78. {A}lthough the obtained high classification performance
	can be successfully applied to microcalcification clusters characterization,
	further studies must be carried out for the clinical evaluation of
	the system using larger datasets. {T}he use of additional features
	originating either from the image itself (such as cluster location
	and orientation) or from the patient data may further improve the
	diagnostic value of the system.},
  doi = {10.1016/j.artmed.2004.10.001},
  pdf = {../local/Papadopoulos2005Characterization.pdf},
  file = {Papadopoulos2005Characterization.pdf:local/Papadopoulos2005Characterization.pdf:PDF},
  keywords = {Apoptosis, Gene Expression Profiling, Humans, Neoplasms, Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, Polymerase Chain
	Reaction, Proteins, Research Support, Subcellular Fractions, Unknown
	Primary, 15894178},
  pii = {S0933-3657(04)00154-X},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.10.001}
}

@article{BLEU,
  author = {Papineni, K. and Roukos, S. and Ward, T. and Zhu, W. J. },
  title = {{BLEU: a Method for Automatic Evaluation of Machine Translation}},
  journal = {IBM Research Report},
  year = {2001},
  volume = {RC22176},
  organization = {IBM}
}

@inproceedings{Pardalos95aparallel,
  author = {P.M. Pardalos and L. S. Pitsoulis and M. G. C. Resende},
  title = {A Parallel GRASP Implementation for the Quadratic Assignment Problem},
  booktitle = {Parallel Algorithms for Irregularly Structured Problems},
  year = {1995},
  pages = {111--130},
  publisher = {Kluwer Academic Publishers}
}

@book{Parham2004The,
  title = {The {I}mmune {S}ystem},
  publisher = {Garland {S}cience {P}ublishing},
  year = {2004},
  author = {Peter Parham}
}

@article{Park2003Prediction,
  author = {Park, K.-J. and Kanehisa, M.},
  title = {Prediction of protein subcellular locations by support vector machines
	using compositions of amino acids and amino acid pairs},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1656-1663},
  number = {13},
  abstract = {Motivation: {T}he subcellular location of a protein is closely correlated
	to its function. {T}hus, computational prediction of subcellular
	locations from the amino acid sequence information would help annotation
	and functional prediction of protein coding genes in complete genomes.
	{W}e have developed a method based on support vector machines ({SVM}s).
	{R}esults: {W}e considered 12 subcellular locations in eukaryotic
	cells: chloroplast, cytoplasm, cytoskeleton, endoplasmic reticulum,
	extracellular medium, {G}olgi apparatus, lysosome, mitochondrion,
	nucleus, peroxisome, plasma membrane, and vacuole. {W}e constructed
	a data set of proteins with known locations from the {SWISS}-{PROT}
	database. {A} set of {SVM}s was trained to predict the subcellular
	location of a given protein based on its amino acid, amino acid pair,
	and gapped amino acid pair compositions. {T}he predictors based on
	these different compositions were then combined using a voting scheme.
	{R}esults obtained through 5-fold cross-validation tests showed an
	improvement in prediction accuracy over the algorithm based on the
	amino acid composition only. {T}his prediction method is available
	via the {I}nternet. {A}vailability: http://www.genome.ad.jp/{SIT}/ploc.html
	{S}upplementary information: http://web.kuicr.kyoto-u.ac.jp/~park/{S}eqdata/},
  pdf = {../local/Park2003Prediction.pdf},
  file = {Park2003Prediction.pdf:local/Park2003Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/13/1656}
}

@article{Park2009ChIP,
  author = {Peter J Park},
  title = {ChIP-seq: advantages and challenges of a maturing technology.},
  journal = {Nat Rev Genet},
  year = {2009},
  volume = {10},
  pages = {669--680},
  number = {10},
  month = {Oct},
  abstract = {Chromatin immunoprecipitation followed by sequencing (ChIP-seq) is
	a technique for genome-wide profiling of DNA-binding proteins, histone
	modifications or nucleosomes. Owing to the tremendous progress in
	next-generation sequencing technology, ChIP-seq offers higher resolution,
	less noise and greater coverage than its array-based predecessor
	ChIP-chip. With the decreasing cost of sequencing, ChIP-seq has become
	an indispensable tool for studying gene regulation and epigenetic
	mechanisms. In this Review, I describe the benefits and challenges
	in harnessing this technique with an emphasis on issues related to
	experimental design and data analysis. ChIP-seq experiments generate
	large quantities of data, and effective computational analysis will
	be crucial for uncovering biological mechanisms.},
  doi = {10.1038/nrg2641},
  institution = {Harvard Medical School, 10 Shattuck Street, Boston, MA 02115, USA.
	peter_park@harvard.edu},
  keywords = {Animals; Chromatin Immunoprecipitation, methods; Computational Biology;
	DNA-Binding Proteins, genetics; Epigenesis, Genetic; Humans; Nucleosomes,
	genetics; Sequence Analysis, DNA, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nrg2641},
  pmid = {19736561},
  timestamp = {2010.08.05},
  url = {http://dx.doi.org/10.1038/nrg2641}
}

@article{Parker2009Supervised,
  author = {Joel S Parker and Michael Mullins and Maggie C U Cheang and Samuel
	Leung and David Voduc and Tammi Vickery and Sherri Davies and Christiane
	Fauron and Xiaping He and Zhiyuan Hu and John F Quackenbush and Inge
	J Stijleman and Juan Palazzo and J. S. Marron and Andrew B Nobel
	and Elaine Mardis and Torsten O Nielsen and Matthew J Ellis and Charles
	M Perou and Philip S Bernard},
  title = {Supervised risk predictor of breast cancer based on intrinsic subtypes.},
  journal = {J Clin Oncol},
  year = {2009},
  volume = {27},
  pages = {1160--1167},
  number = {8},
  month = {Mar},
  abstract = {PURPOSE To improve on current standards for breast cancer prognosis
	and prediction of chemotherapy benefit by developing a risk model
	that incorporates the gene expression-based "intrinsic" subtypes
	luminal A, luminal B, HER2-enriched, and basal-like. METHODS A 50-gene
	subtype predictor was developed using microarray and quantitative
	reverse transcriptase polymerase chain reaction data from 189 prototype
	samples. Test sets from 761 patients (no systemic therapy) were evaluated
	for prognosis, and 133 patients were evaluated for prediction of
	pathologic complete response (pCR) to a taxane and anthracycline
	regimen. RESULTS: The intrinsic subtypes as discrete entities showed
	prognostic significance (P = 2.26E-12) and remained significant in
	multivariable analyses that incorporated standard parameters (estrogen
	receptor status, histologic grade, tumor size, and node status).
	A prognostic model for node-negative breast cancer was built using
	intrinsic subtype and clinical information. The C-index estimate
	for the combined model (subtype and tumor size) was a significant
	improvement on either the clinicopathologic model or subtype model
	alone. The intrinsic subtype model predicted neoadjuvant chemotherapy
	efficacy with a negative predictive value for pCR of 97\%. CONCLUSION
	Diagnosis by intrinsic subtype adds significant prognostic and predictive
	information to standard parameters for patients with breast cancer.
	The prognostic properties of the continuous risk score will be of
	value for the management of node-negative breast cancers. The subtypes
	and risk score can also be used to assess the likelihood of efficacy
	from neoadjuvant chemotherapy.},
  doi = {10.1200/JCO.2008.18.1370},
  institution = {Lineberger Comprehensive Cancer Center, Carolina Center for Genome
	Sciences, University of North Carolina at Chapel Hill, Chapel Hill,
	NC, USA.},
  keywords = {Adult; Aged; Breast Neoplasms, classification/drug therapy/etiology/mortality;
	Chemotherapy, Adjuvant; Female; Humans; Middle Aged; Neoplasm Recurrence,
	Local, etiology; Prognosis; Receptor, erbB-2, analysis; Receptors,
	Estrogen, analysis; Reverse Transcriptase Polymerase Chain Reaction;
	Risk},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {JCO.2008.18.1370},
  pmid = {19204204},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1200/JCO.2008.18.1370}
}

@article{Parker1994Scheme,
  author = {Parker, K. C. and Bednarek, M. A. and Coligan, J. E.},
  title = {{S}cheme for ranking potential {HLA}-{A}2 binding peptides based
	on independent binding of individual peptide side-chains},
  journal = {J. Immunol.},
  year = {1994},
  volume = {152},
  pages = {163--175},
  number = {1},
  month = {Jan},
  abstract = {A method to predict the relative binding strengths of all possible
	nonapeptides to the MHC class I molecule HLA-A2 has been developed
	based on experimental peptide binding data. These data indicate that,
	for most peptides, each side-chain of the peptide contributes a certain
	amount to the stability of the HLA-A2 complex that is independent
	of the sequence of the peptide. To quantify these contributions,
	the binding data from a set of 154 peptides were combined together
	to generate a table containing 180 coefficients (20 amino acids x
	9 positions), each of which represents the contribution of one particular
	amino acid residue at a specified position within the peptide to
	binding to HLA-A2. Eighty peptides formed stable HLA-A2 complexes,
	as assessed by measuring the rate of dissociation of beta 2m. The
	remaining 74 peptides formed complexes that had a half-life of beta
	2m dissociation of less than 5 min at 37 degrees C, or did not bind
	to HLA-A2, and were included because they could be used to constrain
	the values of some of the coefficients. The "theoretical" binding
	stability (calculated by multiplying together the corresponding coefficients)
	matched the experimental binding stability to within a factor of
	5. The coefficients were then used to calculate the theoretical binding
	stability for all the previously identified self or antigenic nonamer
	peptides known to bind to HLA-A2. The binding stability for all other
	nonamer peptides that could be generated from the proteins from which
	these peptides were derived was also predicted. In every case, the
	previously described HLA-A2 binding peptides were ranked in the top
	2\% of all possible nonamers for each source protein. Therefore,
	most biologically relevant nonamer peptides should be identifiable
	using the table of coefficients. We conclude that the side-chains
	of most nonamer peptides to the first approximation bind independently
	of one another to the HLA-A2 molecule.},
  keywords = {immunoinformatics},
  pmid = {8254189},
  timestamp = {2007.01.25}
}

@article{Parkhomenko2009Sparse,
  author = {Parkhomenko, E. and Tritchler, D. and Beyene, J.},
  title = {Sparse canonical correlation analysis with application to genomic
	data integration.},
  journal = {Stat Appl Genet Mol Biol},
  year = {2009},
  volume = {8},
  pages = {Article 1},
  number = {1},
  month = {Jan},
  abstract = {Large scale genomic studies with multiple phenotypic or genotypic
	measures may require the identification of complex multivariate relationships.
	In multivariate analysis a common way to inspect the relationship
	between two sets of variables based on their correlation is canonical
	correlation analysis, which determines linear combinations of all
	variables of each type with maximal correlation between the two linear
	combinations. However, in high dimensional data analysis, when the
	number of variables under consideration exceeds tens of thousands,
	linear combinations of the entire sets of features may lack biological
	plausibility and interpretability. In addition, insufficient sample
	size may lead to computational problems, inaccurate estimates of
	parameters and non-generalizable results. These problems may be solved
	by selecting sparse subsets of variables, i.e. obtaining sparse loadings
	in the linear combinations of variables of each type. In this paper
	we present Sparse Canonical Correlation Analysis (SCCA) which examines
	the relationships between two types of variables and provides sparse
	solutions that include only small subsets of variables of each type
	by maximizing the correlation between the subsets of variables of
	different types while performing variable selection. We also present
	an extension of SCCA--adaptive SCCA. We evaluate their properties
	using simulated data and illustrate practical use by applying both
	methods to the study of natural variation in human gene expression.},
  doi = {10.2202/1544-6115.1406},
  institution = {Hospital for Sick Children Research Institute. elena@utstat.toronto.edu},
  keywords = {Algorithms; Genomics, statistics /&/ numerical data; Humans; Models,
	Statistical; Sample Size},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19222376},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.2202/1544-6115.1406}
}

@article{Passerini2004Learning,
  author = {Passerini, A. and Frasconi, P.},
  title = {Learning to discriminate between ligand-bound and disulfide-bound
	cysteines},
  journal = {Protein {E}ng. {D}es. {S}el.},
  year = {2004},
  volume = {17},
  pages = {367-373},
  number = {4},
  abstract = {We present a machine learning method to discriminate between cysteines
	involved in ligand binding and cysteines forming disulfide bridges.
	{O}ur method uses a window of multiple alignment profiles to represent
	each instance and support vector machines with a polynomial kernel
	as the learning algorithm. {W}e also report results obtained with
	two new kernel functions based on similarity matrices. {E}xperimental
	results indicate that binding type can be predicted at significantly
	higher accuracy than using {PROSITE} patterns.},
  doi = {10.1093/protein/gzh042},
  pdf = {../local/Passerini2004Learning.pdf},
  file = {Passerini2004Learning.pdf:local/Passerini2004Learning.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/protein/gzh042}
}

@article{Passerini2004New,
  author = {Andrea Passerini and Massimiliano Pontil and Paolo Frasconi},
  title = {New results on error correcting output codes of kernel machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {45-54},
  number = {1},
  month = {Jan},
  abstract = {We study the problem of multiclass classification within the framework
	of error correcting output codes ({ECOC}) using margin-based binary
	classifiers. {S}pecifically, we address two important open problems
	in this context: decoding and model selection. {T}he decoding problem
	concerns how to map the outputs of the classifiers into class codewords.
	{I}n this paper we introduce a new decoding function that combines
	the margins through an estimate of their class conditional probabilities.
	{C}oncerning model selection, we present new theoretical results
	bounding the leave-one-out ({LOO}) error of {ECOC} of kernel machines,
	which can be used to tune kernel hyperparameters. {W}e report experiments
	using support vector machines as the base binary classifiers, showing
	the advantage of the proposed decoding function over other functions
	of the margin commonly used in practice. {M}oreover, our empirical
	evaluations on model selection indicate that the bound leads to good
	estimates of kernel parameters.},
  keywords = {Neural Networks (Computer), Research Design, 15387246}
}

@techreport{Pastor-Satorras2002Evolving,
  author = {Pastor-Satorras, R. and Smith, E. D. and Sol{\'e}, R. V.},
  title = {Evolving protein interaction networks through gene duplication},
  institution = {Santa Fe Institute},
  year = {2002},
  note = {Working paper 02-02-008},
  pdf = {../local/past02.pdf},
  file = {past02.pdf:local/past02.pdf:PDF},
  subject = {bionetprot},
  url = {http://www.santafe.edu/sfi/publications/Abstracts/02-02-008abs.html}
}

@article{Patil2005Uncovering,
  author = {Patil, K. R. and Nielsen, J.},
  title = {Uncovering transcriptional regulation of metabolism by using metabolic
	network topology.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2005},
  volume = {102},
  pages = {2685--2689},
  number = {8},
  month = {Feb},
  abstract = {Cellular response to genetic and environmental perturbations is often
	reflected and/or mediated through changes in the metabolism, because
	the latter plays a key role in providing Gibbs free energy and precursors
	for biosynthesis. Such metabolic changes are often exerted through
	transcriptional changes induced by complex regulatory mechanisms
	coordinating the activity of different metabolic pathways. It is
	difficult to map such global transcriptional responses by using traditional
	methods, because many genes in the metabolic network have relatively
	small changes at their transcription level. We therefore developed
	an algorithm that is based on hypothesis-driven data analysis to
	uncover the transcriptional regulatory architecture of metabolic
	networks. By using information on the metabolic network topology
	from genome-scale metabolic reconstruction, we show that it is possible
	to reveal patterns in the metabolic network that follow a common
	transcriptional response. Thus, the algorithm enables identification
	of so-called reporter metabolites (metabolites around which the most
	significant transcriptional changes occur) and a set of connected
	genes with significant and coordinated response to genetic or environmental
	perturbations. We find that cells respond to perturbations by changing
	the expression pattern of several genes involved in the specific
	part(s) of the metabolism in which a perturbation is introduced.
	These changes then are propagated through the metabolic network because
	of the highly connected nature of metabolism.},
  doi = {10.1073/pnas.0406811102},
  pdf = {../local/Patil2005Uncovering.pdf},
  file = {Patil2005Uncovering.pdf:Patil2005Uncovering.pdf:PDF},
  institution = {Center for Microbial Biotechnology, BioCentrum-DTU, Technical University
	of Denmark, Building 223, DK-2800 Kgs. Lyngby, Denmark.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0406811102},
  pmid = {15710883},
  timestamp = {2011.10.08},
  url = {http://dx.doi.org/10.1073/pnas.0406811102}
}

@inproceedings{Patterson2002Pre-mRNA,
  author = {Patterson, D.J. and Yasuhara, K. and Ruzzo, W.L.},
  title = {Pre-{m{RNA}} secondary structure prediction aids splice site prediction.},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2002},
  editor = {Russ B. Altman and A. Keith Dunker and Lawrence Hunter and Kevin
	Lauerdale and Teri E. Klein},
  pages = {223-234},
  publisher = {World Scientific},
  abstract = {Accurate splice site prediction is a critical component of any computational
	approach to gene prediction in higher organisms. {E}xisting approaches
	generally use sequence-based models that capture local dependencies
	among nucleotides in a small window around the splice site. {W}e
	present evidence that computationally predicted secondary structure
	of moderate length pre-m{RNA} subsequencies contains information
	that can be exploited to improve acceptor splice site prediction
	beyond that possible with conventional sequence-based approaches.
	{B}oth decision tree and support vector machine classifiers, using
	folding energy and structure metrics characterizing helix formation
	near the splice site, achieve a 5-10% reduction in error rate with
	a human data set. {B}ased on our data, we hypothesize that acceptors
	preferentially exhibit short helices at the splice site.},
  pdf = {../local/Patterson2002Pre-mRNA.pdf},
  file = {Patterson2002Pre-mRNA.pdf:local/Patterson2002Pre-mRNA.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.smi.stanford.edu/projects/helix/psb02/patterson.pdf}
}

@article{Patterson2003Proteomics,
  author = {Scott D Patterson and Ruedi H Aebersold},
  title = {Proteomics: the first decade and beyond.},
  journal = {Nat Genet},
  year = {2003},
  volume = {33 Suppl},
  pages = {311--323},
  month = {Mar},
  abstract = {Proteomics is the systematic study of the many and diverse properties
	of proteins in a parallel manner with the aim of providing detailed
	descriptions of the structure, function and control of biological
	systems in health and disease. Advances in methods and technologies
	have catalyzed an expansion of the scope of biological studies from
	the reductionist biochemical analysis of single proteins to proteome-wide
	measurements. Proteomics and other complementary analysis methods
	are essential components of the emerging 'systems biology' approach
	that seeks to comprehensively describe biological systems through
	integration of diverse types of data and, in the future, to ultimately
	allow computational simulations of complex biological systems.},
  doi = {10.1038/ng1106},
  institution = {Celera Genomics Corporation, 45 West Gude Drive, Rockville, Maryland
	20850, USA. scottp@farmalbiomed.com},
  keywords = {Amino Acid Sequence; Base Sequence; Chromatography, Liquid; Computational
	Biology; DNA; Genetic Techniques; History, 20th Century; History,
	21st Century; Mass Spectrometry; Oligonucleotide Array Sequence Analysis;
	Proteins; Proteomics},
  owner = {phupe},
  pii = {ng1106},
  pmid = {12610541},
  timestamp = {2010.08.13},
  url = {http://dx.doi.org/10.1038/ng1106}
}

@article{Pavey2004Microarray,
  author = {Pavey, S. and Johansson, P. and Packer, L. and Taylor, J. and Stark,
	M. and Pollock, P.M. and Walker, G.J. and Boyle, G.M. and Harper,
	U. and Cozzi, S.J. and Hansen, K. and Yudt, L. and Schmidt, C. and
	Hersey, P. and Ellem, K.A. and O'Rourke, M.G. and Parsons, P.G. and
	Meltzer, P. and Ringner, M. and Hayward, N.K.},
  title = {Microarray expression profiling in melanoma reveals a {BRAF} mutation
	signature},
  journal = {Oncogene},
  year = {2004},
  volume = {23},
  pages = {4060-4067},
  number = {23},
  month = {May},
  abstract = {We have used microarray gene expression profiling and machine learning
	to predict the presence of {BRAF} mutations in a panel of 61 melanoma
	cell lines. {T}he {BRAF} gene was found to be mutated in 42 samples
	(69%) and intragenic mutations of the {NRAS} gene were detected in
	seven samples (11%). {N}o cell line carried mutations of both genes.
	{U}sing support vector machines, we have built a classifier that
	differentiates between melanoma cell lines based on {BRAF} mutation
	status. {A}s few as 83 genes are able to discriminate between {BRAF}
	mutant and {BRAF} wild-type samples with clear separation observed
	using hierarchical clustering. {M}ultidimensional scaling was used
	to visualize the relationship between a {BRAF} mutation signature
	and that of a generalized mitogen-activated protein kinase ({MAPK})
	activation (either {BRAF} or {NRAS} mutation) in the context of the
	discriminating gene list. {W}e observed that samples carrying {NRAS}
	mutations lie somewhere between those with or without {BRAF} mutations.
	{T}hese observations suggest that there are gene-specific mutation
	signals in addition to a common {MAPK} activation that result from
	the pleiotropic effects of either {BRAF} or {NRAS} on other signaling
	pathways, leading to measurably different transcriptional changes.},
  doi = {10.1038/sj.onc.1207563},
  pdf = {../local/Pavey2004Microarray.pdf},
  file = {Pavey2004Microarray.pdf:local/Pavey2004Microarray.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1038/sj.onc.1207563}
}

@inproceedings{Pavlidis2001Promoter,
  author = {P. Pavlidis and T. S. Furey and M. Liberto and D. Haussler and W.
	N. Grundy},
  title = {Promoter {R}egion-{B}ased {C}lassification of {G}enes},
  booktitle = {Pacific {S}ymposium on {B}iocomputing},
  year = {2001},
  pages = {139--150},
  pdf = {../local/pavl01b.pdf},
  file = {pavl01b.pdf:local/pavl01b.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.smi.stanford.edu/projects/helix/psb01/pavlidis.pdf}
}

@article{Pavlidis2002Exploring,
  author = {Pavlidis, P. and Lewis, D. P. and Noble, W. S.},
  title = {Exploring gene expression data with class scores.},
  journal = {Pac. Symp. Biocomput.},
  year = {2002},
  pages = {474--485},
  abstract = {We address a commonly asked question about gene expression data sets:
	"What functional classes of genes are most interesting in the data?"
	In the methods we present, expression data is partitioned into classes
	based on existing annotation schemes. Each class is then given three
	separately derived "interest" scores. The first score is based on
	an assessment of the statistical significance of gene expression
	changes experienced by members of the class, in the context of the
	experimental design. The second is based on the co-expression of
	genes in the class. The third score is based on the learnability
	of the classification. We show that all three methods reveal significant
	classes in each of three different gene expression data sets. Many
	classes are identified by one method but not the others, indicating
	that the methods are complementary. The classes identified are in
	many cases of clear relevance to the experiment. Our results suggest
	that these class scoring methods are useful tools for exploring gene
	expression data.},
  pdf = {../local/Pavlidis2002Exploring.pdf},
  file = {Pavlidis2002Exploring.pdf:Pavlidis2002Exploring.pdf:PDF},
  institution = {Columbia Genome Center, Columbia University, USA. pp175@columbia.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11928500},
  timestamp = {2011.10.04}
}

@article{Pavlidis2004Support,
  author = {Paul Pavlidis and Ilan Wapinski and William Stafford Noble},
  title = {Support vector machine classification on the web.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {586-7},
  number = {4},
  month = {Mar},
  abstract = {The support vector machine ({SVM}) learning algorithm has been widely
	applied in bioinformatics. {W}e have developed a simple web interface
	to our implementation of the {SVM} algorithm, called {G}ist. {T}his
	interface allows novice or occasional users to apply a sophisticated
	machine learning algorithm easily to their data. {M}ore advanced
	users can download the software and source code for local installation.
	{T}he availability of these tools will permit more widespread application
	of this powerful learning algorithm in bioinformatics.},
  doi = {10.1093/bioinformatics/btg461},
  pdf = {../local/Pavlidis2004Support.pdf},
  file = {Pavlidis2004Support.pdf:local/Pavlidis2004Support.pdf:PDF},
  keywords = {Adaptation, Algorithms, Ambergris, Amino Acid Sequence, Animals, Artifacts,
	Artificial Intelligence, Automated, Cadmium, Candida, Candida albicans,
	Capillary, Clinical, Cluster Analysis, Combinatorial Chemistry Techniques,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	Computing Methodologies, Databases, Decision Support Systems, Electrophoresis,
	Enzymes, Europe, Eye Enucleation, Humans, Image Interpretation, Image
	Processing, Information Storage and Retrieval, Internet, Magnetic
	Resonance Imaging, Magnetic Resonance Spectroscopy, Markov Chains,
	Melanoma, Models, Molecular, Molecular Conformation, Molecular Sequence
	Data, Molecular Structure, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Odors, P.H.S., Pattern Recognition,
	Perfume, Physiological, Predictive Value of Tests, Prognosis, Prospective
	Studies, Protein, Protein Structure, Proteins, Proteomics, Quantitative
	Structure-Activity Relationship, Rats, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae, Saccharomyces cerevisiae
	Proteins, Secondary, Sensitivity and Specificity, Signal Processing,
	Single-Blind Method, Soft Tissue Neoplasms, Software, Statistical,
	U.S. Gov't, Uveal Neoplasms, Visual, 14990457},
  pii = {btg461},
  url = {http://dx.doi.org/10.1093/bioinformatics/btg461}
}

@inproceedings{Pavlidis2001Gene,
  author = {Pavlidis, P. and Weston, J. and Cai, J. and Grundy, W.N.},
  title = {Gene functional classification from heterogeneous data},
  booktitle = {Proceedings of the {F}ifth {A}nnual {I}nternational {C}onference
	on {C}omputational {B}iology},
  year = {2001},
  pages = {249--255},
  pdf = {../local/pavl01.pdf},
  file = {pavl01.pdf:local/pavl01.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.cs.columbia.edu/compbio/papers/exp-phylo.pdf}
}

@article{Pavlidis2002Learning,
  author = {Pavlidis, P. and Weston, J. and Cai, J. and Noble, W.S.},
  title = {Learning Gene Functional Classifications from Multiple Data Types},
  journal = {J. Comput. Biol.},
  year = {2002},
  volume = {9},
  pages = {401--411},
  number = {2},
  abstract = {In our attempts to understand cellular function at the molecular level,
	we must be able to synthesize information from disparate types of
	genomic data. {W}e consider the problem of inferring gene functional
	classifications from a heterogeneous data set consisting of {DNA}
	microarray expression measurements and phylogenetic profiles from
	whole-genome sequence comparisons. {W}e demonstrate the application
	of the support vector machine ({SVM}) learning algorithm to this
	functional inference task. {O}ur results suggest the importance of
	exploiting prior information about the heterogeneity of the data.
	{I}n particular, we propose an {SVM} kernel function that is explicitly
	heterogeneous. {I}n addition, we describe feature scaling methods
	for further exploiting prior knowledge of heterogeneity by giving
	each data type different weights.},
  doi = {10.1089/10665270252935539},
  pdf = {../local/Pavlidis2002Learning.pdf},
  file = {Pavlidis2002Learning.pdf:local/Pavlidis2002Learning.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Pawitan2005Gene,
  author = {Pawitan, Y. and Bj{\"o}hle, J. and Amler, L. and Borg, A.L. and Egyhazi,
	S. and Hall, P. and Han, X. and Holmberg, L. and Huang, F. and Klaar,
	S. and others},
  title = {Gene expression profiling spares early breast cancer patients from
	adjuvant therapy: derived and validated in two population-based cohorts},
  journal = {Breast Cancer Research},
  year = {2005},
  volume = {7},
  pages = {R953},
  number = {6},
  publisher = {BioMed Central Ltd}
}

@article{Pazos2001Similarity,
  author = {Pazos, F. and Valencia, A.},
  title = {Similarity of phylogenetic trees as indicator of protein-protein
	interaction},
  journal = {Protein {E}ng.},
  year = {2001},
  volume = {9},
  pages = {609--614},
  number = {14},
  pdf = {../local/Pazos2001Similarity.pdf},
  file = {Pazos2001Similarity.pdf:local/Pazos2001Similarity.pdf:PDF},
  owner = {vert},
  url = {http://peds.oxfordjournals.org/cgi/content/abstract/14/9/609}
}

@book{Pearl1988Probabilistic,
  title = {Probabilistic {R}easoning in {I}ntelligent {S}ystems: {N}etworks
	of {P}lausible {I}nference},
  publisher = {Morgan Kaufmann},
  year = {1988},
  author = {Pearl, J. },
  address = {San Mateo},
  note = {The classic original book on belief networks, which was certainly
	motivated by the idea that belief networks might have relevance to
	brains},
  owner = {vert}
}

@article{Pearlstein2003Characterization,
  author = {Pearlstein, R. A. and Vaz, R. J. and Kang, J. and Chen, X.-L. and
	Preobrazhenskaya, M. and Shchekotikhin, A. E. and Korolev, A. M.
	and Lysenkova, L. N. and Miroshnikova, O. V. and Hendrix, J. and
	Rampe, D.},
  title = {{C}haracterization of {HERG} potassium channel inhibition using {C}o{MS}i{A}
	3{D} {QSAR} and homology modeling approaches.},
  journal = {Bioorg. Med. Chem. Lett.},
  year = {2003},
  volume = {13},
  pages = {1829--1835},
  number = {10},
  month = {May},
  abstract = {A data set consisting of twenty-two sertindole analogues and ten structurally
	diverse inhibitors, spanning a wide range in potency, was analyzed
	using CoMSiA. A homology model of HERG was constructed from the crystal
	structure of the open MthK potassium channel. A complementary relationship
	between our CoMSiA and homology models is apparent when the long
	inhibitor axis is oriented parallel to the longitudinal axis of the
	pore, with the tail region pointed toward the selectivity filter.
	The key elements of the pharmacophore, the CoMSiA and the homology
	model are: (1) The hydrophobic feature optimally consists of an aromatic
	group that is capable of engaging in pi-stacking with a Phe656 side
	chain. Optionally, a second aromatic or hydrophobic group present
	in some inhibitors may contact an additional Phe656 side chain. (2)
	The basic nitrogen appears to undergo a pi-cation interaction with
	Tyr652. (3) The pore diameter (12A+), and depth of the selectivity
	loop relative to the intracellular opening, act as constraints on
	the conformation-dependent inhibitor dimensions.},
  keywords = {chemoinformatics herg},
  pii = {S0960894X03001963},
  pmid = {12729675},
  timestamp = {2006.10.06}
}

@article{Pearlstein2003Understanding,
  author = {Pearlstein, R. and Vaz, R. and Rampe, D.},
  title = {{U}nderstanding the structure-activity relationship of the human
	ether-a-go-go-related gene cardiac {K}+ channel. {A} model for bad
	behavior.},
  journal = {J. Med. Chem.},
  year = {2003},
  volume = {46},
  pages = {2017--2022},
  number = {11},
  month = {May},
  doi = {10.1021/jm0205651},
  keywords = {chemoinformatics herg},
  pmid = {12747773},
  timestamp = {2006.10.06},
  url = {http://dx.doi.org/10.1021/jm0205651}
}

@article{Pearson1901On,
  author = {Pearson, K.},
  title = {{On lines and planes of closest fit to systems of points in space}},
  journal = {Philos. Mag.},
  year = {1901},
  volume = {2},
  pages = {559--572},
  number = {6},
  citeulike-article-id = {2013414},
  keywords = {pca},
  posted-at = {2007-11-29 10:41:36},
  priority = {2}
}

@article{Pearson1990Rapid,
  author = {Pearson, W. R.},
  title = {Rapid and sensitive sequence comparisons with {FASTP} and {FASTA}},
  journal = {Meth. {E}nzymol.},
  year = {1990},
  volume = {183},
  pages = {63--98}
}

@inproceedings{Pelckmans2009Transductively,
  author = {Pelckmans, K. and Suykens, J.A.K.},
  title = {Transductively Learning from Positive Examples Only},
  booktitle = {Proc. of the European Symposium on Artificial Neural Networks (ESANN
	2009)},
  year = {2009},
  pdf = {../local/Pelckmans2009Transductively.pdf},
  file = {Pelckmans2009Transductively.pdf:Pelckmans2009Transductively.pdf:PDF},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.01.29},
  url = {ftp://ftp.esat.kuleuven.be/pub/SISTA/kpelckma/esann09_ssl.pdf}
}

@inproceedings{Pelleg2000X-means,
  author = {Pelleg, D. and Moore, A.},
  title = {X-means: Extending K-means with efficient estimation of the number
	of clusters},
  booktitle = {Proceedings of the Seventeenth International Conference on Machine
	Learning},
  year = {2000},
  pages = {727--734},
  address = {San Francisco},
  publisher = {Morgan Kaufmann},
  owner = {jp},
  timestamp = {2011.12.29}
}

@article{Pellegrini1999Assigning,
  author = {Pellegrini, M. and Marcotte, E. M. and Thompson, M. J. and Eisenberg,
	D. and Yeates, T. O.},
  title = {Assigning protein functions by comparative genome analysis: {P}rotein
	phylogenetic profiles},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {1999},
  volume = {96},
  pages = {4285--4288},
  month = {April},
  pdf = {../local/pell99.pdf},
  file = {pell99.pdf:local/pell99.pdf:PDF},
  subject = {bio},
  url = {http://www.pnas.org/cgi/reprint/96/8/4285.pdf}
}

@article{Peng2005Feature,
  author = {Peng, H. and Long, F. and Ding, C.},
  title = {Feature selection based on mutual information criteria of max-dependency,
	max-relevance, and min-redundancy},
  journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
  year = {2005},
  volume = {27},
  pages = {1226--1238},
  number = {8},
  publisher = {IEEE}
}

@article{Peng2010Regularized,
  author = {Jie Peng and Ji Zhu and Anna Bergamaschi and Wonshik Han and Dong-Young
	Noh and Jonathan R.~ Pollack and Pei Wang},
  title = {Regularized Multivariate Regression for Identifying Master Predictors
	with Application to Integrative Genomics Study of Breast Cancer},
  journal = {Ann. Appl. Stat.},
  year = {2010},
  volume = {4},
  pages = {53--77},
  number = {1},
  doi = {doi:10.1214/09-AOAS271SUPP},
  owner = {jp},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/doi:10.1214/09-AOAS271SUPP}
}

@article{Peng2003Splicing-site,
  author = {Si-hua Peng and Long-jiang Fan and Xiao-ning Peng and Shu-lin Zhuang
	and Wei Du and Liang-biao Chen},
  title = {Splicing-site recognition of rice ({O}ryza sativa {L}.) {DNA} sequences
	by support vector machines.},
  journal = {J {Z}hejiang {U}niv {S}ci},
  year = {2003},
  volume = {4},
  pages = {573-7},
  number = {5},
  abstract = {M{OTIVATION}: {I}t was found that high accuracy splicing-site recognition
	of rice ({O}ryza sativa {L}.) {DNA} sequence is especially difficult.
	{W}e described a new method for the splicing-site recognition of
	rice {DNA} sequences. {METHOD}: {B}ased on the intron in eukaryotic
	organisms conforming to the principle of {GT}-{AG}, we used support
	vector machines ({SVM}) to predict the splicing sites. {B}y machine
	learning, we built a model and used it to test the effect of the
	test data set of true and pseudo splicing sites. {RESULTS}: {T}he
	prediction accuracy we obtained was 87.53\% at the true 5' end splicing
	site and 87.37\% at the true 3' end splicing sites. {T}he results
	suggested that the {SVM} approach could achieve higher accuracy than
	the previous approaches.}
}

@article{Peng2003Molecular,
  author = {Peng, S. and Xu, Q. and Ling, X.B. and Peng, X. and Du, W. and Chen,
	L.},
  title = {Molecular classification of cancer types from microarray data using
	the combination of genetic algorithms and support vector machines.},
  journal = {F{EBS} {L}ett.},
  year = {2003},
  volume = {555},
  pages = {358-362},
  number = {2},
  abstract = {Simultaneous multiclass classification of tumor types is essential
	for future clinical implementations of microarray-based cancer diagnosis.
	{I}n this study, we have combined genetic algorithms ({GA}s) and
	all paired support vector machines ({SVM}s) for multiclass cancer
	identification. {T}he predictive features have been selected through
	iterative {SVM}s/{GA}s, and recursive feature elimination post-processing
	steps, leading to a very compact cancer-related predictive gene set.
	{L}eave-one-out cross-validations yielded accuracies of 87.93% for
	the eight-class and 85.19% for the fourteen-class cancer classifications,
	outperforming the results derived from previously published methods.},
  doi = {10.1016/S0014-5793(03)01275-4},
  pdf = {../local/Peng2003Molecular.pdf},
  file = {Peng2003Molecular.pdf:local/Peng2003Molecular.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0014-5793(03)01275-4}
}

@article{Pepperrell1991Techniques,
  author = {C. A. Pepperrell and P. Willett},
  title = {{T}echniques for the calculation of three-dimensional structural
	similarity using inter-atomic distances.},
  journal = {J Comput Aided Mol Des},
  year = {1991},
  volume = {5},
  pages = {455--474},
  number = {5},
  month = {Oct},
  abstract = {This paper reports a comparison of several methods for measuring the
	degree of similarity between pairs of 3-D chemical structures that
	are represented by inter-atomic distance matrices. The methods that
	have been tested use the distance information in very different ways
	and have very different computational requirements. Experiments with
	10 small datasets, for which both structural and biological activity
	data are available, suggest that the most cost-effective technique
	is based on a mapping procedure that tries to match pairs of atoms,
	one from each of the molecules that are being compared, that have
	neighbouring atoms at approximately the same distances.},
  keywords = {Algorithms, Binding Sites, Chemical, Chemistry, Comparative Study,
	Computer Simulation, Databases, Factual, Macromolecular Substances,
	Models, Molecular Conformation, Molecular Structure, Non-U.S. Gov't,
	Physical, Protein Conformation, Protein Structure, Proteins, Research
	Support, Structure-Activity Relationship, Tertiary, 1770381},
  owner = {mahe},
  pmid = {1770381},
  timestamp = {2006.08.22}
}

@article{Perez-Iratxeta2002Association,
  author = {Perez-Iratxeta, C. and Bork, P. and Andrade, M. A.},
  title = {Association of genes to genetically inherited diseases using data
	mining},
  journal = {Nat. Genet.},
  year = {2002},
  volume = {31},
  pages = {316--319},
  number = {3},
  month = {Jul},
  abstract = {Although approximately one-quarter of the roughly 4,000 genetically
	inherited diseases currently recorded in respective databases (LocusLink,
	OMIM) are already linked to a region of the human genome, about 450
	have no known associated gene. Finding disease-related genes requires
	laborious examination of hundreds of possible candidate genes (sometimes,
	these are not even annotated; see, for example, refs 3,4). The public
	availability of the human genome draft sequence has fostered new
	strategies to map molecular functional features of gene products
	to complex phenotypic descriptions, such as those of genetically
	inherited diseases. Owing to recent progress in the systematic annotation
	of genes using controlled vocabularies, we have developed a scoring
	system for the possible functional relationships of human genes to
	455 genetically inherited diseases that have been mapped to chromosomal
	regions without assignment of a particular gene. In a benchmark of
	the system with 100 known disease-associated genes, the disease-associated
	gene was among the 8 best-scoring genes with a 25\% chance, and among
	the best 30 genes with a 50\% chance, showing that there is a relationship
	between the score of a gene and its likelihood of being associated
	with a particular disease. The scoring also indicates that for some
	diseases, the chance of identifying the underlying gene is higher.},
  doi = {10.1038/ng895},
  pdf = {../local/Perez-Iratxeta2002Association.pdf},
  file = {Perez-Iratxeta2002Association.pdf:Perez-Iratxeta2002Association.pdf:PDF},
  institution = {European Molecular Biology Laboratory, Meyerhofstr.1, Heidelberg
	69012, Germany.},
  owner = {jp},
  pii = {ng895},
  pmid = {12006977},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1038/ng895}
}

@article{Perez-Iratxeta2005,
  author = {Carolina Perez-Iratxeta and Matthias Wjst and Peer Bork and Miguel
	A Andrade},
  title = {G2D: a tool for mining genes associated with disease.},
  journal = {BMC Genet},
  year = {2005},
  volume = {6},
  pages = {45},
  abstract = {BACKGROUND: Human inherited diseases can be associated by genetic
	linkage with one or more genomic regions. The availability of the
	complete sequence of the human genome allows examining those locations
	for an associated gene. We previously developed an algorithm to prioritize
	genes on a chromosomal region according to their possible relation
	to an inherited disease using a combination of data mining on biomedical
	databases and gene sequence analysis. RESULTS: We have implemented
	this method as a web application in our site G2D (Genes to Diseases).
	It allows users to inspect any region of the human genome to find
	candidate genes related to a genetic disease of their interest. In
	addition, the G2D server includes pre-computed analyses of candidate
	genes for 552 linked monogenic diseases without an associated gene,
	and the analysis of 18 asthma loci. CONCLUSION: G2D can be publicly
	accessed at http://www.ogic.ca/projects/g2d_2/.},
  doi = {10.1186/1471-2156-6-45},
  institution = {Ontario Genomics Innovation Centre, Ottawa Health Research Institute,
	ON K1H 8L6, Ottawa, Canada. cperez-iratxeta@ohri.ca},
  keywords = {Algorithms; Alzheimer Disease; Asthma; Genetic Diseases, Inborn; Genetic
	Predisposition to Disease; Humans; Internet; Linkage (Genetics)},
  owner = {mordelet},
  pii = {1471-2156-6-45},
  pmid = {16115313},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1186/1471-2156-6-45}
}

@article{Perkins2005Expanding,
  author = {D O Perkins and C Jeffries and P Sullivan},
  title = {Expanding the 'central dogma': the regulatory role of nonprotein
	coding genes and implications for the genetic liability to schizophrenia},
  journal = {Molecular Psychiatry},
  year = {2005},
  volume = {10},
  pages = {69-78},
  keywords = {csbcbook}
}

@article{Perlman2004Multidimensional,
  author = {Perlman, Z. E. and Slack, M. D. and Feng, Y. and Mitchison, T. J.
	and Wu, L. F. and Altschuler, S. J.},
  title = {Multidimensional drug profiling by automated microscopy},
  journal = {Science},
  year = {2004},
  volume = {306},
  pages = {1194--1198},
  number = {5699},
  month = {Nov},
  abstract = {We present a method for high-throughput cytological profiling by microscopy.
	Our system provides quantitative multidimensional measures of individual
	cell states over wide ranges of perturbations. We profile dose-dependent
	phenotypic effects of drugs in human cell culture with a titration-invariant
	similarity score (TISS). This method successfully categorized blinded
	drugs and suggested targets for drugs of uncertain mechanism. Multivariate
	single-cell analysis is a starting point for identifying relationships
	among drug effects at a systems level and a step toward phenotypic
	profiling at the single-cell level. Our methods will be useful for
	discovering the mechanism and predicting the toxicity of new drugs.},
  doi = {10.1126/science.1100709},
  pdf = {../local/Perlman2004Multidimensional.pdf},
  file = {Perlman2004Multidimensional.pdf:Perlman2004Multidimensional.pdf:PDF},
  institution = {Institute of Chemistry and Cell Biology, Harvard Medical School,
	Boston, MA 02115, USA.},
  keywords = {chemogenomics, highcontentscreening},
  owner = {jp},
  pii = {306/5699/1194},
  pmid = {15539606},
  timestamp = {2009.03.26},
  url = {http://dx.doi.org/10.1126/science.1100709}
}

@article{Perola2004Conformational,
  author = {Emanuele Perola and Paul S Charifson},
  title = {Conformational analysis of drug-like molecules bound to proteins:
	an extensive study of ligand reorganization upon binding.},
  journal = {J. Med. Chem.},
  year = {2004},
  volume = {47},
  pages = {2499--2510},
  number = {10},
  month = {May},
  abstract = {This paper describes a large-scale study on the nature and the energetics
	of the conformational changes drug-like molecules experience upon
	binding. Ligand strain energies and conformational reorganization
	were analyzed with different computational methods on 150 crystal
	structures of pharmaceutically relevant protein-ligand complexes.
	The common knowledge that ligands rarely bind in their lowest calculated
	energy conformation was confirmed. Additionally, we found that over
	60\% of the ligands do not bind in a local minimum conformation.
	While approximately 60\% of the ligands were calculated to bind with
	strain energies lower than 5 kcal/mol, strain energies over 9 kcal/mol
	were calculated in at least 10\% of the cases regardless of the method
	used. A clear correlation was found between acceptable strain energy
	and ligand flexibility, while there was no correlation between strain
	energy and binding affinity, thus indicating that expensive conformational
	rearrangements can be tolerated in some cases without overly penalizing
	the tightness of binding. On the basis of the trends observed, thresholds
	for the acceptable strain energies of bioactive conformations were
	defined with consideration of the impact of ligand flexibility. An
	analysis of the degree of folding of the bound ligands confirmed
	the general tendency of small molecules to bind in an extended conformation.
	The results suggest that the unfolding of hydrophobic ligands during
	binding, which exposes hydrophobic surfaces to contact with protein
	residues, could be one of the factors accounting for high reorganization
	energies. Finally, different methods for conformational analysis
	were evaluated, and guidelines were defined to maximize the prevalence
	of bioactive conformations in computationally generated ensembles.},
  doi = {10.1021/jm030563w},
  keywords = {Drug Design; Endopeptidases; Ligands; Molecular Conformation; Pharmaceutical
	Preparations; Phosphotransferases; Protein Binding; Protein Folding;
	Proteins; Thermodynamics},
  owner = {laurent},
  pmid = {15115393},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1021/jm030563w}
}

@article{Perou1999Distinctive,
  author = {Perou, C. M. and Jeffrey, S. S. and {van de Rijn}, M. and Rees, C.
	A. and Eisen, M. B. and Ross, D. T. and Pergamenschikov, A. and Williams,
	C. F. and Zhu, S. X. and Lee, J. C. and Lashkari, D. and Shalon,
	D. and Brown, P. O. and Botstein, D.},
  title = {Distinctive gene expression patterns in human mammary epithelial
	cells and breast cancers.},
  journal = {Proc. Natl. Acad. Sci. U S A},
  year = {1999},
  volume = {96},
  pages = {9212--9217},
  number = {16},
  month = {Aug},
  abstract = {cDNA microarrays and a clustering algorithm were used to identify
	patterns of gene expression in human mammary epithelial cells growing
	in culture and in primary human breast tumors. Clusters of coexpressed
	genes identified through manipulations of mammary epithelial cells
	in vitro also showed consistent patterns of variation in expression
	among breast tumor samples. By using immunohistochemistry with antibodies
	against proteins encoded by a particular gene in a cluster, the identity
	of the cell type within the tumor specimen that contributed the observed
	gene expression pattern could be determined. Clusters of genes with
	coherent expression patterns in cultured cells and in the breast
	tumors samples could be related to specific features of biological
	variation among the samples. Two such clusters were found to have
	patterns that correlated with variation in cell proliferation rates
	and with activation of the IFN-regulated signal transduction pathway,
	respectively. Clusters of genes expressed by stromal cells and lymphocytes
	in the breast tumors also were identified in this analysis. These
	results support the feasibility and usefulness of this systematic
	approach to studying variation in gene expression patterns in human
	cancers as a means to dissect and classify solid tumors.},
  doi = {10.1073/pnas.96.16.9212},
  pdf = {../local/Perou1999Distinctive.pdf},
  file = {Perou1999Distinctive.pdf:Perou1999Distinctive.pdf:PDF},
  institution = {Department of Genetics, Stanford University School of Medicine, Stanford,
	CA 94305, USA.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10430922},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1073/pnas.96.16.9212}
}

@article{Perou2000Molecular,
  author = {Perou, C M. and S{\o}rlie, T. and Eisen, M. B. and van de Rijn, M.
	and Jeffrey, S. S. and Rees, C. A. and Pollack, J. R. and Ross, D.
	T. and Johnsen, H. and Akslen, L. A. and Fluge, O. and Pergamenschikov,
	A. and Williams, C. and Zhu, S. X. and L{\o}nning, P. E. and B{\o}rresen-Dale,
	A. L. and Brown, P. O. and Botstein, D.},
  title = {Molecular portraits of human breast tumours},
  journal = {Nature},
  year = {2000},
  volume = {406},
  pages = {747--752},
  number = {6797},
  month = {Aug},
  abstract = {Human breast tumours are diverse in their natural history and in their
	responsiveness to treatments. Variation in transcriptional programs
	accounts for much of the biological diversity of human cells and
	tumours. In each cell, signal transduction and regulatory systems
	transduce information from the cell's identity to its environmental
	status, thereby controlling the level of expression of every gene
	in the genome. Here we have characterized variation in gene expression
	patterns in a set of 65 surgical specimens of human breast tumours
	from 42 different individuals, using complementary DNA microarrays
	representing 8,102 human genes. These patterns provided a distinctive
	molecular portrait of each tumour. Twenty of the tumours were sampled
	twice, before and after a 16-week course of doxorubicin chemotherapy,
	and two tumours were paired with a lymph node metastasis from the
	same patient. Gene expression patterns in two tumour samples from
	the same individual were almost always more similar to each other
	than either was to any other sample. Sets of co-expressed genes were
	identified for which variation in messenger RNA levels could be related
	to specific features of physiological variation. The tumours could
	be classified into subtypes distinguished by pervasive differences
	in their gene expression patterns.},
  doi = {10.1038/35021093},
  pdf = {../local/Perou2000Molecular.pdf},
  file = {Perou2000Molecular.pdf:Perou2000Molecular.pdf:PDF},
  institution = {Department of Genetics, Stanford University School of Medicine, California
	94305, USA.},
  keywords = {breastcancer, csbcbook, csbcbook-ch3},
  owner = {jp},
  pmid = {10963602},
  timestamp = {2009.02.04},
  url = {http://dx.doi.org/10.1038/35021093}
}

@article{Perrin2003Gene,
  author = {Perrin, B.E. and Ralaivola, L. and Mazurie, A. and Bottani, S. and
	Mallet, J. and d'Alche-Buc, F.},
  title = {Gene networks inference using dynamic Bayesian networks},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {ii138--ii148},
  number = {suppl 2},
  publisher = {Oxford Univ Press}
}

@article{Perry2007Diet,
  author = {George H Perry and Nathaniel J Dominy and Katrina G Claw and Arthur
	S Lee and Heike Fiegler and Richard Redon and John Werner and Fernando
	A Villanea and Joanna L Mountain and Rajeev Misra and Nigel P Carter
	and Charles Lee and Anne C Stone},
  title = {Diet and the evolution of human amylase gene copy number variation.},
  journal = {Nat Genet},
  year = {2007},
  volume = {39},
  pages = {1256--1260},
  number = {10},
  month = {Oct},
  abstract = {Starch consumption is a prominent characteristic of agricultural societies
	and hunter-gatherers in arid environments. In contrast, rainforest
	and circum-arctic hunter-gatherers and some pastoralists consume
	much less starch. This behavioral variation raises the possibility
	that different selective pressures have acted on amylase, the enzyme
	responsible for starch hydrolysis. We found that copy number of the
	salivary amylase gene (AMY1) is correlated positively with salivary
	amylase protein level and that individuals from populations with
	high-starch diets have, on average, more AMY1 copies than those with
	traditionally low-starch diets. Comparisons with other loci in a
	subset of these populations suggest that the extent of AMY1 copy
	number differentiation is highly unusual. This example of positive
	selection on a copy number-variable gene is, to our knowledge, one
	of the first discovered in the human genome. Higher AMY1 copy numbers
	and protein levels probably improve the digestion of starchy foods
	and may buffer against the fitness-reducing effects of intestinal
	disease.},
  doi = {10.1038/ng2123},
  institution = {School of Human Evolution and Social Change, Arizona State University,
	Tempe, Arizona 85287, USA.},
  keywords = {Animals; Diet; Evolution, Molecular; Gene Dosage; Genetic Variation;
	Humans; Starch, metabolism; alpha-Amylases, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {ng2123},
  pmid = {17828263},
  timestamp = {2010.08.01},
  url = {http://dx.doi.org/10.1038/ng2123}
}

@article{Perry1992Database,
  author = {N. Perry and V. J. van Geerestein},
  title = {Database {S}earching on the basis of {T}hree-{D}imensional {S}imilarity
	{U}sing the SPERM {P}rogram},
  journal = {J Chem Inf Comput Sci},
  year = {1992},
  volume = {32},
  pages = {607-616},
  owner = {mahe},
  timestamp = {2006.08.22}
}

@article{Peters2006community,
  author = {Bjoern Peters and Huynh-Hoa Bui and Sune Frankild and Morten Nielson
	and Claus Lundegaard and Emrah Kostem and Derek Basch and Kasper
	Lamberth and Mikkel Harndahl and Ward Fleri and Stephen S Wilson
	and John Sidney and Ole Lund and Soren Buus and Alessandro Sette},
  title = {A community resource benchmarking predictions of peptide binding
	to {MHC-I} molecules.},
  journal = {PLoS Comput. Biol.},
  year = {2006},
  volume = {2},
  pages = {e65},
  number = {6},
  month = {Jun},
  abstract = {Recognition of peptides bound to major histocompatibility complex
	(MHC) class I molecules by T lymphocytes is an essential part of
	immune surveillance. Each MHC allele has a characteristic peptide
	binding preference, which can be captured in prediction algorithms,
	allowing for the rapid scan of entire pathogen proteomes for peptide
	likely to bind MHC. Here we make public a large set of 48,828 quantitative
	peptide-binding affinity measurements relating to 48 different mouse,
	human, macaque, and chimpanzee MHC class I alleles. We use this data
	to establish a set of benchmark predictions with one neural network
	method and two matrix-based prediction methods extensively utilized
	in our groups. In general, the neural network outperforms the matrix-based
	predictions mainly due to its ability to generalize even on a small
	amount of data. We also retrieved predictions from tools publicly
	available on the internet. While differences in the data used to
	generate these predictions hamper direct comparisons, we do conclude
	that tools based on combinatorial peptide libraries perform remarkably
	well. The transparent prediction evaluation on this dataset provides
	tool developers with a benchmark for comparison of newly developed
	prediction methods. In addition, to generate and evaluate our own
	prediction methods, we have established an easily extensible web-based
	prediction framework that allows automated side-by-side comparisons
	of prediction methods implemented by experts. This is an advance
	over the current practice of tool developers having to generate reference
	predictions themselves, which can lead to underestimating the performance
	of prediction methods they are not as familiar with as their own.
	The overall goal of this effort is to provide a transparent prediction
	evaluation allowing bioinformaticians to identify promising features
	of prediction methods and providing guidance to immunologists regarding
	the reliability of prediction tools.},
  doi = {10.1371/journal.pcbi.0020065},
  keywords = {Animals; Databases, Factual; HLA Antigens; Histocompatibility Antigens
	Class I; Humans; Inhibitory Concentration 50; Macaca; Mice; Neural
	Networks (Computer); Pan troglodytes; Peptides; ROC Curve; Software},
  owner = {laurent},
  pii = {06-PLCB-RA-0058R2},
  pmid = {16789818},
  timestamp = {2007.01.30},
  url = {http://dx.doi.org/10.1371/journal.pcbi.0020065}
}

@article{Peters2005Generating,
  author = {Bjoern Peters and Alessandro Sette},
  title = {Generating quantitative models describing the sequence specificity
	of biological processes with the stabilized matrix method.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {132},
  abstract = {BACKGROUND: Many processes in molecular biology involve the recognition
	of short sequences of nucleic-or amino acids, such as the binding
	of immunogenic peptides to major histocompatibility complex (MHC)
	molecules. From experimental data, a model of the sequence specificity
	of these processes can be constructed, such as a sequence motif,
	a scoring matrix or an artificial neural network. The purpose of
	these models is two-fold. First, they can provide a summary of experimental
	results, allowing for a deeper understanding of the mechanisms involved
	in sequence recognition. Second, such models can be used to predict
	the experimental outcome for yet untested sequences. In the past
	we reported the development of a method to generate such models called
	the Stabilized Matrix Method (SMM). This method has been successfully
	applied to predicting peptide binding to MHC molecules, peptide transport
	by the transporter associated with antigen presentation (TAP) and
	proteasomal cleavage of protein sequences. RESULTS: Herein we report
	the implementation of the SMM algorithm as a publicly available software
	package. Specific features determining the type of problems the method
	is most appropriate for are discussed. Advantageous features of the
	package are: (1) the output generated is easy to interpret, (2) input
	and output are both quantitative, (3) specific computational strategies
	to handle experimental noise are built in, (4) the algorithm is designed
	to effectively handle bounded experimental data, (5) experimental
	data from randomized peptide libraries and conventional peptides
	can easily be combined, and (6) it is possible to incorporate pair
	interactions between positions of a sequence. CONCLUSION: Making
	the SMM method publicly available enables bioinformaticians and experimental
	biologists to easily access it, to compare its performance to other
	prediction methods, and to extend it to other applications.},
  doi = {10.1186/1471-2105-6-132},
  keywords = {Algorithms; Amino Acid Sequence; Biology; Computational Biology; Computer
	Simulation; Data Interpretation, Statistical; Databases, Protein;
	Models, Biological; Models, Statistical; Neural Networks (Computer);
	Peptide Library; Peptides; Programming Languages; Prote; Sensitivity
	and Specificity; Software; in Binding},
  owner = {laurent},
  pii = {1471-2105-6-132},
  pmid = {15927070},
  timestamp = {2007.07.12},
  url = {http://dx.doi.org/10.1186/1471-2105-6-132}
}

@inproceedings{Pfeifer2008Multiple,
  author = {Pfeifer, Nico and Kohlbacher, Oliver},
  title = {Multiple Instance Learning Allows MHC Class II Epitope Predictions
	Across Alleles},
  booktitle = {WABI '08: Proceedings of the 8th international workshop on Algorithms
	in Bioinformatics},
  year = {2008},
  pages = {210--221},
  address = {Berlin, Heidelberg},
  publisher = {Springer-Verlag},
  doi = {http://dx.doi.org/10.1007/978-3-540-87361-7_18},
  isbn = {978-3-540-87360-0},
  location = {Karlsruhe, Germany}
}

@article{Pham2005Support,
  author = {Tho Hoan Pham and Kenji Satou and Tu Bao Ho},
  title = {Support vector machines for prediction and analysis of beta and gamma-turns
	in proteins.},
  journal = {J. {B}ioinform. {C}omput. {B}iol.},
  year = {2005},
  volume = {3},
  pages = {343-58},
  number = {2},
  month = {Apr},
  abstract = {Tight turns have long been recognized as one of the three important
	features of proteins, together with alpha-helix and beta-sheet. {T}ight
	turns play an important role in globular proteins from both the structural
	and functional points of view. {M}ore than 90\% tight turns are beta-turns
	and most of the rest are gamma-turns. {A}nalysis and prediction of
	beta-turns and gamma-turns is very useful for design of new molecules
	such as drugs, pesticides, and antigens. {I}n this paper we investigated
	two aspects of applying support vector machine ({SVM}), a promising
	machine learning method for bioinformatics, to prediction and analysis
	of beta-turns and gamma-turns. {F}irst, we developed two {SVM}-based
	methods, called {BTSVM} and {GTSVM}, which predict beta-turns and
	gamma-turns in a protein from its sequence. {W}hen compared with
	other methods, {BTSVM} has a superior performance and {GTSVM} is
	competitive. {S}econd, we used {SVM}s with a linear kernel to estimate
	the support of amino acids for the formation of beta-turns and gamma-turns
	depending on their position in a protein. {O}ur analysis results
	are more comprehensive and easier to use than the previous results
	in designing turns in proteins.},
  keywords = {biosvm},
  pii = {S0219720005001089}
}

@article{Pham2003Prediction,
  author = {Tho Hoan Pham and Kenji Satou and Tu Bao Ho},
  title = {Prediction and analysis of beta-turns in proteins by support vector
	machine.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2003},
  volume = {14},
  pages = {196-205},
  abstract = {Tight turn has long been recognized as one of the three important
	features of proteins after the alpha-helix and beta-sheet. {T}ight
	turns play an important role in globular proteins from both the structural
	and functional points of view. {M}ore than 90\% tight turns are beta-turns.
	{A}nalysis and prediction of beta-turns in particular and tight turns
	in general are very useful for the design of new molecules such as
	drugs, pesticides, and antigens. {I}n this paper, we introduce a
	support vector machine ({SVM}) approach to prediction and analysis
	of beta-turns. {W}e have investigated two aspects of applying {SVM}
	to the prediction and analysis of beta-turns. {F}irst, we developed
	a new {SVM} method, called {BTSVM}, which predicts beta-turns of
	a protein from its sequence. {T}he prediction results on the dataset
	of 426 non-homologous protein chains by sevenfold cross-validation
	technique showed that our method is superior to the other previous
	methods. {S}econd, we analyzed how amino acid positions support (or
	prevent) the formation of beta-turns based on the "multivariable"
	classification model of a linear {SVM}. {T}his model is more general
	than the other ones of previous statistical methods. {O}ur analysis
	results are more comprehensive and easier to use than previously
	published analysis results.},
  keywords = {biosvm}
}

@article{Philippi2009BMCSysBio,
  author = {Philippi, N. and Walter, D. and Schlatter, R. and Ferreira, K. and
	Ederer, M. and Sawodny, O. and Timmer, J. and Borner, C. and Dandekar,
	T.},
  title = {Modeling system states in liver cells: survival, apoptosis and their
	modifications in response to viral infection},
  journal = {BMC Syst Biol},
  year = {2009},
  volume = {3},
  pages = {97},
  abstract = {BACKGROUND: The decision pro- or contra apoptosis is complex, involves
	a number of different inputs, and is central for the homeostasis
	of an individual cell as well as for the maintenance and regeneration
	of the complete organism. RESULTS: This study centers on Fas ligand
	(FasL)-mediated apoptosis, and a complex and internally strongly
	linked network is assembled around the central FasL-mediated apoptosis
	cascade. Different bioinformatical techniques are employed and different
	crosstalk possibilities including the integrin pathway are considered.
	This network is translated into a Boolean network (74 nodes, 108
	edges). System stability is dynamically sampled and investigated
	using the software SQUAD. Testing a number of alternative crosstalk
	possibilities and networks we find that there are four stable system
	states, two states comprising cell survival and two states describing
	apoptosis by the intrinsic and the extrinsic pathways, respectively.
	The model is validated by comparing it to experimental data from
	kinetics of cytochrome c release and caspase activation in wildtype
	and Bid knockout cells grown on different substrates. Pathophysiological
	modifications such as input from cytomegalovirus proteins M36 and
	M45 again produces output behavior that well agrees with experimental
	data. CONCLUSION: A network model for apoptosis and crosstalk in
	hepatocytes shows four different system states and reproduces a number
	of different conditions around apoptosis including effects of different
	growth substrates and viral infections. It produces semi-quantitative
	predictions on the activity of individual nodes, agreeing with experimental
	data. The model (SBML format) and all data are available for further
	predictions and development.},
  keywords = {csbcbook}
}

@article{Philips1962A,
  author = {D. L. Philips},
  title = {A technique for the numerical solution of certain integral equations
	of the first kind},
  journal = {J. Assoc. Comput. Mach.},
  year = {1962},
  volume = {9},
  pages = {84--97}
}

@article{Picard2005statistical,
  author = {Picard, F. and Robin, S. and Lavielle, M. and Vaisse, C. and Daudin,
	J.-J.},
  title = {A statistical approach for array {CGH} data analysis.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {27},
  abstract = {BACKGROUND: Microarray-CGH experiments are used to detect and map
	chromosomal imbalances, by hybridizing targets of genomic DNA from
	a test and a reference sample to sequences immobilized on a slide.
	These probes are genomic DNA sequences (BACs) that are mapped on
	the genome. The signal has a spatial coherence that can be handled
	by specific statistical tools. Segmentation methods seem to be a
	natural framework for this purpose. A CGH profile can be viewed as
	a succession of segments that represent homogeneous regions in the
	genome whose BACs share the same relative copy number on average.
	We model a CGH profile by a random Gaussian process whose distribution
	parameters are affected by abrupt changes at unknown coordinates.
	Two major problems arise: to determine which parameters are affected
	by the abrupt changes (the mean and the variance, or the mean only),
	and the selection of the number of segments in the profile. RESULTS:
	We demonstrate that existing methods for estimating the number of
	segments are not well adapted in the case of array CGH data, and
	we propose an adaptive criterion that detects previously mapped chromosomal
	aberrations. The performances of this method are discussed based
	on simulations and publicly available data sets. Then we discuss
	the choice of modeling for array CGH data and show that the model
	with a homogeneous variance is adapted to this context. CONCLUSIONS:
	Array CGH data analysis is an emerging field that needs appropriate
	statistical tools. Process segmentation and model selection provide
	a theoretical framework that allows precise biological interpretations.
	Adaptive methods for model selection give promising results concerning
	the estimation of the number of altered regions on the genome.},
  doi = {10.1186/1471-2105-6-27},
  pdf = {../local/Picard2005statistical.pdf},
  file = {Picard2005statistical.pdf:Picard2005statistical.pdf:PDF},
  institution = {Institut National Agronomique Paris-Grignon, UMR INAPG/ENGREF/INRA
	MIA 518, Paris, France. picard@inapg.fr},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-6-27},
  pmid = {15705208},
  timestamp = {2010.05.19},
  url = {http://dx.doi.org/10.1186/1471-2105-6-27}
}

@article{Pickett1996Diversity,
  author = {Pickett, S. D. and Mason, J. S. and McLay, I. M.},
  title = {Diversity profiling and design using 3{D} pharmacophores : {P}harmacophores-{D}erived
	{Q}ueries ({PQD})},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {1996},
  volume = {36},
  pages = {1214-1223},
  number = {6},
  abstract = {The current interest in combinatorial chemistry for lead generation
	has necessitated the development of methods for design and evaluation
	of the diversity of the resultant compound libraries. Such methods
	also have application in selecting diverse sets of compounds for
	general screening from corporate databases and in the analysis of
	large sets of structures to identify common patterns. In this paper
	we describe a novel methodology for calculating diversity and identifying
	common features based on the three-point pharmacophores expressed
	by a compound.1 The method has been implemented within the environment
	of the Chem-X molecular modeling package (ChemDBS-3D), using a systematic
	analysis of 3D distance space with three point combinations of six
	pharmacophoric groups. The strategy used to define the pharmacophores
	is discussed, including an in-house developed atom type parameterization.
	The method is compared with the related approach being developed
	into the ChemDiverse module of Chem-X. Results from an analysis of
	a large corporate database and examples of combinatorial library
	profiling with both methods are presented. The use of 3D pharmacophores
	for assessing diversity, and the application of such methods to combinatorial
	library design, is discussed.},
  doi = {10.1021/ci960039g},
  pdf = {../local/Pickett1996Diversity.pdf},
  file = {Pickett1996Diversity.pdf:Pickett1996Diversity.pdf:PDF},
  keywords = {chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci960039g}
}

@article{Piliouras2004Development,
  author = {N. Piliouras and I. Kalatzis and N. Dimitropoulos and D. Cavouras},
  title = {Development of the cubic least squares mapping linear-kernel support
	vector machine classifier for improving the characterization of breast
	lesions on ultrasound.},
  journal = {Comput {M}ed {I}maging {G}raph},
  year = {2004},
  volume = {28},
  pages = {247-55},
  number = {5},
  month = {Jul},
  abstract = {An efficient classification algorithm is proposed for characterizing
	breast lesions. {T}he algorithm is based on the cubic least squares
	mapping and the linear-kernel support vector machine ({SVM}({LSM}))
	classifier. {U}ltrasound images of 154 confirmed lesions (59 benign
	and 52 malignant solid masses, 7 simple cysts, and 32 complicated
	cysts) were manually segmented by a physician using a custom developed
	software. {T}exture and outline features and the {SVM}({LSM}) algorithm
	were used to design a hierarchical tree classification system. {C}lassification
	accuracy was 98.7\%, misdiagnosing 1 malignant an 1 benign solid
	lesions only. {T}his system may be used as a second opinion tool
	to the radiologists.},
  doi = {10.1016/j.compmedimag.2004.04.003},
  pii = {S0895611104000515},
  url = {http://dx.doi.org/10.1016/j.compmedimag.2004.04.003}
}

@article{Pilpel2001Identifying,
  author = {Pilpel, Y. and Sudarsanam, P. and Church, G. M.},
  title = {Identifying regulatory networks by combinatorial analysis of promoter
	elements},
  journal = {Nature},
  year = {2001},
  volume = {29},
  pages = {153--159},
  pdf = {../local/pilp01.pdf},
  file = {pilp01.pdf:local/pilp01.pdf:PDF},
  subject = {microarray},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v29/n2/full/ng724.html&filetype=PDF}
}

@article{Pinkel2005Array,
  author = {Pinkel, D. and Albertson, D. G.},
  title = {Array comparative genomic hybridization and its applications in cancer},
  journal = {Nat. Genet.},
  year = {2005},
  volume = {37 Suppl},
  pages = {S11--S17},
  month = {Jun},
  abstract = {Alteration in DNA copy number is one of the many ways in which gene
	expression and function may be modified. Some variations are found
	among normal individuals, others occur in the course of normal processes
	in some species and still others participate in causing various disease
	states. For example, many defects in human development are due to
	gains and losses of chromosomes and chromosomal segments that occur
	before or shortly after fertilization, and DNA dosage-alteration
	changes occurring in somatic cells are frequent contributors to cancer.
	Detecting these aberrations and interpreting them in the context
	of broader knowledge facilitates the identification of crucial genes
	and pathways involved in biological processes and disease. Over the
	past several years, array comparative genomic hybridization has proven
	its value for analyzing DNA copy-number variations. Here, we discuss
	the state of the art of array comparative genomic hybridization and
	its applications in cancer, emphasizing general concepts rather than
	specific results.},
  doi = {10.1038/ng1569},
  pdf = {../local/Pinkel2005Array.pdf},
  file = {Pinkel2005Array.pdf:Pinkel2005Array.pdf:PDF},
  institution = {Department of Laboratory Medicine and Comprehensive Cancer Center,
	University of California San Francisco, Box 0808, San Francisco,
	California 94143, USA. pinkel@cc.ucsf.edu},
  keywords = {csbcbook, cgh, csbcbook-ch2},
  owner = {jp},
  pii = {ng1569},
  pmid = {15920524},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1038/ng1569}
}

@article{Pinkel1998High,
  author = {Pinkel, D. and Segraves, R. and Sudar, D. and Clark, S. and Poole,
	I. and Kowbel, D. and Collins, C. and Kuo, W. L. and Chen, C. and
	Zhai, Y. and Dairkee, S. H. and Ljung, B. M. and Gray, J. W. and
	Albertson, D. G.},
  title = {High resolution analysis of {DNA} copy number variation using comparative
	genomic hybridization to microarrays},
  journal = {Nat. Genet.},
  year = {1998},
  volume = {20},
  pages = {207--211},
  number = {2},
  month = {Oct},
  abstract = {Gene dosage variations occur in many diseases. In cancer, deletions
	and copy number increases contribute to alterations in the expression
	of tumour-suppressor genes and oncogenes, respectively. Developmental
	abnormalities, such as Down, Prader Willi, Angelman and Cri du Chat
	syndromes, result from gain or loss of one copy of a chromosome or
	chromosomal region. Thus, detection and mapping of copy number abnormalities
	provide an approach for associating aberrations with disease phenotype
	and for localizing critical genes. Comparative genomic hybridization
	(CGH) was developed for genome-wide analysis of DNA sequence copy
	number in a single experiment. In CGH, differentially labelled total
	genomic DNA from a 'test' and a 'reference' cell population are cohybridized
	to normal metaphase chromosomes, using blocking DNA to suppress signals
	from repetitive sequences. The resulting ratio of the fluorescence
	intensities at a location on the 'cytogenetic map', provided by the
	chromosomes, is approximately proportional to the ratio of the copy
	numbers of the corresponding DNA sequences in the test and reference
	genomes. CGH has been broadly applied to human and mouse malignancies.
	The use of metaphase chromosomes, however, limits detection of events
	involving small regions (of less than 20 Mb) of the genome, resolution
	of closely spaced aberrations and linking ratio changes to genomic/genetic
	markers. Therefore, more laborious locus-by-locus techniques have
	been required for higher resolution studies. Hybridization to an
	array of mapped sequences instead of metaphase chromosomes could
	overcome the limitations of conventional CGH (ref. 6) if adequate
	performance could be achieved. Copy number would be related to the
	test/reference fluorescence ratio on the array targets, and genomic
	resolution could be determined by the map distance between the targets,
	or by the length of the cloned DNA segments. We describe here our
	implementation of array CGH. We demonstrate its ability to measure
	copy number with high precision in the human genome, and to analyse
	clinical specimens by obtaining new information on chromosome 20
	aberrations in breast cancer.},
  doi = {10.1038/2524},
  pdf = {../local/Pinkel1998High.pdf},
  file = {Pinkel1998High.pdf:Pinkel1998High.pdf:PDF},
  keywords = {cgh, csbcbook},
  owner = {franck},
  pmid = {9771718},
  timestamp = {2007.09.14},
  url = {http://dx.doi.org/10.1038/2524}
}

@inproceedings{Platt1999Fast,
  author = {J. Platt},
  title = {Fast Training of Support Vector Machines using Sequential Minimal
	Optimization},
  booktitle = {Advances in Kernel Methods - Support Vector Learning},
  year = {1999},
  editor = {B. SchÃ¶lkopf and C. Burges and A. Smola},
  pages = {185-208},
  publisher = {MIT Press, Cambridge, MA, USA},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.31}
}

@article{Plewczyski2005support,
  author = {Dariusz Plewczynski and Adrian Tkacz and Adam Godzik and Leszek Rychlewski},
  title = {A support vector machine approach to the identification of phosphorylation
	sites.},
  journal = {Cell {M}ol {B}iol {L}ett},
  year = {2005},
  volume = {10},
  pages = {73-89},
  number = {1},
  abstract = {We describe a bioinformatics tool that can be used to predict the
	position of phosphorylation sites in proteins based only on sequence
	information. {T}he method uses the support vector machine ({SVM})
	statistical learning theory. {T}he statistical models for phosphorylation
	by various types of kinases are built using a dataset of short (9-amino
	acid long) sequence fragments. {T}he sequence segments are dissected
	around post-translationally modified sites of proteins that are on
	the current release of the {S}wiss-{P}rot database, and that were
	experimentally confirmed to be phosphorylated by any kinase. {W}e
	represent them as vectors in a multidimensional abstract space of
	short sequence fragments. {T}he prediction method is as follows.
	{F}irst, a given query protein sequence is dissected into overlapping
	short segments. {A}ll the fragments are then projected into the multidimensional
	space of sequence fragments via a collection of different representations.
	{T}hose points are classified with pre-built statistical models (the
	{SVM} method with linear, polynomial and radial kernel functions)
	either as phosphorylated or inactive ones. {T}he resulting list of
	plausible sites for phosphorylation by various types of kinases in
	the query protein is returned to the user. {T}he efficiency of the
	method for each type of phosphorylation is estimated using leave-one-out
	tests and presented here. {T}he sensitivities of the models can reach
	over 70\%, depending on the type of kinase. {T}he additional information
	from profile representations of short sequence fragments helps in
	gaining a higher degree of accuracy in some phosphorylation types.
	{T}he further development of an automatic phosphorylation site annotation
	predictor based on our algorithm should yield a significant improvement
	when using statistical algorithms in order to quantify the results.},
  pdf = {../local/Plewczyski2005support.pdf},
  file = {Plewczyski2005support.pdf:local/Plewczyski2005support.pdf:PDF},
  keywords = {biosvm}
}

@article{Plewczynski2005AutoMotif,
  author = {Dariusz Plewczynski and Adrian Tkacz and Lucjan Stanislaw Wyrwicz
	and Leszek Rychlewski},
  title = {Auto{M}otif server: prediction of single residue post-translational
	modifications in proteins.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2525-7},
  number = {10},
  month = {May},
  abstract = {The {A}uto{M}otif {S}erver allows for identification of post-translational
	modification ({PTM}) sites in proteins based only on local sequence
	information. {T}he local sequence preferences of short segments around
	{PTM} residues are described here as linear functional motifs ({LFM}s).
	{S}equence models for all types of {PTM}s are trained by support
	vector machine on short-sequence fragments of proteins in the current
	release of {S}wiss-{P}rot database (phosphorylation by various protein
	kinases, sulfation, acetylation, methylation, amidation, etc.). {T}he
	accuracy of the identification is estimated using the standard leave-one-out
	procedure. {T}he sensitivities for all types of short {LFM}s are
	in the range of 70\%. {AVAILABILITY}: {T}he {A}uto{M}otif {S}erver
	is available free for academic use at http://automotif.bioinfo.pl/},
  doi = {10.1093/bioinformatics/bti333},
  pdf = {../local/Plewczynski2005AutoMotif.pdf},
  file = {Plewczynski2005AutoMotif.pdf:local/Plewczynski2005AutoMotif.pdf:PDF},
  keywords = {biosvm},
  pii = {bti333},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti333}
}

@article{Pochet2004Systematic,
  author = {Pochet, N. and De Smet, F. and Suykens, J. A. K. and De Moor, B.
	L. R.},
  title = {Systematic benchmarking of microarray data classification: assessing
	the role of non-linearity and dimensionality reduction},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3185-3195},
  number = {17},
  month = {Nov},
  abstract = {Motivation: {M}icroarrays are capable of determining the expression
	levels of thousands of genes simultaneously. {I}n combination with
	classification methods, this technology can be useful to support
	clinical management decisions for individual patients, e.g. in oncology.
	{T}he aim of this paper is to systematically benchmark the role of
	non-linear versus linear techniques and dimensionality reduction
	methods. {R}esults: {A} systematic benchmarking study is performed
	by comparing linear versions of standard classification and dimensionality
	reduction techniques with their non-linear versions based on non-linear
	kernel functions with a radial basis function ({RBF}) kernel. {A}
	total of 9 binary cancer classification problems, derived from 7
	publicly available microarray datasets, and 20 randomizations of
	each problem are examined. {C}onclusions: {T}hree main conclusions
	can be formulated based on the performances on independent test sets.
	(1) {W}hen performing classification with least squares support vector
	machines ({LS}-{SVM}s) (without dimensionality reduction), {RBF}
	kernels can be used without risking too much overfitting. {T}he results
	obtained with well-tuned {RBF} kernels are never worse and sometimes
	even statistically significantly better compared to results obtained
	with a linear kernel in terms of test set receiver operating characteristic
	and test set accuracy performances. (2) {E}ven for classification
	with linear classifiers like {LS}-{SVM} with linear kernel, using
	regularization is very important. (3) {W}hen performing kernel principal
	component analysis (kernel {PCA}) before classification, using an
	{RBF} kernel for kernel {PCA} tends to result in overfitting, especially
	when using supervised feature selection. {I}t has been observed that
	an optimal selection of a large number of features is often an indication
	for overfitting. {K}ernel {PCA} with linear kernel gives better results.
	{A}vailability: {M}atlab scripts are available on request. {S}upplementary
	information: http://www.esat.kuleuven.ac.be/~npochet/{B}ioinformatics/},
  doi = {10.1093/bioinformatics/bth383},
  pdf = {../local/Pochet2004Systematic.pdf},
  file = {Pochet2004Systematic.pdf:local/Pochet2004Systematic.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth383}
}

@article{Podani2001Comparable,
  author = {J. Podani and Z.N. Oltvai and H. Jeong and B. Tombor and A.-L. Barab{\'a}si
	and E. Szathm{\'a}ry},
  title = {Comparable system-level organization of {A}rchaea and {E}ukaryotes},
  journal = {Nat. {G}enet.},
  year = {2001},
  volume = {29},
  pages = {54--56},
  pdf = {../local/poda01.pdf},
  file = {poda01.pdf:local/poda01.pdf:PDF},
  subject = {bionet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v29/n1/full/ng708.html&filetype=PDF}
}

@article{Poggio1998Sparse,
  author = {Poggio and Girosi},
  title = {A {S}parse {R}epresentation for {F}unction {A}pproximation.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {1445-54},
  number = {6},
  month = {Jul},
  abstract = {We derive a new general representation for a function as a linear
	combination of local correlation kernels at optimal sparse locations
	(and scales) and characterize its relation to principal component
	analysis, regularization, sparsity principles, and support vector
	machines.},
  keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual,
	Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes,
	Learning, Markov Chains, Models, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9698352}
}

@article{Pollack1999Genome,
  author = {Jonathan R. Pollack and Charles M. Perou and Ash A. Alizadeh and
	Michael B. Eisen and Alexander Pergamenschikov and Cheryl F. Williams
	and Stefanie S. Jeffrey and David Botstein and Patrick O. Brown},
  title = {Genome-wide analysis of {DNA} copy-number changes using {cDNA} microarrays},
  journal = {Nat. Genet.},
  year = {1999},
  volume = {23},
  pages = {41-46},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Pollack2002Microarray,
  author = {Jonathan R Pollack and Therese Sørlie and Charles M Perou and Christian
	A Rees and Stefanie S Jeffrey and Per E Lonning and Robert Tibshirani
	and David Botstein and Anne-Lise Børresen-Dale and Patrick O Brown},
  title = {Microarray analysis reveals a major direct role of DNA copy number
	alteration in the transcriptional program of human breast tumors.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2002},
  volume = {99},
  pages = {12963--12968},
  number = {20},
  month = {Oct},
  abstract = {Genomic DNA copy number alterations are key genetic events in the
	development and progression of human cancers. Here we report a genome-wide
	microarray comparative genomic hybridization (array CGH) analysis
	of DNA copy number variation in a series of primary human breast
	tumors. We have profiled DNA copy number alteration across 6,691
	mapped human genes, in 44 predominantly advanced, primary breast
	tumors and 10 breast cancer cell lines. While the overall patterns
	of DNA amplification and deletion corroborate previous cytogenetic
	studies, the high-resolution (gene-by-gene) mapping of amplicon boundaries
	and the quantitative analysis of amplicon shape provide significant
	improvement in the localization of candidate oncogenes. Parallel
	microarray measurements of mRNA levels reveal the remarkable degree
	to which variation in gene copy number contributes to variation in
	gene expression in tumor cells. Specifically, we find that 62\% of
	highly amplified genes show moderately or highly elevated expression,
	that DNA copy number influences gene expression across a wide range
	of DNA copy number alterations (deletion, low-, mid- and high-level
	amplification), that on average, a 2-fold change in DNA copy number
	is associated with a corresponding 1.5-fold change in mRNA levels,
	and that overall, at least 12\% of all the variation in gene expression
	among the breast tumors is directly attributable to underlying variation
	in gene copy number. These findings provide evidence that widespread
	DNA copy number alteration can lead directly to global deregulation
	of gene expression, which may contribute to the development or progression
	of cancer.},
  doi = {10.1073/pnas.162471999},
  institution = {Departments of Pathology, Genetics, Surgery, Health Research and
	Policy, and Biochemistry, and Howard Hughes Medical Institute, Stanford
	University School of Medicine, Stanford, CA 94305, USA. pollack1@stanford.edu},
  keywords = {Breast Neoplasms, genetics; Chromosome Aberrations; Disease Progression;
	Gene Dosage; Genome; Humans; Oligonucleotide Array Sequence Analysis;
	RNA, Messenger, metabolism; Transcription, Genetic; Tumor Cells,
	Cultured},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {162471999},
  pmid = {12297621},
  timestamp = {2011.06.03},
  url = {http://dx.doi.org/10.1073/pnas.162471999}
}

@article{Polonik1997Minimum,
  author = {W. Polonik},
  title = {Minimum volume sets and generalized quantile processes},
  journal = {Stochastic {P}rocesses and their {A}pplications},
  year = {1997},
  volume = {69},
  pages = {1-24},
  publisher = {Elsevier}
}

@article{Polonik1995Measuring,
  author = {W. Polonik},
  title = {Measuring {M}ass {C}oncentrations and {E}stimating {D}ensity {C}ontour
	{C}lusters-{A}n {E}xcess {M}ass {A}pproach},
  journal = {Ann. {S}tat.},
  year = {1995},
  volume = {23},
  pages = {855-881},
  number = {3},
  pdf = {../local/Polonik1995Measuring.pdf},
  file = {Polonik1995Measuring.pdf:local/Polonik1995Measuring.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0090-5364%28199506%2923%3A3%3C855%3AMMCAED%3E2.0.CO%3B2-K}
}

@article{Pontil1998Properties,
  author = {M. Pontil and A. Verri},
  title = {Properties of support vector machines.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {955-74},
  number = {4},
  month = {May},
  abstract = {Support vector machines ({SVM}s) perform pattern recognition between
	two point classes by finding a decision surface determined by certain
	points of the training set, termed support vectors ({SV}). {T}his
	surface, which in some feature space of possibly infinite dimension
	can be regarded as a hyperplane, is obtained from the solution of
	a problem of quadratic programming that depends on a regularization
	parameter. {I}n this article, we study some mathematical properties
	of support vectors and show that the decision surface can be written
	as the sum of two orthogonal terms, the first depending on only the
	margin vectors (which are {SV}s lying on the margin), the second
	proportional to the regularization parameter. {F}or almost all values
	of the parameter, this enables us to predict how the decision surface
	varies for small parameter changes. {I}n the special but important
	case of feature space of finite dimension m, we also show that m
	+ 1 {SV}s are usually sufficient to determine the decision surface
	fully. {F}or relatively small m, this latter result leads to a consistent
	reduction of the {SV} number.},
  keywords = {Algorithms, Artificial Intelligence, Automated, Biometry, Computers,
	DNA, Databases, Factual, Fungal, Fungal Proteins, GTP-Binding Proteins,
	Gene Expression, Genes, Learning, Linear Models, Markov Chains, Mathematics,
	Models, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S.
	Gov't, Nonlinear Dynamics, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9573414}
}

@book{Poor2008Quickest,
  title = {Quickest Detection},
  publisher = {Cambridge University Press},
  year = {2008},
  author = {Poor, H. V. and Hadjiliadis, O.},
  keywords = {segmentation},
  owner = {jp},
  timestamp = {2010.05.29}
}

@misc{Popat,
  author = {K. Popat and D. H. Greene and J. K. Romberg and D. S. Bloomberg},
  title = {Adding Linguistic Constraints to Document Image Decoding: Comparing
	the Iterated Complete Path and Stack Algorithms},
  year = {2001},
  institution = {Xerox Parc}
}

@article{Popova2009Genome,
  author = {Tatiana Popova and Elodie Manié and Dominique Stoppa-Lyonnet and
	Guillem Rigaill and Emmanuel Barillot and Marc Henri Stern},
  title = {Genome Alteration Print (GAP): a tool to visualize and mine complex
	cancer genomic profiles obtained by SNP arrays.},
  journal = {Genome Biol},
  year = {2009},
  volume = {10},
  pages = {R128},
  number = {11},
  abstract = {We describe a method for automatic detection of absolute segmental
	copy numbers and genotype status in complex cancer genome profiles
	measured with single-nucleotide polymorphism (SNP) arrays. The method
	is based on pattern recognition of segmented and smoothed copy number
	and allelic imbalance profiles. Assignments were verified by DNA
	indexes of primary tumors and karyotypes of cell lines. The method
	performs well even for poor-quality data, low tumor content, and
	highly rearranged tumor genomes.},
  doi = {10.1186/gb-2009-10-11-r128},
  institution = {Centre de Recherche, Institut Curie, 26 rue d'Ulm, Paris, 75248,
	France. tatiana.popova@curie.fr},
  keywords = {Allelic Imbalance; Automation; Breast Neoplasms, genetics; Gene Expression
	Profiling; Gene Expression Regulation, Neoplastic; Genome; Genomics;
	Genotype; Homozygote; Humans; Karyotyping; Loss of Heterozygosity;
	Models, Genetic; Ploidies; Polymorphism, Single Nucleotide},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {gb-2009-10-11-r128},
  pmid = {19903341},
  timestamp = {2010.08.01},
  url = {http://dx.doi.org/10.1186/gb-2009-10-11-r128}
}

@article{Popovici2010Effect,
  author = {Popovici, V. and Chen, W. and Gallas, B.G. and Hatzis, C. and Shi,
	W. and Samuelson, F.W. and Nikolsky, Y. and Tsyganova, M. and Ishkin,
	A. and Nikolskaya, T. and others},
  title = {Effect of training-sample size and classification difficulty on the
	accuracy of genomic predictors},
  journal = {Breast Cancer Res},
  year = {2010},
  volume = {12},
  pages = {R5},
  number = {1}
}

@article{Portela2010Epigenetic,
  author = {Anna Portela and Manel Esteller},
  title = {Epigenetic modifications and human disease.},
  journal = {Nat Biotechnol},
  year = {2010},
  volume = {28},
  pages = {1057--1068},
  number = {10},
  month = {Oct},
  abstract = {Epigenetics is one of the most rapidly expanding fields in biology.
	The recent characterization of a human DNA methylome at single nucleotide
	resolution, the discovery of the CpG island shores, the finding of
	new histone variants and modifications, and the unveiling of genome-wide
	nucleosome positioning maps highlight the accelerating speed of discovery
	over the past two years. Increasing interest in epigenetics has been
	accompanied by technological breakthroughs that now make it possible
	to undertake large-scale epigenomic studies. These allow the mapping
	of epigenetic marks, such as DNA methylation, histone modifications
	and nucleosome positioning, which are critical for regulating gene
	and noncoding RNA expression. In turn, we are learning how aberrant
	placement of these epigenetic marks and mutations in the epigenetic
	machinery is involved in disease. Thus, a comprehensive understanding
	of epigenetic mechanisms, their interactions and alterations in health
	and disease, has become a priority in biomedical research.},
  doi = {10.1038/nbt.1685},
  institution = {Cancer Epigenetics and Biology Program, Bellvitge Biomedical Research
	Institute, Barcelona, Catalonia, Spain.},
  keywords = {Amino Acid Sequence; Autoimmune Diseases, genetics; DNA Methylation,
	genetics; Disease, genetics; Epigenesis, Genetic; Histones, chemistry/metabolism;
	Humans; Molecular Sequence Data; Nerve Degeneration, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nbt.1685},
  pmid = {20944598},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.1038/nbt.1685}
}

@article{Post2008Extensions,
  author = {Post, T.M. and Freijer, J.I. and Ploeger, B.A.},
  title = {{E}xtensions to the {V}isual {P}redictive {C}heck to facilitate model
	performance evaluation.},
  journal = {J Pharmacokinet Pharmacodyn},
  year = {2008},
  volume = {35},
  pages = {185-02},
  doi = {10.1007/s10928-007-9081-1},
  owner = {kb}
}

@article{Prados2004Mining,
  author = {Prados, J. and Kalousis, A. and Sanchez, J.C. and Allard, L. and
	Carrette, O. and Hilario, M.},
  title = {Mining mass spectra for diagnosis and biomarker discovery of cerebral
	accidents.},
  journal = {Proteomics},
  year = {2004},
  volume = {4},
  pages = {2320-2332},
  number = {8},
  abstract = {In this paper we try to identify potential biomarkers for early stroke
	diagnosis using surface-enhanced laser desorption/ionization mass
	spectrometry coupled with analysis tools from machine learning and
	data mining. {D}ata consist of 42 specimen samples, i.e., mass spectra
	divided in two big categories, stroke and control specimens. {A}mong
	the stroke specimens two further categories exist that correspond
	to ischemic and hemorrhagic stroke; in this paper we limit our data
	analysis to discriminating between control and stroke specimens.
	{W}e performed two suites of experiments. {I}n the first one we simply
	applied a number of different machine learning algorithms; in the
	second one we have chosen the best performing algorithm as it was
	determined from the first phase and coupled it with a number of different
	feature selection methods. {T}he reason for this was 2-fold, first
	to establish whether feature selection can indeed improve performance,
	which in our case it did not seem to confirm, but more importantly
	to acquire a small list of potentially interesting biomarkers. {O}f
	the different methods explored the most promising one was support
	vector machines which gave us high levels of sensitivity and specificity.
	{F}inally, by analyzing the models constructed by support vector
	machines we produced a small set of 13 features that could be used
	as potential biomarkers, and which exhibited good performance both
	in terms of sensitivity, specificity and model stability.},
  doi = {10.1002/pmic.200400857},
  pdf = {../local/Prados2004Mining.pdf},
  file = {Prados2004Mining.pdf:local/Prados2004Mining.pdf:PDF},
  keywords = {biosvm proteomics},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/pmic.200400857}
}

@article{Prakash2002Fetal,
  author = {K. N Bhanu Prakash and A. G. Ramakrishnan and S. Suresh and Teresa
	W P Chow},
  title = {Fetal lung maturity analysis using ultrasound image features.},
  journal = {I{EEE} {T}rans {I}nf {T}echnol {B}iomed},
  year = {2002},
  volume = {6},
  pages = {38-45},
  number = {1},
  month = {Mar},
  abstract = {This pilot study was carried out to find the feasibility of analyzing
	the maturity of the fetal lung using ultrasound images. {D}ata were
	collected from normal pregnant women at intervals of two weeks from
	the gestation age of 24 to 38 weeks. {I}mages were acquired at two
	centers located at different geographical locations. {T}he total
	data acquired consisted of 750 images of immature and 250 images
	of mature class. {A} region of interest of 64 x 64 pixels was used
	for extracting the features. {V}arious textural features were computed
	from the fetal lung and liver images. {T}he ratios of fetal lung
	to liver feature values were investigated as possible indexes for
	classifying the images into those from mature (reduced pulmonary
	risk) and immature (possible pulmonary risk) lung. {T}he features
	used are fractal dimension, lacunarity, and features derived from
	the histogram of the images. {T}he following classifiers were used
	to classify the fetal lung images as belonging to mature or immature
	lung: nearest neighbor, k-nearest neighbor, modified k-nearest neighbor,
	multilayer perceptron, radial basis function network, and support
	vector machines. {T}he classification accuracy obtained for the testing
	set ranges from 73\% to 96\%.}
}

@article{Praz2009CleanEx:,
  author = {Viviane Praz and Philipp Bucher},
  title = {CleanEx: new data extraction and merging tools based on MeSH term
	annotation.},
  journal = {Nucleic Acids Res},
  year = {2009},
  volume = {37},
  pages = {D880--D884},
  number = {Database issue},
  month = {Jan},
  abstract = {The CleanEx expression database (http://www.cleanex.isb-sib.ch) provides
	access to public gene expression data via unique gene names as well
	as via experiments biomedical characteristics. To reach this, a dual
	annotation of both sequences and experiments has been generated.
	First, the system links official gene symbols to any kind of sequences
	used for gene expression measurements (cDNA, Affymetrix, oligonucleotide
	arrays, SAGE or MPSS tags, Expressed Sequence Tags or other mRNA
	sequences, etc.). For the biomedical annotation, we re-annotate each
	experiment from the CleanEx database with the MeSH (Medical Subject
	Headings) terms, primarily used by NLM (National Library of Medicine)
	for indexing articles for the MEDLINE/PubMED database. This annotation
	allows a fast and easy retrieval of expression data with common biological
	or medical features. The numerical data can then be exported as matrix-like
	tab-delimited text files. Data can be extracted from either one dataset
	or from heterogeneous datasets.},
  doi = {10.1093/nar/gkn878},
  institution = {ISREC, Swiss Institute of Bioinformatics, Boveresses 155, Epalinges,
	VD 1066, Switzerland. viviane.praz@unil.ch},
  keywords = {Animals; Chromosome Mapping; Databases, Genetic; Gene Expression Profiling;
	Humans; Medical Subject Headings; Mice; Oligonucleotide Array Sequence
	Analysis; Software},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gkn878},
  pmid = {19073704},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1093/nar/gkn878}
}

@article{Praz2004CleanEx,
  author = {Praz, V. and Jagannathan, V. and Bucher, P.},
  title = {{CleanEx}: a database of heterogeneous gene expression data based
	on a consistent gene nomenclature.},
  journal = {Nucleic Acids Res.},
  year = {2004},
  volume = {32},
  pages = {D542--D547},
  number = {Database issue},
  month = {Jan},
  abstract = {The main goal of CleanEx is to provide access to public gene expression
	data via unique gene names. A second objective is to represent heterogeneous
	expression data produced by different technologies in a way that
	facilitates joint analysis and cross-data set comparisons. A consistent
	and up-to-date gene nomenclature is achieved by associating each
	single experiment with a permanent target identifier consisting of
	a physical description of the targeted RNA population or the hybridization
	reagent used. These targets are then mapped at regular intervals
	to the growing and evolving catalogues of human genes and genes from
	model organisms. The completely automatic mapping procedure relies
	partly on external genome information resources such as UniGene and
	RefSeq. The central part of CleanEx is a weekly built gene index
	containing cross-references to all public expression data already
	incorporated into the system. In addition, the expression target
	database of CleanEx provides gene mapping and quality control information
	for various types of experimental resource, such as cDNA clones or
	Affymetrix probe sets. The web-based query interfaces offer access
	to individual entries via text string searches or quantitative expression
	criteria. CleanEx is accessible at: http://www.cleanex.isb-sib.ch/.},
  doi = {10.1093/nar/gkh107},
  pdf = {../local/Praz2004CleanEx.pdf},
  file = {Praz2004CleanEx.pdf:Praz2004CleanEx.pdf:PDF},
  institution = {Swiss Insitiute of Bioinformatics, Ch. des Boveresses 155, 1066 Epalinges
	s/Lausanne, Switzerland.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {32/suppl_1/D542},
  pmid = {14681477},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1093/nar/gkh107}
}

@article{Prill2005PlosBiol,
  author = {Robert J Prill and Pablo A Iglesias and Andre Levchenko},
  title = {Dynamic properties of network motifs contribute to biological network
	organization.},
  journal = {PLoS Biol},
  year = {2005},
  volume = {3},
  pages = {e343},
  number = {11},
  month = {Nov},
  abstract = {Biological networks, such as those describing gene regulation, signal
	transduction, and neural synapses, are representations of large-scale
	dynamic systems. Discovery of organizing principles of biological
	networks can be enhanced by embracing the notion that there is a
	deep interplay between network structure and system dynamics. Recently,
	many structural characteristics of these non-random networks have
	been identified, but dynamical implications of the features have
	not been explored comprehensively. We demonstrate by exhaustive computational
	analysis that a dynamical property--stability or robustness to small
	perturbations--is highly correlated with the relative abundance of
	small subnetworks (network motifs) in several previously determined
	biological networks. We propose that robust dynamical stability is
	an influential property that can determine the non-random structure
	of biological networks.},
  doi = {10.1371/journal.pbio.0030343},
  institution = {Department of Biomedical Engineering, Johns Hopkins University, Baltimore,
	Maryland, USA.},
  keywords = {Animals; Caenorhabditis elegans, physiology; Computational Biology,
	methods; Computer Simulation; Drosophila melanogaster, physiology;
	Escherichia coli, physiology; Models, Biological; Nerve Net; Saccharomyces
	cerevisiae, physiology; Signal Transduction; Statistics as Topic;
	Systems Theory; Transcription, Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {05-PLBI-RA-0233R2},
  pmid = {16187794},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1371/journal.pbio.0030343}
}

@article{Puig2001tandem,
  author = {O. Puig and F. Caspary and G. Rigaut and B. Rutz and E. Bouveret
	and E. Bragado-Nilsson and M. Wilm and B. Séraphin},
  title = {The tandem affinity purification (TAP) method: a general procedure
	of protein complex purification.},
  journal = {Methods},
  year = {2001},
  volume = {24},
  pages = {218--229},
  number = {3},
  month = {Jul},
  abstract = {Identification of components present in biological complexes requires
	their purification to near homogeneity. Methods of purification vary
	from protein to protein, making it impossible to design a general
	purification strategy valid for all cases. We have developed the
	tandem affinity purification (TAP) method as a tool that allows rapid
	purification under native conditions of complexes, even when expressed
	at their natural level. Prior knowledge of complex composition or
	function is not required. The TAP method requires fusion of the TAP
	tag, either N- or C-terminally, to the target protein of interest.
	Starting from a relatively small number of cells, active macromolecular
	complexes can be isolated and used for multiple applications. Variations
	of the method to specifically purify complexes containing two given
	components or to subtract undesired complexes can easily be implemented.
	The TAP method was initially developed in yeast but can be successfully
	adapted to various organisms. Its simplicity, high yield, and wide
	applicability make the TAP method a very useful procedure for protein
	purification and proteome exploration.},
  doi = {10.1006/meth.2001.1183},
  institution = {European Molecular Biology Laboratory Meyerhofstrasse 1, Heidelberg,
	D-69117, Germany.},
  keywords = {Bacterial Proteins; Blotting, Western; DNA, Bacterial; Fungal Proteins;
	Genetic Vectors; Methods; Mutation; Polymerase Chain Reaction; Proteins;
	Proteome; Ribonucleases; Ribonucleoproteins; Saccharomyces cerevisiae;
	Saccharomyces cerevisiae Proteins; Staphylococcus aureus},
  owner = {phupe},
  pii = {S1046-2023(01)91183-1},
  pmid = {11403571},
  timestamp = {2010.08.31},
  url = {http://dx.doi.org/10.1006/meth.2001.1183}
}

@article{Pushkarev2009Single,
  author = {Dmitry Pushkarev and Norma F Neff and Stephen R Quake},
  title = {Single-molecule sequencing of an individual human genome.},
  journal = {Nat Biotechnol},
  year = {2009},
  volume = {27},
  pages = {847--852},
  number = {9},
  month = {Sep},
  abstract = {Recent advances in high-throughput DNA sequencing technologies have
	enabled order-of-magnitude improvements in both cost and throughput.
	Here we report the use of single-molecule methods to sequence an
	individual human genome. We aligned billions of 24- to 70-bp reads
	(32 bp average) to approximately 90\% of the National Center for
	Biotechnology Information (NCBI) reference genome, with 28x average
	coverage. Our results were obtained on one sequencing instrument
	by a single operator with four data collection runs. Single-molecule
	sequencing enabled analysis of human genomic information without
	the need for cloning, amplification or ligation. We determined approximately
	2.8 million single nucleotide polymorphisms (SNPs) with a false-positive
	rate of less than 1\% as validated by Sanger sequencing and 99.8\%
	concordance with SNP genotyping arrays. We identified 752 regions
	of copy number variation by analyzing coverage depth alone and validated
	27 of these using digital PCR. This milestone should allow widespread
	application of genome sequencing to many aspects of genetics and
	human health, including personal genomics.},
  doi = {10.1038/nbt.1561},
  institution = {Department of Bioengineering, Stanford University and Howard Hughes
	Medical Institute, Stanford, California, USA.},
  keywords = {Computer Simulation; Genome, Human; Genomics, methods; Humans; Polymorphism,
	Single Nucleotide; Reproducibility of Results; Sequence Analysis,
	DNA, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nbt.1561},
  pmid = {19668243},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1038/nbt.1561}
}

@article{Perez-Cruz2005Convergence,
  author = {Fernando PÃ©rez-Cruz and Carlos BousoÃ±o-CalzÃ³n and Antonio ArtÃ©s-RodrÃguez},
  title = {Convergence of the {IRWLS} {P}rocedure to the {S}upport {V}ector
	{M}achine {S}olution.},
  journal = {Neural {C}omput},
  year = {2005},
  volume = {17},
  pages = {7-18},
  number = {1},
  month = {Jan},
  abstract = {An iterative reweighted least squares ({IRWLS}) procedure recently
	proposed is shown to converge to the support vector machine solution.
	{T}he convergence to a stationary point is ensured by modifying the
	original {IRWLS} procedure.},
  keywords = {80 and over, Aged, Algorithms, Amino Acids, Animals, Area Under Curve,
	Automated, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted,
	Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging,
	Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Genetic,
	Glaucoma, Humans, Lasers, Least-Squares Analysis, Magnetic Resonance
	Imaging, Magnetic Resonance Spectroscopy, Middle Aged, Models, Molecular,
	Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological,
	Optic Nerve Diseases, P.H.S., Pattern Recognition, Photic Stimulation,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	U.S. Gov't, beta-Lactamases, 15779160}
}

@article{Qian2003Prediction,
  author = {Qian, J. and Lin, J. and Luscombe, N. M. and Yu, H. and Gerstein,
	M.},
  title = {Prediction of regulatory networks: genome-wide identification of
	transcription factor targets from gene expression data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1917-1926},
  number = {15},
  abstract = {Motivation: {D}efining regulatory networks, linking transcription
	factors ({TF}s) to their targets, is a central problem in post-genomic
	biology. {O}ne might imagine one could readily determine these networks
	through inspection of gene expression data. {H}owever, the relationship
	between the expression timecourse of a transcription factor and its
	target is not obvious (e.g. simple correlation over the timecourse),
	and current analysis methods, such as hierarchical clustering, have
	not been very successful in deciphering them. {R}esults: {H}ere we
	introduce an approach based on support vector machines ({SVM}s) to
	predict the targets of a transcription factor by identifying subtle
	relationships between their expression profiles. {I}n particular,
	we used {SVM}s to predict the regulatory targets for 36 transcription
	factors in the {S}accharomyces cerevisiae genome based on the microarray
	expression data from many different physiological conditions. {W}e
	trained and tested our {SVM} on a data set constructed to include
	a significant number of both positive and negative examples, directly
	addressing data imbalance issues. {T}his was non-trivial given that
	most of the known experimental information is only for positives.
	{O}verall, we found that 63% of our {TF}-target relationships were
	confirmed through cross-validation. {W}e further assessed the performance
	of our regulatory network identifications by comparing them with
	the results from two recent genome-wide {C}h{IP}-chip experiments.
	{O}verall, we find the agreement between our results and these experiments
	is comparable to the agreement (albeit low) between the two experiments.
	{W}e find that this network has a delocalized structure with respect
	to chromosomal positioning, with a given transcription factor having
	targets spread fairly uniformly across the genome. {A}vailability:
	{T}he overall network of the relationships is available on the web
	at http://bioinfo.mbb.yale.edu/expression/echipchip},
  pdf = {../local/Qian2003Prediction.pdf},
  file = {Qian2003Prediction.pdf:local/Qian2003Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/15/1917}
}

@article{Qian2001Protein,
  author = {Qian, J. and Luscombe, N. M. and Gerstein, M.},
  title = {Protein {F}old and {F}amily {O}ccurrence in {G}enomes: {P}ower-{L}aw
	{B}ehaviour and {E}volutionary {M}odel},
  journal = {J. {M}ol. {B}iol.},
  year = {2001},
  volume = {313},
  pages = {673--681},
  pdf = {../local/qian01.pdf},
  file = {qian01.pdf:local/qian01.pdf:PDF},
  subject = {bionet},
  url = {http://partslist.org/powerlaw}
}

@article{Qin2004[Automated,
  author = {Dong-mei Qin and Zhan-yi Hu and Yong-heng Zhao},
  title = {Automated classification of celestial spectra based on support vector
	machines},
  journal = {Guang {P}u {X}ue {Y}u {G}uang {P}u {F}en {X}i},
  year = {2004},
  volume = {24},
  pages = {507-11},
  number = {4},
  month = {Apr},
  abstract = {The main objective of an automatic recognition system of celestial
	objects via their spectra is to classify celestial spectra and estimate
	physical parameters automatically. {T}his paper proposes a new automatic
	classification method based on support vector machines to separate
	non-active objects from active objects via their spectra. {W}ith
	low {SNR} and unknown red-shift value, it is difficult to extract
	true spectral lines, and as a result, active objects can not be determined
	by finding strong spectral lines and the spectral classification
	between non-active and active objects becomes difficult. {T}he proposed
	method in this paper combines the principal component analysis with
	support vector machines, and can automatically recognize the spectra
	of active objects with unknown red-shift values from non-active objects.
	{I}t finds its applicability in the automatic processing of voluminous
	observed data from large sky surveys in astronomy.},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15766170}
}

@article{Qin2003Kernel,
  author = {Qin, J. and Lewis, D. P. and Noble, W. S.},
  title = {Kernel hierarchical gene clustering from microarray expression data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {2097-2104},
  number = {16},
  abstract = {Motivation: {U}nsupervised analysis of microarray gene expression
	data attempts to find biologically significant patterns within a
	given collection of expression measurements. {F}or example, hierarchical
	clustering can be applied to expression profiles of genes across
	multiple experiments, identifying groups of genes that share similiar
	expression profiles. {P}revious work using the support vector machine
	supervised learning algorithm with microarray data suggests that
	higher-order features, such as pairwise and tertiary correlations
	across multiple experiments, may provide significant benefit in learning
	to recognize classes of co-expressed genes. {R}esults: {W}e describe
	a generalization of the hierarchical clustering algorithm that efficiently
	incorporates these higher-order features by using a kernel function
	to map the data into a high-dimensional feature space. {W}e then
	evaluate the utility of the kernel hierarchical clustering algorithm
	using both internal and external validation. {T}he experiments demonstrate
	that the kernel representation itself is insufficient to provide
	improved clustering performance. {W}e conclude that mapping gene
	expression data into a high-dimensional feature space is only a good
	idea when combined with a learning algorithm, such as the support
	vector machine that does not suffer from the curse of dimensionality.
	{A}vailability: {S}upplementary data at www.cs.columbia.edu/compbio/hiclust.
	{S}oftware source code available by request.},
  pdf = {../local/Qin2003Kernel.pdf},
  file = {Qin2003Kernel.pdf:local/Qin2003Kernel.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/16/2097}
}

@article{Qin2005Application,
  author = {Zhong Qin and Qiang Yu and Jun Li and Zhi-Yi Wu and Bing-Min Hu},
  title = {Application of least squares vector machines in modelling water vapor
	and carbon dioxide fluxes over a cropland.},
  journal = {J {Z}hejiang {U}niv {S}ci},
  year = {2005},
  volume = {6},
  pages = {491-5},
  number = {6},
  month = {Jun},
  abstract = {Least squares support vector machines ({LS}-{SVM}s), a nonlinear kemel
	based machine was introduced to investigate the prospects of application
	of this approach in modelling water vapor and carbon dioxide fluxes
	above a summer maize field using the dataset obtained in the {N}orth
	{C}hina {P}lain with eddy covariance technique. {T}he performances
	of the {LS}-{SVM}s were compared to the corresponding models obtained
	with radial basis function ({RBF}) neural networks. {T}he results
	indicated the trained {LS}-{SVM}s with a radial basis function kernel
	had satisfactory performance in modelling surface fluxes; its excellent
	approximation and generalization property shed new light on the study
	on complex processes in ecosystem.},
  doi = {10.1631/jzus.2005.B0491},
  pdf = {../local/Qin2005Application.pdf},
  file = {Qin2005Application.pdf:local/Qin2005Application.pdf:PDF},
  url = {http://dx.doi.org/10.1631/jzus.2005.B0491}
}

@article{Qiu2007structural,
  author = {Qiu, J. and Hue, J. and Ben-Hur, A. and Vert, J.-P. and Noble, W.
	S.},
  title = {A structural alignment kernel for protein structures.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {1090--1098},
  number = {9},
  month = {May},
  abstract = {MOTIVATION: This work aims to develop computational methods to annotate
	protein structures in an automated fashion. We employ a support vector
	machine (SVM) classifier to map from a given class of structures
	to their corresponding structural (SCOP) or functional (Gene Ontology)
	annotation. In particular, we build upon recent work describing various
	kernels for protein structures, where a kernel is a similarity function
	that the classifier uses to compare pairs of structures. RESULTS:
	We describe a kernel that is derived in a straightforward fashion
	from an existing structural alignment program, MAMMOTH. We find in
	our benchmark experiments that this kernel significantly out-performs
	a variety of other kernels, including several previously described
	kernels. Furthermore, in both benchmarks, classifying structures
	using MAMMOTH alone does not work as well as using an SVM with the
	MAMMOTH kernel. AVAILABILITY: http://noble.gs.washington.edu/proj/3dkernel},
  doi = {10.1093/bioinformatics/btl642},
  keywords = {Algorithms; Amino Acid Sequence; Artificial Intelligence; Molecular
	Sequence Data; Pattern Recognition, Automated; Proteins; Sequence
	Alignment; Sequence Analysis, Protein; Sequence Homology, Amino Acid},
  owner = {laurent},
  pii = {btl642},
  pmid = {17234638},
  timestamp = {2007.07.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl642}
}

@article{Qiu2005computational,
  author = {Qiu, S. and Adema, C. M. and Lane, T.},
  title = {{A} computational study of off-target effects of {RNA} interference.},
  journal = {Nucleic Acids Res.},
  year = {2005},
  volume = {33},
  pages = {1834--1847},
  number = {6},
  abstract = {RNA interference (RNAi) is an intracellular mechanism for post-transcriptional
	gene silencing that is frequently used to study gene function. RNAi
	is initiated by short interfering RNA (siRNA) of approximately 21
	nt in length, either generated from the double-stranded RNA (dsRNA)
	by using the enzyme Dicer or introduced experimentally. Following
	association with an RNAi silencing complex, siRNA targets mRNA transcripts
	that have sequence identity for destruction. A phenotype resulting
	from this knockdown of expression may inform about the function of
	the targeted gene. However, 'off-target effects' compromise the specificity
	of RNAi if sequence identity between siRNA and random mRNA transcripts
	causes RNAi to knockdown expression of non-targeted genes. The complete
	off-target effects must be investigated systematically on each gene
	in a genome by adjusting a group of parameters, which is too expensive
	to conduct experimentally and motivates a study in silico. This computational
	study examined the potential for off-target effects of RNAi, employing
	the genome and transcriptome sequence data of Homo sapiens, Caenorhabditis
	elegans and Schizosaccharomyces pombe. The chance for RNAi off-target
	effects proved considerable, ranging from 5 to 80\% for each of the
	organisms, when using as parameter the exact identity between any
	possible siRNA sequences (arbitrary length ranging from 17 to 28
	nt) derived from a dsRNA (range 100-400 nt) representing the coding
	sequences of target genes and all other siRNAs within the genome.
	Remarkably, high-sequence specificity and low probability for off-target
	reactivity were optimally balanced for siRNA of 21 nt, the length
	observed mostly in vivo. The chance for off-target RNAi increased
	(although not always significantly) with greater length of the initial
	dsRNA sequence, inclusion into the analysis of available untranslated
	region sequences and allowing for mismatches between siRNA and target
	sequences. siRNA sequences from within 100 nt of the 5' termini of
	coding sequences had low chances for off-target reactivity. This
	may be owing to coding constraints for signal peptide-encoding regions
	of genes relative to regions that encode for mature proteins. Off-target
	distribution varied along the chromosomes of C.elegans, apparently
	owing to the use of more unique sequences in gene-dense regions.
	Finally, biological and thermodynamical descriptors of effective
	siRNA reduced the number of potential siRNAs compared with those
	identified by sequence identity alone, but off-target RNAi remained
	likely, with an off-target error rate of approximately 10\%. These
	results also suggest a direction for future in vivo studies that
	could both help in calibrating true off-target rates in living organisms
	and also in contributing evidence toward the debate of whether siRNA
	efficacy is correlated with, or independent of, the target molecule.
	In summary, off-target effects present a real but not prohibitive
	concern that should be considered for RNAi experiments.},
  doi = {10.1093/nar/gki324},
  keywords = {sirna},
  owner = {vert},
  pii = {33/6/1834},
  pmid = {15800213},
  timestamp = {2006.04.27},
  url = {http://dx.doi.org/10.1093/nar/gki324}
}

@article{Qu2004Supervised,
  author = {Yi Qu and Shizhong Xu},
  title = {Supervised cluster analysis for microarray data based on multivariate
	{G}aussian mixture.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1905-13},
  number = {12},
  month = {Aug},
  abstract = {M{OTIVATION}: {G}rouping genes having similar expression patterns
	is called gene clustering, which has been proved to be a useful tool
	for extracting underlying biological information of gene expression
	data. {M}any clustering procedures have shown success in microarray
	gene clustering; most of them belong to the family of heuristic clustering
	algorithms. {M}odel-based algorithms are alternative clustering algorithms,
	which are based on the assumption that the whole set of microarray
	data is a finite mixture of a certain type of distributions with
	different parameters. {A}pplication of the model-based algorithms
	to unsupervised clustering has been reported. {H}ere, for the first
	time, we demonstrated the use of the model-based algorithm in supervised
	clustering of microarray data. {RESULTS}: {W}e applied the proposed
	methods to real gene expression data and simulated data. {W}e showed
	that the supervised model-based algorithm is superior over the unsupervised
	method and the support vector machines ({SVM}) method. {AVAILABILITY}:
	{T}he program written in the {SAS} language implementing methods
	{I}-{III} in this report is available upon request. {T}he software
	of {SVM}s is available in the website http://svm.sdsc.edu/cgi-bin/nph-{SVM}submit.cgi},
  doi = {10.1093/bioinformatics/bth177},
  pdf = {../local/Qu2004Supervised.pdf},
  file = {Qu2004Supervised.pdf:local/Qu2004Supervised.pdf:PDF},
  pii = {bth177},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth177}
}

@article{Quackenbush2002Microarray,
  author = {John Quackenbush},
  title = {Microarray data normalization and transformation.},
  journal = {Nat Genet},
  year = {2002},
  volume = {32 Suppl},
  pages = {496--501},
  month = {Dec},
  doi = {10.1038/ng1032},
  keywords = {Animals; Data Interpretation, Statistical; Forecasting; Gene Expression
	Profiling, methods; Humans; Oligonucleotide Array Sequence Analysis,
	methods; Research Design},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {ng1032},
  pmid = {12454644},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1038/ng1032}
}

@manual{RCoreTeam2012R,
  title = {{R: A Language and Environment for Statistical Computing}},
  author = {{R Core Team}},
  organization = {{R Foundation for Statistical Computing}},
  address = {{Vienna, Austria}},
  year = {2012},
  note = {{{ISBN} 3-900051-07-0}},
  owner = {jp},
  timestamp = {2012.07.31},
  url = {http://www.R-project.org}
}

@article{Rucker1993Counts,
  author = {R\"{u}cker, G. and R\"{u}cker, C.},
  title = {Counts of {A}ll {W}alks as {A}tomic and {M}olecular {D}escriptors},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {1993},
  volume = {33},
  pages = {683-695},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@inproceedings{Radulescu2006ECCS,
  author = {Radulescu, O. and Gorban, A. and Vakulenko, S. and Zinovyev, A.},
  title = {Hierarchies and modules in complex biological systems},
  booktitle = {Proceedings of European Conference on Complex Systems, Oxford, UK},
  year = {2006},
  owner = {Andrei Zinovyev},
  timestamp = {2011.04.08}
}

@article{Radulescu2005JBSI,
  author = {Radulescu, O and Lagarrigue, S and Siegel, A and Le Borgne, M and
	Veber, P},
  title = {Topology and static response of interaction networks in molecular
	biology. },
  journal = {J.{R}.{S}oc.{I}nterface},
  year = {2005},
  volume = {Published online}
}

@article{Raghava2005Correlation,
  author = {Gajendra P S Raghava and Joon H Han},
  title = {Correlation and prediction of gene expression level from amino acid
	and dipeptide composition of its protein.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {59},
  number = {1},
  month = {Mar},
  abstract = {B{ACKGROUND}: {A} large number of papers have been published on analysis
	of microarray data with particular emphasis on normalization of data,
	detection of differentially expressed genes, clustering of genes
	and regulatory network. {O}n other hand there are only few studies
	on relation between expression level and composition of nucleotide/protein
	sequence, using expression data. {T}here is a need to understand
	why particular genes/proteins express more in particular conditions.
	{I}n this study, we analyze 3468 genes of {S}accharomyces cerevisiae
	obtained from {H}olstege et al., (1998) to understand the relationship
	between expression level and amino acid composition. {RESULTS}: {W}e
	compute the correlation between expression of a gene and amino acid
	composition of its protein. {I}t was observed that some residues
	(like {A}la, {G}ly, {A}rg and {V}al) have significant positive correlation
	(r > 0.20) and some other residues ({L}ike {A}sp, {L}eu, {A}sn and
	{S}er) have negative correlation (r < -0.15) with the expression
	of genes. {A} significant negative correlation (r = -0.18) was also
	found between length and gene expression. {T}hese observations indicate
	the relationship between percent composition and gene expression
	level. {T}hus, attempts have been made to develop a {S}upport {V}ector
	{M}achine ({SVM}) based method for predicting the expression level
	of genes from its protein sequence. {I}n this method the {SVM} is
	trained with proteins whose gene expression data is known in a given
	condition. {T}hen trained {SVM} is used to predict the gene expression
	of other proteins of the same organism in the same condition. {A}
	correlation coefficient r = 0.70 was obtained between predicted and
	experimentally determined expression of genes, which improves from
	r = 0.70 to 0.72 when dipeptide composition was used instead of residue
	composition. {T}he method was evaluated using 5-fold cross validation
	test. {W}e also demonstrate that amino acid composition information
	along with gene expression data can be used for improving the function
	classification of proteins. {CONCLUSION}: {T}here is a correlation
	between gene expression and amino acid composition that can be used
	to predict the expression level of genes up to a certain extent.
	{A} web server based on the above strategy has been developed for
	calculating the correlation between amino acid composition and gene
	expression and prediction of expression level http://kiwi.postech.ac.kr/raghava/lgepred/.
	{T}his server will allow users to study the evolution from expression
	data.},
  doi = {10.1186/1471-2105-6-59},
  keywords = {biosvm},
  pii = {1471-2105-6-59},
  url = {http://dx.doi.org/10.1186/1471-2105-6-59}
}

@article{Rahmann2004Mean,
  author = {Rahmann, S. and Gr{\"a}fe, C.},
  title = {Mean and variance of the {G}ibbs free energy of oligonucleotides
	in the nearest neighbor model under varying conditions.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {2928-33},
  number = {17},
  month = {Nov},
  abstract = {M{OTIVATION}: {I}n order to assess the stability of {DNA}-{DNA} hybridizations-for
	example during {PCR} primer design or oligonucleotide selection for
	microarrays-one needs to predict the change in {G}ibbs free energy
	{D}elta{G} during hybridization. {T}he nearest neighbor model provides
	a good compromise between accuracy and computational simplicity for
	this task. {T}o determine optimal combinations of reaction parameters
	(temperature, salt concentration, oligonucleotide length and {GC}-content),
	one would like to understand how {D}elta{G} depends on all of these
	parameters simultaneously. {RESULTS}: {W}e derive analytic results
	about the distribution of nearest neighbor {D}elta{G} values for
	a {B}ernoulli random sequence model (specified by oligonucleotide
	length and average {GC}-content) under given experimental conditions.
	{W}e find that the distribution of {D}elta{G} values is approximately
	{G}aussian and provide exact formulas for expectation and variance.},
  doi = {10.1093/bioinformatics/bth334},
  pii = {bth334},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth334}
}

@article{Rahnenfuhrer2004,
  author = {Rahnenfuhrer, J and Domingues, FS and Maydt, J and Lengauer, T.},
  title = {Calculating the statistical significance of changes in pathway activity
	from gene expression data},
  journal = {Statistical {A}pplications in {G}enetics and {M}olecular {B}iology},
  year = {2004},
  volume = {3},
  pages = {Article 16},
  number = {1}
}

@article{Rahnenfuehrer2004Calculating,
  author = {Rahnenf{\"u}hrer, J. and Domingues, F. S. and Maydt, J. and Lengauer,
	T.},
  title = {Calculating the statistical significance of changes in pathway activity
	from gene expression data.},
  journal = {Stat. Appl. Genet. Mol. Biol.},
  year = {2004},
  volume = {3},
  pages = {Article16},
  abstract = {We present a statistical approach to scoring changes in activity of
	metabolic pathways from gene expression data. The method identifies
	the biologically relevant pathways with corresponding statistical
	significance. Based on gene expression data alone, only local structures
	of genetic networks can be recovered. Instead of inferring such a
	network, we propose a hypothesis-based approach. We use given knowledge
	about biological networks to improve sensitivity and interpretability
	of findings from microarray experiments. Recently introduced methods
	test if members of predefined gene sets are enriched in a list of
	top-ranked genes in a microarray study. We improve this approach
	by defining scores that depend on all members of the gene set and
	that also take pairwise co-regulation of these genes into account.
	We calculate the significance of co-regulation of gene sets with
	a nonparametric permutation test. On two data sets the method is
	validated and its biological relevance is discussed. It turns out
	that useful measures for co-regulation of genes in a pathway can
	be identified adaptively. We refine our method in two aspects specific
	to pathways. First, to overcome the ambiguity of enzyme-to-gene mappings
	for a fixed pathway, we introduce algorithms for selecting the best
	fitting gene for a specific enzyme in a specific condition. In selected
	cases, functional assignment of genes to pathways is feasible. Second,
	the sensitivity of detecting relevant pathways is improved by integrating
	information about pathway topology. The distance of two enzymes is
	measured by the number of reactions needed to connect them, and enzyme
	pairs with a smaller distance receive a higher weight in the score
	calculation.},
  doi = {10.2202/1544-6115.1055},
  institution = {Max-Planck-Institute for Informatics, Saarbrücken, Germany. rahnenfj@mpi-sb.mpg.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16646794},
  timestamp = {2011.09.27},
  url = {http://dx.doi.org/10.2202/1544-6115.1055}
}

@article{Rain2001protein-protein,
  author = {Rain, J.-C. and Selig, L. and De Reuse, H. and Battaglia, V. and
	Reverdy, C. and Simon, S. and Lenzen, G. and Petel, F. and Wojcik,
	J. and Sch{\"a}chter, V. and Chemama, Y. and Labigne, A. and Legrain,
	P.},
  title = {The protein-protein interaction map of {H}elicobacter pylori},
  journal = {Nature},
  year = {2001},
  volume = {409},
  pages = {211--215},
  pdf = {../local/rain01.pdf},
  file = {rain01.pdf:local/rain01.pdf:PDF},
  subject = {bionetprot},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v409/n6817/full/409211a0_fs.html&content_filetype=pdf}
}

@article{Rajagopalan2005Inferring,
  author = {Rajagopalan, D. and Agarwal, P.},
  title = {Inferring pathways from gene lists using a literature-derived network
	of biological relationships.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {788--793},
  number = {6},
  month = {Mar},
  abstract = {MOTIVATION: A number of omic technologies such as transcriptional
	profiling, proteomics, literature searches, genetic association,
	etc. help in the identification of sets of important genes. A subset
	of these genes may act in a coordinated manner, possibly because
	they are part of the same biological pathway. Interpreting such gene
	lists and relating them to pathways is a challenging task. Databases
	of biological relationships between thousands of mammalian genes
	can help in deciphering omics data. The relationships between genes
	can be assembled into a biological network with each protein as a
	node and each relationship as an edge between two proteins (or nodes).
	This network may then be searched for subnetworks consisting largely
	of interesting genes from the omics experiment. The subset of genes
	in the subnetwork along with the web of relationships between them
	helps to decipher the underlying pathways. Finding such subnetworks
	that maximally include all proteins from the query set but few others
	is the focus for this paper. RESULTS: We present a heuristic algorithm
	and a scoring function that work well both on simulated data and
	on data from known pathways. The scoring function is an extension
	of a previous study for a single biological experiment. We use a
	simple set of heuristics that provide a more efficient solution than
	the simulated annealing method. We find that our method works on
	reasonably complex curated networks containing approximately 9000
	biological entities (genes and metabolites), and approximately 30,000
	biological relationships. We also show that our method can pick up
	a pathway signal from a query list including a moderate number of
	genes unrelated to the pathway. In addition, we quantify the sensitivity
	and specificity of the technique.},
  doi = {10.1093/bioinformatics/bti069},
  pdf = {../local/Rajagopalan2005Inferring.pdf},
  file = {Rajagopalan2005Inferring.pdf:Rajagopalan2005Inferring.pdf:PDF},
  institution = {D, 709 Swedeland Road, UW2230, King of Prussia, PA 19406-0939, USA.
	dilip_rajagopalan@gsk.com},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {bti069},
  pmid = {15509611},
  timestamp = {2011.10.08},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti069}
}

@inproceedings{Rajwan2000Universal,
  author = {Rajwan, D. and Feder, M.},
  title = {Universal finite memory machines for coding binary sequences},
  booktitle = {Proceedings of the {D}ata {C}ompression {C}onference ({DCC} 2000)},
  year = {2000},
  pages = {113-122},
  abstract = {In this work we consider the problem of universal sequential probability
	assignment, under self-information loss, where the machine for performing
	the universal probability assignment is constrained to have a finite
	memory. {S}equential probability assignment is equivalent to lossless
	source coding if we ignore the number of states required to convert
	the probability estimate into code bits. {W}e consider both the probabilistic
	setting where the sequence is generated by a probabilistic source
	(either {B}ernoulli {IID} source or q-th order {M}arkov source),
	and the deterministic setting where the sequence is a deterministic
	individual sequence. {W}e also consider the case where the universal
	machine is deterministic, randomized, time-invariant or time-variant.
	{W}e provide in most cases lower bounds and describe finite memory
	universal machines whose performance, in terms of the memory size,
	is compared to these bounds },
  pdf = {../local/Rajwan2000Universal.pdf},
  file = {Rajwan2000Universal.pdf:local/Rajwan2000Universal.pdf:PDF},
  owner = {vert}
}

@article{Rakotomamonjy2008SimpleMKL,
  author = {Rakotomamonjy, A. and Bach, F. and Canu, S. and Grandvalet, Y.},
  title = {{SimpleMKL}},
  journal = {J. Mach. Learn. Res.},
  year = {2008},
  volume = {9},
  pages = {2491--2521},
  owner = {jp},
  timestamp = {2008.12.08}
}

@inproceedings{Rakotomamonjy2007More,
  author = {Rakotomamonjy, Alain and Bach, Francis and Canu, St\'{e}phane and
	Grandvalet, Yves},
  title = {More efficiency in multiple kernel learning},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {775--782},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1273496.1273594},
  isbn = {978-1-59593-793-3},
  location = {Corvalis, Oregon}
}

@article{Ralaivola2005Graph,
  author = {Ralaivola, L. and Swamidass, S. J. and Saigo, H. and Baldi, P.},
  title = {Graph kernels for chemical informatics.},
  journal = {Neural {N}etw.},
  year = {2005},
  volume = {18},
  pages = {1093--1110},
  number = {8},
  month = {Sep},
  abstract = {Increased availability of large repositories of chemical compounds
	is creating new challenges and opportunities for the application
	of machine learning methods to problems in computational chemistry
	and chemical informatics. {B}ecause chemical compounds are often
	represented by the graph of their covalent bonds, machine learning
	methods in this domain must be capable of processing graphical structures
	with variable size. {H}ere, we first briefly review the literature
	on graph kernels and then introduce three new kernels ({T}animoto,
	{M}in{M}ax, {H}ybrid) based on the idea of molecular fingerprints
	and counting labeled paths of depth up to d using depth-first search
	from each possible vertex. {T}he kernels are applied to three classification
	problems to predict mutagenicity, toxicity, and anti-cancer activity
	on three publicly available data sets. {T}he kernels achieve performances
	at least comparable, and most often superior, to those previously
	reported in the literature reaching accuracies of 91.5\% on the {M}utag
	dataset, 65-67\% on the {PTC} ({P}redictive {T}oxicology {C}hallenge)
	dataset, and 72\% on the {NCI} ({N}ational {C}ancer {I}nstitute)
	dataset. {P}roperties and tradeoffs of these kernels, as well as
	other proposed kernels that leverage 1{D} or 3{D} representations
	of molecules, are briefly discussed.},
  doi = {10.1016/j.neunet.2005.07.009},
  pdf = {../local/Ralaivola2005Graph.pdf},
  file = {Ralaivola2005Graph.pdf:local/Ralaivola2005Graph.pdf:PDF},
  keywords = {chemoinformatics},
  pii = {S0893-6080(05)00169-3},
  url = {http://dx.doi.org/10.1016/j.neunet.2005.07.009}
}

@article{Raliou2010Human,
  author = {Mariam Raliou and Marta Grauso and Brice Hoffmann and Claude Nespoulous
	and hélène Debat and Bano Singh and Didier Trotier and Jean-Claude
	Pernollet and Jean-Pierre Montmayeur and Annick Faurion and Loïc
	Briand},
  title = {Human genetic polymorphisms in T1R1 an d T1R3 affect their function},
  journal = {Chemical Senses},
  year = {In Press},
  owner = {bricehoffmann},
  timestamp = {2010.06.09}
}

@article{Ramaswamy2003molecular,
  author = {Ramaswamy, S. and Ross, K. N. and Lander, E. S. and Golub, T. R.},
  title = {A molecular signature of metastasis in primary solid tumors.},
  journal = {Nat. Genet.},
  year = {2003},
  volume = {33},
  pages = {49--54},
  number = {1},
  month = {Jan},
  abstract = {Metastasis is the principal event leading to death in individuals
	with cancer, yet its molecular basis is poorly understood. To explore
	the molecular differences between human primary tumors and metastases,
	we compared the gene-expression profiles of adenocarcinoma metastases
	of multiple tumor types to unmatched primary adenocarcinomas. We
	found a gene-expression signature that distinguished primary from
	metastatic adenocarcinomas. More notably, we found that a subset
	of primary tumors resembled metastatic tumors with respect to this
	gene-expression signature. We confirmed this finding by applying
	the expression signature to data on 279 primary solid tumors of diverse
	types. We found that solid tumors carrying the gene-expression signature
	were most likely to be associated with metastasis and poor clinical
	outcome (P < 0.03). These results suggest that the metastatic potential
	of human tumors is encoded in the bulk of a primary tumor, thus challenging
	the notion that metastases arise from rare cells within a primary
	tumor that have the ability to metastasize.},
  doi = {10.1038/ng1060},
  pdf = {../local/Ramaswamy2003molecular.pdf},
  file = {Ramaswamy2003molecular.pdf:Ramaswamy2003molecular.pdf:PDF},
  institution = {Whitehead Institute/MIT Center for Genome Research, One Kendall Square,
	Building 300, Cambridge, Massachusetts 02139, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng1060},
  pmid = {12469122},
  timestamp = {2011.09.20},
  url = {http://dx.doi.org/10.1038/ng1060}
}

@article{Ramaswamy2001Multiclass,
  author = {Ramaswamy, S. and Tamayo, P. and Rifkin, R. and Mukherjee, S. and
	Yeang, C.H. and Angelo, M. and Ladd, C. and Reich, M. and Latulippe,
	E. and Mesirov, J.P. and Poggio, T. and Gerald, W. and Loda, M. and
	Lander, E.S. and Golub, T.R.},
  title = {Multiclass cancer diagnosis using tumor gene expression signatures},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2001},
  volume = {98},
  pages = {15149-15154},
  number = {26},
  month = {Dec},
  abstract = {The optimal treatment of patients with cancer depends on establishing
	accurate diagnoses by using a complex combination of clinical and
	histopathological data. {I}n some instances, this task is difficult
	or impossible because of atypical clinical presentation or histopathology.
	{T}o determine whether the diagnosis of multiple common adult malignancies
	could be achieved purely by molecular classification, we subjected
	218 tumor samples, spanning 14 common tumor types, and 90 normal
	tissue samples to oligonucleotide microarray gene expression analysis.
	{T}he expression levels of 16,063 genes and expressed sequence tags
	were used to evaluate the accuracy of a multiclass classifier based
	on a support vector machine algorithm. {O}verall classification accuracy
	was 78%, far exceeding the accuracy of random classification (9%).
	{P}oorly differentiated cancers resulted in low-confidence predictions
	and could not be accurately classified according to their tissue
	of origin, indicating that they are molecularly distinct entities
	with dramatically different gene expression patterns compared with
	their well differentiated counterparts. {T}aken together, these results
	demonstrate the feasibility of accurate, multiclass molecular cancer
	classification and suggest a strategy for future clinical implementation
	of molecular cancer diagnostics.},
  doi = {10.1073/pnas.211566398},
  pdf = {../local/Ramaswamy2001Multiclass.pdf},
  file = {Ramaswamy2001Multiclass.pdf:local/Ramaswamy2001Multiclass.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {vert},
  url = {http://dx.doi.org/10.1073/pnas.211566398}
}

@article{Rammensee1999SYFPEITHI,
  author = {H. Rammensee and J. Bachmann and N. P. Emmerich and O. A. Bachor
	and S. Stevanovi\'c},
  title = {SYFPEITHI: database for {MHC} ligands and peptide motifs.},
  journal = {Immunogenetics},
  year = {1999},
  volume = {50},
  pages = {213--219},
  number = {3-4},
  month = {Nov},
  abstract = {The first version of the major histocompatibility complex (MHC) databank
	SYFPEITHI: database for MHC ligands and peptide motifs, is now available
	to the general public. It contains a collection of MHC class I and
	class II ligands and peptide motifs of humans and other species,
	such as apes, cattle, chicken, and mouse, for example, and is continuously
	updated. All motifs currently available are accessible as individual
	entries. Searches for MHC alleles, MHC motifs, natural ligands, T-cell
	epitopes, source proteins/organisms and references are possible.
	Hyperlinks to the EMBL and PubMed databases are included. In addition,
	ligand predictions are available for a number of MHC allelic products.
	The database content is restricted to published data only.},
  keywords = {Amino Acid Motifs; Amino Acid Sequence; Animals; Databases, Factual;
	Humans; Internet; Ligan; Major Histocompatibility Complex; Molecular
	Sequence Data; Research Support, Non-U.S. Gov't; Sequence Homology,
	Amino Acid; ds},
  owner = {jacob},
  pii = {90500213.251},
  pmid = {10602881},
  timestamp = {2006.08.30}
}

@article{Rammensee1995MHC,
  author = {Rammensee, H. G. and Friede, T. and Stevanovi{\'i}c, S.},
  title = {{MHC} ligands and peptide motifs: first listing},
  journal = {Immunogenetics},
  year = {1995},
  volume = {41},
  pages = {178--228},
  number = {4},
  keywords = {immunoinformatics},
  pmid = {7890324},
  timestamp = {2007.01.25}
}

@inproceedings{Ramon2003Expressivity,
  author = {Ramon, J. and G\"{a}rtner, T.},
  title = {{E}xpressivity versus efficiency of graph kernels},
  booktitle = {{P}roceedings of the {F}irst {I}nternational {W}orkshop on {M}ining
	{G}raphs, {T}rees and {S}equences},
  year = {2003},
  editor = {Washio, T. and De Raedt, L.},
  pages = {65-74},
  keywords = {kernel-theory chemoinformatics},
  owner = {mahe},
  timestamp = {2006.07.31}
}

@inproceedings{Rangarajan96lagrangian,
  author = {A. Rangarajan and E. Mjolsness},
  title = {A Lagrangian Relaxation Network for Graph Matching},
  booktitle = {IEEE Trans. Neural Networks},
  year = {1996},
  pages = {4629--4634},
  publisher = {IEEE Press}
}

@article{Rangwala2005Profile-based,
  author = {Rangwala, H. and Karypis, G.},
  title = {Profile-based direct kernels for remote homology detection and fold
	recognition.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {4239--4247},
  number = {23},
  month = {Dec},
  abstract = {MOTIVATION: Protein remote homology detection is a central problem
	in computational biology. Supervised learning algorithms based on
	support vector machines are currently one of the most effective methods
	for remote homology detection. The performance of these methods depends
	on how the protein sequences are modeled and on the method used to
	compute the kernel function between them. RESULTS: We introduce two
	classes of kernel functions that are constructed by combining sequence
	profiles with new and existing approaches for determining the similarity
	between pairs of protein sequences. These kernels are constructed
	directly from these explicit protein similarity measures and employ
	effective profile-to-profile scoring schemes for measuring the similarity
	between pairs of proteins. Experiments with remote homology detection
	and fold recognition problems show that these kernels are capable
	of producing results that are substantially better than those produced
	by all of the existing state-of-the-art SVM-based methods. In addition,
	the experiments show that these kernels, even when used in the absence
	of profiles, produce results that are better than those produced
	by existing non-profile-based schemes. AVAILABILITY: The programs
	for computing the various kernel functions are available on request
	from the authors.},
  doi = {10.1093/bioinformatics/bti687},
  keywords = {biosvm},
  owner = {vert},
  pii = {bti687},
  pmid = {16188929},
  timestamp = {2007.08.01},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti687}
}

@phdthesis{Rapaport2008Introduction,
  author = {Rapaport, F.},
  title = {Introduction de la connaissance a priori dans l’\'etude des puces
	\`a {ADN}},
  school = {Universit\'e Pierre et Marie Curie - Paris 6},
  year = {2008},
  owner = {jp},
  timestamp = {2010.10.12}
}

@article{Rapaport2008Classification,
  author = {Rapaport, F. and Barillot, E. and Vert, J.-P.},
  title = {Classification of array{CGH} data using fused {SVM}},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {i375--i382},
  number = {13},
  month = {Jul},
  abstract = {MOTIVATION: Array-based comparative genomic hybridization (arrayCGH)
	has recently become a popular tool to identify DNA copy number variations
	along the genome. These profiles are starting to be used as markers
	to improve prognosis or diagnosis of cancer, which implies that methods
	for automated supervised classification of arrayCGH data are needed.
	Like gene expression profiles, arrayCGH profiles are characterized
	by a large number of variables usually measured on a limited number
	of samples. However, arrayCGH profiles have a particular structure
	of correlations between variables, due to the spatial organization
	of bacterial artificial chromosomes along the genome. This suggests
	that classical classification methods, often based on the selection
	of a small number of discriminative features, may not be the most
	accurate methods and may not produce easily interpretable prediction
	rules. RESULTS: We propose a new method for supervised classification
	of arrayCGH data. The method is a variant of support vector machine
	that incorporates the biological specificities of DNA copy number
	variations along the genome as prior knowledge. The resulting classifier
	is a sparse linear classifier based on a limited number of regions
	automatically selected on the chromosomes, leading to easy interpretation
	and identification of discriminative regions of the genome. We test
	this method on three classification problems for bladder and uveal
	cancer, involving both diagnosis and prognosis. We demonstrate that
	the introduction of the new prior on the classifier leads not only
	to more accurate predictions, but also to the identification of known
	and new regions of interest in the genome. AVAILABILITY: All data
	and algorithms are publicly available.},
  doi = {10.1093/bioinformatics/btn188},
  pdf = {../local/Rapaport2008Classification.pdf},
  file = {Rapaport2008Classification.pdf:Rapaport2008Classification.pdf:PDF},
  institution = {Institut Curie, Centre de Recherche, INSERM, U900, Paris, F-75248
	France. franck.rapaport@curie.fr},
  keywords = {cgh},
  owner = {laurent},
  pii = {btn188},
  pmid = {18586737},
  timestamp = {2008.10.26},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn188}
}

@article{Rapaport2007Classification,
  author = {Rapaport, F. and Zynoviev, A. and Dutreix, M. and Barillot, E. and
	Vert, J.-P.},
  title = {Classification of microarray data using gene networks},
  journal = {BMC Bioinformatics},
  year = {2007},
  volume = {8},
  pages = {35},
  timestamp = {2007.04.12}
}

@article{Rarey1996fast,
  author = {M. Rarey and B. Kramer and T. Lengauer and G. Klebe},
  title = {{A} fast flexible docking method using an incremental construction
	algorithm.},
  journal = {J. Mol. Biol.},
  year = {1996},
  volume = {261},
  pages = {470--489},
  number = {3},
  month = {Aug},
  abstract = {We present an automatic method for docking organic ligands into protein
	binding sites. The method can be used in the design process of specific
	protein ligands. It combines an appropriate model of the physico-chemical
	properties of the docked molecules with efficient methods for sampling
	the conformational space of the ligand. If the ligand is flexible,
	it can adopt a large variety of different conformations. Each such
	minimum in conformational space presents a potential candidate for
	the conformation of the ligand in the complexed state. Our docking
	method samples the conformation space of the ligand on the basis
	of a discrete model and uses a tree-search technique for placing
	the ligand incrementally into the active site. For placing the first
	fragment of the ligand into the protein, we use hashing techniques
	adapted from computer vision. The incremental construction algorithm
	is based on a greedy strategy combined with efficient methods for
	overlap detection and for the search of new interactions. We present
	results on 19 complexes of which the binding geometry has been crystallographically
	determined. All considered ligands are docked in at most three minutes
	on a current workstation. The experimentally observed binding mode
	of the ligand is reproduced with 0.5 to 1.2 A rms deviation. It is
	almost always found among the highest-ranking conformations computed.},
  doi = {10.1006/jmbi.1996.0477},
  keywords = {Aldehyde Reductase, Algorithms, Amiloride, Aminoimidazole Carboxamide,
	Animals, Arabinose, Automation, Binding Sites, Carbonic Anhydrases,
	Computational Biology, Computer Simulation, Concanavalin A, Crystallography,
	Databases, Drug Design, Drug Evaluation, Enzyme Inhibitors, Factual,
	Folic Acid, Folic Acid Antagonists, Fructose-Bisphosphatase, Humans,
	Internet, Ligands, Methotrexate, Models, Molecular, Non-U.S. Gov't,
	Pancreatic Elastase, Pentamidine, Pliability, Point Mutation, Preclinical,
	Protein Binding, Protein Conformation, Proteins, Reproducibility
	of Results, Research Support, Ribonucleosides, Software, Tetrahydrofolate
	Dehydrogenase, Thermolysin, Time Factors, Trypsin, X-Ray, 8780787},
  owner = {mahe},
  pii = {S0022-2836(96)90477-5},
  pmid = {8780787},
  timestamp = {2006.09.05},
  url = {http://dx.doi.org/10.1006/jmbi.1996.0477}
}

@article{Rarey1996Placement,
  author = {M. Rarey and S. Wefing and T. Lengauer},
  title = {Placement of medium-sized molecular fragments into active sites of
	proteins.},
  journal = {J Comput Aided Mol Des},
  year = {1996},
  volume = {10},
  pages = {41--54},
  number = {1},
  month = {Feb},
  abstract = {We present an algorithm for placing molecular fragments into the active
	site of a receptor. A molecular fragment is defined as a connected
	part of a molecule containing only complete ring systems. The algorithm
	is part of a docking tool, called FLEXX, which is currently under
	development at GMD. The overall goal is to provide means of automatically
	computing low-energy conformations of the ligand within the active
	site, with an accuracy approaching the limitations of experimental
	methods for resolving molecular structures and within a run time
	that allows for docking large sets of ligands. The methods by which
	we plan to achieve this goal are the explicit exploitation of molecular
	flexibility of the ligand and the incorporation of physicochemical
	properties of the molecules. The algorithm for fragment placement,
	which is the topic of this paper, is based on pattern recognition
	techniques and is able to predict a small set of possible positions
	of a molecular fragment with low flexibility within seconds on a
	workstation. In most cases, a placement with rms deviation below
	1.0 A with respect to the X-ray structure is found among the 10 highest
	ranking solutions, assuming that the receptor is given in the bound
	conformation.},
  institution = {German National Research Center for Information Technology (GMD),
	Institute for Algorithms and Scientific Computing (SCAI), Sankt Augustin,
	Germany.},
  keywords = {Algorithms; Binding Sites; Databases, Factual; Ligands; Models, Chemical;
	Peptide Fragments, chemistry; Proteins, chemistry; Software},
  owner = {bricehoffmann},
  pmid = {8786414},
  timestamp = {2009.02.13}
}

@book{Rasmussen2005Gaussian,
  title = {Gaussian Processes for Machine Learning (Adaptive Computation and
	Machine Learning)},
  publisher = {The MIT Press},
  year = {2005},
  author = {Rasmussen, Carl E. and Williams, Christopher K. I.},
  month = {December},
  abstract = {Gaussian processes (GPs) provide a principled, practical, probabilistic
	approach to learning in kernel machines. GPs have received increased
	attention in the machine-learning community over the past decade,
	and this book provides a long-needed systematic and unified treatment
	of theoretical and practical aspects of GPs in machine learning.
	The treatment is comprehensive and self-contained, targeted at researchers
	and students in machine learning and applied statistics.
 
 The book deals with the supervised-learning problem for both regression
	and classification, and includes detailed algorithms. A wide variety
	of covariance (kernel) functions are presented and their properties
	discussed. Model selection is discussed both from a Bayesian and
	a classical perspective. Many connections to other well-known techniques
	from machine learning and statistics are discussed, including support-vector
	machines, neural networks, splines, regularization networks, relevance
	vector machines and others. Theoretical issues including learning
	curves and the PAC-Bayesian framework are treated, and several approximation
	methods for learning with large datasets are discussed. The book
	contains illustrative examples and exercises, and code and datasets
	are available on the Web. Appendixes provide mathematical background
	and a discussion of Gaussian Markov processes.},
  howpublished = {Hardcover},
  isbn = {026218253X},
  keywords = {machine\_learning}
}

@article{Raudys2000How,
  author = {S. Raudys},
  title = {How good are support vector machines?},
  journal = {Neural {N}etw},
  year = {2000},
  volume = {13},
  pages = {17-9},
  number = {1},
  month = {Jan},
  abstract = {Support vector ({SV}) machines are useful tools to classify populations
	characterized by abrupt decreases in density functions. {A}t least
	for one class of {G}aussian data model the {SV} classifier is not
	an optimal one according to a mean generalization error criterion.
	{I}n real world problems, we have neither {G}aussian populations
	nor data with sharp linear boundaries. {T}hus, the {SV} (maximal
	margin) classifiers can lose against other methods where more than
	a fixed number of supporting vectors contribute in determining the
	final weights of the classification and prediction rules. {A} good
	alternative to the linear {SV} machine is a specially trained and
	optimally stopped {SLP} in a transformed feature space obtained after
	decorrelating and scaling the multivariate data.},
  keywords = {Automated, Learning, Models, Neural Networks (Computer), Neurological,
	Pattern Recognition, 10935455},
  pii = {S0893608099000970}
}

@article{Ravdin1996computer,
  author = {Ravdin, P. M.},
  title = {A computer program to assist in making breast cancer adjuvant therapy
	decisions.},
  journal = {Semin Oncol},
  year = {1996},
  volume = {23},
  pages = {43--50},
  number = {1 Suppl 2},
  month = {Feb},
  abstract = {This report describes a computer program designed to assist health
	care professionals in making projections of the average benefit of
	systemic adjuvant therapy for individual breast cancer patients.
	It requires as input patient age (used to make projections of natural
	mortality), an estimate of breast cancer-related mortality at 5 years
	(used to make projections of breast cancer-specific mortality), and
	the proportional risk reduction for breast cancer mortality expected
	for the adjuvant therapy (with included tables from the Early Breast
	Cancer Trialist's 1992 meta-analysis). The program uses life table
	analytical techniques to make projections of outcome in three scenarios:
	that the breast cancer never occurred, that the breast cancer patient
	received definitive local therapy but no adjuvant systemic therapy,
	and that the patient received adjuvant therapy. The outcome projections
	are given for total, natural (non-breast cancer-related), and breast
	cancer-related mortality at several time points and also of total
	remaining life expectancy. These estimates are currently widely made
	by clinicians by nonnumerical techniques. Computer-based tools can
	serve as valuable aids in physician and patient education and in
	the process of informed decision making.},
  institution = {Division of Oncology, University of Texas Health Sciences Center,
	San Antonio, 78284, USA.},
  keywords = {Adult; Age Factors; Aged; Aged, 80 and over; Breast Neoplasms, drug
	therapy/mortality/surgery; Chemotherapy, Adjuvant; Decision Making;
	Female; Humans; Life Expectancy; Life Tables; Middle Aged; Proportional
	Hazards Models; Risk Factors; SEER Program; Software; Survival Rate;
	Treatment Outcome},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {8614844},
  timestamp = {2012.03.02}
}

@article{Ravdin1995computer,
  author = {Ravdin, P. M.},
  title = {A computer based program to assist in adjuvant therapy decisions
	for individual breast cancer patients.},
  journal = {Bull Cancer},
  year = {1995},
  volume = {82 Suppl 5},
  pages = {561s--564s},
  month = {Dec},
  abstract = {This paper describes a personal computer based tool to aid in decision
	making about whether a woman should receive adjuvant therapy for
	breast cancer. This tool can assist in engaging women with primary
	breast cancer in the discussion about: 1) her risk of breast cancer
	related mortality if she receives only local control measures, but
	no systemic adjuvant therapy, 2) how much receiving adjuvant therapy
	may reduce this risk, and 3) what the impact of receiving the adjuvant
	systemic therapy is in terms of survival. The tool utilizes life
	table analytical techniques to project outcomes after entry of patient
	age (used to calculate natural mortality rates), estimated risk of
	breast cancer related mortality (with a help tool allowing the physician
	to use estimates based on national database information), and estimate
	of the efficacy of adjuvant chemotherapy (with included tables of
	estimates based on the Early Breast Cancer Trialists' meta-analysis).
	Computer based tools can serve as valuable aids in patient and physician
	education, and the process of informed decision making.},
  institution = {Division of Medical Oncology, University of Texas Health Sciences
	Center, San Antonio 78284, USA.},
  keywords = {Aged; Breast Neoplasms, drug therapy/mortality; Chemotherapy, Adjuvant;
	Decision Making, Computer-Assisted; Education, Medical; Female; Humans;
	Life Tables; Middle Aged; Patient Participation; Prognosis; Software;
	Survival Rate},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {8680066},
  timestamp = {2012.03.02}
}

@article{Ravdin2001Computer,
  author = {Ravdin, P. M. and Siminoff, L. A. and Davis, G. J. and Mercer, M.
	B. and Hewlett, J. and Gerson, N. and Parker, H. L.},
  title = {Computer program to assist in making decisions about adjuvant therapy
	for women with early breast cancer.},
  journal = {J Clin Oncol},
  year = {2001},
  volume = {19},
  pages = {980--991},
  number = {4},
  month = {Feb},
  abstract = {The goal of the computer program Adjuvant! is to allow health professionals
	and their patients with early breast cancer to make more informed
	decisions about adjuvant therapy.Actuarial analysis was used to project
	outcomes of patients with and without adjuvant therapy based on estimates
	of prognosis largely derived from Surveillance, Epidemiology, and
	End-Results data and estimates of the efficacy of adjuvant therapy
	based on the 1998 overviews of randomized trials of adjuvant therapy.
	These estimates can be refined using the Prognostic Factor Impact
	Calculator, which uses a Bayesian method to make adjustments based
	on relative risks conferred and prevalence of positive test results.From
	the entries of patient information (age, menopausal status, comorbidity
	estimate) and tumor staging and characteristics (tumor size, number
	of positive axillary nodes, estrogen receptor status), baseline prognostic
	estimates are made. Estimates for the efficacy of endocrine therapy
	(5 years of tamoxifen) and of polychemotherapy (cyclophosphamide/methotrexate/fluorouracil-like
	regimens, or anthracycline-based therapy, or therapy based on both
	an anthracycline and a taxane) can then be used to project outcomes
	presented in both numerical and graphical formats. Outcomes for overall
	survival and disease-free survival and the improvement seen in clinical
	trials, are reasonably modeled by Adjuvant!, although an ideal validation
	for all patient subsets with all treatment options is not possible.
	Additional speculative estimates of years of remaining life expectancy
	and long-term survival curves can also be produced. Help files supply
	general information about breast cancer. The program's Internet links
	supply national treatment guidelines, cooperative group trial options,
	and other related information.The computer program Adjuvant! can
	play practical and educational roles in clinical settings.},
  institution = {San Antonio, TX 78284, USA. pravdin@swog.org},
  keywords = {Actuarial Analysis; Breast Neoplasms, mortality/therapy; Chemotherapy,
	Adjuvant; Decision Making; Female; Humans; Prognosis; Software; Survival
	Analysis},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11181660},
  timestamp = {2012.03.02}
}

@incollection{Ravikumar2008SpAM,
  author = {Pradeep Ravikumar and Han Liu and John Lafferty and Larry Wasserman},
  title = {SpAM: Sparse Additive Models},
  booktitle = {Advances in Neural Information Processing Systems 20},
  publisher = {MIT Press},
  year = {2008},
  editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  pages = {1201--1208},
  address = {Cambridge, MA}
}

@incollection{Ravikumar2009Model,
  author = {Pradeep Ravikumar and Garvesh Raskutti and Martin Wainwright and
	Bin Yu},
  title = {Model Selection in Gaussian Graphical Models: High-Dimensional Consistency
	of {\boldmath$\ell_1$}-regularized MLE},
  booktitle = {Advances in Neural Information Processing Systems 21},
  publisher = {MIT Press},
  year = {2009},
  editor = {D. Koller and D. Schuurmans and Y. Bengio and L. Bottou},
  pages = {1329--1336}
}

@article{Ravikumar2011High-dimensional,
  author = {Ravikumar, P. and Wainwright, M. J. and Raskutti, G. and Yu, B.},
  title = {High-dimensional covarince estimation by minimizing $\ell_1$-penalized
	log-determinant divergence},
  journal = {Electron. J. Statist.},
  year = {2011},
  volume = {5},
  pages = {935--980},
  doi = {10.1214/11-EJS631},
  pdf = {../local/Ravikumar2011High-dimensional.pdf},
  file = {Ravikumar2011High-dimensional.pdf:Ravikumar2011High-dimensional.pdf:PDF},
  owner = {jp},
  timestamp = {2012.03.20},
  url = {http://dx.doi.org/10.1214/11-EJS631}
}

@article{Ray1957Finding,
  author = {Ray, L. C. and Kirsch, R. A.},
  title = {Finding Chemical Records by Digital Computers},
  journal = {Science},
  year = {1957},
  volume = {126},
  pages = {814}
}

@article{Raychaudhuri2000Principal,
  author = {Raychaudhuri, S. and Stuart, J. M. and Altman, R. B.},
  title = {Principal components analysis to summarize microarray experiments:
	application to sporulation time series},
  journal = {Pac. Symp. Biocomput.},
  year = {2000},
  pages = {455--466},
  abstract = {A series of microarray experiments produces observations of differential
	expression for thousands of genes across multiple conditions. It
	is often not clear whether a set of experiments are measuring fundamentally
	different gene expression states or are measuring similar states
	created through different mechanisms. It is useful, therefore, to
	define a core set of independent features for the expression states
	that allow them to be compared directly. Principal components analysis
	(PCA) is a statistical technique for determining the key variables
	in a multidimensional data set that explain the differences in the
	observations, and can be used to simplify the analysis and visualization
	of multidimensional data sets. We show that application of PCA to
	expression data (where the experimental conditions are the variables,
	and the gene expression measurements are the observations) allows
	us to summarize the ways in which gene responses vary under different
	conditions. Examination of the components also provides insight into
	the underlying factors that are measured in the experiments. We applied
	PCA to the publicly released yeast sporulation data set (Chu et al.
	1998). In that work, 7 different measurements of gene expression
	were made over time. PCA on the time-points suggests that much of
	the observed variability in the experiment can be summarized in just
	2 components--i.e. 2 variables capture most of the information. These
	components appear to represent (1) overall induction level and (2)
	change in induction level over time. We also examined the clusters
	proposed in the original paper, and show how they are manifested
	in principal component space. Our results are available on the internet
	at http:¿www.smi.stanford.edu/project/helix/PCArray .},
  pdf = {../local/Raychaudhuri2000Principal.pdf},
  file = {Raychaudhuri2000Principal.pdf:Raychaudhuri2000Principal.pdf:PDF},
  institution = {Stanford Medical Informatics, Stanford University, CA 94305-5479,
	USA. sxr@smi.stanford.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10902193},
  timestamp = {2011.10.03},
  url = {http://helix-web.stanford.edu/psb00/raychaudhuri.pdf}
}

@article{Rea2000Regulation,
  author = {S. Rea and F. Eisenhaber and D. O'Carroll and B. D. Strahl and Z.
	W. Sun and M. Schmid and S. Opravil and K. Mechtler and C. P. Ponting
	and C. D. Allis and T. Jenuwein},
  title = {Regulation of chromatin structure by site-specific histone H3 methyltransferases.},
  journal = {Nature},
  year = {2000},
  volume = {406},
  pages = {593--599},
  number = {6796},
  month = {Aug},
  abstract = {The organization of chromatin into higher-order structures influences
	chromosome function and epigenetic gene regulation. Higher-order
	chromatin has been proposed to be nucleated by the covalent modification
	of histone tails and the subsequent establishment of chromosomal
	subdomains by non-histone modifier factors. Here we show that human
	SUV39H1 and murine Suv39h1--mammalian homologues of Drosophila Su(var)3-9
	and of Schizosaccharomyces pombe clr4--encode histone H3-specific
	methyltransferases that selectively methylate lysine 9 of the amino
	terminus of histone H3 in vitro. We mapped the catalytic motif to
	the evolutionarily conserved SET domain, which requires adjacent
	cysteine-rich regions to confer histone methyltransferase activity.
	Methylation of lysine 9 interferes with phosphorylation of serine
	10, but is also influenced by pre-existing modifications in the amino
	terminus of H3. In vivo, deregulated SUV39H1 or disrupted Suv39h
	activity modulate H3 serine 10 phosphorylation in native chromatin
	and induce aberrant mitotic divisions. Our data reveal a functional
	interdependence of site-specific H3 tail modifications and suggest
	a dynamic mechanism for the regulation of higher-order chromatin.},
  doi = {10.1038/35020506},
  institution = {Research Institute of Molecular Pathology, The Vienna Biocenter,
	Austria.},
  keywords = {Amino Acid Sequence; Animals; Catalytic Domain; Chromatin, chemistry/metabolism;
	Drosophila; Hela Cells; Histone-Lysine N-Methyltransferase; Humans;
	Lysine, metabolism; Methylation; Methyltransferases, genetics/metabolism;
	Mice; Molecular Sequence Data; Phosphorylation; Protein Conformation;
	Protein Methyltransferases; Protein Structure, Tertiary; Recombinant
	Proteins, metabolism; Repressor Proteins, genetics/metabolism; Sequence
	Homology, Amino Acid; Serine, metabolism; Substrate Specificity},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10949293},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1038/35020506}
}

@article{Read1977Graph,
  author = {R. C. Read and D. G. Corneil},
  title = {The graph isomorphism disease},
  journal = {J. Graph Theor.},
  year = {1977},
  volume = {1},
  pages = {339-363},
  number = {4},
  publisher = {Wiley Periodicals}
}

@article{Reche2002Prediction,
  author = {Reche, P. A. and Glutting, J.-P. and Reinherz, E. L.},
  title = {{P}rediction of {MHC} class {I} binding peptides using profile motifs.},
  journal = {Hum. Immunol.},
  year = {2002},
  volume = {63},
  pages = {701--709},
  number = {9},
  month = {Sep},
  abstract = {Peptides that bind to a given major histocompatibility complex (MHC)
	molecule share sequence similarity. Therefore, a position specific
	scoring matrix (PSSM) or profile derived from a set of peptides known
	to bind to a specific MHC molecule would be a suitable predictor
	of whether other peptides might bind, thus anticipating possible
	T-cell epitopes within a protein. In this approach, the binding potential
	of any peptide sequence (query) to a given MHC molecule is linked
	to its similarity to a group of aligned peptides known to bind to
	that MHC, and can be obtained by comparing the query to the PSSM.
	This article describes the derivation of alignments and profiles
	from a collection of peptides known to bind a specific MHC, compatible
	with the structural and molecular basis of the peptide-MHC class
	I (MHCI) interaction. Moreover, in order to apply these profiles
	to the prediction of peptide-MHCI binding, we have developed a new
	search algorithm (RANKPEP) that ranks all possible peptides from
	an input protein using the PSSM coefficients. The predictive power
	of the method was evaluated by running RANKPEP on proteins known
	to bear MHCI K(b)- and D(b)-restricted T-cell epitopes. Analysis
	of the results indicates that > 80\% of these epitopes are among
	the top 2\% of scoring peptides. Prediction of peptide-MHC binding
	using a variety of MHCI-specific PSSMs is available on line at our
	RANKPEP web server (www.mifoundation.org/Tools/rankpep.html). In
	addition, the RANKPEP server also allows the user to enter additional
	profiles, making the server a powerful and versatile computational
	biology benchmark for the prediction of peptide-MHC binding.},
  keywords = {immunoinformatics},
  pii = {S0198885902004329},
  pmid = {12175724},
  timestamp = {2007.01.25}
}

@article{Reche2004Enhancement,
  author = {Pedro A Reche and John-Paul Glutting and Hong Zhang and Ellis L Reinherz},
  title = {Enhancement to the {RANKPEP} resource for the prediction of peptide
	binding to {MHC} molecules using profiles.},
  journal = {Immunogenetics},
  year = {2004},
  volume = {56},
  pages = {405--419},
  number = {6},
  month = {Sep},
  abstract = {We introduced previously an on-line resource, RANKPEP that uses position
	specific scoring matrices (PSSMs) or profiles for the prediction
	of peptide-MHC class I (MHCI) binding as a basis for CD8 T-cell epitope
	identification. Here, using PSSMs that are structurally consistent
	with the binding mode of MHC class II (MHCII) ligands, we have extended
	RANKPEP to prediction of peptide-MHCII binding and anticipation of
	CD4 T-cell epitopes. Currently, 88 and 50 different MHCI and MHCII
	molecules, respectively, can be targeted for peptide binding predictions
	in RANKPEP. Because appropriate processing of antigenic peptides
	must occur prior to major histocompatibility complex (MHC) binding,
	cleavage site prediction methods are important adjuncts for T-cell
	epitope discovery. Given that the C-terminus of most MHCI-restricted
	epitopes results from proteasomal cleavage, we have modeled the cleavage
	site from known MHCI-restricted epitopes using statistical language
	models. The RANKPEP server now determines whether the C-terminus
	of any predicted MHCI ligand may result from such proteasomal cleavage.
	Also implemented is a variability masking function. This feature
	focuses prediction on conserved rather than highly variable protein
	segments encoded by infectious genomes, thereby offering identification
	of invariant T-cell epitopes to thwart mutation as an immune evasion
	mechanism.},
  doi = {10.1007/s00251-004-0709-7},
  keywords = {Algorithms; Amino Acid Motifs; Antigen Presentation; Epitopes, T-Lymphocyte;
	Histocompatibility Antigens Class I; Histocompatibility Antigens
	Class II; Humans; Models, Molecular; Peptide Fragments; Research
	Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.; T-Lymphocytes},
  owner = {jacob},
  pmid = {15349703},
  timestamp = {2006.08.30},
  url = {http://dx.doi.org/10.1007/s00251-004-0709-7}
}

@article{Redfield1927Theory,
  author = {J. H. Redfield},
  title = {The theory of group-reduced distributions},
  journal = {Amer. J. Math.},
  year = {1927},
  volume = {49},
  pages = {433--455}
}

@article{Redon2006Global,
  author = {Richard Redon and Shumpei Ishikawa and Karen R Fitch and Lars Feuk
	and George H Perry and T. Daniel Andrews and Heike Fiegler and Michael
	H Shapero and Andrew R Carson and Wenwei Chen and Eun Kyung Cho and
	Stephanie Dallaire and Jennifer L Freeman and Juan R González and
	Mònica Gratacòs and Jing Huang and Dimitrios Kalaitzopoulos and Daisuke
	Komura and Jeffrey R MacDonald and Christian R Marshall and Rui Mei
	and Lyndal Montgomery and Kunihiro Nishimura and Kohji Okamura and
	Fan Shen and Martin J Somerville and Joelle Tchinda and Armand Valsesia
	and Cara Woodwark and Fengtang Yang and Junjun Zhang and Tatiana
	Zerjal and Jane Zhang and Lluis Armengol and Donald F Conrad and
	Xavier Estivill and Chris Tyler-Smith and Nigel P Carter and Hiroyuki
	Aburatani and Charles Lee and Keith W Jones and Stephen W Scherer
	and Matthew E Hurles},
  title = {Global variation in copy number in the human genome.},
  journal = {Nature},
  year = {2006},
  volume = {444},
  pages = {444--454},
  number = {7118},
  month = {Nov},
  abstract = {Copy number variation (CNV) of DNA sequences is functionally significant
	but has yet to be fully ascertained. We have constructed a first-generation
	CNV map of the human genome through the study of 270 individuals
	from four populations with ancestry in Europe, Africa or Asia (the
	HapMap collection). DNA from these individuals was screened for CNV
	using two complementary technologies: single-nucleotide polymorphism
	(SNP) genotyping arrays, and clone-based comparative genomic hybridization.
	A total of 1,447 copy number variable regions (CNVRs), which can
	encompass overlapping or adjacent gains or losses, covering 360 megabases
	(12\% of the genome) were identified in these populations. These
	CNVRs contained hundreds of genes, disease loci, functional elements
	and segmental duplications. Notably, the CNVRs encompassed more nucleotide
	content per genome than SNPs, underscoring the importance of CNV
	in genetic diversity and evolution. The data obtained delineate linkage
	disequilibrium patterns for many CNVs, and reveal marked variation
	in copy number among populations. We also demonstrate the utility
	of this resource for genetic disease studies.},
  doi = {10.1038/nature05329},
  institution = {The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus,
	Hinxton, Cambridge CB10 1SA, UK.},
  keywords = {Chromosome Mapping; Gene Dosage; Genetic Variation; Genetics, Population;
	Genome, Human; Genomics, methods; Genotype; Humans; Linkage Disequilibrium;
	Molecular Diagnostic Techniques; Oligonucleotide Array Sequence Analysis,
	methods; Polymorphism, Single Nucleotide},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nature05329},
  pmid = {17122850},
  timestamp = {2010.08.01},
  url = {http://dx.doi.org/10.1038/nature05329}
}

@article{Rege2004Parallel,
  author = {Kaushal Rege and Asif Ladiwala and Nihal Tugcu and Curt M Breneman
	and Steven M Cramer},
  title = {Parallel screening of selective and high-affinity displacers for
	proteins in ion-exchange systems.},
  journal = {J {C}hromatogr {A}},
  year = {2004},
  volume = {1033},
  pages = {19-28},
  number = {1},
  month = {Apr},
  abstract = {This paper employs a parallel batch screening technique for the identification
	of both selective and high-affinity displacers for a model binary
	mixture of proteins in a cation-exchange system. {A} variety of molecules
	were screened as possible displacers for the proteins ribonuclease
	{A} ({RNA}se{A}) and alpha-chymotrypsinogen {A} (alpha-chy{A}) on
	high performance {S}epharose {SP}. {T}he batch screening data for
	each protein was used to select leads for selective and high-affinity
	displacers and column experiments were carried out to evaluate the
	performance of the selected leads. {T}he data from the batch displacements
	was also employed to generate quantitative structure-efficacy relationship
	({QSER}) models based on a support vector machine regression approach.
	{T}he resulting models had high correlation coefficients and were
	able to predict the behaviour of molecules not included in the training
	set. {T}he descriptors selected in the {QSER} models for both proteins
	were examined to provide insights into factors influencing displacer
	selectivity in ion-exchange systems. {T}he results presented in this
	paper demonstrate that this parallel batch screening-{QSER} approach
	can be employed for the identification of selective and high-affinity
	displacers for protein mixtures.}
}

@article{Rehm2009CellDeathDiff,
  author = {Rehm, M. and Huber, H. J. and Hellwig, C. T. and Anguissola, S. and
	Dussmann, H. and Prehn, J. H. M.},
  title = {Dynamics of outer mitochondrial membrane permeabilization during
	apoptosis},
  journal = {Cell Death and Differentiation},
  year = {2009},
  volume = {16},
  pages = {613 - 623},
  issn = {1350-9047},
  keywords = {csbcbook},
  url = {http://www.nature.com/cdd/journal/v16/n4/suppinfo/cdd2008187s1.html}
}

@article{Rehm2009Dynamics,
  author = {Rehm, M. and Huber, H. J. and Hellwig, C. T. and Anguissola, S. and
	Dussmann, H. and Prehn, J. H. M.},
  title = {Dynamics of outer mitochondrial membrane permeabilization during
	apoptosis},
  journal = {Cell Death Differ.},
  year = {2009},
  volume = {16},
  pages = {613--623},
  issn = {1350-9047},
  keywords = {csbcbook},
  owner = {jp},
  timestamp = {2012.05.11},
  url = {http://www.nature.com/cdd/journal/v16/n4/suppinfo/cdd2008187s1.html}
}

@article{Reinders2008Genome,
  author = {Jon Reinders and Celine Delucinge Vivier and Gregory Theiler and
	Didier Chollet and Patrick Descombes and Jerzy Paszkowski},
  title = {Genome-wide, high-resolution {DNA} methylation profiling using bisulfite-mediated
	cytosine conversion},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {469-76},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Remm2001Automatic,
  author = {Remm, M. and Storm, C.E. and Sonnhammer, E.L.},
  title = {Automatic clustering of orthologs and in-paralogs from pairwise species
	comparisons},
  journal = {J. Mol. Biol.},
  year = {2001},
  volume = {314},
  pages = {1041--1052},
  number = {5},
  month = {Dec},
  abstract = {Orthologs are genes in different species that originate from a single
	gene in the last common ancestor of these species. Such genes have
	often retained identical biological roles in the present-day organisms.
	It is hence important to identify orthologs for transferring functional
	information between genes in different organisms with a high degree
	of reliability. For example, orthologs of human proteins are often
	functionally characterized in model organisms. Unfortunately, orthology
	analysis between human and e.g. invertebrates is often complex because
	of large numbers of paralogs within protein families. Paralogs that
	predate the species split, which we call out-paralogs, can easily
	be confused with true orthologs. Paralogs that arose after the species
	split, which we call in-paralogs, however, are bona fide orthologs
	by definition.Orthologs and in-paralogs are typically detected with
	phylogenetic methods, but these are slow and difficult to automate.
	Automatic clustering methods based on two-way best genome-wide matches
	on the other hand, have so far not separated in-paralogs from out-paralogs
	effectively.We present a fully automatic method for finding orthologs
	and in-paralogs from two species. Ortholog clusters are seeded with
	a two-way best pairwise match, after which an algorithm for adding
	in-paralogs is applied. The method bypasses multiple alignments and
	phylogenetic trees, which can be slow and error-prone steps in classical
	ortholog detection. Still, it robustly detects complex orthologous
	relationships and assigns confidence values for both orthologs and
	in-paralogs. The program, called INPARANOID, was tested on all completely
	sequenced eukaryotic genomes. To assess the quality of INPARANOID
	results, ortholog clusters were generated from a dataset of worm
	and mammalian transmembrane proteins, and were compared to clusters
	derived by manual tree-based ortholog detection methods. This study
	led to the identification with a high degree of confidence of over
	a dozen novel worm-mammalian ortholog assignments that were previously
	undetected because of shortcomings of phylogenetic methods.A WWW
	server that allows searching for orthologs between human and several
	fully sequenced genomes is installed at http://www.cgb.ki.se/inparanoid/.
	This is the first comprehensive resource with orthologs of all fully
	sequenced eukaryotic genomes. Programs and tables of orthology assignments
	are available from the same location.},
  doi = {10.1006/jmbi.2000.5197},
  pdf = {../local/Remm2001Automatic.pdf},
  file = {Remm2001Automatic.pdf:Remm2001Automatic.pdf:PDF},
  institution = {Center for Genomics and Bioinformatics, Karolinska Institutet, S-17177
	Stockholm, Sweden.},
  owner = {jp},
  pii = {S0022-2836(00)95197-0},
  pmid = {11743721},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1006/jmbi.2000.5197}
}

@article{Ren2000Genome-wide,
  author = {Ren, B. and Robert, F. and Wyrick, J. J. and Aparicio, O. and Jennings,
	E. G. and Simon, I. and Zeitlinger, J. and Schreiber, J. and Hannett,
	N. and Kanin, E. and Volkert, T. L. and Wilson, C. J. and Bell, S.
	P. and Young, R. A.},
  title = {Genome-wide location and function of {DNA} binding proteins.},
  journal = {Science},
  year = {2000},
  volume = {290},
  pages = {2306--2309},
  number = {5500},
  month = {Dec},
  abstract = {Understanding how DNA binding proteins control global gene expression
	and chromosomal maintenance requires knowledge of the chromosomal
	locations at which these proteins function in vivo. We developed
	a microarray method that reveals the genome-wide location of DNA-bound
	proteins and used this method to monitor binding of gene-specific
	transcription activators in yeast. A combination of location and
	expression profiles was used to identify genes whose expression is
	directly controlled by Gal4 and Ste12 as cells respond to changes
	in carbon source and mating pheromone, respectively. The results
	identify pathways that are coordinately regulated by each of the
	two activators and reveal previously unknown functions for Gal4 and
	Ste12. Genome-wide location analysis will facilitate investigation
	of gene regulatory networks, gene function, and genome maintenance.},
  doi = {10.1126/science.290.5500.2306},
  pdf = {../local/Ren2000Genome-wide.pdf},
  file = {Ren2000Genome-wide.pdf:Ren2000Genome-wide.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, Nine Cambridge Center,
	Cambridge, MA 02142, USA.},
  owner = {jp},
  pii = {290/5500/2306},
  pmid = {11125145},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1126/science.290.5500.2306}
}

@article{Ren2005HIV,
  author = {J. Ren and D.K. Stammers},
  title = {{HIV} reverse transcriptase structures: designing new inhibitors
	and understanding mechanisms of drug resistance},
  journal = {Trends Pharmacol. Sci.},
  year = {2005},
  volume = {26},
  pages = {4-7},
  owner = {mahe},
  timestamp = {2006.09.07}
}

@article{Ren2006siRecords,
  author = {Ren, Y. and Gong, W. and Xu, Q. and Zheng, X. and Lin, D. and Wang,
	Y. and Li, T.},
  title = {si{R}ecords: an extensive database of mammalian si{RNA}s with efficacy
	ratings.},
  journal = {Bioinformatics},
  year = {2006},
  month = {Jan},
  abstract = {SUMMARY: Short interfering RNAs have been gaining popularity as the
	gene knock-down tool of choice by many researchers due to the clean
	nature of their workings as well as the technical simplicity and
	cost efficiency in their applications. We have constructed siRecords,
	a database of siRNAs experimentally tested by researchers with consistent
	efficacy ratings. This database will help siRNA researchers develop
	more reliable siRNA design rules; in the mean time, benefit experimental
	researchers directly by providing them with information about the
	siRNAs that have been experimentally tested against the genes of
	their interest. Currently, more than 4, 100 carefully annotated siRNA
	sequences obtained from more than 1, 200 published siRNA studies
	are hosted in siRecords. This database will continue to expand as
	more experimentally tested siRNAs are published. AVAILABILITY: The
	siRecords database can be accessed at http://siRecords.umn.edu/siRecords/.},
  doi = {10.1093/bioinformatics/btl026},
  keywords = {sirna},
  owner = {vert},
  pii = {btl026},
  pmid = {16443930},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl026}
}

@inproceedings{Rennie2005Fast,
  author = {Rennie, J. D. M. and Srebro, N.},
  title = {Fast maximum margin matrix factorization for collaborative prediction},
  booktitle = {Proceedings of the 22nd international conference on Machine learning},
  year = {2005},
  pages = {713--719},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  doi = {10.1145/1102351.1102441},
  timestamp = {2007.10.22},
  url = {http://dx.doi.org/10.1145/1102351.1102441}
}

@article{Rensing2005Protein,
  author = {Stefan A Rensing and Dana Fritzowsky and Daniel Lang and Ralf Reski},
  title = {Protein encoding genes in an ancient plant: analysis of codon usage,
	retained genes and splice sites in a moss, {P}hyscomitrella patens.},
  journal = {B{MC} {G}enomics},
  year = {2005},
  volume = {6},
  pages = {43},
  number = {1},
  month = {Mar},
  abstract = {B{ACKGROUND}: {T}he moss {P}hyscomitrella patens is an emerging plant
	model system due to its high rate of homologous recombination, haploidy,
	simple body plan, physiological properties as well as phylogenetic
	position. {A}vailable {EST} data was clustered and assembled, and
	provided the basis for a genome-wide analysis of protein encoding
	genes. {RESULTS}: {W}e have clustered and assembled {P}hyscomitrella
	patens {EST} and {CDS} data in order to represent the transcriptome
	of this non-seed plant. {C}lustering of the publicly available data
	and subsequent prediction resulted in a total of 19,081 non-redundant
	{ORF}. {O}f these putative transcripts, approximately 30\% have a
	homolog in both rice and {A}rabidopsis transcriptome. {M}ore than
	130 transcripts are not present in seed plants but can be found in
	other kingdoms. {T}hese potential "retained genes" might have been
	lost during seed plant evolution. {F}unctional annotation of these
	genes reveals unequal distribution among taxonomic groups and intriguing
	putative functions such as cytotoxicity and nucleic acid repair.
	{W}hereas introns in the moss are larger on average than in the seed
	plant {A}rabidopsis thaliana, position and amount of introns are
	approximately the same. {C}ontrary to {A}rabidopsis, where {CDS}
	contain on average 44\% {G}/{C}, in {P}hyscomitrella the average
	{G}/{C} content is 50\%. {I}nterestingly, moss orthologs of {A}rabidopsis
	genes show a significant drift of codon fraction usage, towards the
	seed plant. {W}hile averaged codon bias is the same in {P}hyscomitrella
	and {A}rabidopsis, the distribution pattern is different, with 15\%
	of moss genes being unbiased. {S}pecies-specific, sensitive and selective
	splice site prediction for {P}hyscomitrella has been developed using
	a dataset of 368 donor and acceptor sites, utilizing a support vector
	machine. {T}he prediction accuracy is better than those achieved
	with tools trained on {A}rabidopsis data. {CONCLUSION}: {A}nalysis
	of the moss transcriptome displays differences in gene structure,
	codon and splice site usage in comparison with the seed plant {A}rabidopsis.
	{P}utative retained genes exhibit possible functions that might explain
	the peculiar physiological properties of mosses. {B}oth the transcriptome
	representation (including a {BLAST} and retrieval service) and splice
	site prediction have been made available on http://www.cosmoss.org,
	setting the basis for assembly and annotation of the {P}hyscomitrella
	genome, of which draft shotgun sequences will become available in
	2005.},
  doi = {10.1186/1471-2164-6-43},
  pdf = {../local/Rensing2005Protein.pdf},
  file = {Rensing2005Protein.pdf:local/Rensing2005Protein.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2164-6-43},
  url = {http://dx.doi.org/10.1186/1471-2164-6-43}
}

@article{Res2005evolution,
  author = {I. Res and I. Mihalek and O. Lichtarge},
  title = {An evolution based classifier for prediction of protein interfaces
	without using protein structures.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2496-501},
  number = {10},
  month = {May},
  abstract = {M{OTIVATION}: {T}he number of available protein structures still lags
	far behind the number of known protein sequences. {T}his makes it
	important to predict which residues participate in protein-protein
	interactions using only sequence information. {F}ew studies have
	tackled this problem until now. {RESULTS}: {W}e applied support vector
	machines to sequences in order to generate a classification of all
	protein residues into those that are part of a protein interface
	and those that are not. {F}or the first time evolutionary information
	was used as one of the attributes and this inclusion of evolutionary
	importance rankings improves the classification. {L}eave-one-out
	cross-validation experiments show that prediction accuracy reaches
	64\%.},
  doi = {10.1093/bioinformatics/bti340},
  pdf = {../local/Res2005evolution.pdf},
  file = {Res2005evolution.pdf:local/Res2005evolution.pdf:PDF},
  keywords = {biosvm},
  pii = {bti340},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti340}
}

@phdthesis{Reyal2009Analyse,
  author = {Reyal, F.},
  title = {Analyse du profil d'expression par la technique des puces {\`a} ADN.
	Application \`a la caract\'erisation mol\'eculaire et \`a la d\'etermination
	du pronostic des cancers canalaires infiltrants du sein.},
  school = {Universit\'e Paris 11},
  year = {2009},
  keywords = {breastcancer, microarray},
  owner = {jp},
  timestamp = {2009.10.31}
}

@article{Reyal2008comprehensive,
  author = {Reyal, F. and van Vliet, M. H. and Armstrong, N. J. and Horlings,
	H. M. and de Visser, K. E. and Kok, M. and Teschendorff, A. E. and
	Mook, S. and van't Veer, L. and Caldas, C. and Salmon, Remy, R. J.
	and van de Vijver, M. J. and Wessels, L. F. A.},
  title = {A comprehensive analysis of prognostic signatures reveals the high
	predictive capacity of the proliferation, immune response and {RNA}
	splicing modules in breast cancer},
  journal = {Breast Cancer Res.},
  year = {2008},
  volume = {10},
  pages = {R93},
  number = {6},
  doi = {10.1186/bcr2192},
  pdf = {../local/Reyal2008comprehensive.pdf},
  file = {Reyal2008comprehensive.pdf:Reyal2008comprehensive.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1186/bcr2192}
}

@article{Reynolds2004Rational,
  author = {Reynolds, A. and Leake, D. and Boese, Q. and Scaringe, S. and Marshall,
	W. S. and Khvorova, A.},
  title = {Rational si{RNA} design for {RNA} interference.},
  journal = {Nat. {B}iotechnol.},
  year = {2004},
  volume = {22},
  pages = {326-330},
  number = {3},
  month = {Mar},
  abstract = {Short-interfering {RNA}s suppress gene expression through a highly
	regulated enzyme-mediated process called {RNA} interference ({RNA}i)1,
	2, 3, 4. {RNA}i involves multiple {RNA}-protein interactions characterized
	by four major steps: assembly of si{RNA} with the {RNA}-induced silencing
	complex ({RISC}), activation of the {RISC}, target recognition and
	target cleavage. {T}hese interactions may bias strand selection during
	si{RNA}-{RISC} assembly and activation, and contribute to the overall
	efficiency of {RNA}i5, 6. {T}o identify si{RNA}-specific features
	likely to contribute to efficient processing at each step, we performed
	a systematic analysis of 180 si{RNA}s targeting the m{RNA} of two
	genes. {E}ight characteristics associated with si{RNA} functionality
	were identified: low {G}/{C} content, a bias towards low internal
	stability at the sense strand 3'-terminus, lack of inverted repeats,
	and sense strand base preferences (positions 3, 10, 13 and 19). {F}urther
	analyses revealed that application of an algorithm incorporating
	all eight criteria significantly improves potent si{RNA} selection.
	{T}his highlights the utility of rational design for selecting potent
	si{RNA}s and facilitating functional gene knockdown studies.},
  doi = {10.1038/nbt936},
  pdf = {../local/Reynolds2004Rational.pdf},
  file = {Reynolds2004Rational.pdf:local/Reynolds2004Rational.pdf:PDF},
  keywords = {sirna},
  url = {http://dx.doi.org/10.1038/nbt936}
}

@article{Rhoades2002Prediction,
  author = {Matthew W Rhoades and Brenda J Reinhart and Lee P Lim and Christopher
	B Burge and Bonnie Bartel and David P Bartel},
  title = {Prediction of plant microRNA targets.},
  journal = {Cell},
  year = {2002},
  volume = {110},
  pages = {513--520},
  number = {4},
  month = {Aug},
  abstract = {We predict regulatory targets for 14 Arabidopsis microRNAs (miRNAs)
	by identifying mRNAs with near complementarity. Complementary sites
	within predicted targets are conserved in rice. Of the 49 predicted
	targets, 34 are members of transcription factor gene families involved
	in developmental patterning or cell differentiation. The near-perfect
	complementarity between plant miRNAs and their targets suggests that
	many plant miRNAs act similarly to small interfering RNAs and direct
	mRNA cleavage. The targeting of developmental transcription factors
	suggests that many plant miRNAs function during cellular differentiation
	to clear key regulatory transcripts from daughter cell lineages.},
  pdf = {../local/Rhoades2002Prediction.pdf},
  file = {Rhoades2002Prediction.pdf:Rhoades2002Prediction.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center,
	MA 02142, USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0092867402008632},
  pmid = {12202040},
  timestamp = {2009.10.28}
}

@article{Rhodes2005Integrative,
  author = {Rhodes, D. R. and Chinnaiyan, A. M.},
  title = {Integrative analysis of the cancer transcriptome.},
  journal = {Nat. Genet.},
  year = {2005},
  volume = {37 Suppl},
  pages = {S31--S37},
  month = {Jun},
  abstract = {DNA microarrays have been widely applied to the study of human cancer,
	delineating myriad molecular subtypes of cancer, many of which are
	associated with distinct biological underpinnings, disease progression
	and treatment response. These primary analyses have begun to decipher
	the molecular heterogeneity of cancer, but integrative analyses that
	evaluate cancer transcriptome data in the context of other data sources
	are often capable of extracting deeper biological insight from the
	data. Here we discuss several such integrative computational and
	analytical approaches, including meta-analysis, functional enrichment
	analysis, interactome analysis, transcriptional network analysis
	and integrative model system analysis.},
  doi = {10.1038/ng1570},
  pdf = {../local/Rhodes2005Integrative.pdf},
  file = {Rhodes2005Integrative.pdf:Rhodes2005Integrative.pdf:PDF},
  institution = {Department of Pathology, Comprehensive Cancer Center, University
	of Michigan Medical School, Ann Arbor, Michigan 48109, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng1570},
  pmid = {15920528},
  timestamp = {2011.09.20},
  url = {http://dx.doi.org/10.1038/ng1570}
}

@article{Rhodes2005Mining,
  author = {Rhodes, D. R. and Kalyana-Sundaram, S. and Mahavisno, V. and Barrette,
	T. R. and Ghosh, D. and Chinnaiyan, A. M.},
  title = {Mining for regulatory programs in the cancer transcriptome.},
  journal = {Nat. Genet.},
  year = {2005},
  volume = {37},
  pages = {579--583},
  number = {6},
  month = {Jun},
  abstract = {DNA microarrays have been widely applied to cancer transcriptome analysis.
	The Oncomine database contains a large collection of such data, as
	well as hundreds of derived gene-expression signatures. We studied
	the regulatory mechanisms responsible for gene deregulation in these
	cancer signatures by searching for the coordinate regulation of genes
	with common transcription factor binding sites. We found that genes
	with binding sites for the archetypal cancer transcription factor,
	E2F, were disproportionately overexpressed in a wide variety of cancers,
	whereas genes with binding sites for other transcription factors,
	such as Myc-Max, c-Rel and ATF, were disproportionately overexpressed
	in specific cancer types. These results suggest that alterations
	in pathways activating these transcription factors may be responsible
	for the observed gene deregulation and cancer pathogenesis.},
  doi = {10.1038/ng1578},
  pdf = {../local/Rhodes2005Mining.pdf},
  file = {Rhodes2005Mining.pdf:Rhodes2005Mining.pdf:PDF},
  institution = {Department of Pathology, University of Michigan Medical School, Ann
	Arbor, Michigan 48109, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng1578},
  pmid = {15920519},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1038/ng1578}
}

@article{Rhodes2007Oncomine,
  author = {Rhodes, Daniel R. and Kalyana-Sundaram, Shanker and Mahavisno, Vasudeva
	and Varambally, Radhika and Yu, Jianjun and Briggs, Benjamin B. and
	Barrette, Terrence R. and Anstet, Matthew J. and Kincead-Beal, Colleen
	and Kulkarni, Prakash and Varambally, Sooryanaryana and Ghosh, Debashis
	and Chinnaiyan, Arul M.},
  title = {Oncomine 3.0: genes, pathways, and networks in a collection of 18,000
	cancer gene expression profiles.},
  journal = {Neoplasia},
  year = {2007},
  volume = {9},
  pages = {166--180},
  number = {2},
  month = {Feb},
  abstract = {DNA microarrays have been widely applied to cancer transcriptome analysis;
	however, the majority of such data are not easily accessible or comparable.
	Furthermore, several important analytic approaches have been applied
	to microarray analysis; however, their application is often limited.
	To overcome these limitations, we have developed Oncomine, a bioinformatics
	initiative aimed at collecting, standardizing, analyzing, and delivering
	cancer transcriptome data to the biomedical research community. Our
	analysis has identified the genes, pathways, and networks deregulated
	across 18,000 cancer gene expression microarrays, spanning the majority
	of cancer types and subtypes. Here, we provide an update on the initiative,
	describe the database and analysis modules, and highlight several
	notable observations. Results from this comprehensive analysis are
	available at http://www.oncomine.org.},
  institution = {Department of Pathology, University of Michigan Medical School, Ann
	Arbor, MI 48109-0940, USA.},
  keywords = {Antineoplastic Agents, pharmacology; Automatic Data Processing; Chromosome
	Mapping; Chromosomes, Human, genetics; Computational Biology, organization
	/&/ administration; Data Collection; Data Display; Data Interpretation,
	Statistical; Databases, Genetic; Drug Design; Gene Expression Profiling,
	statistics /&/ numerical data; Gene Expression Regulation, Neoplastic;
	Genes, Neoplasm; Humans; Internet; Models, Biological; Neoplasm Proteins,
	biosynthesis/chemistry/genetics; Neoplasms, classification/genetics/metabolism;
	Oligonucleotide Array Sequence Analysis; Subtraction Technique; Transcription,
	Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17356713},
  timestamp = {2012.03.10}
}

@article{Rhodes2005Probabilistic,
  author = {Rhodes, D. R. and Tomlins, S. A. and Varambally, S. and Mahavisno,
	V. and Barrette,T.and Kalyana-Sundaram, S. and Ghosh, D. and Pandey,
	A. and Chinnaiyan, A. M.},
  title = {Probabilistic model of the human protein-protein interaction network.},
  journal = {Nat Biotechnol},
  year = {2005},
  volume = {23},
  pages = {951--959},
  number = {8},
  month = {Aug},
  abstract = {A catalog of all human protein-protein interactions would provide
	scientists with a framework to study protein deregulation in complex
	diseases such as cancer. Here we demonstrate that a probabilistic
	analysis integrating model organism interactome data, protein domain
	data, genome-wide gene expression data and functional annotation
	data predicts nearly 40,000 protein-protein interactions in humans-a
	result comparable to those obtained with experimental and computational
	approaches in model organisms. We validated the accuracy of the predictive
	model on an independent test set of known interactions and also experimentally
	confirmed two predicted interactions relevant to human cancer, implicating
	uncharacterized proteins into definitive pathways. We also applied
	the human interactome network to cancer genomics data and identified
	several interaction subnetworks activated in cancer. This integrative
	analysis provides a comprehensive framework for exploring the human
	protein interaction network.},
  doi = {10.1038/nbt1103},
  pdf = {../local/Rhodes2005Probabilistic.pdf},
  file = {Rhodes2005Probabilistic.pdf:Rhodes2005Probabilistic.pdf:PDF},
  institution = {Bioinformatics Program, Department of Pathology, University of Michigan
	Medical School, Ann Arbor, Michigan 48109, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt1103},
  pmid = {16082366},
  timestamp = {2011.08.07},
  url = {http://dx.doi.org/10.1038/nbt1103}
}

@article{Rhodes2004Large-scale,
  author = {Rhodes, D. R. and Yu, J. and Shanker, K. and Deshpande, N. and Varambally,
	R. and Ghosh, D. and Barrette, T. and Pandey, A. and Chinnaiyan,
	A. M.},
  title = {Large-scale meta-analysis of cancer microarray data identifies common
	transcriptional profiles of neoplastic transformation and progression.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2004},
  volume = {101},
  pages = {9309--9314},
  number = {25},
  month = {Jun},
  abstract = {Many studies have used DNA microarrays to identify the gene expression
	signatures of human cancer, yet the critical features of these often
	unmanageably large signatures remain elusive. To address this, we
	developed a statistical method, comparative metaprofiling, which
	identifies and assesses the intersection of multiple gene expression
	signatures from a diverse collection of microarray data sets. We
	collected and analyzed 40 published cancer microarray data sets,
	comprising 38 million gene expression measurements from >3,700 cancer
	samples. From this, we characterized a common transcriptional profile
	that is universally activated in most cancer types relative to the
	normal tissues from which they arose, likely reflecting essential
	transcriptional features of neoplastic transformation. In addition,
	we characterized a transcriptional profile that is commonly activated
	in various types of undifferentiated cancer, suggesting common molecular
	mechanisms by which cancer cells progress and avoid differentiation.
	Finally, we validated these transcriptional profiles on independent
	data sets.},
  doi = {10.1073/pnas.0401994101},
  pdf = {../local/Rhodes2004Large-scale.pdf},
  file = {Rhodes2004Large-scale.pdf:Rhodes2004Large-scale.pdf:PDF},
  institution = {Department of Pathology, University of Michigan Medical School, Ann
	Arbor, 48109, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0401994101},
  pmid = {15184677},
  timestamp = {2011.09.20},
  url = {http://dx.doi.org/10.1073/pnas.0401994101}
}

@article{Rice2005Reconstructing,
  author = {Rice, J.J. and Tu, Y. and Stolovitzky, G.},
  title = {Reconstructing biological networks using conditional correlation
	analysis.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {765--773},
  number = {6},
  month = {Mar},
  abstract = {MOTIVATION: One of the present challenges in biological research is
	the organization of the data originating from high-throughput technologies.
	One way in which this information can be organized is in the form
	of networks of influences, physical or statistical, between cellular
	components. We propose an experimental method for probing biological
	networks, analyzing the resulting data and reconstructing the network
	architecture. METHODS: We use networks of known topology consisting
	of nodes (genes), directed edges (gene-gene interactions) and a dynamics
	for the genes' mRNA concentrations in terms of the gene-gene interactions.
	We proposed a network reconstruction algorithm based on the conditional
	correlation of the mRNA equilibrium concentration between two genes
	given that one of them was knocked down. Using simulated gene expression
	data on networks of known connectivity, we investigated how the reconstruction
	error is affected by noise, network topology, size, sparseness and
	dynamic parameters. RESULTS: Errors arise from correlation between
	nodes connected through intermediate nodes (false positives) and
	when the correlation between two directly connected nodes is obscured
	by noise, non-linearity or multiple inputs to the target node (false
	negatives). Two critical components of the method are as follows:
	(1) the choice of an optimal correlation threshold for predicting
	connections and (2) the reduction of errors arising from indirect
	connections (for which a novel algorithm is proposed). With these
	improvements, we can reconstruct networks with the topology of the
	transcriptional regulatory network in Escherichia coli with a reasonably
	low error rate.},
  doi = {10.1093/bioinformatics/bti064},
  institution = {Computational Biology Center, IBM T.J. Watson Research Center, PO
	Box 218, Yorktown Heights, NY 10598, USA.},
  keywords = {Algorithms; Computer Simulation; Gene Expression Profiling; Gene Expression
	Regulation; Models, Biological; Models, Statistical; Oligonucleotide
	Array Sequence Analysis; Protein Interaction Mapping; Signal Transduction;
	Statistics as Topic; Transcription Factors},
  owner = {fantine},
  pii = {bti064},
  pmid = {15486043},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti064}
}

@article{Rice2000EMBOSS,
  author = {P. Rice and I. Longden and A. Bleasby},
  title = {EMBOSS: the European Molecular Biology Open Software Suite.},
  journal = {Trends Genet.},
  year = {2000},
  volume = {16},
  pages = {276--277},
  number = {6},
  month = {Jun},
  institution = {The Sanger Centre, Wellcome Trust Genome Campus, Hinxton, Cambridge,
	UK CB10 1SA.},
  keywords = {Internet; Molecular Biology; Sequence Alignment, methods; Software;
	User-Computer Interface},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pii = {S0168-9525(00)02024-2},
  pmid = {10827456},
  timestamp = {2009.07.29}
}

@article{Rice2005Mining,
  author = {Simon B Rice and Goran Nenadic and Benjamin J Stapley},
  title = {Mining protein function from text using term-based support vector
	machines.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6 Suppl 1},
  pages = {S22},
  abstract = {B{ACKGROUND}: {T}ext mining has spurred huge interest in the domain
	of biology. {T}he goal of the {B}io{C}re{A}t{I}v{E} exercise was
	to evaluate the performance of current text mining systems. {W}e
	participated in {T}ask 2, which addressed assigning {G}ene {O}ntology
	terms to human proteins and selecting relevant evidence from full-text
	documents. {W}e approached it as a modified form of the document
	classification task. {W}e used a supervised machine-learning approach
	(based on support vector machines) to assign protein function and
	select passages that support the assignments. {A}s classification
	features, we used a protein's co-occurring terms that were automatically
	extracted from documents. {RESULTS}: {T}he results evaluated by curators
	were modest, and quite variable for different problems: in many cases
	we have relatively good assignment of {GO} terms to proteins, but
	the selected supporting text was typically non-relevant (precision
	spanning from 3\% to 50\%). {T}he method appears to work best when
	a substantial set of relevant documents is obtained, while it works
	poorly on single documents and/or short passages. {T}he initial results
	suggest that our approach can also mine annotations from text even
	when an explicit statement relating a protein to a {GO} term is absent.
	{CONCLUSION}: {A} machine learning approach to mining protein function
	predictions from text can yield good performance only if sufficient
	training data is available, and significant amount of supporting
	data is used for prediction. {T}he most promising results are for
	combined document retrieval and {GO} term assignment, which calls
	for the integration of methods developed in {B}io{C}re{A}t{I}v{E}
	{T}ask 1 and {T}ask 2.},
  doi = {10.1186/1471-2105-6-S1-S22},
  pdf = {../local/Rice2005Mining.pdf},
  file = {Rice2005Mining.pdf:local/Rice2005Mining.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-S1-S22},
  url = {http://dx.doi.org/10.1186/1471-2105-6-S1-S22}
}

@article{Riedesel2004Peptide,
  author = {Henning Riedesel and BjÃ¶rn Kolbeck and Oliver Schmetzer and Ernst-Walter
	Knapp},
  title = {Peptide binding at class {I} major histocompatibility complex scored
	with linear functions and support vector machines.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2004},
  volume = {15},
  pages = {198-212},
  number = {1},
  abstract = {We explore two different methods to predict the binding ability of
	nonapeptides at the class {I} major histocompatibility complex using
	a general linear scoring function that defines a separating hyperplane
	in the feature space of sequences. {I}n absence of suitable data
	on non-binding nonapeptides we generated sequences randomly from
	a selected set of proteins from the protein data bank. {T}he parameters
	of the scoring function were determined by a generalized least square
	optimization ({LSM}) and alternatively by the support vector machine
	({SVM}). {W}ith the generalized {LSM} impaired data for learning
	with a small set of binding peptides and a large set of non-binding
	peptides can be treated in a balanced way rendering {LSM} more successful
	than {SVM}, while for symmetric data sets {SVM} has a slight advantage
	compared to {LSM}.},
  pdf = {../local/Riedesel2004Peptide.pdf},
  file = {Riedesel2004Peptide.pdf:local/Riedesel2004Peptide.pdf:PDF},
  keywords = {biosvm},
  url = {http://www.jsbi.org/journal/IBSB04/IBSB04F004.html}
}

@article{Rigaut1999generic,
  author = {G. Rigaut and A. Shevchenko and B. Rutz and M. Wilm and M. Mann and
	B. Séraphin},
  title = {A generic protein purification method for protein complex characterization
	and proteome exploration.},
  journal = {Nat Biotechnol},
  year = {1999},
  volume = {17},
  pages = {1030--1032},
  number = {10},
  month = {Oct},
  abstract = {We have developed a generic procedure to purify proteins expressed
	at their natural level under native conditions using a novel tandem
	affinity purification (TAP) tag. The TAP tag allows the rapid purification
	of complexes from a relatively small number of cells without prior
	knowledge of the complex composition, activity, or function. Combined
	with mass spectrometry, the TAP strategy allows for the identification
	of proteins interacting with a given target protein. The TAP method
	has been tested in yeast but should be applicable to other cells
	or organisms.},
  doi = {10.1038/13732},
  institution = {European Molecular Biology Laboratory, Meyerhofstrasse 1, D-69117
	Heidelberg, Germany.},
  keywords = {Affinity Labels; Amino Acid Sequence; Electrophoresis, Polyacrylamide
	Gel; Methods; Molecular Sequence Data; Proteins; Proteome},
  owner = {phupe},
  pmid = {10504710},
  timestamp = {2010.09.01},
  url = {http://dx.doi.org/10.1038/13732}
}

@article{Rinaldo2009Properties,
  author = {Rinaldo, A.},
  title = {Properties and refinements of the fused lasso},
  journal = {Ann. Stat.},
  year = {2009},
  volume = {37},
  pages = {2922--2952},
  number = {5B},
  url = {http://doi:10.1214/08-AOS665}
}

@article{Risau-Gusman2000Generalization,
  author = {Risau-Gusman and Gordon},
  title = {Generalization properties of finite-size polynomial support vector
	machines},
  journal = {Phys {R}ev {E} {S}tat {P}hys {P}lasmas {F}luids {R}elat {I}nterdiscip
	{T}opics},
  year = {2000},
  volume = {62},
  pages = {7092-9},
  number = {5 Pt B},
  month = {Nov},
  abstract = {The learning properties of finite-size polynomial support vector machines
	are analyzed in the case of realizable classification tasks. {T}he
	normalization of the high-order features acts as a squeezing factor,
	introducing a strong anisotropy in the patterns distribution in feature
	space. {A}s a function of the training set size, the corresponding
	generalization error presents a crossover, more or less abrupt depending
	on the distribution's anisotropy and on the task to be learned, between
	a fast-decreasing and a slowly decreasing regime. {T}his behavior
	corresponds to the stepwise decrease found by {D}ietrich et al. [{P}hys.
	{R}ev. {L}ett. 82, 2975 (1999)] in the thermodynamic limit. {T}he
	theoretical results are in excellent agreement with the numerical
	simulations.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electrophysiology,
	Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric
	Emptying, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease,
	Genomics, Hemolysins, Humans, Indians, Initiator, Ion Channels, Kinetics,
	Leukemia, Likelihood Functions, Lipid Bilayers, Logistic Models,
	Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid,
	Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological,
	Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution,
	North American, Nucleic Acid Conformation, Oligonucleotide Array
	Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms,
	Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive
	Value of Tests, Promoter Regions (Genetics), Protein Biosynthesis,
	Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility
	of Results, Research Support, Saccharomyces cerevisiae, Secondary,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation,
	Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	Type 2, U.S. Gov't, Vertebrates, 0011102066}
}

@article{Risau-Gusman2001Statistical,
  author = {S. Risau-Gusman and M. B. Gordon},
  title = {Statistical mechanics of learning with soft margin classifiers.},
  journal = {Phys {R}ev {E} {S}tat {N}onlin {S}oft {M}atter {P}hys},
  year = {2001},
  volume = {64},
  pages = {031907},
  number = {3 Pt 1},
  month = {Sep},
  abstract = {We study the typical learning properties of the recently introduced
	soft margin classifiers ({SMC}s), learning realizable and unrealizable
	tasks, with the tools of statistical mechanics. {W}e derive analytically
	the behavior of the learning curves in the regime of very large training
	sets. {W}e obtain exponential and power laws for the decay of the
	generalization error towards the asymptotic value, depending on the
	task and on general characteristics of the distribution of stabilities
	of the patterns to be learned. {T}he optimal learning curves of the
	{SMC}s, which give the minimal generalization error, are obtained
	by tuning the coefficient controlling the trade-off between the error
	and the regularization terms in the cost function. {I}f the task
	is realizable by the {SMC}, the optimal performance is better than
	that of a hard margin support vector machine and is very close to
	that of a {B}ayesian classifier.}
}

@article{Rissanen1984Universal,
  author = {Rissanen, J. },
  title = {Universal coding, information, prediction, and estimation},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1984},
  volume = {30},
  pages = {629-636},
  number = {4},
  month = {Jul},
  abstract = {A connection between universal codes and the problems of prediction
	and statistical estimation is established. {A} known lower bound
	for the mean length of universal codes is sharpened and generalized,
	and optimum universal codes constructed. {T}he bound is defined to
	give the information in strings relative to the considered class
	of processes. {T}he earlier derived minimum description length criterion
	for estimation of parameters, including their number, is given a
	fundamental information, theoretic justification by showing that
	its estimators achieve the information in the strings. {I}t is also
	shown that one cannot do prediction in {G}aussian autoregressive
	moving average ({ARMA}) processes below a bound, which is determined
	by the information in the data. },
  pdf = {../local/Rissanen1984Universal.pdf},
  file = {Rissanen1984Universal.pdf:local/Rissanen1984Universal.pdf:PDF},
  keywords = {information-theory universal-coding},
  owner = {vert}
}

@article{Rissanen1981Universal,
  author = {Rissanen, J. and Langdon, G. Jr. },
  title = {Universal modeling and coding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1981},
  volume = {27},
  pages = {12-23},
  number = {1},
  month = {Jan},
  pdf = {../local/Rissanen1981Universal.pdf},
  file = {Rissanen1981Universal.pdf:local/Rissanen1981Universal.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@article{Rissanen1992Density,
  author = {Rissanen, J. and Speed, T. P. and Yu, B.},
  title = {Density estimation by stochastic complexity},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1992},
  volume = {38},
  pages = {315-323},
  number = {2},
  month = {Mar},
  abstract = {The results by {P}. {H}all and {E}.{J}. {H}annan (1988) on optimization
	of histogram density estimators with equal bin widths by minimization
	of the stochastic complexity are extended and sharpened in two separate
	ways. {A}s the first contribution, two generalized histogram estimators
	are constructed. {T}he first has unequal bin widths which, together
	with the number of the bins, are determined by minimization of the
	stochastic complexity using dynamic programming. {T}he other estimator
	consists of a mixture of equal bin width estimators, each of which
	is defined by the associated stochastic complexity. {A}s the main
	contribution in the present work, two theorems are proved, which
	together extend the universal coding theorems to a large class of
	data generating densities. {T}he first gives an asymptotic upper
	bound for the code redundancy in the order of magnitude, achieved
	with a special predictive type of histogram estimator, which sharpens
	a related bound. {T}he second theorem states that this bound cannot
	be improved upon by any code whatsoever },
  pdf = {../local/Rissanen1992Density.pdf},
  file = {Rissanen1992Density.pdf:local/Rissanen1992Density.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Rissanen1996Fisher,
  author = {Rissanen, J. J.},
  title = {Fisher information and stochastic complexity},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {40-47},
  number = {1},
  month = {Jan},
  abstract = {By taking into account the {F}isher information and removing an inherent
	redundancy in earlier two-part codes, a sharper code length as the
	stochastic complexity and the associated universal process are derived
	for a class of parametric processes. {T}he main condition required
	is that the maximum-likelihood estimates satisfy the central limit
	theorem. {T}he same code length is also obtained from the so-called
	maximum-likelihood code },
  pdf = {../local/Rissanen1996Fisher.pdf},
  file = {Rissanen1996Fisher.pdf:local/Rissanen1996Fisher.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Rivals2007Enrichment,
  author = {Rivals, I. and Personnaz, L. and Taing, L. and Potier, M.-C.},
  title = {Enrichment or depletion of a GO category within a class of genes:
	which test?},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {401--407},
  number = {4},
  month = {Feb},
  abstract = {A number of available program packages determine the significant enrichments
	and/or depletions of GO categories among a class of genes of interest.
	Whereas a correct formulation of the problem leads to a single exact
	null distribution, these GO tools use a large variety of statistical
	tests whose denominations often do not clarify the underlying P-value
	computations.We review the different formulations of the problem
	and the tests they lead to: the binomial, chi2, equality of two probabilities,
	Fisher's exact and hypergeometric tests. We clarify the relationships
	existing between these tests, in particular the equivalence between
	the hypergeometric test and Fisher's exact test. We recall that the
	other tests are valid only for large samples, the test of equality
	of two probabilities and the chi2-test being equivalent. We discuss
	the appropriateness of one- and two-sided P-values, as well as some
	discreteness and conservatism issues.Supplementary data are available
	at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btl633},
  pdf = {../local/Rivals2007Enrichment.pdf},
  file = {Rivals2007Enrichment.pdf:Rivals2007Enrichment.pdf:PDF},
  institution = {Equipe de Statistique Appliquée, 10 rue Vauquelin, 75005 Paris, France.
	isabelle.rivals@espci.fr},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btl633},
  pmid = {17182697},
  timestamp = {2011.08.06},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl633}
}

@article{Roberts2011Identification,
  author = {Roberts, A. and Pimentel, H. and Trapnell, C. and Pachter, L.},
  title = {Identification of novel transcripts in annotated genomes using {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {2325--2329},
  number = {17},
  month = {Sep},
  abstract = {We describe a new 'reference annotation based transcript assembly'
	problem for RNA-Seq data that involves assembling novel transcripts
	in the context of an existing annotation. This problem arises in
	the analysis of expression in model organisms, where it is desirable
	to leverage existing annotations for discovering novel transcripts.
	We present an algorithm for reference annotation-based transcript
	assembly and show how it can be used to rapidly investigate novel
	transcripts revealed by RNA-Seq in comparison with a reference annotation.The
	methods described in this article are implemented in the Cufflinks
	suite of software for RNA-Seq, freely available from http://bio.math.berkeley.edu/cufflinks.
	The software is released under the BOOST license.cole@broadinstitute.org;
	lpachter@math.berkeley.eduSupplementary data are available at Bioinformatics
	online.},
  doi = {10.1093/bioinformatics/btr355},
  pdf = {../local/Roberts2011Identification.pdf},
  file = {Roberts2011Identification.pdf:Roberts2011Identification.pdf:PDF},
  institution = {Department of Computer Science, UC Berkeley, Berkeley, CA, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btr355},
  pmid = {21697122},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/bioinformatics/btr355}
}

@article{Roberts2011Improving,
  author = {Roberts, A. and Trapnell, C. and Donaghey, J. and Rinn, J. L. and
	Pachter, L.},
  title = {Improving {RNA-Seq} expression estimates by correcting for fragment
	bias.},
  journal = {Genome Biol},
  year = {2011},
  volume = {12},
  pages = {R22},
  number = {3},
  abstract = {The biochemistry of RNA-Seq library preparation results in cDNA fragments
	that are not uniformly distributed within the transcripts they represent.
	This non-uniformity must be accounted for when estimating expression
	levels, and we show how to perform the needed corrections using a
	likelihood based approach. We find improvements in expression estimates
	as measured by correlation with independently performed qRT-PCR and
	show that correction of bias leads to improved replicability of results
	across libraries and sequencing technologies.},
  doi = {10.1186/gb-2011-12-3-r22},
  pdf = {../local/Roberts2011Improving.pdf},
  file = {Roberts2011Improving.pdf:Roberts2011Improving.pdf:PDF},
  institution = {Department of Computer Science, 387 Soda Hall, UC Berkeley, Berkeley,
	CA 94720, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2011-12-3-r22},
  pmid = {21410973},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1186/gb-2011-12-3-r22}
}

@article{Robine2007Genome-wide,
  author = {Robine, N. and Uematsu, N. and Amiot, F. and Gidrol, X. and Barillot,
	E. and Nicolas, A. and Borde, V.},
  title = {Genome-wide redistribution of meiotic double-strand breaks in Saccharomyces
	cerevisiae},
  journal = {Mol. Cell. Biol.},
  year = {2007},
  volume = {27},
  pages = {1868--1880},
  number = {5},
  month = {Mar},
  abstract = {Meiotic recombination is initiated by the formation of programmed
	DNA double-strand breaks (DSBs) catalyzed by the Spo11 protein. DSBs
	are not randomly distributed along chromosomes. To better understand
	factors that control the distribution of DSBs in budding yeast, we
	have examined the genome-wide binding and cleavage properties of
	the Gal4 DNA binding domain (Gal4BD)-Spo11 fusion protein. We found
	that Gal4BD-Spo11 cleaves only a subset of its binding sites, indicating
	that the association of Spo11 with chromatin is not sufficient for
	DSB formation. In centromere-associated regions, the centromere itself
	prevents DSB cleavage by tethered Gal4BD-Spo11 since its displacement
	restores targeted DSB formation. In addition, we observed that new
	DSBs introduced by Gal4BD-Spo11 inhibit surrounding DSB formation
	over long distances (up to 60 kb), keeping constant the number of
	DSBs per chromosomal region. Together, these results demonstrate
	that the targeting of Spo11 to new chromosomal locations leads to
	both local stimulation and genome-wide redistribution of recombination
	initiation and that some chromosomal regions are inherently cold
	regardless of the presence of Spo11.},
  doi = {10.1128/MCB.02063-06},
  institution = {Institut Curie, Recombinaison et InstabilitÃ© GÃ©nÃ©tique, Centre
	de Recherche, UMR7147 CNRS-Institut Curie-UniversitÃ© P. et M. Curie,
	26 rue d'Ulm, 75248 Paris Cedex 05, France. valerie.borde@curie.fr},
  owner = {jp},
  pii = {MCB.02063-06},
  pmid = {17189430},
  timestamp = {2008.12.05},
  url = {http://dx.doi.org/10.1128/MCB.02063-06}
}

@article{Robinson2000IMGT/HLAa,
  author = {J. Robinson and A. Malik and P. Parham and J. G. Bodmer and S. G.
	Marsh},
  title = {{IMGT/HLA} database--a sequence database for the human major histocompatibility
	complex.},
  journal = {Tissue Antigens},
  year = {2000},
  volume = {55},
  pages = {280--287},
  number = {3},
  month = {Mar},
  abstract = {The IMGT/HLA Database is a specialist database for sequences of the
	human major histocompatibility (MHC) system. It includes all the
	HLA sequences officially recognised and named by the WHO Nomenclature
	Committee for Factors of the HLA System. The database provides users
	with online tools and facilities for the retrieval and analysis of
	these sequences. These include allele reports, alignment tools and
	a detailed database of all source cells. The online IMGT/HLA submission
	tool allows the submission of both new and confirmatory allele sequences
	directly to the WHO Nomenclature Committee for Factors of the HLA
	System. The latest version (release 1.4.1, November 1999) contains
	1,015 HLA alleles from over 2,270 component sequences derived from
	the EMBL/GenBank/DDBJ databases. From its release in December 1998
	until December 1999 the IMGT/HLA website received approximately 100,000
	hits. The database currently focuses on the human major histocompatibility
	complex but will be used as a model system to provide specialist
	databases for the MHC sequences of other species.},
  keywords = {Base Sequence; Databases, Factual; Humans; Major Histocompatibility
	Complex; Molecular Sequence Data},
  owner = {laurent},
  pmid = {10777106},
  timestamp = {2007.01.30}
}

@article{Roche2002virtual,
  author = {Roche, O. and Trube, G. and Zuegge, J. and Pflimlin, P. and Alanine,
	A. and Schneider, G.},
  title = {{A} virtual screening method for prediction of the {HERG} potassium
	channel liability of compound libraries.},
  journal = {ChemBioChem},
  year = {2002},
  volume = {3},
  pages = {455--459},
  number = {5},
  month = {May},
  abstract = {A computer-based method has been developed for prediction of the hERG
	(human ether-Ã -go-go related gene) K(+)-channel affinity of low
	molecular weight compounds. hERG channel blockage is a major concern
	in drug design, as such blocking agents can cause sudden cardiac
	death. Various techniques were applied to finding appropriate molecular
	descriptors for modeling structure-activity relationships: substructure
	analysis, self-organizing maps (SOM), principal component analysis
	(PCA), partial least squares fitting (PLS), and supervised neural
	networks. The most accurate prediction system was based on an artificial
	neural network. In a validation study, 93 \% of the nonblocking agents
	and 71 \% of the hERG channel blockers were correctly classified.
	This virtual screening method can be used for general compound-library
	shaping and combinatorial library design.},
  keywords = {chemoinformatics herg},
  pmid = {12007180},
  timestamp = {2006.10.05}
}

@book{Rockafellar1997Convex,
  title = {Convex Analysis},
  publisher = {Princeton Univ. Press},
  year = {1997},
  author = {Rockafellar, R.T.}
}

@article{Rodriguez-Paredes2011Cancer,
  author = {Manuel Rodríguez-Paredes and Manel Esteller},
  title = {Cancer epigenetics reaches mainstream oncology.},
  journal = {Nat Med},
  year = {2011},
  volume = {17},
  pages = {330--339},
  number = {3},
  month = {Mar},
  abstract = {Epigenetics is one of the most promising and expanding fields in the
	current biomedical research landscape. Since the inception of epigenetics
	in the 1940s, the discoveries regarding its implications in normal
	and disease biology have not stopped, compiling a vast amount of
	knowledge in the past decade. The field has moved from just one recognized
	marker, DNA methylation, to a variety of others, including a wide
	spectrum of histone modifications. From the methodological standpoint,
	the successful initial single gene candidate approaches have been
	complemented by the current comprehensive epigenomic approaches that
	allow the interrogation of genomes to search for translational applications
	in an unbiased manner. Most important, the discovery of mutations
	in the epigenetic machinery and the approval of the first epigenetic
	drugs for the treatment of subtypes of leukemias and lymphomas has
	been an eye-opener for many biomedical scientists and clinicians.
	Herein, we will summarize the progress in the field of cancer epigenetics
	research that has reached mainstream oncology in the development
	of new biomarkers of the disease and new pharmacological strategies.},
  doi = {10.1038/nm.2305},
  institution = {Cancer Epigenetics and Biology Program, Bellvitge Biomedical Research
	Institute, L'Hospitalet, and Department of Physiological Sciences
	II, School of Medicine, University of Barcelona, Barcelona, Spain.},
  keywords = {Amino Acid Sequence; DNA Methylation; Epigenesis, Genetic; Humans;
	Molecular Sequence Data; Neoplasms, genetics/therapy; Tumor Markers,
	Biological},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nm.2305},
  pmid = {21386836},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.1038/nm.2305}
}

@article{Roepstorff1984Proposal,
  author = {P. Roepstorff and J. Fohlman},
  title = {Proposal for a common nomenclature for sequence ions in mass spectra
	of peptides.},
  journal = {Biomed Mass Spectrom},
  year = {1984},
  volume = {11},
  pages = {601},
  number = {11},
  month = {Nov},
  doi = {10.1002/bms.1200111109},
  keywords = {Mass Spectrometry; Peptides; Terminology as Topic},
  owner = {phupe},
  pmid = {6525415},
  timestamp = {2010.09.03},
  url = {http://dx.doi.org/10.1002/bms.1200111109}
}

@article{Rogers1994Application,
  author = {D. Rogers and A. J. Hopfinger},
  title = {Application of Genetic Function Approximation to Quantitative Structure-Activity
	Relationships and Quantitative Structure-Property Relationships},
  journal = {J Chem Inf Comput Sci},
  year = {1994},
  volume = {34},
  pages = {854-866},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@article{Rognan2007Chemogenomic,
  author = {Rognan, D.},
  title = {Chemogenomic approaches to rational drug design},
  journal = {Br. J. Pharmacol.},
  year = {2007},
  volume = {152},
  pages = {38--52},
  month = {May},
  abstract = {Paradigms in drug design and discovery are changing at a significant
	pace. Concomitant to the sequencing of over 180 several genomes,
	the high-throughput miniaturization of chemical synthesis and biological
	evaluation of a multiple compounds on gene/protein expression and
	function opens the way to global drug-discovery approaches, no more
	focused on a single target but on an entire family of related proteins
	or on a full metabolic pathway. Chemogenomics is this emerging research
	field aimed at systematically studying the biological effect of a
	wide array of small molecular-weight ligands on a wide array of macromolecular
	targets. Since the quantity of existing data (compounds, targets
	and assays) and of produced information (gene/protein expression
	levels and binding constants) are too large for manual manipulation,
	information technologies play a crucial role in planning, analysing
	and predicting chemogenomic data. The present review will focus on
	predictive in silico chemogenomic approaches to foster rational drug
	design and derive information from the simultaneous biological evaluation
	of multiple compounds on multiple targets. State-of-the-art methods
	for navigating in either ligand or target space will be presented
	and concrete drug design applications will be mentioned.British Journal
	of Pharmacology advance online publication, 29 May 2007; doi:10.1038/sj.bjp.0707307.},
  doi = {10.1038/sj.bjp.0707307},
  owner = {laurent},
  pii = {0707307},
  pmid = {17533416},
  timestamp = {2007.07.30},
  url = {http://dx.doi.org/10.1038/sj.bjp.0707307}
}

@article{Rohlf2005J,
  author = {Rohlf, F. James},
  title = {J. Felsenstein, Inferring Phylogenies, Sinauer Assoc., 2004, pp.
	xx + 664.},
  journal = {J. Classif.},
  year = {2005},
  volume = {22},
  pages = {139--142},
  number = {1},
  address = {Secaucus, NJ, USA},
  doi = {http://dx.doi.org/10.1007/s00357-005-0009-4},
  issn = {0176-4268},
  publisher = {Springer-Verlag New York, Inc.}
}

@article{Rolland2005G-protein-coupled,
  author = {Rolland, C. and Gozalbes, R. and Nicola{\"i}, A. and Paugam, M.-F.
	and Coussy, L. and Barbosa, F. and Horvath, D. and Revah, F.},
  title = {G-protein-coupled receptor affinity prediction based on the use of
	a profiling dataset: QSAR design, synthesis, and experimental validation.},
  journal = {J. Med. Chem.},
  year = {2005},
  volume = {48},
  pages = {6563--6574},
  number = {21},
  month = {Oct},
  abstract = {A QSAR model accounting for "average" G-protein-coupled receptor (GPCR)
	binding was built from a large set of experimental standardized binding
	data (1939 compounds systematically tested over 40 different GPCRs)
	and applied to the design of a library of "GPCR-predicted" compounds.
	Three hundred and sixty of these compounds were randomly selected
	and tested in 21 GPCR binding assays. Positives were defined by their
	ability to inhibit by more than 70\% the binding of reference compounds
	at 10 microM. A 5.5-fold enrichment in positives was observed when
	comparing the "GPCR-predicted" compounds with 600 randomly selected
	compounds predicted as "non-GPCR" from a general collection. The
	model was efficient in predicting strongest binders, since enrichment
	was greater for higher cutoffs. Significant enrichment was also observed
	for peptidic GPCRs and receptors not included to develop the QSAR
	model, suggesting the usefulness of the model to design ligands binding
	with newly identified GPCRs, including orphan ones.},
  doi = {10.1021/jm0500673},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {16220973},
  timestamp = {2008.01.16},
  url = {http://dx.doi.org/10.1021/jm0500673}
}

@article{Rosasco2004loss,
  author = {Rosasco, L. and Vito, E.D. and Caponnetto, A. and Piana, M. and Verri,
	A.},
  title = {Are loss functions all the same?},
  journal = {Neural Computation},
  year = {2004},
  volume = {16},
  pages = {1063--1076},
  number = {5},
  publisher = {MIT Press}
}

@article{Roschke2003Karyotypic,
  author = {Anna V Roschke and Giovanni Tonon and Kristen S Gehlhaus and Nicolas
	McTyre and Kimberly J Bussey and Samir Lababidi and Dominic A Scudiero
	and John N Weinstein and Ilan R Kirsch},
  title = {Karyotypic complexity of the NCI-60 drug-screening panel.},
  journal = {Cancer Res},
  year = {2003},
  volume = {63},
  pages = {8634--8647},
  number = {24},
  month = {Dec},
  abstract = {We used spectral karyotyping to provide a detailed analysis of karyotypic
	aberrations in the diverse group of cancer cell lines established
	by the National Cancer Institute for the purpose of anticancer drug
	discovery. Along with the karyotypic description of these cell lines
	we defined and studied karyotypic complexity and heterogeneity (metaphase-to-metaphase
	variations) based on three separate components of genomic anatomy:
	(a) ploidy; (b) numerical changes; and (c) structural rearrangements.
	A wide variation in these parameters was evident in these cell lines,
	and different association patterns between them were revealed. Analysis
	of the breakpoints and other specific features of chromosomal changes
	across the entire set of cell lines or within particular lineages
	pointed to a striking lability of centromeric regions that distinguishes
	the epithelial tumor cell lines. We have also found that balanced
	translocations are as frequent in absolute number within the cell
	lines derived from solid as from hematopoietic tumors. Important
	similarities were noticed between karyotypic changes in cancer cell
	lines and that seen in primary tumors. This dataset offers insights
	into the causes and consequences of the destabilizing events and
	chromosomal instability that may occur during tumor development and
	progression. It also provides a foundation for investigating associations
	between structural genome anatomy and cancer molecular markers and
	targets, gene expression, gene dosage, and resistance or sensitivity
	to tens of thousands of molecular compounds.},
  institution = {Genetics Branch, Center for Cancer Research, National Cancer Institute,
	Bethesda, Maryland 20889-5105, USA.},
  keywords = {Cell Line, Tumor; Chromosome Aberrations; DNA Repair, genetics; Drug
	Screening Assays, Antitumor; Humans; Neoplasms, genetics/pathology;
	Ploidies; Retinoblastoma Protein, genetics; Spectral Karyotyping;
	Translocation, Genetic; Tumor Suppressor Protein p53, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {14695175},
  timestamp = {2010.08.05}
}

@article{Rose2005Correlation,
  author = {Rose, J. R. and Turkett, W. H., Jr. and Oroian, I. C. and Laegreid,
	W. W. and Keele, J.},
  title = {Correlation of amino acid preference and mammalian viral genome type},
  journal = {Bioinformatics},
  year = {2005},
  abstract = {Motivation: {I}n the event of an outbreak of a disease caused by an
	initially unknown pathogen, the ability to characterize anonymous
	sequences prior to isolation and culturing of the pathogen will be
	helpful. {W}e show that it is possible to classify viral sequences
	by genome type (ds{DNA}, ss{DNA}, ss{RNA} positive strand, ss{RNA}
	negative strand, retroid) using amino acid distribution.{R}esults:
	{I}n this paper we describe the results of analysis of amino acid
	preference in mammalian viruses. {T}he study was carried out at the
	genome level as well as two shorter sequence levels: short (300 amino
	acids) and medium length (660 amino acids). {T}he analysis indicates
	a correlation between the viral genome types ds{DNA}, ss{DNA}, ss{RNA}
	positive strand, ss{RNA} negative strand, and retroid and amino acid
	preference. {W}e investigated three different models of amino acid
	preference. {T}he simplest amino acid preference model, 1-{AAP},
	is a normalized description of the frequency of amino acids in genomes
	of a viral genome type. {A} slightly more complex model is the ordered
	pair amino acid preference model (2-{AAP}), which characterizes genomes
	of different viral genome types by the frequency of ordered pairs
	of amino acids. {T}he most complex and accurate model is the ordered
	triple amino acid preference model (3-{AAP}), which is based on ordered
	triples of amino acids. {T}he results demonstrate that mammalian
	viral genome types differ in their amino acid preference.{A}vailability:
	{T}he tools used to format and analyze data and supplementary material
	are available at http://www.cse.sc.edu/~rose/amino{P}reference/index.html.},
  doi = {10.1093/bioinformatics/bti174.},
  pdf = {../local/Rose2005Correlation.pdf},
  file = {Rose2005Correlation.pdf:local/Rose2005Correlation.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti174v1}
}

@article{Rosenfeld1995Flexible,
  author = {Rosenfeld, R. and Zheng, Q. and Vajda, S. and DeLisi, C.},
  title = {{F}lexible docking of peptides to class {I} major-histocompatibility-complex
	receptors.},
  journal = {Genet. Anal.},
  year = {1995},
  volume = {12},
  pages = {1--21},
  number = {1},
  month = {Mar},
  abstract = {We present a new method for docking flexible peptides to class I Major-Histocompatibility-Complex
	(MHC) receptors. Docking is performed in two steps: (a) The charged
	terminal peptide residues are located by randomly distributing multiple
	copies of each in volumes of approximately 150 A at either end of
	the binding groove, and then minimizing the system energy using a
	modified multiple-copy search algorithm. This is followed by (b)
	construction of the intervening chain using the multiple-copy bond-scaling-relaxation
	loop closure algorithm. In both steps, the copies tend to cluster
	and the size of the resulting clusters is proportional to the basin
	of attraction of the corresponding energy well. We show that native
	MHC-bound peptides have broad minima and, consequently, that misfolded,
	low-energy peptide conformations can be eliminated by restricting
	consideration to groups of peptides which cluster into broad minima.
	The accuracy of the method is assessed by comparing the predictions
	with crystallographic data for three different MHC peptide systems,
	at various degrees of stringency: (a) the extent to which we can
	determine side chain function (anchor vs. T-cell epitopes); (b) the
	extent to which we can determine the peptide-receptor orientation;
	and (c) the accuracy with which we can predict atomic coordinates.
	We find the method correct on (a) for 19 of the 22 non-Gly positions;
	the failures appearing to be a consequence of omitting solvation.
	Predictions related to (b) are also very encouraging, with the overall
	orientation of the predicted peptides being very similar to the crystal
	conformation, when measured by the hydrogen bonding pattern between
	the two. The degree of success in predicting atomic coordinates varied
	considerably, however, from 1.4 A for the HLA-A2 peptide to 2.7 A
	for the Kb peptide. The inaccuracy of the latter appears to reflect
	an incomplete target function, most likely the ommission of solvation.
	The calculations thus define the current limits of accuracy in docking
	flexible peptides to Class I receptors and identify the methodological
	improvements that must be made for the next advance in accuracy.},
  keywords = {immunoinformatics},
  pmid = {7648466},
  timestamp = {2007.01.25}
}

@article{Rosipal2001Kernel,
  author = {Rosipal, R. and Trejo, L. J.},
  title = {Kernel Partial Least Squares Regression in Reproducing Kernel Hilbert
	Space},
  journal = {J. Mach. Learn. Res.},
  year = {2001},
  volume = {2},
  pages = {97--123},
  owner = {vert},
  timestamp = {2007.08.02}
}

@article{Ross2004Multiplexed,
  author = {Philip L Ross and Yulin N Huang and Jason N Marchese and Brian Williamson
	and Kenneth Parker and Stephen Hattan and Nikita Khainovski and Sasi
	Pillai and Subhakar Dey and Scott Daniels and Subhasish Purkayastha
	and Peter Juhasz and Stephen Martin and Michael Bartlet-Jones and
	Feng He and Allan Jacobson and Darryl J Pappin},
  title = {Multiplexed protein quantitation in Saccharomyces cerevisiae using
	amine-reactive isobaric tagging reagents.},
  journal = {Mol Cell Proteomics},
  year = {2004},
  volume = {3},
  pages = {1154--1169},
  number = {12},
  month = {Dec},
  abstract = {We describe here a multiplexed protein quantitation strategy that
	provides relative and absolute measurements of proteins in complex
	mixtures. At the core of this methodology is a multiplexed set of
	isobaric reagents that yield amine-derivatized peptides. The derivatized
	peptides are indistinguishable in MS, but exhibit intense low-mass
	MS/MS signature ions that support quantitation. In this study, we
	have examined the global protein expression of a wild-type yeast
	strain and the isogenic upf1Delta and xrn1Delta mutant strains that
	are defective in the nonsense-mediated mRNA decay and the general
	5' to 3' decay pathways, respectively. We also demonstrate the use
	of 4-fold multiplexing to enable relative protein measurements simultaneously
	with determination of absolute levels of a target protein using synthetic
	isobaric peptide standards. We find that inactivation of Upf1p and
	Xrn1p causes common as well as unique effects on protein expression.},
  doi = {10.1074/mcp.M400129-MCP200},
  institution = {Applied Biosystems, Framingham, MA 01701, USA.},
  keywords = {Cations; Chromatography, Ion Exchange; Chromatography, Liquid; Down-Regulation;
	Exoribonucleases; Fungal Proteins; Indicators and Reagents; Ions;
	Mass Spectrometry; Models, Chemical; Peptides; Phenotype; Proteomics;
	RNA Helicases; RNA, Messenger; Saccharomyces cerevisiae; Saccharomyces
	cerevisiae Proteins; Succinimides},
  owner = {phupe},
  pii = {M400129-MCP200},
  pmid = {15385600},
  timestamp = {2010.08.19},
  url = {http://dx.doi.org/10.1074/mcp.M400129-MCP200}
}

@article{Rosset2007Piecewise,
  author = {S. Rosset and J. Zhu},
  title = {Piecewise Linear Regularized Solution Paths},
  journal = {Annals of {S}tatistics},
  year = {2007},
  volume = {35},
  pages = {1012--1030},
  number = {3}
}

@article{Roth1998Finding,
  author = {Roth, F. P. and Hughes, J. D. and Estep, P. W. and Church, G. M.},
  title = {Finding {DNA} regulatory motifs within unaligned noncoding sequences
	clustered by whole-genome mRNA quantitation.},
  journal = {Nat. Biotechnol.},
  year = {1998},
  volume = {16},
  pages = {939--945},
  number = {10},
  month = {October},
  abstract = {Whole-genome mRNA quantitation can be used to identify the genes that
	are most responsive to environmental or genotypic change. By searching
	for mutually similar DNA elements among the upstream non-coding DNA
	sequences of these genes, we can identify candidate regulatory motifs
	and corresponding candidate sets of coregulated genes. We have tested
	this strategy by applying it to three extensively studied regulatory
	systems in the yeast Saccharomyces cerevisiae: galactose response,
	heat shock, and mating type. Galactose-response data yielded the
	known binding site of Gal4, and six of nine genes known to be induced
	by galactose. Heat shock data yielded the cell-cycle activation motif,
	which is known to mediate cell-cycle dependent activation, and a
	set of genes coding for all four nucleosomal proteins. Mating type
	alpha and a data yielded all of the four relevant DNA motifs and
	most of the known a- and alpha-specific genes.},
  address = {Harvard University Graduate Biophysics Program and Harvard Medical
	School Department of Genetics, Boston, MA 02115, USA.},
  doi = {10.1038/nbt1098-939},
  issn = {1087-0156},
  keywords = {bioinformatics, genome-wide, tfs},
  url = {http://dx.doi.org/10.1038/nbt1098-939}
}

@article{Roth2004generalized,
  author = {Volker Roth},
  title = {The generalized {LASSO}.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {16-28},
  number = {1},
  month = {Jan},
  abstract = {In the last few years, the support vector machine ({SVM}) method has
	motivated new interest in kernel regression techniques. {A}lthough
	the {SVM} has been shown to exhibit excellent generalization properties
	in many experiments, it suffers from several drawbacks, both of a
	theoretical and a technical nature: the absence of probabilistic
	outputs, the restriction to {M}ercer kernels, and the steep growth
	of the number of support vectors with increasing size of the training
	set. {I}n this paper, we present a different class of kernel regressors
	that effectively overcome the above problems. {W}e call this approach
	generalized {LASSO} regression. {I}t has a clear probabilistic interpretation,
	can handle learning sets that are corrupted by outliers, produces
	extremely sparse solutions, and is capable of dealing with large-scale
	problems. {F}or regression functionals which can be modeled as iteratively
	reweighted least-squares ({IRLS}) problems, we present a highly efficient
	algorithm with guaranteed global convergence. {T}his defies a unique
	framework for sparse regression models in the very rich class of
	{IRLS} models, including various types of robust regression models
	and logistic regression. {P}erformance studies for many standard
	benchmark datasets effectively demonstrate the advantages of this
	model over related approaches.},
  doi = {10.1109/TNN.2003.809398},
  pdf = {../local/Roth2004generalized.pdf},
  file = {Roth2004generalized.pdf:local/Roth2004generalized.pdf:PDF},
  keywords = {Algorithms, Bayes Theorem, Models, Neural Networks (Computer), Non-U.S.
	Gov't, Research Design, Research Support, Theoretical, 15387244},
  url = {http://dx.doi.org/10.1109/TNN.2003.809398}
}

@techreport{Roth02Thegeneralized,
  author = {Volker Roth},
  title = {The Generalized LASSO: a wrapper approach to gene selection for microarray
	data},
  institution = {Proceedings 14th International Conference on Automated Deduction
	(CADE-14), 252--255},
  year = {2002}
}

@inproceedings{Roth2002Thegeneralized,
  author = {Volker Roth},
  title = {The Generalized LASSO: a wrapper approach to gene selection for microarray
	data},
  booktitle = {Proc. CADE-14, 252--255},
  year = {2002}
}

@inproceedings{Roth2008The,
  author = {Roth, V. and Fischer, B.},
  title = {The Group-Lasso for generalized linear models: uniqueness of solutions
	and efficient algorithms},
  booktitle = {ICML '08: Proceedings of the 25th international conference on Machine
	learning},
  year = {2008},
  pages = {848-855},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://doi.acm.org/10.1145/1390156.1390263},
  pdf = {../local/Roth2008The.pdf},
  file = {Roth2008The.pdf:Roth2008The.pdf:PDF},
  keywords = {lasso}
}

@article{Roth2004Bayesian,
  author = {Volker Roth and Tilman Lange},
  title = {{B}ayesian class discovery in microarray datasets.},
  journal = {IEEE Trans Biomed Eng},
  year = {2004},
  volume = {51},
  pages = {707--718},
  number = {5},
  month = {May},
  abstract = {A novel approach to class discovery in gene expression datasets is
	presented. In the context of clinical diagnosis, the central goal
	of class discovery algorithms is to simultaneously find putative
	(sub-)types of diseases and to identify informative subsets of genes
	with disease-type specific expression profile. Contrary to many other
	approaches in the literature, the method presented implements a wrapper
	strategy for feature selection, in the sense that the features are
	directly selected by optimizing the discriminative power of the used
	partitioning algorithm. The usual combinatorial problems associated
	with wrapper approaches are overcome by a Bayesian inference mechanism.
	On the technical side, we present an efficient optimization algorithm
	with guaranteed local convergence property. The only free parameter
	of the optimization method is selected by a resampling-based stability
	analysis. Experiments with Leukemia and Lymphoma datasets demonstrate
	that our method is able to correctly infer partitions and corresponding
	subsets of genes which both are relevant in a biological sense. Moreover,
	the frequently observed problem of ambiguities caused by different
	but equally high-scoring partitions is successfully overcome by the
	model selection method proposed.},
  keywords = {Algorithms, Automated, Bayes Theorem, Cluster Analysis, Comparative
	Study, DNA, Databases, Gene Expression Profiling, Genetic, Genetic
	Screening, Humans, Leukemia, Models, Non-U.S. Gov't, Nucleic Acid,
	Oligonucleotide Array Sequence Analysis, Pattern Recognition, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Alignment, Sequence Analysis, Statistical, 15132496},
  pmid = {15132496},
  timestamp = {2006.07.27}
}

@article{Rothman2008Sparse,
  author = {Rothman, A. J. and Bickel, P. J. and Levina, E. and Zhu, J.},
  title = {Sparse permutation invariant covariance estimation},
  journal = {Electron. J. Statist.},
  year = {2008},
  volume = {2},
  pages = {494--515},
  doi = {10.1214/08-EJS176},
  pdf = {../local/Rothman2008Sparse.pdf},
  file = {Rothman2008Sparse.pdf:Rothman2008Sparse.pdf:PDF},
  owner = {jp},
  timestamp = {2012.03.20},
  url = {http://dx.doi.org/10.1214/08-EJS176}
}

@article{Roweis2000Nonlinear,
  author = {Roweis, S. T. and Saul, L. K.},
  title = {Nonlinear dimensionality reduction by locally linear embedding.},
  journal = {Science},
  year = {2000},
  volume = {290},
  pages = {2323-6},
  number = {5500},
  month = {Dec},
  abstract = {Many areas of science depend on exploratory data analysis and visualization.
	{T}he need to analyze large amounts of multivariate data raises the
	fundamental problem of dimensionality reduction: how to discover
	compact representations of high-dimensional data. {H}ere, we introduce
	locally linear embedding ({LLE}), an unsupervised learning algorithm
	that computes low-dimensional, neighborhood-preserving embeddings
	of high-dimensional inputs. {U}nlike clustering methods for local
	dimensionality reduction, {LLE} maps its inputs into a single global
	coordinate system of lower dimensionality, and its optimizations
	do not involve local minima. {B}y exploiting the local symmetries
	of linear reconstructions, {LLE} is able to learn the global structure
	of nonlinear manifolds, such as those generated by images of faces
	or documents of text.},
  doi = {10.1126/science.290.5500.2323},
  pdf = {../local/Roweis2000Nonlinear.pdf},
  file = {Roweis2000Nonlinear.pdf:local/Roweis2000Nonlinear.pdf:PDF},
  keywords = {dimred},
  pii = {290/5500/2323},
  url = {http://dx.doi.org/10.1126/science.290.5500.2323}
}

@article{Rual2005Towards,
  author = {Jean-François Rual and Kavitha Venkatesan and Tong Hao and Tomoko
	Hirozane-Kishikawa and Amélie Dricot and Ning Li and Gabriel F Berriz
	and Francis D Gibbons and Matija Dreze and Nono Ayivi-Guedehoussou
	and Niels Klitgord and Christophe Simon and Mike Boxem and Stuart
	Milstein and Jennifer Rosenberg and Debra S Goldberg and Lan V Zhang
	and Sharyl L Wong and Giovanni Franklin and Siming Li and Joanna
	S Albala and Janghoo Lim and Carlene Fraughton and Estelle Llamosas
	and Sebiha Cevik and Camille Bex and Philippe Lamesch and Robert
	S Sikorski and Jean Vandenhaute and Huda Y Zoghbi and Alex Smolyar
	and Stephanie Bosak and Reynaldo Sequerra and Lynn Doucette-Stamm
	and Michael E Cusick and David E Hill and Frederick P Roth and Marc
	Vidal},
  title = {Towards a proteome-scale map of the human protein-protein interaction
	network.},
  journal = {Nature},
  year = {2005},
  volume = {437},
  pages = {1173--1178},
  number = {7062},
  month = {Oct},
  abstract = {Systematic mapping of protein-protein interactions, or 'interactome'
	mapping, was initiated in model organisms, starting with defined
	biological processes and then expanding to the scale of the proteome.
	Although far from complete, such maps have revealed global topological
	and dynamic features of interactome networks that relate to known
	biological properties, suggesting that a human interactome map will
	provide insight into development and disease mechanisms at a systems
	level. Here we describe an initial version of a proteome-scale map
	of human binary protein-protein interactions. Using a stringent,
	high-throughput yeast two-hybrid system, we tested pairwise interactions
	among the products of approximately 8,100 currently available Gateway-cloned
	open reading frames and detected approximately 2,800 interactions.
	This data set, called CCSB-HI1, has a verification rate of approximately
	78\% as revealed by an independent co-affinity purification assay,
	and correlates significantly with other biological attributes. The
	CCSB-HI1 data set increases by approximately 70\% the set of available
	binary interactions within the tested space and reveals more than
	300 new connections to over 100 disease-associated proteins. This
	work represents an important step towards a systematic and comprehensive
	human interactome project.},
  doi = {10.1038/nature04209},
  institution = {Center for Cancer Systems Biology and Department of Cancer Biology,
	Dana-Farber Cancer Institute, Harvard Medical School, 44 Binney Street,
	Boston, Massachusetts 02115, USA.},
  keywords = {Cloning, Molecular; Humans; Open Reading Frames; Protein Binding;
	Proteome; RNA; Saccharomyces cerevisiae; Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {nature04209},
  pmid = {16189514},
  timestamp = {2010.09.01},
  url = {http://dx.doi.org/10.1038/nature04209}
}

@article{Rudd2005Eclair,
  author = {Rudd, S. and Tetko, I. V.},
  title = {Eclair--a web service for unravelling species origin of sequences
	sampled from mixed host interfaces.},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {W724-7},
  number = {Web Server issue},
  month = {Jul},
  abstract = {The identification of the genes that participate at the biological
	interface of two species remains critical to our understanding of
	the mechanisms of disease resistance, disease susceptibility and
	symbiosis. {T}he sequencing of complementary {DNA} (c{DNA}) libraries
	prepared from the biological interface between two organisms provides
	an inexpensive way to identify the novel genes that may be expressed
	as a cause or consequence of compatible or incompatible interactions.
	{S}equence classification and annotation of species origin typically
	use an orthology-based approach and require access to large portions
	of either genome, or a close relative. {N}ovel species- or clade-specific
	sequences may have no counterpart within existing databases and remain
	ambiguous features. {H}ere we present a web-service, {E}clair, which
	utilizes support vector machines for the classification of the origin
	of expressed sequence tags stemming from mixed host c{DNA} libraries.
	{I}n addition to providing an interface for the classification of
	sequences, users are presented with the opportunity to train a model
	to suit their preferred species pair. {E}clair is freely available
	at http://eclair.btk.fi.},
  doi = {10.1093/nar/gki434},
  pdf = {../local/Rudd2005Eclair.pdf},
  file = {Rudd2005Eclair.pdf:local/Rudd2005Eclair.pdf:PDF},
  keywords = {biosvm},
  pii = {33/suppl_2/W724},
  url = {http://dx.doi.org/10.1093/nar/gki434}
}

@article{Rudin1992Nonlinear,
  author = {Rudin, L. I. and Osher, S. and Fatemi, E.},
  title = {Nonlinear total variation based noise removal algorithms},
  journal = {Physica D},
  year = {1992},
  volume = {60},
  pages = {259--268},
  abstract = {A constrained optimization type of numerical algorithm for removing
	noise from images is presented. The total variation of the image
	is minimized subject to constraints involvingthe statistics of the
	noise. The constraints are imposed using Lagrange multipliers. The
	solution is obtained using the gradient-projection method. This amounts
	to solving a time dependent partial differential equation on a manifold
	determined by the constraints. As t---~0othe solution converges to
	a steady state which is the denoised image. The numerical algorithm
	is simple and relatively fast. The results appear to be state-of-the-art
	for very noisy images. The method is noninvasive, yielding sharp
	edges in the image. The technique could be interpreted as a first
	step of moving each level set of the the level set divided by the
	magnitude of the gradient of the the constraint set.},
  pdf = {../local/Rudin1992Nonlinear.pdf},
  file = {Rudin1992Nonlinear.pdf:Rudin1992Nonlinear.pdf:PDF},
  keywords = {segmentation},
  owner = {jp},
  timestamp = {2010.05.31}
}

@article{Ruepp2005Assessment,
  author = {Ruepp, S. and Boess, F. and Suter, L. and de Vera, M. C. and Steiner,
	G. and Steele, T. and Weiser, T. and Albertini, S.},
  title = {Assessment of hepatotoxic liabilities by transcript profiling.},
  journal = {Toxicol {A}ppl {P}harmacol},
  year = {2005},
  month = {Jun},
  abstract = {Male {W}istar rats were treated with various model compounds or the
	appropriate vehicle controls in order to create a reference database
	for toxicogenomics assessment of novel compounds. {H}epatotoxic compounds
	in the database were either known hepatotoxicants or showed hepatotoxicity
	during preclinical testing. {H}istopathology and clinical chemistry
	data were used to anchor the transcript profiles to an established
	endpoint (steatosis, cholestasis, direct acting, peroxisomal proliferation
	or nontoxic/control). {T}hese reference data were analyzed using
	a supervised learning method (support vector machines, {SVM}) to
	generate classification rules. {T}his predictive model was subsequently
	used to assess compounds with regard to a potential hepatotoxic liability.
	{A} steatotic and a non-hepatotoxic 5{HT}(6) receptor antagonist
	compound from the same series were successfully discriminated by
	this toxicogenomics model. {A}dditionally, an example is shown where
	a hepatotoxic liability was correctly recognized in the absence of
	pathological findings. {I}n vitro experiments and a dog study confirmed
	the correctness of the toxicogenomics alert. {A}nother interesting
	observation was that transcript profiles indicate toxicologically
	relevant changes at an earlier timepoint than routinely used methods.
	{T}ogether, these results support the useful application of toxicogenomics
	in raising alerts for adverse effects and generating mechanistic
	hypotheses that can be followed up by confirmatory experiments.},
  doi = {10.1016/j.taap.2005.05.008},
  pdf = {../local/Ruepp2005Assessment.pdf},
  file = {Ruepp2005Assessment.pdf:local/Ruepp2005Assessment.pdf:PDF},
  keywords = {biosvm},
  pii = {S0041-008X(05)00295-4},
  url = {http://dx.doi.org/10.1016/j.taap.2005.05.008}
}

@article{Rumble2009SHRiMP,
  author = {Rumble, S. M. and Lacroute, P. and Dalca, A. V. and Fiume, M. and
	Sidow, A. and Brudno, M.},
  title = {{SHRiMP}: accurate mapping of short color-space reads.},
  journal = {PLoS Comput. Biol.},
  year = {2009},
  volume = {5},
  pages = {e1000386},
  number = {5},
  month = {May},
  abstract = {The development of Next Generation Sequencing technologies, capable
	of sequencing hundreds of millions of short reads (25-70 bp each)
	in a single run, is opening the door to population genomic studies
	of non-model species. In this paper we present SHRiMP - the SHort
	Read Mapping Package: a set of algorithms and methods to map short
	reads to a genome, even in the presence of a large amount of polymorphism.
	Our method is based upon a fast read mapping technique, separate
	thorough alignment methods for regular letter-space as well as AB
	SOLiD (color-space) reads, and a statistical model for false positive
	hits. We use SHRiMP to map reads from a newly sequenced Ciona savignyi
	individual to the reference genome. We demonstrate that SHRiMP can
	accurately map reads to this highly polymorphic genome, while confirming
	high heterozygosity of C. savignyi in this second individual. SHRiMP
	is freely available at http://compbio.cs.toronto.edu/shrimp.},
  doi = {10.1371/journal.pcbi.1000386},
  institution = {Department of Computer Science, University of Toronto, Toronto, Ontario,
	Canada.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19461883},
  timestamp = {2011.10.27},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000386}
}

@article{Rumelhart1986Learning,
  author = {Rumelhart, D. E. and Hinton, G. E. and Williams, R. J.},
  title = {Learning representations by back-propagating errors},
  journal = {Nature},
  year = {1986},
  volume = {323},
  pages = {533--536},
  pdf = {../local/Rumelhart1986Learning.pdf},
  file = {Rumelhart1986Learning.pdf:Rumelhart1986Learning.pdf:PDF},
  owner = {jp},
  timestamp = {2012.12.13}
}

@article{Rusk2008Primer,
  author = {Nicole Rusk and Veronique Kiermer},
  title = {Primer: Sequencing - the next generation},
  journal = {Nat. Methods},
  year = {2008},
  volume = {5},
  pages = {15},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Russell1992Multiple,
  author = {R. B. Russell and G. J. Barton},
  title = {Multiple protein sequence alignment from tertiary structure comparison:
	assignment of global and residue confidence levels.},
  journal = {Proteins},
  year = {1992},
  volume = {14},
  pages = {309--323},
  number = {2},
  month = {Oct},
  abstract = {An algorithm is presented for the accurate and rapid generation of
	multiple protein sequence alignments from tertiary structure comparisons.
	A preliminary multiple sequence alignment is performed using sequence
	information, which then determines an initial superposition of the
	structures. A structure comparison algorithm is applied to all pairs
	of proteins in the superimposed set and a similarity tree calculated.
	Multiple sequence alignments are then generated by following the
	tree from the branches to the root. At each branchpoint of the tree,
	a structure-based sequence alignment and coordinate transformations
	are output, with the multiple alignment of all structures output
	at the root. The algorithm encoded in STAMP (STructural Alignment
	of Multiple Proteins) is shown to give alignments in good agreement
	with published structural accounts within the dehydrogenase fold
	domains, globins, and serine proteinases. In order to reduce the
	need for visual verification, two similarity indices are introduced
	to determine the quality of each generated structural alignment.
	Sc quantifies the global structural similarity between pairs or groups
	of proteins, whereas Pij' provides a normalized measure of the confidence
	in the alignment of each residue. STAMP alignments have the quality
	of each alignment characterized by Sc and Pij' values and thus provide
	a reproducible resource for studies of residue conservation within
	structural motifs.},
  doi = {10.1002/prot.340140216},
  keywords = {Algorithms; Amino Acid Sequence; Animals; Confidence Intervals; Globins;
	Humans; Molecular Sequence Data; Protein Structure, Tertiary; Sequence
	Alignment; Sequence Homology, Amino Acid; Serine Endopeptidases;
	Software},
  owner = {laurent},
  pmid = {1409577},
  timestamp = {2008.01.15},
  url = {http://dx.doi.org/10.1002/prot.340140216}
}

@incollection{Ratsch2004Accurate,
  author = {R{\"a}tsch, G. and Sonnenburg, S.},
  title = {Accurate splice site detection for {C}aenorhabditis elegans},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {277-298},
  abstract = {During the past three years, the support vector machine learning algorithm
	has been extensively applied within the field of computational biology.
	{T}he algorithm has been used to detect patterns within and among
	biological sequences, to classify genes and patients based upon gene
	expression profiles, and has recently been applied to several new
	biological problems. {T}his chapter reviews the state of the art
	with respect to {SVM} applications in computational biology.},
  keywords = {biosvm},
  owner = {vert}
}

@article{Raetsch2005RASE,
  author = {G. R{\"a}tsch and S. Sonnenburg and B. Sch{\"o}lkopf},
  title = {R{ASE}: recognition of alternatively spliced exons in {C}.elegans.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i369-i377},
  number = {Suppl. 1},
  month = {Jun},
  abstract = {M{OTIVATION}: {E}ukaryotic pre-m{RNA}s are spliced to form mature
	m{RNA}. {P}re-m{RNA} alternative splicing greatly increases the complexity
	of gene expression. {E}stimates show that more than half of the human
	genes and at least one-third of the genes of less complex organisms,
	such as nematodes or flies, are alternatively spliced. {I}n this
	work, we consider one major form of alternative splicing, namely
	the exclusion of exons from the transcript. {I}t has been shown that
	alternatively spliced exons have certain properties that distinguish
	them from constitutively spliced exons. {A}lthough most recent computational
	studies on alternative splicing apply only to exons which are conserved
	among two species, our method only uses information that is available
	to the splicing machinery, i.e. the {DNA} sequence itself. {W}e employ
	advanced machine learning techniques in order to answer the following
	two questions: (1) {I}s a certain exon alternatively spliced? (2)
	{H}ow can we identify yet unidentified exons within known introns?
	{RESULTS}: {W}e designed a support vector machine ({SVM}) kernel
	well suited for the task of classifying sequences with motifs having
	positional preferences. {I}n order to solve the task (1), we combine
	the kernel with additional local sequence information, such as lengths
	of the exon and the flanking introns. {T}he resulting {SVM}-based
	classifier achieves a true positive rate of 48.5\% at a false positive
	rate of 1\%. {B}y scanning over single {EST} confirmed exons we identified
	215 potential alternatively spliced exons. {F}or 10 randomly selected
	such exons we successfully performed biological verification experiments
	and confirmed three novel alternatively spliced exons. {T}o answer
	question (2), we additionally used {SVM}-based predictions to recognize
	acceptor and donor splice sites. {C}ombined with the above mentioned
	features we were able to identify 85.2\% of skipped exons within
	known introns at a false positive rate of 1\%. {AVAILABILITY}: {D}atasets,
	model selection results, our predictions and additional experimental
	results are available at http://www.fml.tuebingen.mpg.de/~raetsch/{RASE}
	{CONTACT}: {G}unnar.{R}aetsch@tuebingen.mpg.de {SUPPLEMENTARY} {INFORMATION}:
	http://www.fml.tuebingen.mpg.de/raetsch/{RASE}.},
  doi = {10.1093/bioinformatics/bti1053},
  pdf = {../local/Raetsch2005RASE.pdf},
  file = {Raetsch2005RASE.pdf:local/Raetsch2005RASE.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i369},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1053}
}

@article{Roegnvaldsson2004Why,
  author = {Thorsteinn R{\"o}gnvaldsson and Liwen You},
  title = {Why neural networks should not be used for {HIV}-1 protease cleavage
	site prediction.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1702-9},
  number = {11},
  month = {Jul},
  abstract = {S{UMMARY}: {S}everal papers have been published where nonlinear machine
	learning algorithms, e.g. artificial neural networks, support vector
	machines and decision trees, have been used to model the specificity
	of the {HIV}-1 protease and extract specificity rules. {W}e show
	that the dataset used in these studies is linearly separable and
	that it is a misuse of nonlinear classifiers to apply them to this
	problem. {T}he best solution on this dataset is achieved using a
	linear classifier like the simple perceptron or the linear support
	vector machine, and it is straightforward to extract rules from these
	linear models. {W}e identify key residues in peptides that are efficiently
	cleaved by the {HIV}-1 protease and list the most prominent rules,
	relating them to experimental results for the {HIV}-1 protease. {MOTIVATION}:
	{U}nderstanding {HIV}-1 protease specificity is important when designing
	{HIV} inhibitors and several different machine learning algorithms
	have been applied to the problem. {H}owever, little progress has
	been made in understanding the specificity because nonlinear and
	overly complex models have been used. {RESULTS}: {W}e show that the
	problem is much easier than what has previously been reported and
	that linear classifiers like the simple perceptron or linear support
	vector machines are at least as good predictors as nonlinear algorithms.
	{W}e also show how sets of specificity rules can be generated from
	the resulting linear classifiers. {AVAILABILITY}: {T}he datasets
	used are available at http://www.hh.se/staff/bioinf/},
  doi = {10.1093/bioinformatics/bth144},
  pdf = {../local/Roegnvaldsson2004Why.pdf},
  file = {Roegnvaldsson2004Why.pdf:local/Roegnvaldsson2004Why.pdf:PDF},
  keywords = {biosvm},
  pii = {bth144},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth144}
}

@article{Rotzschke1992Peptide,
  author = {R{\"o}tzschke, O. and Falk, K. and Stevanovi{\'c}, S. and Jung, G.
	and Rammensee, H. C.},
  title = {{P}eptide motifs of closely related {HLA} class {I} molecules encompass
	substantial differences.},
  journal = {Eur. J. Immunol.},
  year = {1992},
  volume = {22},
  pages = {2453--2456},
  number = {9},
  month = {Sep},
  abstract = {The peptides presented by major histocompatibility complex class I
	molecules adhere to strict rules concerning peptide length and occupancy
	by certain amino acid residues at anchor positions. Peptides presented
	by HLA-A*0201 molecules, for example, are generally nonapeptides
	requiring Leu or Met at position 2 and an aliphatic residue, predominantly
	Val, at position 9. A closely related molecule, HLA-A*0205, differing
	from the former at four amino acid residues, has a related but substantially
	different peptide motif. A*0205-presented peptides are still nonapeptides,
	and position 9 is still aliphatic, although it is preferentially
	occupied by Leu instead of Val. Position 2 not only allows aliphatic
	residues but also polar ones. Occupancy at position 6, considered
	as an auxiliary anchor in A*0201, as well as non-anchor residues
	at positions 3, 4, and 8 are relatively well conserved between the
	two peptide motifs. Thus, although a number of the T cell epitopes
	presented by the two HLA-A2 forms is expected to be identical, a
	considerable number of epitopes should be different.},
  keywords = {immunoinformatics},
  pmid = {1516632},
  timestamp = {2007.01.25}
}

@article{Roesch2005Chemotaxonomic,
  author = {Petra RÃ¶sch and Michaela Harz and Michael Schmitt and Klaus-Dieter
	Peschke and Olaf Ronneberger and Hans Burkhardt and Hans-Walter Motzkus
	and Markus Lankers and Stefan Hofer and Hans Thiele and JÃ¼rgen Popp},
  title = {Chemotaxonomic identification of single bacteria by micro-{R}aman
	spectroscopy: application to clean-room-relevant biological contaminations.},
  journal = {Appl {E}nviron {M}icrobiol},
  year = {2005},
  volume = {71},
  pages = {1626-37},
  number = {3},
  month = {Mar},
  abstract = {Microorganisms, such as bacteria, which might be present as contamination
	inside an industrial food or pharmaceutical clean room process need
	to be identified on short time scales in order to minimize possible
	health hazards as well as production downtimes causing financial
	deficits. {H}ere we describe the first results of single-particle
	micro-{R}aman measurements in combination with a classification method,
	the so-called support vector machine technique, allowing for a fast,
	reliable, and nondestructive online identification method for single
	bacteria.},
  doi = {10.1128/AEM.71.3.1626-1637.2005},
  pdf = {../local/Roesch2005Chemotaxonomic.pdf},
  file = {Roesch2005Chemotaxonomic.pdf:local/Roesch2005Chemotaxonomic.pdf:PDF},
  pii = {71/3/1626},
  url = {http://dx.doi.org/10.1128/AEM.71.3.1626-1637.2005}
}

@article{Sachidanandam2001Map,
  author = {R. Sachidanandam and D. Weissman and S. C. Schmidt and J. M. Kakol
	and L. D. Stein and G. Marth and S. Sherry and J. C. Mullikin and
	B. J. Mortimore and D. L. Willey and S. E. Hunt and C. G. Cole and
	P. C. Coggill and C. M. Rice and Z. Ning and J. Rogers and D. R.
	Bentley and P. Y. Kwok and E. R. Mardis and R. T. Yeh and B. Schultz
	and L. Cook and R. Davenport and M. Dante and L. Fulton and L. Hillier
	and R. H. Waterston and J. D. McPherson and B. Gilman and S. Schaffner
	and W. J. Van Etten and D. Reich and J. Higgins and M. J. Daly and
	B. Blumenstiel and J. Baldwin and N. Stange-Thomann and M. C. Zody
	and L. Linton and E. S. Lander and D. Altshuler and International
	SNP Map Working Group},
  title = {A map of human genome sequence variation containing 1.42 million
	single nucleotide polymorphisms.},
  journal = {Nature},
  year = {2001},
  volume = {409},
  pages = {928--933},
  number = {6822},
  month = {Feb},
  abstract = {We describe a map of 1.42 million single nucleotide polymorphisms
	(SNPs) distributed throughout the human genome, providing an average
	density on available sequence of one SNP every 1.9 kilobases. These
	SNPs were primarily discovered by two projects: The SNP Consortium
	and the analysis of clone overlaps by the International Human Genome
	Sequencing Consortium. The map integrates all publicly available
	SNPs with described genes and other genomic features. We estimate
	that 60,000 SNPs fall within exon (coding and untranslated regions),
	and 85\% of exons are within 5 kb of the nearest SNP. Nucleotide
	diversity varies greatly across the genome, in a manner broadly consistent
	with a standard population genetic model of human history. This high-density
	SNP map provides a public resource for defining haplotype variation
	across the genome, and should help to identify biomedically important
	genes for diagnosis and therapy.},
  institution = {Cold Spring Harbor, New York 11724, USA.},
  keywords = {Chromosome Mapping; Genetic Variation; Genetics, Medical; Genetics,
	Population; Genome, Human; Humans; Nucleotides; Polymorphism, Single
	Nucleotide},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {11237013},
  timestamp = {2010.08.01}
}

@article{Sadik2004Detection,
  author = {Omowunmi Sadik and Walker H Land and Adam K Wanekaya and Michiko
	Uematsu and Mark J Embrechts and Lut Wong and Dale Leibensperger
	and Alex Volykin},
  title = {Detection and classification of organophosphate nerve agent simulants
	using support vector machines with multiarray sensors.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {499-507},
  number = {2},
  abstract = {The need for rapid and accurate detection systems is expanding and
	the utilization of cross-reactive sensor arrays to detect chemical
	warfare agents in conjunction with novel computational techniques
	may prove to be a potential solution to this challenge. {W}e have
	investigated the detection, prediction, and classification of various
	organophosphate ({OP}) nerve agent simulants using sensor arrays
	with a novel learning scheme known as support vector machines ({SVM}s).
	{T}he {OP}s tested include parathion, malathion, dichlorvos, trichlorfon,
	paraoxon, and diazinon. {A} new data reduction software program was
	written in {MATLAB} {V}. 6.1 to extract steady-state and kinetic
	data from the sensor arrays. {T}he program also creates training
	sets by mixing and randomly sorting any combination of data categories
	into both positive and negative cases. {T}he resulting signals were
	fed into {SVM} software for "pairwise" and "one" vs all classification.
	{E}xperimental results for this new paradigm show a significant increase
	in classification accuracy when compared to artificial neural networks
	({ANN}s). {T}hree kernels, the {S}2000, the polynomial, and the {G}aussian
	radial basis function ({RBF}), were tested and compared to the {ANN}.
	{T}he following measures of performance were considered in the pairwise
	classification: receiver operating curve ({ROC}) {A}z indices, specificities,
	and positive predictive values ({PPV}s). {T}he {ROC} {A}z) values,
	specifities, and {PPV}s increases ranged from 5\% to 25\%, 108\%
	to 204\%, and 13\% to 54\%, respectively, in all {OP} pairs studied
	when compared to the {ANN} baseline. {D}ichlorvos, trichlorfon, and
	paraoxon were perfectly predicted. {P}ositive prediction for malathion
	was 95\%.},
  doi = {10.1021/ci034220i},
  pdf = {../local/Sadik2004Detection.pdf},
  file = {Sadik2004Detection.pdf:local/Sadik2004Detection.pdf:PDF},
  keywords = {Algorithms, Ambergris, Combinatorial Chemistry Techniques, Models,
	Molecular, Molecular Conformation, Odors, P.H.S., Perfume, Predictive
	Value of Tests, Quantitative Structure-Activity Relationship, Research
	Support, U.S. Gov't, 15032529},
  url = {http://dx.doi.org/10.1021/ci034220i}
}

@article{Saeh2005Lead,
  author = {Saeh, J. and Lyne, P. and Takasaki, B. and Cosgrove, D.},
  title = {Lead hopping using {SVM} and 3{D} pharmacophore fingerprints.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {1122-1133},
  number = {4},
  month = {Jul},
  abstract = {The combination of 3{D} pharmacophore fingerprints and the support
	vector machine classification algorithm has been used to generate
	robust models that are able to classify compounds as active or inactive
	in a number of {G}-protein-coupled receptor assays. {T}he models
	have been tested against progressively more challenging validation
	sets where steps are taken to ensure that compounds in the validation
	set are chemically and structurally distinct from the training set.
	{I}n the most challenging example, we simulate a lead-hopping experiment
	by excluding an entire class of compounds (defined by a core substructure)
	from the training set. {T}he left-out active compounds comprised
	approximately 40\% of the actives. {T}he model trained on the remaining
	compounds is able to recall 75\% of the actives from the "new" lead
	series while correctly classifying >99\% of the 5000 inactives included
	in the validation set.},
  doi = {10.1021/ci049732r},
  pdf = {../local/Saeh2005Lead.pdf},
  file = {Saeh2005Lead.pdf:local/Saeh2005Lead.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci049732r}
}

@article{Saetrom2004Predicting,
  author = {Saetrom, P.},
  title = {Predicting the efficacy of short oligonucleotides in antisense and
	{RNA}i experiments with boosted genetic programming},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3055-3063},
  number = {17},
  abstract = {Motivation: {B}oth small interfering {RNA}s (si{RNA}s) and antisense
	oligonucleotides can selectively block gene expression. {A}lthough
	the two methods rely on different cellular mechanisms, these methods
	share the common property that not all oligonucleotides (oligos)
	are equally effective. {T}hat is, if m{RNA} target sites are picked
	at random, many of the antisense or si{RNA} oligos will not be effective.
	{A}lgorithms that can reliably predict the efficacy of candidate
	oligos can greatly reduce the cost of knockdown experiments, but
	previous attempts to predict the efficacy of antisense oligos have
	had limited success. {M}achine learning has not previously been used
	to predict si{RNA} efficacy. {R}esults: {W}e develop a genetic programming
	based prediction system that shows promising results on both antisense
	and si{RNA} efficacy prediction. {W}e train and evaluate our system
	on a previously published database of antisense efficacies and our
	own database of si{RNA} efficacies collected from the literature.
	{T}he best models gave an overall correlation between predicted and
	observed efficacy of 0.46 on both antisense and si{RNA} data. {A}s
	a comparison, the best correlations of support vector machine classifiers
	trained on the same data were 0.40 and 0.30, respectively. {A}vailability:
	{T}he prediction system uses proprietary hardware and is available
	for both commercial and strategic academic collaborations. {T}he
	si{RNA} database is available upon request.},
  doi = {10.1093/bioinformatics/bth364},
  pdf = {../local/Saetrom2004Predicting.pdf},
  file = {Saetrom2004Predicting.pdf:local/Saetrom2004Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/17/3055}
}

@article{Saetrom2004comparison,
  author = {Saetrom, P. and SnÃ¸ve, O.},
  title = {A comparison of si{RNA} efficacy predictors.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {321},
  pages = {247-53},
  number = {1},
  month = {Aug},
  abstract = {Short interfering {RNA} (si{RNA}) efficacy prediction algorithms aim
	to increase the probability of selecting target sites that are applicable
	for gene silencing by {RNA} interference. {M}any algorithms have
	been published recently, and they base their predictions on such
	different features as duplex stability, sequence characteristics,
	m{RNA} secondary structure, and target site uniqueness. {W}e compare
	the performance of the algorithms on a collection of publicly available
	si{RNA}s. {F}irst, we show that our regularized genetic programming
	algorithm {GP}boost appears to have a higher and more stable performance
	than other algorithms on the collected datasets. {S}econd, several
	algorithms gave close to random classification on unseen data, and
	only {GP}boost and three other algorithms have a reasonably high
	and stable performance on all parts of the dataset. {T}hird, the
	results indicate that the si{RNA}s' sequence is sufficient input
	to si{RNA} efficacy algorithms, and that other features that have
	been suggested to be important may be indirectly captured by the
	sequence.},
  doi = {10.1016/j.bbrc.2004.06.116},
  keywords = {sirna},
  pii = {S0006-291X(04)01394-4},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.06.116}
}

@article{Saeys2004Feature,
  author = {Saeys, Y. and Degroeve, S. and Aeyels, D. and RouzÃ©, P. and Van
	de Peer, Y.},
  title = {Feature selection for splice site prediction: {A} new method using
	{EDA}-based feature ranking},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {64},
  abstract = {Background {T}he identification of relevant biological features in
	large and complex datasets is an important step towards gaining insight
	in the processes underlying the data. {O}ther advantages of feature
	selection include the ability of the classification system to attain
	good or even better solutions using a restricted subset of features,
	and a faster classification. {T}hus, robust methods for fast feature
	selection are of key importance in extracting knowledge from complex
	biological data. {R}esults {I}n this paper we present a novel method
	for feature subset selection applied to splice site prediction, based
	on estimation of distribution algorithms, a more general framework
	of genetic algorithms. {F}rom the estimated distribution of the algorithm,
	a feature ranking is derived. {A}fterwards this ranking is used to
	iteratively discard features. {W}e apply this technique to the problem
	of splice site prediction, and show how it can be used to gain insight
	into the underlying biological process of splicing. {C}onclusion
	{W}e show that this technique proves to be more robust than the traditional
	use of estimation of distribution algorithms for feature selection:
	instead of returning a single best subset of features (as they normally
	do) this method provides a dynamical view of the feature selection
	process, like the traditional sequential wrapper methods. {H}owever,
	the method is faster than the traditional techniques, and scales
	better to datasets described by a large number of features.},
  doi = {10.1186/1471-2105-5-64},
  pdf = {../local/Saeys2004Feature.pdf},
  file = {Saeys2004Feature.pdf:local/Saeys2004Feature.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Saeys2003Fast,
  author = {Saeys, Y. and Degroeve, S. and Aeyels, D. and Van de Peer, Y. and
	Rouze, P.},
  title = {Fast feature selection using a simple estimation of distribution
	algorithm: a case study on splice site prediction},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {ii179-ii188},
  number = {Suppl. 1},
  abstract = {Motivation: {F}eature subset selection is an important preprocessing
	step for classification. {I}n biology, where structures or processes
	are described by a large number of features, the elimination of irrelevant
	and redundant information in a reasonable amount of time has a number
	of advantages. {I}t enables the classification system to achieve
	good or even better solutions with a restricted subset of features,
	allows for a faster classification, and it helps the human expert
	focus on a relevant subset of features, hence providing useful biological
	knowledge. {R}esults: {W}e present a heuristic method based on {E}stimation
	of {D}istribution {A}lgorithms to select relevant subsets of features
	for splice site prediction in {A}rabidopsis thaliana. {W}e show that
	this method performs a fast detection of relevant feature subsets
	using the technique of constrained feature subsets. {C}ompared to
	the traditional greedy methods the gain in speed can be up to one
	order of magnitude, with results being comparable or even better
	than the greedy methods. {T}his makes it a very practical solution
	for classification tasks that can be solved using a relatively small
	amount of discriminative features (or feature dependencies), but
	where the initial set of potential discriminative features is rather
	large. {K}eywords: {M}achine {L}earning, {F}eature {S}ubset {S}election,
	{E}stimation of {D}istribution {A}lgorithms, {S}plice {S}ite {P}rediction.
	{C}ontact: yvsae@gengenp.rug.ac.be},
  pdf = {../local/Saeys2003Fast.pdf},
  file = {Saeys2003Fast.pdf:local/Saeys2003Fast.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/suppl_2/ii179}
}

@article{Saeys2007review,
  author = {Saeys, Y. and Inza, I. and Larra{\~n}aga, P.},
  title = {A review of feature selection techniques in bioinformatics},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {2507--2517},
  number = {19},
  publisher = {Oxford Univ Press}
}

@inproceedings{Saigo2006linear,
  author = {Saigo, H. and Kadowaki, T. and Tsuda, K.},
  title = {A linear programming approach for molecular {QSAR} analysis},
  booktitle = {International workshop on mining and learning with graphs (MLG)},
  year = {2006},
  editor = {G{\"a}rtner, T. and Garriga, G. C. and Meinl, T.},
  pages = {85--96},
  pdf = {../local/Saigo2006linear.pdf},
  file = {Saigo2006linear.pdf:Saigo2006linear.pdf:PDF},
  owner = {jp},
  timestamp = {2009.09.27}
}

@article{Saigo2009gBoost,
  author = {Saigo, H. and Nowozin, S. and Kadowaki, T. and Kudo, T. and Tsuda,
	K.},
  title = {{gBoost}: a mathematical programming approach to graph classification
	and regression},
  journal = {Mach. Learn.},
  year = {2009},
  volume = {75},
  pages = {69--89},
  number = {1},
  doi = {10.1007/s10994-008-5089-z},
  pdf = {../local/Saigo2009gBoost.pdf},
  file = {Saigo2009gBoost.pdf:Saigo2009gBoost.pdf:PDF},
  owner = {jp},
  timestamp = {2009.09.27},
  url = {http://dx.doi.org/10.1007/s10994-008-5089-z}
}

@article{Saigo2004Protein,
  author = {Saigo, H. and Vert, J.-P. and Ueda, N. and Akutsu, T.},
  title = {Protein homology detection using string alignment kernels},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1682-1689},
  number = {11},
  abstract = {Motivation: {R}emote homology detection between protein sequences
	is a central problem in computational biology. {D}iscriminative methods
	involving support vector machines ({SVM}s) are currently the most
	effective methods for the problem of superfamily recognition in the
	{S}tructural {C}lassification {O}f {P}roteins ({SCOP}) database.
	{T}he performance of {SVM}s depends critically on the kernel function
	used to quantify the similarity between sequences. {R}esults: {W}e
	propose new kernels for strings adapted to biological sequences,
	which we call local alignment kernels. {T}hese kernels measure the
	similarity between two sequences by summing up scores obtained from
	local alignments with gaps of the sequences. {W}hen tested in combination
	with {SVM} on their ability to recognize {SCOP} superfamilies on
	a benchmark dataset, the new kernels outperform state-of-the-art
	methods for remote homology detection. {A}vailability: {S}oftware
	and data available upon request.},
  pdf = {../local/Saigo2004Protein.pdf},
  file = {Saigo2004Protein.pdf:local/Saigo2004Protein.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/11/1682}
}

@article{Saito1985ETL9b,
  author = {T. Saito and H. Yamada and K. Yamamoto},
  title = {On the data base ETL9B of handprinted characters in JIS Chinese characters
	and its analysis},
  journal = {IEICE Trans},
  year = {1985},
  volume = {68}
}

@book{Saitoh1988Theory,
  title = {Theory of reproducing {K}ernels and its applications},
  publisher = {Longman Scientific \& Technical},
  year = {1988},
  author = {S. Saitoh},
  address = {Harlow, UK},
  subject = {kernel}
}

@article{Salgado2006RegulonDB,
  author = {Salgado, H. and Gama-Castro, S. and Peralta-Gil, M. and D{\'i}az-Peredo,
	E. and S{\'a}nchez-Solano, F. and Santos-Zavaleta, A. and Mart{\'i}nez-Flores,
	I. and Jim{\'e}nez-Jacinto, V. and Bonavides-Mart{\'i}nez, C. and
	Segura-Salazar, J. and Mart{\'i}nez-Antonio, A. and Collado-Vides,
	J.},
  title = {{RegulonDB} (version 5.0): {Escherichia coli K-12} transcriptional
	regulatory network, operon organization, and growth conditions.},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {D394--D397},
  number = {Database issue},
  month = {Jan},
  abstract = {RegulonDB is the internationally recognized reference database of
	Escherichia coli K-12 offering curated knowledge of the regulatory
	network and operon organization. It is currently the largest electronically-encoded
	database of the regulatory network of any free-living organism. We
	present here the recently launched RegulonDB version 5.0 radically
	different in content, interface design and capabilities. Continuous
	curation of original scientific literature provides the evidence
	behind every single object and feature. This knowledge is complemented
	with comprehensive computational predictions across the complete
	genome. Literature-based and predicted data are clearly distinguished
	in the database. Starting with this version, RegulonDB public releases
	are synchronized with those of EcoCyc since our curation supports
	both databases. The complex biology of regulation is simplified in
	a navigation scheme based on three major streams: genes, operons
	and regulons. Regulatory knowledge is directly available in every
	navigation step. Displays combine graphic and textual information
	and are organized allowing different levels of detail and biological
	context. This knowledge is the backbone of an integrated system for
	the graphic display of the network, graphic and tabular microarray
	comparisons with curated and predicted objects, as well as predictions
	across bacterial genomes, and predicted networks of functionally
	related gene products. Access RegulonDB at http://regulondb.ccg.unam.mx.},
  doi = {10.1093/nar/gkj156},
  institution = {Program of Computational Genomics, Centro de Ciencias GenÃ³micas,
	Universidad Nacional AutÃ³noma de MÃ©xico, A.P. 565-A. Cuernavaca,
	Morelos 62100, Mexico.},
  keywords = {Databases, Genetic; Escherichia coli K12; Gene Expression Regulation,
	Bacterial; Genome, Bacterial; Internet; Operon; Regulon; Software;
	Transcription, Genetic; User-Computer Interface},
  owner = {fantine},
  pii = {34/suppl_1/D394},
  pmid = {16381895},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1093/nar/gkj156}
}

@article{Salim2003Combination,
  author = {N. Salim and J. Holliday and P. Willett},
  title = {{C}ombination of fingerprint-based similarity coefficients using
	data fusion.},
  journal = {J Chem Inf Comput Sci},
  year = {2003},
  volume = {43},
  pages = {435--442},
  number = {2},
  abstract = {Many different types of similarity coefficients have been described
	in the literature. Since different coefficients take into account
	different characteristics when assessing the degree of similarity
	between molecules, it is reasonable to combine them to further optimize
	the measures of similarity between molecules. This paper describes
	experiments in which data fusion is used to combine several binary
	similarity coefficients to get an overall estimate of similarity
	for searching databases of bioactive molecules. The results show
	that search performances can be improved by combining coefficients
	with little extra computational cost. However, there is no single
	combination which gives a consistently high performance for all search
	types.},
  doi = {10.1021/ci025596j},
  keywords = {80 and over, Acid-Base Imbalance, Acute, Acute Disease, Adolescent,
	Adult, African Americans, Aged, Anemia, Animals, Anti-HIV Agents,
	Anti-Infective Agents, Antibiotics, Antibodies, Antineoplastic, Antineoplastic
	Agents, Antineoplastic Combined Chemotherapy Protocols, Antitubercular
	Agents, Aorta, Asparaginase, Autoimmune, B-Cell, Bangladesh, Bicarbonates,
	Biological Markers, Blood Glucose, California, Camptothecin, Cellulitis,
	Chorionic Gonadotropin, Chronic Disease, Ciprofloxacin, Clinical
	Protocols, Colorectal Neoplasms, Combination, Comparative Study,
	Daunorubicin, Decision Trees, Dexamethasone, Diabetes Mellitus, Dideoxynucleosides,
	Directly Observed Therapy, Disease Transmission, Drug Administration
	Schedule, Drug Resistance, Drug Therapy, English Abstract, Female,
	Fluorouracil, Follow-Up Studies, Glucose Tolerance Test, Glucosephosphate
	Dehydrogenase, Glyburide, HIV Infections, HIV-1, Health Planning,
	Health Resources, Helminth, Hemolysis, Hemolytic, Hormonal, Hospital
	Mortality, Human, Humans, Hypoglycemic Agents, Immunoglobulin M,
	In Vitro, Incidence, Indinavir, Insulin, Intensive Care Units, Interstitial,
	Lactates, Leucovorin, Leukemia, Male, Maternal Age, Middle Aged,
	Motor Activity, Multidrug-Resistant, Mutation, Nephritis, Non-U.S.
	Gov't, Organoplatinum Compounds, Pennsylvania, Phytotherapy, Plant
	Extracts, Plant Leaves, Population Dynamics, Potassium Channels,
	Prednisone, Pregnancy, Pregnancy Outcome, Prenatal, Prenatal Care,
	Progesterone, Prognosis, Prospective Studies, Pulmonary, Rabbits,
	Randomized Controlled Trials, Rats, Research Support, Retrospective
	Studies, Risk Assessment, Scalp Dermatoses, Schistosomiasis japonica,
	Severity of Illness Index, Spondylarthropathies, Streptozocin, Survival
	Rate, Trauma Centers, Trauma Severity Indices, Tubal, Tuberculosis,
	Type 2, Ultrasonography, Vertical, Vincristine, Viral, Viral Load,
	Wistar, Wounds and Injuries, Ziziphus, beta Subunit, 12653506},
  owner = {mahe},
  pmid = {12653506},
  timestamp = {2006.09.01},
  url = {http://dx.doi.org/10.1021/ci025596j}
}

@article{Salomon2006Predicting,
  author = {Salomon, J. and Flower, D. R.},
  title = {{P}redicting {C}lass {II} {MHC}-{P}eptide binding: a kernel based
	approach using similarity scores.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {501},
  abstract = {BACKGROUND: Modelling the interaction between potentially antigenic
	peptides and Major Histocompatibility Complex (MHC) molecules is
	a key step in identifying potential T-cell epitopes. For Class II
	MHC alleles, the binding groove is open at both ends, causing ambiguity
	in the positional alignment between the groove and peptide, as well
	as creating uncertainty as to what parts of the peptide interact
	with the MHC. Moreover, the antigenic peptides have variable lengths,
	making naive modelling methods difficult to apply. This paper introduces
	a kernel method that can handle variable length peptides effectively
	by quantifying similarities between peptide sequences and integrating
	these into the kernel. RESULTS: The kernel approach presented here
	shows increased prediction accuracy with a significantly higher number
	of true positives and negatives on multiple MHC class II alleles,
	when testing data sets from MHCPEP 1, MCHBN 2, and MHCBench 3. Evaluation
	by cross validation, when segregating binders and non-binders, produced
	an average of 0.824 AROC for the MHCBench data sets (up from 0.756),
	and an average of 0.96 AROC for multiple alleles of the MHCPEP database.
	CONCLUSION: The method improves performance over existing state-of-the-art
	methods of MHC class II peptide binding predictions by using a custom,
	knowledge-based representation of peptides. Similarity scores, in
	contrast to a fixed-length, pocket-specific representation of amino
	acids, provide a flexible and powerful way of modelling MHC binding,
	and can easily be applied to other dynamic sequence problems.},
  doi = {10.1186/1471-2105-7-501},
  keywords = {Amino Acid, Binding Sites, Computational Biology, Databases, Epitope
	Mapping, Genetic, HLA-A Antigens, HLA-DR Antigens, Histocompatibility
	Antigens Class II, Humans, Peptides, Protein, Protein Binding, Protein
	Conformation, ROC Curve, Reproducibility of Results, Sequence Alignment,
	Sequence Analysis, Sequence Homology, 17105666},
  pii = {1471-2105-7-501},
  pmid = {17105666},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1186/1471-2105-7-501}
}

@article{Sample2002Using,
  author = {Pamela A Sample and Michael H Goldbaum and Kwokleung Chan and Catherine
	Boden and Te-Won Lee and Christiana Vasile and Andreas G Boehm and
	Terrence Sejnowski and Chris A Johnson and Robert N Weinreb},
  title = {Using machine learning classifiers to identify glaucomatous change
	earlier in standard visual fields.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2002},
  volume = {43},
  pages = {2660-5},
  number = {8},
  month = {Aug},
  abstract = {P{URPOSE}: {T}o compare the ability of several machine learning classifiers
	to predict development of abnormal fields at follow-up in ocular
	hypertensive ({OHT}) eyes that had normal visual fields in baseline
	examination. {METHODS}: {T}he visual fields of 114 eyes of 114 patients
	with {OHT} with four or more visual field tests with standard automated
	perimetry over three or more years and for whom stereophotographs
	were available were assessed. {T}he mean (+/-{SD}) number of visual
	field tests was 7.89 +/- 3.04. {T}he mean number of years covered
	(+/-{SD}) was 5.92 +/- 2.34 (range, 2.81-11.77). {F}ields were classified
	as normal or abnormal based on {S}tatpac-like methods ({H}umphrey
	{I}nstruments, {D}ublin, {CA}) and by several machine learning classifiers.
	{T}he machine learning classifiers were two types of support vector
	machine ({SVM}), a mixture of {G}aussian ({M}o{G}) classifier, a
	constrained {M}o{G}, and a mixture of generalized {G}aussian ({MGG}).
	{S}pecificity was set to 96\% for all classifiers, using data from
	94 normal eyes evaluated longitudinally. {S}pecificity cutoffs required
	confirmation of abnormality. {RESULTS}: {T}hirty-two percent (36/114)
	of the eyes converted to abnormal fields during follow-up based on
	the {S}tatpac-like methods. {A}ll 36 were identified by at least
	one machine classifier. {I}n nearly all cases, the machine learning
	classifiers predicted the confirmed abnormality, on average, 3.92
	+/- 0.55 years earlier than traditional {S}tatpac-like methods. {CONCLUSIONS}:
	{M}achine learning classifiers can learn complex patterns and trends
	in data and adapt to create a decision surface without the constraints
	imposed by statistical classifiers. {T}his adaptation allowed the
	machine learning classifiers to identify abnormality in visual field
	converts much earlier than the traditional methods.}
}

@article{Sanchez-Carbayo2003Gene,
  author = {Marta Sanchez-Carbayo and Nicholas D Socci and Juan Jose Lozano and
	Wentian Li and Elizabeth Charytonowicz and Thomas J Belbin and Michael
	B Prystowsky and Angel R Ortiz and Geoffrey Childs and Carlos Cordon-Cardo},
  title = {Gene discovery in bladder cancer progression using c{DNA} microarrays.},
  journal = {Am. {J}. {P}athol.},
  year = {2003},
  volume = {163},
  pages = {505-16},
  number = {2},
  month = {Aug},
  abstract = {To identify gene expression changes along progression of bladder cancer,
	we compared the expression profiles of early-stage and advanced bladder
	tumors using c{DNA} microarrays containing 17,842 known genes and
	expressed sequence tags. {T}he application of bootstrapping techniques
	to hierarchical clustering segregated early-stage and invasive transitional
	carcinomas into two main clusters. {M}ultidimensional analysis confirmed
	these clusters and more importantly, it separated carcinoma in situ
	from papillary superficial lesions and subgroups within early-stage
	and invasive tumors displaying different overall survival. {A}dditionally,
	it recognized early-stage tumors showing gene profiles similar to
	invasive disease. {D}ifferent techniques including standard t-test,
	single-gene logistic regression, and support vector machine algorithms
	were applied to identify relevant genes involved in bladder cancer
	progression. {C}ytokeratin 20, neuropilin-2, p21, and p33{ING}1 were
	selected among the top ranked molecular targets differentially expressed
	and validated by immunohistochemistry using tissue microarrays (n
	= 173). {T}heir expression patterns were significantly associated
	with pathological stage, tumor grade, and altered retinoblastoma
	({RB}) expression. {M}oreover, p33{ING}1 expression levels were significantly
	associated with overall survival. {A}nalysis of the annotation of
	the most significant genes revealed the relevance of critical genes
	and pathways during bladder cancer progression, including the overexpression
	of oncogenic genes such as {DEK} in superficial tumors or immune
	response genes such as {C}d86 antigen in invasive disease. {G}ene
	profiling successfully classified bladder tumors based on their progression
	and clinical outcome. {T}he present study has identified molecular
	biomarkers of potential clinical significance and critical molecular
	targets associated with bladder cancer progression.},
  pdf = {../local/Sanchez-Carbayo2003Gene.pdf},
  file = {Sanchez-Carbayo2003Gene.pdf:local/Sanchez-Carbayo2003Gene.pdf:PDF},
  keywords = {biosvm},
  url = {http://ajp.amjpathol.org/cgi/content/abstract/163/2/505}
}

@article{Sanguinetti2006hERG,
  author = {Sanguinetti, M.C. and Tristani-Firouzi, M.},
  title = {h{ERG} potassium channels and cardiac arrhythmia.},
  journal = {Nature},
  year = {2006},
  volume = {440},
  pages = {463--469},
  number = {7083},
  month = {Mar},
  abstract = {hERG potassium channels are essential for normal electrical activity
	in the heart. Inherited mutations in the HERG gene cause long QT
	syndrome, a disorder that predisposes individuals to life-threatening
	arrhythmias. Arrhythmia can also be induced by a blockage of hERG
	channels by a surprisingly diverse group of drugs. This side effect
	is a common reason for drug failure in preclinical safety trials.
	Insights gained from the crystal structures of other potassium channels
	have helped our understanding of the block of hERG channels and the
	mechanisms of gating.},
  doi = {10.1038/nature04710},
  pdf = {../local/Sanguinetti2006hERG.pdf},
  file = {Sanguinetti2006hERG.pdf:local/Sanguinetti2006hERG.pdf:PDF},
  keywords = {herg},
  pii = {nature04710},
  pmid = {16554806},
  timestamp = {2006.10.05},
  url = {http://dx.doi.org/10.1038/nature04710}
}

@article{Sanguinetti2005Predicting,
  author = {Sanguinetti, M. C. and Mitcheson, J. S.},
  title = {{P}redicting drug-h{ERG} channel interactions that cause acquired
	long {QT} syndrome.},
  journal = {Trends Pharmacol. Sci.},
  year = {2005},
  volume = {26},
  pages = {119--124},
  number = {3},
  month = {Mar},
  abstract = {Avoiding drug-induced cardiac arrhythmia is recognized as a major
	hurdle in the successful development of new drugs. The most common
	problem is acquired long QT syndrome caused by drugs that block human
	ether-a-go-go-related-gene (hERG) K(+) channels, delay cardiac repolarization
	and increase the risk of torsades de pointes arrhythmia (TdP). Not
	all hERG channel blockers induce TdP because they can also modulate
	other channels that counteract the hERG channel-mediated effect.
	However, hERG channel blockade is an important indicator of potential
	pro-arrhythmic liability. The molecular determinants of hERG channel
	blockade have been defined using a site-directed mutagenesis approach.
	Combined with pharmacophore models, knowledge of the drug-binding
	site of hERG channels will facilitate in silico design efforts to
	discover drugs that are devoid of this rare, but potentially lethal,
	side-effect.},
  doi = {10.1016/j.tips.2005.01.003},
  keywords = {herg},
  pii = {S0165-6147(05)00004-0},
  pmid = {15749156},
  timestamp = {2006.10.05},
  url = {http://dx.doi.org/10.1016/j.tips.2005.01.003}
}

@inproceedings{Sanjiv2003Discriminative,
  author = {Sanjiv, K. and Martial, H.},
  title = {Discriminative {R}andom {F}ields: {A} {D}iscriminative {F}ramework
	for {C}ontextual {I}nteraction in {C}lassification},
  booktitle = {Proceedings of the {N}inth {IEEE} {I}nternational {C}onference on
	{C}omputer {V}ision},
  year = {2003},
  pages = {1150},
  publisher = {IEEE Computer Society},
  abstract = {In this work we present {D}iscriminative {R}andom {F}ields({DRF}s),
	a discriminative framework for the classification ofimage regions
	by incorporating neighborhood interactionsin the labels as well as
	the observed data. {T}he discriminativerandom fields offer several
	advantages over the conventional{M}arkov {R}andom {F}ield ({MRF})
	framework. {F}irst,the {DRF}s allow to relax the strong assumption
	of conditionalindependence of the observed data generally used inthe
	{MRF} framework for tractability. {T}his assumption is toorestrictive
	for a large number of applications in vision. {S}econd,the {DRF}s
	derive their classification power by exploitingthe probabilistic
	discriminative models instead of thegenerative models used in the
	{MRF} framework. {F}inally, allthe parameters in the {DRF} model
	are estimated simultaneouslyfrom the training data unlike the {MRF}
	frameworkwhere likelihood parameters are usually learned separatelyfrom
	the field parameters. {W}e illustrate the advantages ofthe {DRF}s
	over the {MRF} framework in an application ofman-made structure detection
	in natural images taken fromthe {C}orel database.},
  owner = {vert}
}

@article{Sarda2005pSLIP,
  author = {Deepak Sarda and Gek Huey Chua and Kuo-Bin Li and Arun Krishnan},
  title = {p{SLIP}: {SVM} based protein subcellular localization prediction
	using multiple physicochemical properties.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {152},
  number = {1},
  month = {Jun},
  abstract = {B{ACKGROUND}: {P}rotein subcellular localization is an important determinant
	of protein function and hence, reliable methods for prediction of
	localization are needed. {A} number of prediction algorithms have
	been developed based on amino acid compositions or on the {N}-terminal
	characteristics (signal peptides) of proteins. {H}owever, such approaches
	lead to a loss of contextual information. {M}oreover, where information
	about the physicochemical properties of amino acids has been used,
	the methods employed to exploit that information are less than optimal
	and could use the information more effectively. {RESULTS}: {I}n this
	paper, we propose a new algorithm called p{SLIP} which uses {S}upport
	{V}ector {M}achines ({SVM}s) in conjunction with multiple physicochemical
	properties of amino acids to predict protein subcellular localization
	in eukaryotes across six different locations, namely, chloroplast,
	cytoplasmic, extracellular, mitochondrial, nuclear and plasma membrane.
	{T}he algorithm was applied to the dataset provided by {P}ark and
	{K}anehisa and we obtained prediction accuracies for the different
	classes ranging from 87.7\%-97.0\% with an overall accuracy of 93.1\%.
	{CONCLUSIONS}: {T}his study presents a physicochemical property based
	protein localization prediction algorithm. {U}nlike other algorithms,
	contextual information is preserved by dividing the protein sequences
	into clusters. {T}he prediction accuracy shows an improvement over
	other algorithms based on various types of amino acid composition
	(single, pair and gapped pair). {W}e have also implemented a web
	server to predict protein localization across the six classes (available
	at http://pslip.bii.a-star.edu.sg).},
  doi = {10.1186/1471-2105-6-152},
  pdf = {../local/Sarda2005pSLIP.pdf},
  file = {Sarda2005pSLIP.pdf:local/Sarda2005pSLIP.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-152},
  url = {http://dx.doi.org/10.1186/1471-2105-6-152}
}

@article{Sassi2005automated,
  author = {Alexander P Sassi and Frank Andel and Hans-Marcus L Bitter and Michael
	P S Brown and Robert G Chapman and Jeraldine Espiritu and Alfred
	C Greenquist and Isabelle Guyon and Mariana Horchi-Alegre and Kathy
	L Stults and Ann Wainright and Jonathan C Heller and John T Stults},
  title = {An automated, sheathless capillary electrophoresis-mass spectrometry
	platform for discovery of biomarkers in human serum.},
  journal = {Electrophoresis},
  year = {2005},
  volume = {26},
  pages = {1500-12},
  number = {7-8},
  month = {Apr},
  abstract = {A capillary electrophoresis-mass spectrometry ({CE}-{MS}) method has
	been developed to perform routine, automated analysis of low-molecular-weight
	peptides in human serum. {T}he method incorporates transient isotachophoresis
	for in-line preconcentration and a sheathless electrospray interface.
	{T}o evaluate the performance of the method and demonstrate the utility
	of the approach, an experiment was designed in which peptides were
	added to sera from individuals at each of two different concentrations,
	artificially creating two groups of samples. {T}he {CE}-{MS} data
	from the serum samples were divided into separate training and test
	sets. {A} pattern-recognition/feature-selection algorithm based on
	support vector machines was used to select the mass-to-charge (m/z)
	values from the training set data that distinguished the two groups
	of samples from each other. {T}he added peptides were identified
	correctly as the distinguishing features, and pattern recognition
	based on these peptides was used to assign each sample in the independent
	test set to its respective group. {A} twofold difference in peptide
	concentration could be detected with statistical significance (p-value
	< 0.0001). {T}he accuracy of the assignment was 95\%, demonstrating
	the utility of this technique for the discovery of patterns of biomarkers
	in serum.},
  doi = {10.1002/elps.200410127},
  pdf = {../local/Sassi2005automated.pdf},
  file = {Sassi2005automated.pdf:local/Sassi2005automated.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15765480},
  url = {http://dx.doi.org/10.1002/elps.200410127}
}

@article{Satzinger2008Theodor,
  author = {Helga Satzinger},
  title = {{T}heodor and {M}arcella {B}overi: chromosomes and cytoplasm in heredity
	and development},
  journal = {Nat. Rev. Genet.},
  year = {2008},
  volume = {9},
  pages = {231-238},
  keywords = {csbcbook}
}

@article{Saul2003Think,
  author = {Saul, L. K. and Roweis, S. T.},
  title = {Think {G}lobally, {F}it {L}ocally: {U}nsupervised {L}earning of {L}ow
	{D}imensional {M}anifolds},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2003},
  volume = {4},
  pages = {119-155},
  abstract = {The problem of dimensionality reduction arises in many fields of information
	processing, including machine learning, data compression, scientific
	visualization, pattern recognition, and neural computation. {H}ere
	we describe locally linear embedding ({LLE}), an unsupervised learning
	algorithm that computes low dimensional, neighborhood preserving
	embeddings of high dimensional data. {T}he data, assumed to be sampled
	from an underlying manifold, are mapped into a single global coordinate
	system of lower dimensionality. {T}he mapping is derived from the
	symmetries of locally linear reconstructions, and the actual computation
	of the embedding reduces to a sparse eigenvalue problem. {N}otably,
	the optimizations in {LLE}---though capable of generating highly
	nonlinear embeddings---are simple to implement, and they do not involve
	local minima. {I}n this paper, we describe the implementation of
	the algorithm in detail and discuss several extensions that enhance
	its performance. {W}e present results of the algorithm applied to
	data sampled from known manifolds, as well as to collections of images
	of faces, lips, and handwritten digits. {T}hese examples are used
	to provide extensive illustrations of the algorithm's performance---both
	successes and failures---and to relate the algorithm to previous
	and ongoing work in nonlinear dimensionality reduction.},
  pdf = {../local/saul03a.pdf:http\},
  file = {saul03a.pdf:http\://www.jmlr.org/papers/volume4/saul03a/saul03a.pdf:PDF},
  keywords = {dimred},
  url = {http://www.jmlr.org/papers/v4/saul03a.html}
}

@inproceedings{Saupe20013D,
  author = {D. Saupe and D. V. Vranic},
  title = {3D Model Retrieval with Spherical Harmonics and Moments},
  booktitle = {Proceedings of the 23rd DAGM-Symposium on Pattern Recognition},
  year = {2001},
  pages = {392--397},
  address = {London, UK},
  publisher = {Springer-Verlag},
  isbn = {3-540-42596-9}
}

@article{Sawyers2008cancer,
  author = {Sawyers, C. L.},
  title = {The cancer biomarker problem.},
  journal = {Nature},
  year = {2008},
  volume = {452},
  pages = {548--552},
  number = {7187},
  month = {Apr},
  abstract = {Genomic technologies offer the promise of a comprehensive understanding
	of cancer. These technologies are being used to characterize tumours
	at the molecular level, and several clinical successes have shown
	that such information can guide the design of drugs targeted to a
	relevant molecule. One of the main barriers to further progress is
	identifying the biological indicators, or biomarkers, of cancer that
	predict who will benefit from a particular targeted therapy.},
  doi = {10.1038/nature06913},
  pdf = {../local/Sawyers2008cancer.pdf},
  file = {Sawyers2008cancer.pdf:Sawyers2008cancer.pdf:PDF},
  institution = {Howard Hughes Medical Institute, Human Oncology and Pathogenesis
	Program, Memorial Sloan-Kettering Cancer Center, 1275 York Avenue,
	New York, New York 10065, USA.},
  keywords = {csbcbook-ch3, csbcbook},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature06913},
  pmid = {18385728},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1038/nature06913}
}

@article{Saxena2003Comparison,
  author = {A.K. Saxena and P. Prathipati},
  title = {Comparison of MLR, PLS and GA-MLR in QSAR analysis},
  journal = {SAR. QSAR. Environ. Res.},
  year = {2003},
  volume = {14},
  pages = {433-445},
  owner = {mahe},
  timestamp = {2006.09.07}
}

@article{Scacheri2004Short,
  author = {Scacheri, P. C. and Rozenblatt-Rosen, O. and Caplen, N. J. and Wolfsberg,
	T. G. and Umayam, L. and Lee, J. C. and Hughes, C. M. and Shanmugam,
	K. S. and Bhattacharjee, A. and Meyerson, M. and Collins, F. S.},
  title = {Short interfering {RNA}s can induce unexpected and divergent changes
	in the levels of untargeted proteins in mammalian cells.},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2004},
  volume = {101},
  pages = {1892-7},
  number = {7},
  month = {Feb},
  abstract = {R{NA} interference ({RNA}i) mediated by short interfering {RNA}s (si{RNA}s)
	is a widely used method to analyze gene function. {T}o use {RNA}i
	knockdown accurately to infer gene function, it is essential to determine
	the specificity of si{RNA}-mediated {RNA}i. {W}e have assessed the
	specificity of 10 different si{RNA}s corresponding to the {MEN}1
	gene by examining the expression of two additional genes, {TP}53
	(p53) and {CDKN}1{A} (p21), which are considered functionally unrelated
	to menin but are sensitive markers of cell state. {MEN}1 {RNA} and
	corresponding protein levels were all reduced after si{RNA} transfection
	of {H}e{L}a cells, although the degree of inhibition mediated by
	individual si{RNA}s varied. {U}nexpectedly, we observed dramatic
	and significant changes in protein levels of p53 and p21 that were
	unrelated to silencing of the target gene. {T}he modulations in p53
	and p21 levels were not abolished on titration of the si{RNA}s, and
	similar results were obtained in three other cell lines; in none
	of the cell lines tested did we see an effect on the protein levels
	of actin. {T}hese data suggest that si{RNA}s can induce nonspecific
	effects on protein levels that are si{RNA} sequence dependent but
	that these effects may be difficult to detect until genes central
	to a pivotal cellular response, such as p53 and p21, are studied.
	{W}e find no evidence that activation of the double-stranded {RNA}-triggered
	{IFN}-associated antiviral pathways accounts for these effects, but
	we speculate that partial complementary sequence matches to off-target
	genes may result in a micro-{RNA}-like inhibition of translation.},
  doi = {10.1073/pnas.0308698100},
  keywords = {sirna},
  pii = {0308698100},
  url = {http://dx.doi.org/10.1073/pnas.0308698100}
}

@article{Schachtner2008Knowledge-based,
  author = {R. Schachtner and D. Lutter and P. KnollmÃ¼ller and A. M. TomÃ© and
	F. J. Theis and G. Schmitz and M. Stetter and P. GÃ³mez Vilda and
	E. W. Lang},
  title = {Knowledge-based gene expression classification via matrix factorization.},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {1688--1697},
  number = {15},
  month = {Aug},
  abstract = {MOTIVATION: Modern machine learning methods based on matrix decomposition
	techniques, like independent component analysis (ICA) or non-negative
	matrix factorization (NMF), provide new and efficient analysis tools
	which are currently explored to analyze gene expression profiles.
	These exploratory feature extraction techniques yield expression
	modes (ICA) or metagenes (NMF). These extracted features are considered
	indicative of underlying regulatory processes. They can as well be
	applied to the classification of gene expression datasets by grouping
	samples into different categories for diagnostic purposes or group
	genes into functional categories for further investigation of related
	metabolic pathways and regulatory networks. RESULTS: In this study
	we focus on unsupervised matrix factorization techniques and apply
	ICA and sparse NMF to microarray datasets. The latter monitor the
	gene expression levels of human peripheral blood cells during differentiation
	from monocytes to macrophages. We show that these tools are able
	to identify relevant signatures in the deduced component matrices
	and extract informative sets of marker genes from these gene expression
	profiles. The methods rely on the joint discriminative power of a
	set of marker genes rather than on single marker genes. With these
	sets of marker genes, corroborated by leave-one-out or random forest
	cross-validation, the datasets could easily be classified into related
	diagnostic categories. The latter correspond to either monocytes
	versus macrophages or healthy vs Niemann Pick C disease patients.},
  doi = {10.1093/bioinformatics/btn245},
  institution = {CIML/Biophysics, University of Regensburg, D-93040 Regensburg, Germany.},
  keywords = {Algorithms; Artificial Intelligence; Gene Expression Profiling; Oligonucleotide
	Array Sequence Analysis; Pattern Recognition, Automated},
  owner = {laurent},
  pii = {btn245},
  pmid = {18535085},
  timestamp = {2008.10.26},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn245}
}

@article{Schaffter2011GeneNetWeaver,
  author = {Schaffter, T. and Marbach, D. and Floreano, D.},
  title = {GeneNetWeaver: in silico benchmark generation and performance profiling
	of network inference methods},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {2263-2270},
  number = {16},
  abstract = {Motivation: Over the last decade, numerous methods have been developed
	for inference of regulatory networks from gene expression data. However,
	accurate and systematic evaluation of these methods is hampered by
	the difficulty of constructing adequate benchmarks and the lack of
	tools for a differentiated analysis of network predictions on such
	benchmarks.Results: Here, we describe a novel and comprehensive method
	for in silico benchmark generation and performance profiling of network
	inference methods available to the community as an open-source software
	called GeneNetWeaver (GNW). In addition to the generation of detailed
	dynamical models of gene regulatory networks to be used as benchmarks,
	GNW provides a network motif analysis that reveals systematic prediction
	errors, thereby indicating potential ways of improving inference
	methods. The accuracy of network inference methods is evaluated using
	standard metrics such as precision-recall and receiver operating
	characteristic curves. We show how GNW can be used to assess the
	performance and identify the strengths and weaknesses of six inference
	methods. Furthermore, we used GNW to provide the international Dialogue
	for Reverse Engineering Assessments and Methods (DREAM) competition
	with three network inference challenges (DREAM3, DREAM4 and DREAM5).Availability:
	GNW is available at http://gnw.sourceforge.net along with its Java
	source code, user manual and supporting data.Supplementary information:
	Supplementary data are available at Bioinformatics online.Contact:
	dario.floreano@epfl.ch},
  doi = {10.1093/bioinformatics/btr373},
  eprint = {http://bioinformatics.oxfordjournals.org/content/27/16/2263.full.pdf+html},
  url = {http://bioinformatics.oxfordjournals.org/content/27/16/2263.abstract}
}

@article{Schalon2008Simple,
  author = {C. Schalon and J-S. Surgand and E. Kellenberger and D. Rognan},
  title = {A simple and fuzzy method to align and compare druggable ligand-binding
	sites.},
  journal = {Proteins},
  year = {2008},
  volume = {71},
  pages = {1755--1778},
  number = {4},
  month = {Jun},
  abstract = {A novel method to measure distances between druggable protein cavities
	is presented. Starting from user-defined ligand binding sites, eight
	topological and physicochemical properties are projected from cavity-lining
	protein residues to an 80 triangle-discretised sphere placed at the
	centre of the binding site, thus defining a cavity fingerprint. Representing
	binding site properties onto a discretised sphere presents many advantages:
	(i) a normalised distance between binding sites of different sizes
	may be easily derived by summing up the normalised differences between
	the 8 computed descriptors; (ii) a structural alignment of two proteins
	is simply done by systematically rotating/translating one mobile
	sphere around one immobile reference; (iii) a certain degree of fuzziness
	in the comparison is reached by projecting global amino acid properties
	(e.g., charge, size, functional groups count, distance to the site
	centre) independently of local rotameric/tautomeric states of cavity-lining
	residues. The method was implemented in a new program (SiteAlign)
	and tested in a number of various scenarios: measuring the distance
	between 376 related active site pairs, computing the cross-similarity
	of members of a protein family, predicting the targets of ligands
	with various promiscuity levels. The proposed method is robust enough
	to detect local similarity among active sites of different sizes,
	to discriminate between protein subfamilies and to recover the known
	targets of promiscuous ligands by virtual screening.},
  doi = {10.1002/prot.21858},
  institution = {Bioinformatics of the Drug, Institut Gilbert Laustriat, CNRS UMR
	7175-LC1, 74 route du Rhin, F-67400 Illkirch.},
  keywords = {Algorithms; Amino Acid Sequence; Binding Sites, drug effects; Drug
	Design; Hydrogen Bonding; Ligands; Protein Binding; Sequence Alignment;
	Structure-Activity Relationship},
  owner = {bricehoffmann},
  pmid = {18175308},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1002/prot.21858}
}

@inproceedings{Schellewald2001Evaluation,
  author = {Schellewald, C. and Roth, S. and Schn\"{o}rr, C.},
  title = {Evaluation of Convex Optimization Techniques for the Weighted Graph-Matching
	Problem in Computer Vision},
  booktitle = {Proceedings of the 23rd DAGM-Symposium on Pattern Recognition},
  year = {2001},
  pages = {361--368},
  address = {London, UK},
  publisher = {Springer-Verlag},
  isbn = {3-540-42596-9}
}

@inproceedings{Schellewald2005Probabilistic,
  author = {Schellewald, C. and Schnorr, C.},
  title = {Probabilistic Subgraph Matching Based on Convex Relaxation},
  booktitle = {EMMCVPR05},
  year = {2005},
  pages = {171-186},
  bibsource = {http://www.visionbib.com/bibliography/match558.html#TT46972}
}

@article{Schena1995Quantitative,
  author = {M. Schena and D. Shalon and R. W. Davis and P. O. Brown},
  title = {Quantitative monitoring of gene expression patterns with a complementary
	DNA microarray.},
  journal = {Science},
  year = {1995},
  volume = {270},
  pages = {467--470},
  number = {5235},
  month = {Oct},
  abstract = {A high-capacity system was developed to monitor the expression of
	many genes in parallel. Microarrays prepared by high-speed robotic
	printing of complementary DNAs on glass were used for quantitative
	expression measurements of the corresponding genes. Because of the
	small format and high density of the arrays, hybridization volumes
	of 2 microliters could be used that enabled detection of rare transcripts
	in probe mixtures derived from 2 micrograms of total cellular messenger
	RNA. Differential expression measurements of 45 Arabidopsis genes
	were made by means of simultaneous, two-color fluorescence hybridization.},
  doi = {10.1126/science.270.5235.467},
  pdf = {../local/Schena1995Quantitative.pdf},
  file = {Schena1995Quantitative.pdf:Schena1995Quantitative.pdf:PDF},
  institution = {Department of Biochemistry, Beckman Center, Stanford University Medical
	Center, CA 94305, USA.},
  keywords = {microarray},
  owner = {jp},
  pmid = {7569999},
  timestamp = {2009.02.08},
  url = {http://dx.doi.org/10.1126/science.270.5235.467}
}

@article{Schmidt1976Fast,
  author = {Schmidt, D. C. and Druffel, L. E.},
  title = {A Fast Backtracking Algorithm to Test Directed Graphs for Isomorphism
	Using Distance Matrices},
  journal = {J. ACM},
  year = {1976},
  volume = {23},
  pages = {433--445},
  number = {3},
  address = {New York, NY, USA},
  doi = {http://doi.acm.org/10.1145/321958.321963},
  issn = {0004-5411},
  publisher = {ACM}
}

@article{Schmitt2002New,
  author = {Stefan Schmitt and Daniel Kuhn and Gerhard Klebe},
  title = {A new method to detect related function among proteins independent
	of sequence and fold homology.},
  journal = {J. Mol. Biol.},
  year = {2002},
  volume = {323},
  pages = {387--406},
  number = {2},
  month = {Oct},
  abstract = {A new method has been developed to detect functional relationships
	among proteins independent of a given sequence or fold homology.
	It is based on the idea that protein function is intimately related
	to the recognition and subsequent response to the binding of a substrate
	or an endogenous ligand in a well-characterized binding pocket. Thus,
	recognition of similar ligands, supposedly linked to similar function,
	requires conserved recognition features exposed in terms of common
	physicochemical interaction properties via the functional groups
	of the residues flanking a particular binding cavity. Following a
	technique commonly used in the comparison of small molecule ligands,
	generic pseudocenters coding for possible interaction properties
	were assigned for a large sample set of cavities extracted from the
	entire PDB and stored in the database Cavbase. Using a particular
	query cavity a series of related cavities of decreasing similarity
	is detected based on a clique detection algorithm. The detected similarity
	is ranked according to property-based surface patches shared in common
	by the different clique solutions. The approach either retrieves
	protein cavities accommodating the same (e.g. co-factors) or closely
	related ligands or it extracts proteins exhibiting similar function
	in terms of a related catalytic mechanism. Finally the new method
	has strong potential to suggest alternative molecular skeletons in
	de novo design. The retrieval of molecular building blocks accommodated
	in a particular sub-pocket that shares similarity with the pocket
	in a protein studied by drug design can inspire the discovery of
	novel ligands.},
  institution = {Inst. of Pharmaceutical Chemistry, Univ. of Marburg, Marbacher Weg
	6, D-35032, Marburg, Germany.},
  keywords = {Algorithms; Binding Sites; Databases, Protein; Models, Molecular;
	Molecular Structure; Protein Binding; Protein Folding; Protein Structure,
	Tertiary; Proteins, chemistry/metabolism; Reproducibility of Results},
  owner = {bricehoffmann},
  pii = {S0022283602008112},
  pmid = {12381328},
  timestamp = {2009.02.13}
}

@article{Schneider2004Advances,
  author = {Gisbert Schneider and Uli Fechner},
  title = {Advances in the prediction of protein targeting signals.},
  journal = {Proteomics},
  year = {2004},
  volume = {4},
  pages = {1571-80},
  number = {6},
  month = {Jun},
  doi = {10.1002/pmic.200300786},
  pdf = {../local/Schneider2004Advances.pdf},
  file = {Schneider2004Advances.pdf:local/Schneider2004Advances.pdf:PDF},
  url = {http://dx.doi.org/10.1002/pmic.200300786}
}

@article{Schneider1998Artificial,
  author = {G. Schneider and P. Wrede},
  title = {{A}rtificial neural networks for computer-based molecular design.},
  journal = {Prog Biophys Mol Biol},
  year = {1998},
  volume = {70},
  pages = {175--222},
  number = {3},
  abstract = {The theory of artificial neural networks is briefly reviewed focusing
	on supervised and unsupervised techniques which have great impact
	on current chemical applications. An introduction to molecular descriptors
	and representation schemes is given. In addition, worked examples
	of recent advances in this field are highlighted and pioneering publications
	are discussed. Applications of several types of artificial neural
	networks to compound classification, modelling of structure-activity
	relationships, biological target identification, and feature extraction
	from biopolymers are presented and compared to other techniques.
	Advantages and limitations of neural networks for computer-aided
	molecular design and sequence analysis are discussed.},
  keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Animals, Artificial
	Intelligence, Automated, Bacterial, Bacterial Proteins, Bicuculline,
	Binding Sites, Biological, Biological Availability, Blood Proteins,
	Blood-Brain Barrier, Cation Transport Proteins, Cats, Cell Membrane
	Permeability, Chemical, Chemistry, Cluster Analysis, Combinatorial
	Chemistry Techniques, Comparative Study, Computational Biology, Computer
	Simulation, Computer Systems, Computer-Aided Design, Computer-Assisted,
	Computing Methodologies, DNA-Binding Proteins, Databases, Dogs, Drug
	Design, Electric Stimulation, Electromyography, Enzyme Inhibitors,
	Ether-A-Go-Go Potassium Channels, Excitatory Amino Acid Antagonists,
	Factual, False Positive Reactions, Forecasting, Forelimb, GABA Antagonists,
	Gene Expression Profiling, Genome, Glutamic Acid, Humans, Hydrogen
	Bonding, Image Enhancement, Image Interpretation, Image Processing,
	Information Storage and Retrieval, Iontophoresis, Kynurenic Acid,
	Least-Squares Analysis, Linear Models, Liver, Markov Chains, Metabolic
	Clearance Rate, Metalloendopeptidases, Microelectrodes, Models, Molecular,
	Molecular Conformation, Molecular Sequence Data, Molecular Structure,
	Motor Cortex, Movement, Multivariate Analysis, Nerve Net, Neural
	Networks (Computer), Neuropeptides, Non-U.S. Gov't, Nonlinear Dynamics,
	Pattern Recognition, Pharmaceutical, Pharmaceutical Preparations,
	Pharmacokinetics, Phylogeny, Potassium Channels, Predictive Value
	of Tests, Protein Interaction Mapping, Protein Sorting Signals, Protein
	Structure, Proteins, Rats, Reproducibility of Results, Research Support,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Shoulder, Signal Processing, Software, Statistical, Stereotaxic Techniques,
	Structure-Activity Relationship, Terminology, Tertiary, Trans-Activators,
	Voltage-Gated, Zinc, 9830312},
  owner = {mahe},
  pii = {S0079610798000261},
  pmid = {9830312},
  timestamp = {2006.09.06}
}

@article{Schoenberg1938Metric,
  author = {Schoenberg, I. J.},
  title = {Metric spaces and positive definite functions},
  journal = {Trans. Am. Math. Soc.},
  year = {1938},
  volume = {44},
  pages = {522--536},
  number = {3},
  pdf = {../local/Schoenberg1938Metric.pdf},
  file = {Schoenberg1938Metric.pdf:Schoenberg1938Metric.pdf:PDF},
  owner = {jp},
  timestamp = {2011.05.15}
}

@article{Schones2008Genome,
  author = {Dustin E. Schones and Keji Zhao},
  title = {Genome-wide approaches to studying chromatin modifications},
  journal = {Nat. Rev. Genet.},
  year = {2008},
  volume = {9},
  pages = {179-191},
  keywords = {csbcbook, csbcbook-ch2}
}

@book{Schrodinger1944Vie,
  title = {Qu'est-ce que la vie?},
  publisher = {Christian Bourgois Editeur, 1986},
  year = {1944},
  author = {Erwin Schrödinger},
  pages = {242 p},
  keywords = {csbcbook},
  opteditor = {Christian Bourgois}
}

@article{Schubert2005Local,
  author = {Schubert, S. and GrÃ¼nweller, A. and Erdmann, V. A. and Kurreck,
	J.},
  title = {{L}ocal {RNA} target structure influences si{RNA} efficacy: systematic
	analysis of intentionally designed binding regions.},
  journal = {J. Mol. Biol.},
  year = {2005},
  volume = {348},
  pages = {883--893},
  number = {4},
  month = {May},
  abstract = {Contradictory reports in the literature have emphasised either the
	sequence of small interfering RNAs (siRNA) or the structure of their
	target molecules to be the major determinant of the efficiency of
	RNA interference (RNAi) approaches. In the present study, we analyse
	systematically the contributions of these parameters to siRNA activity
	by using deliberately designed mRNA constructs. The siRNA target
	sites were included in well-defined structural elements rendering
	them either highly accessible or completely involved in stable base-pairing.
	Furthermore, complementary sequence elements and various hairpins
	with different stem lengths and designs were used as target sites.
	Only one of the strands of the siRNA duplex was found to be capable
	of silencing via its respective target site, indicating that thermodynamic
	characteristics intrinsic to the siRNA strands are a basic determinant
	of siRNA activity. A significant obstruction of gene silencing by
	the same siRNA, however, was observed to be caused by structural
	features of the substrate RNA. Bioinformatic analysis of the mRNA
	structures suggests a direct correlation between the extent of gene-knockdown
	and the local free energy in the target region. Our findings indicate
	that, although a favourable siRNA sequence is a necessary prerequisite
	for efficient RNAi, complex target structures may limit the applicability
	even of carefully chosen siRNAs.},
  doi = {10.1016/j.jmb.2005.03.011},
  keywords = {sirna},
  owner = {vert},
  pii = {S0022-2836(05)00269-X},
  pmid = {15843020},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.03.011}
}

@article{Schueler-Furman2000Structure-based,
  author = {Schueler-Furman, O. and Altuvia, Y. and Sette, A. and Margalit, H.},
  title = {{S}tructure-based prediction of binding peptides to {MHC} class {I}
	molecules: application to a broad range of {MHC} alleles.},
  journal = {Protein Sci.},
  year = {2000},
  volume = {9},
  pages = {1838--1846},
  number = {9},
  month = {Sep},
  abstract = {Specific binding of antigenic peptides to major histocompatibility
	complex (MHC) class I molecules is a prerequisite for their recognition
	by cytotoxic T-cells. Prediction of MHC-binding peptides must therefore
	be incorporated in any predictive algorithm attempting to identify
	immunodominant T-cell epitopes, based on the amino acid sequence
	of the protein antigen. Development of predictive algorithms based
	on experimental binding data requires experimental testing of a very
	large number of peptides. A complementary approach relies on the
	structural conservation observed in crystallographically solved peptide-MHC
	complexes. By this approach, the peptide structure in the MHC groove
	is used as a template upon which peptide candidates are threaded,
	and their compatibility to bind is evaluated by statistical pairwise
	potentials. Our original algorithm based on this approach used the
	pairwise potential table of Miyazawa and Jernigan (Miyazawa S, Jernigan
	RL, 1996, J Mol Biol 256:623-644) and succeeded to correctly identify
	good binders only for MHC molecules with hydrophobic binding pockets,
	probably because of the high emphasis of hydrophobic interactions
	in this table. A recently developed pairwise potential table by Betancourt
	and Thirumalai (Betancourt MR, Thirumalai D, 1999, Protein Sci 8:361-369)
	that is based on the Miyazawa and Jernigan table describes the hydrophilic
	interactions more appropriately. In this paper, we demonstrate how
	the use of this table, together with a new definition of MHC contact
	residues by which only residues that contribute exclusively to sequence
	specific binding are included, allows the development of an improved
	algorithm that can be applied to a wide range of MHC class I alleles.},
  keywords = {immunoinformatics},
  pmid = {11045629},
  timestamp = {2007.01.25}
}

@article{Schuffenhauer2003Similarity,
  author = {Schuffenhauer, A. and Floersheim, P. and Acklin, P. and Jacoby, E.},
  title = {Similarity metrics for ligands reflecting the similarity of the target
	proteins},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {391--405},
  number = {2},
  abstract = {In this study we evaluate how far the scope of similarity searching
	can be extended to identify not only ligands binding to the same
	target as the reference ligand(s) but also ligands of other homologous
	targets without initially known ligands. This "homology-based similarity
	searching" requires molecular representations reflecting the ability
	of a molecule to interact with target proteins. The Similog keys,
	which are introduced here as a new molecular representation, were
	designed to fulfill such requirements. They are based only on the
	molecular constitution and are counts of atom triplets. Each triplet
	is characterized by the graph distances and the types of its atoms.
	The atom-typing scheme classifies each atom by its function as H-bond
	donor or acceptor and by its electronegativity and bulkiness. In
	this study the Similog keys are investigated in retrospective in
	silico screening experiments and compared with other conformation
	independent molecular representations. Studied were molecules of
	the MDDR database for which the activity data was augmented by standardized
	target classification information from public protein classification
	databases. The MDDR molecule set was split randomly into two halves.
	The first half formed the candidate set. Ligands of four targets
	(dopamine D2 receptor, opioid delta-receptor, factor Xa serine protease,
	and progesterone receptor) were taken from the second half to form
	the respective reference sets. Different similarity calculation methods
	are used to rank the molecules of the candidate set by their similarity
	to each of the four reference sets. The accumulated counts of molecules
	binding to the reference target and groups of targets with decreasing
	homology to it were examined as a function of the similarity rank
	for each reference set and similarity method. In summary, similarity
	searching based on Unity 2D-fingerprints or Similog keys are found
	to be equally effective in the identification of molecules binding
	to the same target as the reference set. However, the application
	of the Similog keys is more effective in comparison with the other
	investigated methods in the identification of ligands binding to
	any target belonging to the same family as the reference target.
	We attribute this superiority to the fact that the Similog keys provide
	a generalization of the chemical elements and that the keys are counted
	instead of merely noting their presence or absence in a binary form.
	The second most effective molecular representation are the occurrence
	counts of the public ISIS key fragments, which like the Similog method,
	incorporates key counting as well as a generalization of the chemical
	elements. The results obtained suggest that ligands for a new target
	can be identified by the following three-step procedure: 1. Select
	at least one target with known ligands which is homologous to the
	new target. 2. Combine the known ligands of the selected target(s)
	to a reference set. 3. Search candidate ligands for the new targets
	by their similarity to the reference set using the Similog method.
	This clearly enlarges the scope of similarity searching from the
	classical application for a single target to the identification of
	candidate ligands for whole target families and is expected to be
	of key utility for further systematic chemogenomics exploration of
	previously well explored target families.},
  doi = {10.1021/ci025569t},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {12653501},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1021/ci025569t}
}

@article{Schuffenhauer2002ontology,
  author = {Schuffenhauer, A. and Zimmermann, J. and Stoop, R. and van der Vyver,
	J. J. and Lecchini, S. and Jacoby, E.},
  title = {An ontology for pharmaceutical ligands and its application for in
	silico screening and library design},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2002},
  volume = {42},
  pages = {947--955},
  number = {4},
  abstract = {Annotation efforts in biosciences have focused in past years mainly
	on the annotation of genomic sequences. Only very limited effort
	has been put into annotation schemes for pharmaceutical ligands.
	Here we propose annotation schemes for the ligands of four major
	target classes, enzymes, G protein-coupled receptors (GPCRs), nuclear
	receptors (NRs), and ligand-gated ion channels (LGICs), and outline
	their usage for in silico screening and combinatorial library design.
	The proposed schemes cover ligand functionality and hierarchical
	levels of target classification. The classification schemes are based
	on those established by the EC, GPCRDB, NuclearDB, and LGICDB. The
	ligands of the MDL Drug Data Report (MDDR) database serve as a reference
	data set of known pharmacologically active compounds. All ligands
	were annotated according to the schemes when attribution was possible
	based on the activity classification provided by the reference database.
	The purpose of the ligand-target classification schemes is to allow
	annotation-based searching of the ligand database. In addition, the
	biological sequence information of the target is directly linkable
	to the ligand, hereby allowing sequence similarity-based identification
	of ligands of next homologous receptors. Ligands of specified levels
	can easily be retrieved to serve as comprehensive reference sets
	for cheminformatics-based similarity searches and for design of target
	class focused compound libraries. Retrospective in silico screening
	experiments within the MDDR01.1 database, searching for structures
	binding to dopamine D2, all dopamine receptors and all amine-binding
	class A GPCRs using known dopamine D2 binding compounds as a reference
	set, have shown that such reference sets are in particular useful
	for the identification of ligands binding to receptors closely related
	to the reference system. The potential for ligand identification
	drops with increasing phylogenetic distance. The analysis of the
	focus of a tertiary amine based combinatorial library compared to
	known amine binding class A GPCRs, peptide binding class A GPCRs,
	and LGIC ligands constitutes a second application scenario which
	illustrates how the focus of a combinatorial library can be treated
	quantitatively. The provided annotation schemes, which bridge chem-
	and bioinformatics by linking ligands to sequences, are expected
	to be of key utility for further systematic chemogenomics exploration
	of previously well explored target families.},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {ci010385k},
  pmid = {12132896},
  timestamp = {2008.07.16}
}

@article{Schumacher2006Microarray,
  author = {Schumacher, A. and Kapranov, P. and Kaminsky, Z. and Flanagan, J.
	and Assadzadeh, A. and Yau, P. and Virtanen, C. and Winegarden, N.
	and Cheng, J. and Gingeras, T. and Petronis, A.},
  title = {{M}icroarray-based {D}{N}{A} methylation profiling: technology and
	applications},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {528--542},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Schuster2000general,
  author = {Schuster, S. and Fell, D. A. and Dandekar, T.},
  title = {A general definition of metabolic pathways useful for systematic
	organization and analysis of complex metabolic networks.},
  journal = {Nat Biotechnol},
  year = {2000},
  volume = {18},
  pages = {326--332},
  number = {3},
  month = {Mar},
  abstract = {A set of linear pathways often does not capture the full range of
	behaviors of a metabolic network. The concept of 'elementary flux
	modes' provides a mathematical tool to define and comprehensively
	describe all metabolic routes that are both stoichiometrically and
	thermodynamically feasible for a group of enzymes. We have used this
	concept to analyze the interplay between the pentose phosphate pathway
	(PPP) and glycolysis. The set of elementary modes for this system
	involves conventional glycolysis, a futile cycle, all the modes of
	PPP function described in biochemistry textbooks, and additional
	modes that are a priori equally entitled to pathway status. Applications
	include maximizing product yield in amino acid and antibiotic synthesis,
	reconstruction and consistency checks of metabolism from genome data,
	analysis of enzyme deficiencies, and drug target identification in
	metabolic networks.},
  doi = {10.1038/73786},
  pdf = {../local/Schuster2000general.pdf},
  file = {Schuster2000general.pdf:Schuster2000general.pdf:PDF},
  institution = {Department of Bioinformatics, Max Delbrück Center for Molecular Medicine,
	D-13092 Berlin-Buch, Germany.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10700151},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1038/73786}
}

@article{Schuster1994elementary,
  author = {Schuster, S. and Hilgetag, C.},
  title = {On elementary flux modes in biochemical reaction systems at steady
	state},
  journal = {J. Biol. Syst.},
  year = {1994},
  volume = {2},
  pages = {165--182},
  number = {2},
  doi = {10.1142/S0218339094000131},
  pdf = {../local/Schuster1994elementary.pdf},
  file = {Schuster1994elementary.pdf:Schuster1994elementary.pdf:PDF},
  owner = {jp},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1142/S0218339094000131}
}

@article{Schuster2002Reaction,
  author = {Schuster, S. and Hilgetag, C. and Woods, J. H. and Fell, D. A.},
  title = {Reaction routes in biochemical reaction systems: algebraic properties,
	validated calculation procedure and example from nucleotide metabolism.},
  journal = {J Math Biol},
  year = {2002},
  volume = {45},
  pages = {153--181},
  number = {2},
  month = {Aug},
  abstract = {Elementary flux modes (direct reaction routes) are minimal sets of
	enzymes that can operate at steady state, with all irreversible reactions
	used in the appropriate direction. They can be interpreted as component
	pathways of a (bio)chemical reaction network. Here, two different
	definitions of elementary modes are given and their equivalence is
	proved. Several algebraic properties of elementary modes are then
	presented and proved. This concerns, amongst other features, the
	minimal number of enzymes of the network not used in an elementary
	mode and the situations where irreversible reactions are replaced
	by reversible ones. Based on these properties, a refined algorithm
	is presented, and it is formally proved that this algorithm will
	exclusively generate all the elementary flux modes of an arbitrary
	network containing reversible or irreversible reactions or both.
	The algorithm is illustrated by a biochemical example relevant in
	nucleotide metabolism. The computer implementation in two different
	programming languages is discussed.},
  doi = {10.1007/s002850200143},
  pdf = {../local/Schuster2002Reaction.pdf},
  file = {Schuster2002Reaction.pdf:Schuster2002Reaction.pdf:PDF},
  institution = {Department of Bioinformatics, Max Delbrück Centre for Molecular Medicine,
	D-13092 Berlin-Buch, Germany. stschust@mdc-berlin.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {12181603},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1007/s002850200143}
}

@article{Schuster2007Next,
  author = {Schuster, S. C.},
  title = {Next-generation sequencing transforms today's biology},
  journal = {Nat. Methods},
  year = {2007},
  volume = {5},
  pages = {16-18}
}

@article{Schwarz2003Asymmetry,
  author = {Schwarz, D. S. and Hutvagner, G. and Du, T. and Xu, Z. and Aronin,
	N. and Zamore, P. D.},
  title = {Asymmetry in the assembly of the {RNA}i enzyme complex},
  journal = {Cell},
  year = {2003},
  volume = {115},
  pages = {199-208},
  number = {2},
  month = {Oct},
  abstract = {A key step in {RNA} interference ({RNA}i) is assembly of the {RISC},
	the protein-si{RNA} complex that mediates target {RNA} cleavage.
	{H}ere, we show that the two strands of an si{RNA} duplex are not
	equally eligible for assembly into {RISC}. {R}ather, both the absolute
	and relative stabilities of the base pairs at the 5? ends of the
	two si{RNA} strands determine the degree to which each strand participates
	in the {RNA}i pathway. si{RNA} duplexes can be functionally asymmetric,
	with only one of the two strands able to trigger {RNA}i. {A}symmetry
	is the hallmark of a related class of small, single-stranded, noncoding
	{RNA}s, micro{RNA}s (mi{RNA}s). {W}e suggest that single-stranded
	mi{RNA}s are initially generated as si{RNA}-like duplexes whose structures
	predestine one strand to enter the {RISC} and the other strand to
	be destroyed. {T}hus, the common step of {RISC} assembly is an unexpected
	source of asymmetry for both si{RNA} function and mi{RNA} biogenesis.},
  doi = {10.1016/S0092-8674(03)00759-1},
  pdf = {../local/Schwarz2003Asymmetry.pdf},
  file = {Schwarz2003Asymmetry.pdf:local/Schwarz2003Asymmetry.pdf:PDF},
  keywords = {sirna},
  url = {http://dx.doi.org/10.1016/S0092-8674(03)00759-1}
}

@article{Schwarz1978Estimating,
  author = {Schwarz, G.},
  title = {Estimating the dimension of a model},
  journal = {Annals of Statistics},
  year = {1978},
  volume = {6},
  pages = {461--464}
}

@article{Schwender2004pilot,
  author = {Holger Schwender and Manuela Zucknick and Katja Ickstadt and Hermann
	M Bolt and G. E. N. I. C. A. network},
  title = {A pilot study on the application of statistical classification procedures
	to molecular epidemiological data.},
  journal = {Toxicol {L}ett},
  year = {2004},
  volume = {151},
  pages = {291-9},
  number = {1},
  month = {Jun},
  abstract = {The development of new statistical methods for use in molecular epidemiology
	comprises the building and application of appropriate classification
	rules. {T}he aim of this study was to assess various classification
	methods that can potentially handle genetic interactions. {A} data
	set comprising genotypes at 25 single nucleotide polymorphic ({SNP})
	loci from 518 breast cancer cases and 586 age-matched population-based
	controls from the {GENICA} study was used to built a classification
	rule with the discrimination methods {SVM} (support vector machine),
	{CART} (classification and regression tree), {B}agging, {R}andom
	{F}orest, {L}ogit{B}oost and k nearest neighbours (k{NN}). {A} blind
	pilot analysis of the genotypic data set was a first approach to
	obtain an impression of the statistical structure of the data. {F}urthermore,
	this analysis was performed to explore classification methods that
	may be applied to molecular-epidemiological evaluation. {T}he results
	showed that all blindly applied classification methods had a slightly
	smaller misclassification rate than a random classification. {T}he
	findings, nevertheless, suggest that {SNP} data might be useful for
	the classification of individuals into categories of high or low
	risk of diseases.},
  keywords = {biosvm}
}

@inproceedings{Scholkopf1995Extracting,
  author = {B . Sch{\"o}lkopf and C. Burges and V. Vapnik},
  title = {Extracting support data for a given task},
  booktitle = {Proceedings of the First International Conference on Knowledge Discovery
	\& Data Mining},
  year = {1995},
  editor = {M. Fayyad and R. Uthurusamy},
  publisher = {AAAI Press},
  owner = {mahe},
  timestamp = {2006.09.13}
}

@inproceedings{Scholkopf1996Incorporating,
  author = {Sch{\"o}lkopf, B. and Burges, C. and Vapnik, V.},
  title = {Incorporating invariances in support vector learning machines},
  booktitle = {ICANN 96: Proceedings of the 1996 International Conference on Artificial
	Neural Networks},
  year = {1996},
  editor = {von der Malsburg, C. and von Seelen, W. and Vorbr\"{u}ggen, J. C.
	and Sendhoff, B.},
  pages = {47--52},
  address = {London, UK},
  publisher = {Springer-Verlag},
  pdf = {../local/Scholkopf1996Incorporating.pdf},
  file = {Scholkopf1996Incorporating.pdf:Scholkopf1996Incorporating.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.22}
}

@inproceedings{Scholkopf2001Generalized,
  author = {B. Sch{\"o}lkopf and R. Herbrich and A. J. Smola},
  title = {A Generalized Representer Theorem},
  booktitle = {Proceedings of the 14th Annual Conference on Computational Learning
	Theory},
  year = {2001},
  volume = {2011},
  series = {Lecture Notes in Computer Science},
  pages = {416--426},
  address = {Berlin / Heidelberg},
  publisher = {Springer},
  doi = {http://dx.doi.org/10.1007/3-540-44581-1}
}

@article{Scholkopf2001Estimating,
  author = {Sch{\"o}lkopf, B. and Platt, J. C. and Shawe-Taylor, J. and Smola,
	A. J. and Williamson, R. C.},
  title = {Estimating the support of a high-himensional distributions},
  journal = {Neural Comput.},
  year = {2001},
  volume = {13},
  pages = {1443--1471},
  pdf = {../local/Scholkopf2001Estimating.pdf},
  file = {Scholkopf2001Estimating.pdf:local/Scholkopf2001Estimating.pdf:PDF}
}

@incollection{Schoelkopf1999Kernel,
  author = {Sch{\"o}lkopf, B. and Smola, A.J. and M{\"u}ller, K.-R.},
  title = {Kernel principal component analysis},
  booktitle = {Advances in {K}ernel {M}ethods - {S}upport {V}ector {L}earning},
  publisher = {MIT Press},
  year = {1999},
  editor = {B. Sch{\"o}lkopf and C. Burges and A. Smola},
  pages = {327--352},
  pdf = {../local/scho99.pdf},
  file = {scho99.pdf:local/scho99.pdf:PDF},
  subject = {kernel}
}

@book{Scholkopf2002Learning,
  title = {Learning with {K}ernels: {S}upport {V}ector {M}achines, {R}egularization,
	{O}ptimization, and {B}eyond},
  publisher = {MIT Press},
  year = {2002},
  author = {Sch{\"o}lkopf, B. and Smola, A. J.},
  address = {Cambridge, MA},
  subject = {kernel},
  url = {http://www.learning-with-kernels.org}
}

@book{Schoelkopf2004Kernel,
  title = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  author = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.-P.},
  address = {The MIT Press, Cambridge, Massachussetts},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Schoelkopf2002Kernel,
  author = {Sch{\"o}lkopf, B. and Weston, J. and Eskin, E. and Leslie, C. and
	Noble, W.S.},
  title = {A {K}ernel {A}pproach for {L}earning from {A}lmost {O}rthogonal {P}atterns},
  booktitle = {Proceedings of {ECML} 2002},
  year = {2002},
  pdf = {../local/scho02.pdf},
  file = {scho02.pdf:local/scho02.pdf:PDF},
  subject = {biokernel},
  url = {http://www.cs.columbia.edu/~cleslie/papers/domdiag.pdf}
}

@inproceedings{Scholkopf2000Support,
  author = {Sch{\"o}lkopf, B. and Williamson, R. and Smola, A. and Shawe-Taylor,
	J. and Platt, J.},
  title = {Support {V}ector {M}ethod for {N}ovelty {D}etection},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2000},
  editor = {S.A. Solla and T.K. Leen and K.-R. M{\"u}ller},
  volume = {12},
  pages = {582--588},
  publisher = {MIT Press},
  pdf = {../local/scho99.pdf},
  file = {scho99.pdf:local/scho99.pdf:PDF},
  subject = {kernel},
  url = {http://citeseer.nj.nec.com/400144.html}
}

@inproceedings{Scott2009Novelty,
  author = {Scott, C. and Blanchard, G.},
  title = {Novelty detection: Unlabeled data definitely help},
  booktitle = {Proceedings of the Twelfth International Conference on Artificial
	Intelligence and Statistics (AISTATS) 2009},
  year = {2009},
  editor = {van Dyk, D. and Welling, M.},
  volume = {5},
  pages = {464--471},
  address = {Clearwater Beach, Florida},
  publisher = {JMLR: W\&CP 5},
  abstract = {In machine learning, one formulation of the novelty detection problem
	is to build a detector based on a training sample consisting of only
	nominal data. The standard (inductive) approach to this problem has
	been to declare novelties where the nominal density is low, which
	reduces the problem to density level set estimation. In this paper,
	we consider the setting where an unlabeled and possibly contaminated
	sample is also available at learning time. We argue that novelty
	detection is naturally solved by a general reduction to a binary
	classification problem. In particular, a detector with a desired
	false positive rate can be achieved through a reduction to Neyman-Pearson
	classification. Unlike the inductive approach, our approach yields
	detectors that are optimal (e.g., statistically consistent) regardless
	of the distribution on novelties. Therefore, in novelty detection,
	unlabeled data have a substantial impact on the theoretical properties
	of the decision rule.},
  pdf = {../local/Scott2009Novelty.pdf},
  file = {Scott2009Novelty.pdf:Scott2009Novelty.pdf:PDF},
  keywords = {PUlearning},
  owner = {jp},
  timestamp = {2010.01.31},
  url = {http://jmlr.csail.mit.edu/proceedings/papers/v5/scott09a.html}
}

@article{Scott2005Neyman,
  author = {Scott, C. and Nowak, R.},
  title = {{A Neyman-Pearson approach to statistical learning}},
  journal = {IEEE Trans. Inf. Theory},
  year = {2005},
  volume = {51},
  pages = {3806--3819},
  number = {11},
  abstract = {{The Neyman-Pearson (NP) approach to hypothesis testing is useful
	in situations where different types of error have different consequences
	or a priori probabilities are unknown. For any /spl alpha/>0, the
	NP lemma specifies the most powerful test of size /spl alpha/, but
	assumes the distributions for each hypothesis are known or (in some
	cases) the likelihood ratio is monotonic in an unknown parameter.
	This paper investigates an extension of NP theory to situations in
	which one has no knowledge of the underlying distributions except
	for a collection of independent and identically distributed (i.i.d.)
	training examples from each hypothesis. Building on a "fundamental
	lemma" of Cannon et al., we demonstrate that several concepts from
	statistical learning theory have counterparts in the NP context.
	Specifically, we consider constrained versions of empirical risk
	minimization (NP-ERM) and structural risk minimization (NP-SRM),
	and prove performance guarantees for both. General conditions are
	given under which NP-SRM leads to strong universal consistency. We
	also apply NP-SRM to (dyadic) decision trees to derive rates of convergence.
	Finally, we present explicit algorithms to implement NP-SRM for histograms
	and dyadic decision trees.}},
  citeulike-article-id = {600676},
  citeulike-linkout-0 = {http://dx.doi.org/10.1109/TIT.2005.856955},
  citeulike-linkout-1 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=1522642},
  doi = {10.1109/TIT.2005.856955},
  keywords = {neyman, pearson},
  owner = {fantinemordelet},
  posted-at = {2008-11-02 10:16:52},
  priority = {2},
  timestamp = {2013.01.11},
  url = {http://dx.doi.org/10.1109/TIT.2005.856955}
}

@article{Scott2005Identifying,
  author = {Scott, M. S. and Perkins, T. and Bunnell, S. and Pepin, F. and Thomas,
	D. Y. and Hallett, M.},
  title = {Identifying regulatory subnetworks for a set of genes},
  journal = {Mol. Cell. Proteomics},
  year = {2005},
  volume = {4},
  pages = {683--692},
  number = {5},
  month = {May},
  abstract = {High throughput genomic/proteomic strategies, such as microarray studies,
	drug screens, and genetic screens, often produce a list of genes
	that are believed to be important for one or more reasons. Unfortunately
	it is often difficult to discern meaningful biological relationships
	from such lists. This study presents a new bioinformatic approach
	that can be used to identify regulatory subnetworks for lists of
	significant genes or proteins. We demonstrate the utility of this
	approach using an interaction network for yeast constructed from
	BIND, TRANSFAC, SCPD, and chromatin immunoprecipitation (ChIP)-Chip
	data bases and lists of genes from well known metabolic pathways
	or differential expression experiments. The approach accurately rediscovers
	known regulatory elements of the heat shock response as well as the
	gluconeogenesis, galactose, glycolysis, and glucose fermentation
	pathways in yeast. We also find evidence supporting a previous conjecture
	that approximately half of the enzymes in a metabolic pathway are
	transcriptionally co-regulated. Finally we demonstrate a previously
	unknown connection between GAL80 and the diauxic shift in yeast.},
  doi = {10.1074/mcp.M400110-MCP200},
  pdf = {../local/Scott2005Identifying.pdf},
  file = {Scott2005Identifying.pdf:Scott2005Identifying.pdf:PDF},
  institution = {McGill Centre for Bioinformatics, 3775 University Street, Montreal
	H3A 2B4, Canada.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {M400110-MCP200},
  pmid = {15722371},
  timestamp = {2011.09.26},
  url = {http://dx.doi.org/10.1074/mcp.M400110-MCP200}
}

@article{Scott99thegromos,
  author = {W. R. P. Scott and I. G. Tironi and A. E. Mark and S. R. Billeter
	and J. F. and A. E. Torda and T. Huber and P. Kruger},
  title = {The Gromos biomolecular simulation program package},
  journal = {J. Phys. Chem. A},
  year = {1999},
  volume = {103},
  pages = {3596--3607}
}

@techreport{Scovel2004Fast,
  author = {Scovel, C. and Steinwart, I.},
  title = {Fast {R}ates for {S}upport {V}ector {M}achines},
  institution = {Los Alamos National Laboratory},
  year = {2004}
}

@inproceedings{Sebag1997Tractable,
  author = {Sebag, M. and Rouveirol, C.},
  title = {Tractable {I}nduction and {C}lassification in {F}irst-{O}rder {L}ogic
	via {S}tochastic {M}atching.},
  booktitle = {Proceedings of the 15th {I}nternational {J}oint {C}onference on {A}rtificial
	{I}ntelligence},
  year = {1997},
  pages = {888-893},
  publisher = {Morgan Kaufmann},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@article{Sebat2007Major,
  author = {Jonathan Sebat},
  title = {Major changes in our DNA lead to major changes in our thinking.},
  journal = {Nat. Genet.},
  year = {2007},
  volume = {39},
  pages = {S3--S5},
  number = {7 Suppl},
  month = {Jul},
  abstract = {Variability in the human genome has far exceeded expectations. In
	the course of the past three years, we have learned that much of
	our naturally occurring genetic variation consists of large-scale
	differences in genome structure, including copy-number variants (CNVs)
	and balanced rearrangements such as inversions. Recent studies have
	begun to reveal that structural variants are an important contributor
	to disease risk; however, structural variants as a class may not
	conform well to expectations of current methods for gene mapping.
	New approaches are needed to understand the contribution of structural
	variants to disease.},
  doi = {10.1038/ng2095},
  institution = {Cold Spring Harbor Laboratory, One Bungtown Road, Cold Spring Harbor,
	New York 11724, USA. sebat@cshl.edu},
  keywords = {DNA; Gene Dosage; Gene Rearrangement; Genetic Diseases, I; Genetic
	Variation; Genome, Human; Humans; nborn},
  owner = {jp},
  pii = {ng2095},
  pmid = {17597778},
  timestamp = {2009.02.08},
  url = {http://dx.doi.org/10.1038/ng2095}
}

@article{Sebat2004Large-scale,
  author = {Sebat, J. and Lakshmi, B. and Troge, J. and Alexander, J. and Young,
	J. and Lundin, P. and MÃ¥nÃ©r, S. and Massa, H. and Walker, M. and
	Chi, M. and Navin, N. and Lucito, R. and Healy, J. and Hicks, J.
	and Ye, K. and Reiner, A. and Gilliam, T. C. and Trask, B. and Patterson,
	N. and Zetterberg, A. and Wigler, M.},
  title = {Large-scale copy number polymorphism in the human genome},
  journal = {Science},
  year = {2004},
  volume = {305},
  pages = {525--528},
  number = {5683},
  month = {Jul},
  abstract = {The extent to which large duplications and deletions contribute to
	human genetic variation and diversity is unknown. Here, we show that
	large-scale copy number polymorphisms (CNPs) (about 100 kilobases
	and greater) contribute substantially to genomic variation between
	normal humans. Representational oligonucleotide microarray analysis
	of 20 individuals revealed a total of 221 copy number differences
	representing 76 unique CNPs. On average, individuals differed by
	11 CNPs, and the average length of a CNP interval was 465 kilobases.
	We observed copy number variation of 70 different genes within CNP
	intervals, including genes involved in neurological function, regulation
	of cell growth, regulation of metabolism, and several genes known
	to be associated with disease.},
  doi = {10.1126/science.1098918},
  pdf = {../local/Sebat2004Large-scale.pdf},
  file = {Sebat2004Large-scale.pdf:Sebat2004Large-scale.pdf:PDF},
  institution = {Cold Spring Harbor Laboratory, Cold Spring Harbor, NY 11724, USA.},
  keywords = {cgh},
  owner = {jp},
  pii = {305/5683/525},
  pmid = {15273396},
  timestamp = {2009.02.08},
  url = {http://dx.doi.org/10.1126/science.1098918}
}

@article{Seeger2004Gaussian,
  author = {Matthias Seeger},
  title = {Gaussian processes for machine learning.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2004},
  volume = {14},
  pages = {69-106},
  number = {2},
  month = {Apr},
  abstract = {Gaussian processes ({GP}s) are natural generalisations of multivariate
	{G}aussian random variables to infinite (countably or continuous)
	index sets. {GP}s have been applied in a large number of fields to
	a diverse range of ends, and very many deep theoretical analyses
	of various properties are available. {T}his paper gives an introduction
	to {G}aussian processes on a fairly elementary level with special
	emphasis on characteristics relevant in machine learning. {I}t draws
	explicit connections to branches such as spline smoothing models
	and support vector machines in which similar ideas have been investigated.
	{G}aussian process models are routinely used to solve hard machine
	learning problems. {T}hey are attractive because of their flexible
	non-parametric nature and computational simplicity. {T}reated within
	a {B}ayesian framework, very powerful statistical methods can be
	implemented which offer valid estimates of uncertainties in our predictions
	and generic model selection procedures cast as nonlinear optimization
	problems. {T}heir main drawback of heavy computational scaling has
	recently been alleviated by the introduction of generic sparse approximations.13,78,31
	{T}he mathematical literature on {GP}s is large and often uses deep
	concepts which are not required to fully understand most machine
	learning applications. {I}n this tutorial paper, we aim to present
	characteristics of {GP}s relevant to machine learning and to show
	up precise connections to other "kernel machines" popular in the
	community. {O}ur focus is on a simple presentation, but references
	to more detailed sources are provided.},
  keywords = {Algorithms, Amino Acids, Antibodies, Artificial Intelligence, Astrocytoma,
	Automated, Bayes Theorem, Biological, Biopsy, Brain, Brain Mapping,
	Brain Neoplasms, Calibration, Comparative Study, Computational Biology,
	Computer-Assisted, Computing Methodologies, Cysteine, Cystine, Dysplastic
	Nevus Syndrome, Electrodes, Electroencephalography, Entropy, Eosine
	Yellowish-(YS), Evoked Potentials, Female, Gene Expression Profiling,
	Hematoxylin, Horseradish Peroxidase, Humans, Image Interpretation,
	Image Processing, Imagery (Psychotherapy), Imagination, Laterality,
	Linear Models, Male, Melanoma, Models, Monoclonal, Movement, Neoplasms,
	Neural Networks (Computer), Neuropeptides, Non-P.H.S., Non-U.S. Gov't,
	Nonparametric, Normal Distribution, P.H.S., Pattern Recognition,
	Perception, Principal Component Analysis, Protein, Protein Array
	Analysis, Protein Interaction Mapping, Proteins, Regression Analysis,
	Research Support, Sensitivity and Specificity, Sequence Alignment,
	Sequence Ana, Sequence Analysis, Skin Neoplasms, Software, Statistical,
	Statistics, Tumor Markers, U.S. Gov't, User-Computer Interface, World
	Health Organization, lysis, 15112367},
  pii = {S0129065704001899}
}

@inproceedings{Seeger2002Covariance,
  author = {Seeger, M.},
  title = {Covariance {K}ernels from {B}ayesian {G}enerative {M}odels},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2002},
  volume = {14},
  pages = {905-912},
  pdf = {../local/nips2001.pdf:http\://www.cs.berkeley.edu/~mseeger/papers/nips2001.pdf:PDF;nips2001.pdf:http\},
  file = {nips2001.pdf:http\://www.cs.berkeley.edu/~mseeger/papers/nips2001.pdf:PDF;nips2001.pdf:http\://www.cs.berkeley.edu/~mseeger/papers/nips2001.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Segal2005From,
  author = {Segal, E. and Friedman, N. and Kaminski, N. and Regev, A. and Koller,
	D.},
  title = {From signatures to models: understanding cancer using microarrays},
  journal = {Nat {G}enet},
  year = {2005},
  volume = {37},
  pages = {S38-45},
  number = {6 Suppl},
  abstract = {Genomics has the potential to revolutionize the diagnosis and management
	of cancer by offering an unprecedented comprehensive view of the
	molecular underpinnings of pathology. {C}omputational analysis is
	essential to transform the masses of generated data into a mechanistic
	understanding of disease. {H}ere we review current research aimed
	at uncovering the modular organization and function of transcriptional
	networks and responses in cancer. {W}e first describe how methods
	that analyze biological processes in terms of higher-level modules
	can identify robust signatures of disease mechanisms. {W}e then discuss
	methods that aim to identify the regulatory mechanisms underlying
	these modules and processes. {F}inally, we show how comparative analysis,
	combining human data with model organisms, can lead to more robust
	findings. {W}e conclude by discussing the challenges of generalizing
	these methods from cells to tissues and the opportunities they offer
	to improve cancer diagnosis and management.},
  doi = {10.1038/ng1561},
  pdf = {../local/Segal2005From.pdf},
  file = {Segal2005From.pdf:Segal2005From.pdf:PDF},
  keywords = {microarray},
  url = {http://dx.doi.org/10.1038/ng1561}
}

@article{Segal2004module,
  author = {Segal, E. and Friedman, N. and Koller, D. and Regev, A.},
  title = {A module map showing conditional activity of expression modules in
	cancer.},
  journal = {Nat. {G}enet.},
  year = {2004},
  volume = {36},
  pages = {1090--1098},
  number = {10},
  month = {Oct},
  abstract = {D{NA} microarrays are widely used to study changes in gene expression
	in tumors, but such studies are typically system-specific and do
	not address the commonalities and variations between different types
	of tumor. {H}ere we present an integrated analysis of 1,975 published
	microarrays spanning 22 tumor types. {W}e describe expression profiles
	in different tumors in terms of the behavior of modules, sets of
	genes that act in concert to carry out a specific function. {U}sing
	a simple unified analysis, we extract modules and characterize gene-expression
	profiles in tumors as a combination of activated and deactivated
	modules. {A}ctivation of some modules is specific to particular types
	of tumor; for example, a growth-inhibitory module is specifically
	repressed in acute lymphoblastic leukemias and may underlie the deregulated
	proliferation in these cancers. {O}ther modules are shared across
	a diverse set of clinical conditions, suggestive of common tumor
	progression mechanisms. {F}or example, the bone osteoblastic module
	spans a variety of tumor types and includes both secreted growth
	factors and their receptors. {O}ur findings suggest that there is
	a single mechanism for both primary tumor proliferation and metastasis
	to bone. {O}ur analysis presents multiple research directions for
	diagnostic, prognostic and therapeutic studies.},
  doi = {10.1038/ng1434},
  pdf = {../local/Segal2004module.pdf},
  file = {Segal2004module.pdf:local/Segal2004module.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {ng1434},
  pmid = {15448693},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1038/ng1434}
}

@article{Segal2004modulea,
  author = {Segal, E. and Friedman, N. and Koller, D. and Regev, A.},
  title = {A module map showing conditional activity of expression modules in
	cancer},
  journal = {Nat {G}enet},
  year = {2004},
  volume = {36},
  pages = {1090-8},
  number = {10},
  abstract = {D{NA} microarrays are widely used to study changes in gene expression
	in tumors, but such studies are typically system-specific and do
	not address the commonalities and variations between different types
	of tumor. {H}ere we present an integrated analysis of 1,975 published
	microarrays spanning 22 tumor types. {W}e describe expression profiles
	in different tumors in terms of the behavior of modules, sets of
	genes that act in concert to carry out a specific function. {U}sing
	a simple unified analysis, we extract modules and characterize gene-expression
	profiles in tumors as a combination of activated and deactivated
	modules. {A}ctivation of some modules is specific to particular types
	of tumor; for example, a growth-inhibitory module is specifically
	repressed in acute lymphoblastic leukemias and may underlie the deregulated
	proliferation in these cancers. {O}ther modules are shared across
	a diverse set of clinical conditions, suggestive of common tumor
	progression mechanisms. {F}or example, the bone osteoblastic module
	spans a variety of tumor types and includes both secreted growth
	factors and their receptors. {O}ur findings suggest that there is
	a single mechanism for both primary tumor proliferation and metastasis
	to bone. {O}ur analysis presents multiple research directions for
	diagnostic, prognostic and therapeutic studies.},
  doi = {10.1038/ng1434},
  pdf = {../local/Segal2004modulea.pdf},
  file = {Segal2004modulea.pdf:Segal2004modulea.pdf:PDF},
  url = {http://dx.doi.org/10.1038/ng1434}
}

@article{Segal2003Module,
  author = {Segal, E. and Shapira, M. and Regev, A. and Pe'er, D. and Botstein,
	D. and Koller, D. and Friedman, N.},
  title = {Module networks: identifying regulatory modules and their condition-specific
	regulators from gene expression data.},
  journal = {Nat. {G}enet.},
  year = {2003},
  volume = {34},
  pages = {166--176},
  number = {2},
  month = {Jun},
  abstract = {Much of a cell's activity is organized as a network of interacting
	modules: sets of genes coregulated to respond to different conditions.
	{W}e present a probabilistic method for identifying regulatory modules
	from gene expression data. {O}ur procedure identifies modules of
	coregulated genes, their regulators and the conditions under which
	regulation occurs, generating testable hypotheses in the form 'regulator
	{X} regulates module {Y} under conditions {W}'. {W}e applied the
	method to a {S}accharomyces cerevisiae expression data set, showing
	its ability to identify functionally coherent modules and their correct
	regulators. {W}e present microarray experiments supporting three
	novel predictions, suggesting regulatory roles for previously uncharacterized
	proteins.},
  doi = {10.1038/ng1165},
  pdf = {../local/Segal2003Module.pdf},
  file = {Segal2003Module.pdf:Segal2003Module.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  pii = {ng1165},
  pmid = {12740579},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1038/ng1165}
}

@article{Segal2003Regression,
  author = {Segal, M. R. and Dahlquist, K. D. and Conklin, B. R.},
  title = {Regression approaches for microarray data analysis.},
  journal = {J. {C}omput. {B}iol.},
  year = {2003},
  volume = {10},
  pages = {961-980},
  number = {6},
  abstract = {A variety of new procedures have been devised to handle the two-sample
	comparison (e.g., tumor versus normal tissue) of gene expression
	values as measured with microarrays. {S}uch new methods are required
	in part because of some defining characteristics of microarray-based
	studies: (i) the very large number of genes contributing expression
	measures which far exceeds the number of samples (observations) available
	and (ii) the fact that by virtue of pathway/network relationships,
	the gene expression measures tend to be highly correlated. {T}hese
	concerns are exacerbated in the regression setting, where the objective
	is to relate gene expression, simultaneously for multiple genes,
	to some external outcome or phenotype. {C}orrespondingly, several
	methods have been recently proposed for addressing these issues.
	{W}e briefly critique some of these methods prior to a detailed evaluation
	of gene harvesting. {T}his reveals that gene harvesting, without
	additional constraints, can yield artifactual solutions. {R}esults
	obtained employing such constraints motivate the use of regularized
	regression procedures such as the lasso, least angle regression,
	and support vector machines. {M}odel selection and solution multiplicity
	issues are also discussed. {T}he methods are evaluated using a microarray-based
	study of cardiomyopathy in transgenic mice.},
  doi = {10.1089/106652703322756177},
  pdf = {../local/Segal2003Regression.pdf},
  file = {Segal2003Regression.pdf:local/Segal2003Regression.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Segal2003Classificationa,
  author = {Segal, N. H. and Pavlidis, P. and Antonescu, C. R. and Maki, R. G.
	and Noble, W. S. and DeSantis, D. and Woodruff, J. M. and Lewis,
	J. J. and Brennan, M. F. and Houghton, A. N. and Cordon-Cardo, C.},
  title = {Classification and {S}ubtype {P}rediction of {A}dult {S}oft {T}issue
	{S}arcoma by {F}unctional {G}enomics},
  journal = {Am. {J}. {P}athol.},
  year = {2003},
  volume = {163},
  pages = {691-700},
  number = {2},
  month = {Aug},
  abstract = {Adult soft tissue sarcomas are a heterogeneous group of tumors, including
	well-described subtypes by histological and genotypic criteria, and
	pleomorphic tumors typically characterized by non-recurrent genetic
	aberrations and karyotypic heterogeneity. {T}he latter pose a diagnostic
	challenge, even to experienced pathologists. {W}e proposed that gene
	expression profiling in soft tissue sarcoma would identify a genomic-based
	classification scheme that is useful in diagnosis. {RNA} samples
	from 51 pathologically confirmed cases, representing nine different
	histological subtypes of adult soft tissue sarcoma, were examined
	using the {A}ffymetrix {U}95{A} {G}ene{C}hip. {S}tatistical tests
	were performed on experimental groups identified by cluster analysis,
	to find discriminating genes that could subsequently be applied in
	a support vector machine algorithm. {S}ynovial sarcomas, round-cell/myxoid
	liposarcomas, clear-cell sarcomas and gastrointestinal stromal tumors
	displayed remarkably distinct and homogenous gene expression profiles.
	{P}leomorphic tumors were heterogeneous. {N}otably, a subset of malignant
	fibrous histiocytomas, a controversialhistological subtype, was identified
	as a distinct genomic group. {T}he support vector machine algorithm
	supported a genomic basis for diagnosis, with both high sensitivity
	and specificity. {I}n conclusion, we showed gene expression profiling
	to be useful in classification and diagnosis, providing insights
	into pathogenesis and pointing to potential new therapeutic targets
	of soft tissue sarcoma.},
  pdf = {../local/Segal2003Classificationa.pdf},
  file = {Segal2003Classificationa.pdf:local/Segal2003Classificationa.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://ajp.amjpathol.org/cgi/content/abstract/163/2/691}
}

@article{Segal2003Classification,
  author = {Segal, N. H. and Pavlidis, P. and Noble, W. S. and Antonescu, C.
	R. and Viale, A. and Wesley, U. V. and Busam, K. and Gallardo, H.
	and DeSantis, D. and Brennan, M. F. and Cordon-Cardo, C. and Wolchok,
	J. D. and Houghton, A. N.},
  title = {Classification of {C}lear-{C}ell {S}arcoma as a {S}ubtype of {M}elanoma
	by {G}enomic {P}rofiling},
  journal = {J. {C}lin. {O}ncol.},
  year = {2003},
  volume = {21},
  pages = {1775-1781},
  number = {9},
  month = {May},
  abstract = {Purpose: {T}o develop a genome-based classification scheme for clear-cell
	sarcoma ({CCS}), also known as melanoma of soft parts ({MSP}), which
	would have implications for diagnosis and treatment. {T}his tumor
	displays characteristic features of soft tissue sarcoma ({STS}),
	including deep soft tissue primary location and a characteristic
	translocation, t(12;22)(q13;q12), involving {EWS} and {ATF}1 genes.
	{CCS}/{MSP} also has typical melanoma features, including immunoreactivity
	for {S}100 and {HMB}45, pigmentation, {MITF}-{M} expression, and
	a propensity for regional lymph node metastases. {M}aterials and
	{M}ethods: {RNA} samples from 21 cell lines and 60 pathologically
	confirmed cases of {STS}, melanoma, and {CCS}/{MSP} were examined
	using the {U}95{A} {G}ene{C}hip ({A}ffymetrix, {S}anta {C}lara, {CA}).
	{H}ierarchical cluster analysis, principal component analysis, and
	support vector machine ({SVM}) analysis exploited genomic correlations
	within the data to classify {CCS}/{MSP}. {R}esults: {U}nsupervised
	analyses demonstrated a clear distinction between {STS} and melanoma
	and, furthermore, showed that {CCS}/{MSP} cluster with the melanomas
	as a distinct group. {A} supervised {SVM} learning approach further
	validated this finding and provided a user-independent approach to
	diagnosis. {G}enes of interest that discriminate {CCS}/{MSP} included
	those encoding melanocyte differentiation antigens, {MITF}, {SOX}10,
	{ERBB}3, and {FGFR}1. {C}onclusion: {G}ene expression profiles support
	the classification of {CCS}/{MSP} as a distinct genomic subtype of
	melanoma. {A}nalysis of these gene profiles using the {SVM} may be
	an important diagnostic tool. {G}enomic analysis identified potential
	targets for the development of therapeutic strategies in the treatment
	of this disease.},
  doi = {10.1200/JCO.2003.10.108},
  pdf = {../local/Segal2003Classification.pdf},
  file = {Segal2003Classification.pdf:local/Segal2003Classification.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1200/JCO.2003.10.108}
}

@article{Segre2002Analysis,
  author = {Segr{\`e}, D. and Vitkup, D. and Church, G. M.},
  title = {Analysis of optimality in natural and perturbed metabolic networks.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2002},
  volume = {99},
  pages = {15112--15117},
  number = {23},
  month = {Nov},
  abstract = {An important goal of whole-cell computational modeling is to integrate
	detailed biochemical information with biological intuition to produce
	testable predictions. Based on the premise that prokaryotes such
	as Escherichia coli have maximized their growth performance along
	evolution, flux balance analysis (FBA) predicts metabolic flux distributions
	at steady state by using linear programming. Corroborating earlier
	results, we show that recent intracellular flux data for wild-type
	E. coli JM101 display excellent agreement with FBA predictions. Although
	the assumption of optimality for a wild-type bacterium is justifiable,
	the same argument may not be valid for genetically engineered knockouts
	or other bacterial strains that were not exposed to long-term evolutionary
	pressure. We address this point by introducing the method of minimization
	of metabolic adjustment (MOMA), whereby we test the hypothesis that
	knockout metabolic fluxes undergo a minimal redistribution with respect
	to the flux configuration of the wild type. MOMA employs quadratic
	programming to identify a point in flux space, which is closest to
	the wild-type point, compatibly with the gene deletion constraint.
	Comparing MOMA and FBA predictions to experimental flux data for
	E. coli pyruvate kinase mutant PB25, we find that MOMA displays a
	significantly higher correlation than FBA. Our method is further
	supported by experimental data for E. coli knockout growth rates.
	It can therefore be used for predicting the behavior of perturbed
	metabolic networks, whose growth performance is in general suboptimal.
	MOMA and its possible future extensions may be useful in understanding
	the evolutionary optimization of metabolism.},
  doi = {10.1073/pnas.232349399},
  pdf = {../local/Segre2002Analysis.pdf},
  file = {Segre2002Analysis.pdf:Segre2002Analysis.pdf:PDF},
  institution = {Lipper Center for Computational Genetics and Department of Genetics,
	Harvard Medical School, Boston, MA 02115, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {232349399},
  pmid = {12415116},
  timestamp = {2013.01.25},
  url = {http://dx.doi.org/10.1073/pnas.232349399}
}

@article{Seike2005Proteomic,
  author = {Seike, M. and Kondo, T. and Fujii, K. and Okano, T. and Yamada, T.
	and Matsuno, Y. and Gemma, A. and Kudoh, S. and Hirohashi, S.},
  title = {Proteomic signatures for histological types of lung cancer.},
  journal = {Proteomics},
  year = {2005},
  month = {Jul},
  abstract = {We performed proteomic studies on lung cancer cells to elucidate the
	mechanisms that determine histological phenotype. {T}hirty lung cancer
	cell lines with three different histological backgrounds (squamous
	cell carcinoma, small cell lung carcinoma and adenocarcinoma) were
	subjected to two-dimensional difference gel electrophoresis (2-{D}
	{DIGE}) and grouped by multivariate analyses on the basis of their
	protein expression profiles. 2-{D} {DIGE} achieves more accurate
	quantification of protein expression by using highly sensitive fluorescence
	dyes to label the cysteine residues of proteins prior to two-dimensional
	polyacrylamide gel electrophoresis. {W}e found that hierarchical
	clustering analysis and principal component analysis divided the
	cell lines according to their original histology. {S}pot ranking
	analysis using a support vector machine algorithm and unsupervised
	classification methods identified 32 protein spots essential for
	the classification. {T}he proteins corresponding to the spots were
	identified by mass spectrometry. {N}ext, lung cancer cells isolated
	from tumor tissue by laser microdissection were classified on the
	basis of the expression pattern of these 32 protein spots. {B}ased
	on the expression profile of the 32 spots, the isolated cancer cells
	were categorized into three histological groups: the squamous cell
	carcinoma group, the adenocarcinoma group, and a group of carcinomas
	with other histological types. {I}n conclusion, our results demonstrate
	the utility of quantitative proteomic analysis for molecular diagnosis
	and classification of lung cancer cells.},
  doi = {10.1002/pmic.200401166},
  pdf = {../local/Seike2005Proteomic.pdf},
  file = {Seike2005Proteomic.pdf:local/Seike2005Proteomic.pdf:PDF},
  keywords = {biosvm proteomics},
  url = {http://dx.doi.org/10.1002/pmic.200401166}
}

@article{Selinger2000RNA,
  author = {Douglas W. Selinger and Kevin J. Cheung and Rui Mei and Erik M. Johansson
	and Craig S. Richmond and Frederick R. Blattner and David J. Lockhart
	and George M. Church},
  title = {R{NA} expression analysis using a 30 base pair resolution {E}scherichia
	coli genome array},
  journal = {Nat. {B}iotechnol.},
  year = {2000},
  volume = {18},
  pages = {1262--1268},
  pdf = {../local/seli00.pdf},
  file = {seli00.pdf:local/seli00.pdf:PDF},
  subject = {microarray},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nbt/journal/v18/n12/full/nbt1200_1262.html&filetype=PDF}
}

@article{Semizarov2003Specificity,
  author = {Semizarov, D. and Frost, L. and Sarthy, A. and Kroeger, P. and Halbert,
	D. N. and Fesik, S. W.},
  title = {Specificity of short interfering {RNA} determined through gene expression
	signatures.},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2003},
  volume = {100},
  pages = {6347-52},
  number = {11},
  month = {May},
  abstract = {Short interfering {RNA} (si{RNA}) is widely used for studying gene
	function and holds great promise as a tool for validating drug targets
	and treating disease. {A} critical assumption in these applications
	is that the effect of si{RNA} on cells is specific, i.e., limited
	to the specific knockdown of the target gene. {I}n this article,
	we characterize the specificity of si{RNA} by applying gene expression
	profiling. {S}everal si{RNA}s were designed against different regions
	of the same target gene for three different targets. {T}heir effects
	on cells were compared by using {DNA} microarrays to generate gene
	expression signatures. {W}hen the si{RNA} design and transfection
	conditions were optimized, the signatures for different si{RNA}s
	against the same target were shown to correlate very closely, whereas
	the signatures for different genes revealed no correlation. {T}hese
	results indicate that si{RNA} is a highly specific tool for targeted
	gene knockdown, establishing si{RNA}-mediated gene silencing as a
	reliable approach for large-scale screening of gene function and
	drug target validation.},
  doi = {10.1073/pnas.1131959100},
  keywords = {sirna},
  pii = {1131959100},
  url = {http://dx.doi.org/10.1073/pnas.1131959100}
}

@article{Sen2004Predicting,
  author = {Sen, T.Z. and Kloczkowski, A. and Jernigan, R.L. and Yan, C. and
	Honavar, V. and Ho, K.M. and Wang, C.Z. and Ihm, Y. and Cao, H. and
	Gu, X. and Dobbs, D.},
  title = {Predicting binding sites of hydrolase-inhibitor complexes by combining
	several methods.},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {205},
  abstract = {Background {P}rotein-protein interactions play a critical role in
	protein function. {C}ompletion of many genomes is being followed
	rapidly by major efforts to identify interacting protein pairs experimentally
	in order to decipher the networks of interacting, coordinated-in-action
	proteins. {I}dentification of protein-protein interaction sites and
	detection of specific amino acids that contribute to the specificity
	and the strength of protein interactions is an important problem
	with broad applications ranging from rational drug design to the
	analysis of metabolic and signal transduction networks. {R}esults
	{I}n order to increase the power of predictive methods for protein-protein
	interaction sites, we have developed a consensus methodology for
	combining four different methods. {T}hese approaches include: data
	mining using {S}upport {V}ector {M}achines, threading through protein
	structures, prediction of conserved residues on the protein surface
	by analysis of phylogenetic trees, and the {C}onservatism of {C}onservatism
	method of {M}irny and {S}hakhnovich. {R}esults obtained on a dataset
	of hydrolase-inhibitor complexes demonstrate that the combination
	of all four methods yield improved predictions over the individual
	methods. {C}onclusions {W}e developed a consensus method for predicting
	protein-protein interface residues by combining sequence and structure-based
	methods. {T}he success of our consensus approach suggests that similar
	methodologies can be developed to improve prediction accuracies for
	other bioinformatic problems.},
  doi = {10.1186/1471-2105-5-205},
  pdf = {../local/Sen2004Predicting.pdf},
  file = {Sen2004Predicting.pdf:local/Sen2004Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Senawongse2005Predicting,
  author = {Pasak Senawongse and Andrew R Dalby and Zheng Rong Yang},
  title = {Predicting the phosphorylation sites using hidden markov models and
	machine learning methods.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {1147-52},
  number = {4},
  abstract = {Accurately predicting phosphorylation sites in proteins is an important
	issue in postgenomics, for which how to efficiently extract the most
	predictive features from amino acid sequences for modeling is still
	challenging. {A}lthough both the distributed encoding method and
	the bio-basis function method work well, they still have some limits
	in use. {T}he distributed encoding method is unable to code the biological
	content in sequences efficiently, whereas the bio-basis function
	method is a nonparametric method, which is often computationally
	expensive. {A}s hidden {M}arkov models ({HMM}s) can be used to generate
	one model for one cluster of aligned protein sequences, the aim in
	this study is to use {HMM}s to extract features from amino acid sequences,
	where sequence clusters are determined using available biological
	knowledge. {I}n this novel method, {HMM}s are first constructed using
	functional sequences only. {B}oth functional and nonfunctional training
	sequences are then inputted into the trained {HMM}s to generate functional
	and nonfunctional feature vectors. {F}rom this, a machine learning
	algorithm is used to construct a classifier based on these feature
	vectors. {I}t is found in this work that (1) this method provides
	much better prediction accuracy than the use of {HMM}s only for prediction,
	and (2) the support vector machines ({SVM}s) algorithm outperforms
	decision trees and neural network algorithms when they are constructed
	on the features extracted using the trained {HMM}s.},
  doi = {10.1021/ci050047+},
  pdf = {../local/Senawongse2005Predicting.pdf},
  file = {Senawongse2005Predicting.pdf:local/Senawongse2005Predicting.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci050047+}
}

@article{Seol2001Skp1,
  author = {J. H. Seol and A. Shevchenko and A. Shevchenko and R. J. Deshaies},
  title = {Skp1 forms multiple protein complexes, including {RAVE}, a regulator
	of {V}-{ATP}ase assembly.},
  journal = {Nat {C}ell {B}iol},
  year = {2001},
  volume = {3},
  pages = {384-91},
  number = {4},
  month = {Apr},
  abstract = {S{CF} ubiquitin ligases are composed of {S}kp1, {C}dc53, {H}rt1 and
	one member of a large family of substrate receptors known as {F}-box
	proteins ({FBP}s). {H}ere we report the identification, using sequential
	rounds of epitope tagging, affinity purification and mass spectrometry,
	of 16 {S}kp1 and {C}dc53-associated proteins in budding yeast, including
	all components of {SCF}, 9 {FBP}s, {Y}jr033 ({R}av1) and {Y}dr202
	({R}av2). {R}av1, {R}av2 and {S}kp1 form a complex that we have named
	'regulator of the ({H}+)-{ATP}ase of the vacuolar and endosomal membranes'
	({RAVE}), which associates with the {V}1 domain of the vacuolar membrane
	({H}+)-{ATP}ase ({V}-{ATP}ase). {V}-{ATP}ases are conserved throughout
	eukaryotes, and have been implicated in tumour metastasis and multidrug
	resistance, and here we show that {RAVE} promotes glucose-triggered
	assembly of the {V}-{ATP}ase holoenzyme. {P}revious systematic genome-wide
	two-hybrid screens yielded 17 proteins that interact with {S}kp1
	and {C}dc53, only 3 of which overlap with those reported here. {T}hus,
	our results provide a distinct view of the interactions that link
	proteins into a comprehensive cellular network.},
  doi = {10.1038/35070067},
  pdf = {../local/Seol2001Skp1.pdf},
  file = {Seol2001Skp1.pdf:local/Seol2001Skp1.pdf:PDF},
  keywords = {Affinity, Affinity Labels, Amino Acid Sequence, Animals, Cell Cycle
	Proteins, Cells, Chromatography, Cloning, Comparative Study, Cullin
	Proteins, Cultured, Cytoplasm, DNA, DNA Damage, DNA Repair, Electrospray
	Ionization, Fungal, Fungal Proteins, Gene Targeting, Genetic, Glucose,
	Holoenzymes, Humans, Macromolecular Substances, Mass, Matrix-Assisted
	Laser Desorption-Ionization, Mitosis, Molecular, Molecular Sequence
	Data, Non-P.H.S., Non-U.S. Gov't, P.H.S., Phosphoric Monoester Hydrolases,
	Protein Binding, Protein Interaction Mapping, Protein Kinases, Proteome,
	Proteomics, Proton-Translocating ATPases, Recombinant Fusion Proteins,
	Research Support, Ribonucleoproteins, Ribosomes, S-Phase Kinase-Associated
	Proteins, Saccharomyces cerevisiae, Saccharomyces cerevisiae Proteins,
	Sensitivity and Specificity, Sequence Alignment, Signal Transduction,
	Species Specificity, Spectrometry, Spectrum Analysis, Transcription,
	U.S. Gov't, Vacuolar Proton-Translocating ATPases, 11283612},
  owner = {vert},
  pii = {35070067},
  url = {http://dx.doi.org/10.1038/35070067}
}

@article{Serra2003Development,
  author = {Serra, J.R. and Thompson, E.D. and Jurs, P.C.},
  title = {Development of binary classification of structural chromosome aberrations
	for a diverse set of organic compounds from molecular structure},
  journal = {Chem. {R}es. {T}oxicol.},
  year = {2003},
  volume = {16},
  pages = {153-163},
  number = {2},
  abstract = {Classification models are generated to predict in vitro cytogenetic
	results for a diverse set of 383 organic compounds. {B}oth k-nearest
	neighbor and support vector machine models are developed. {T}hey
	are based on calculated molecular structure descriptors. {E}ndpoints
	used are the labels clastogenic or nonclastogenic according to an
	in vitro chromosomal aberration assay with {C}hinese hamster lung
	cells. {C}ompounds that were tested with both a 24 and 48 h exposure
	are included. {E}ach compound is represented by calculated molecular
	structure descriptors encoding the topological, electronic, geometrical,
	or polar surface area aspects of the structure. {S}ubsets of informative
	descriptors are identified with genetic algorithm feature selection
	coupled to the appropriate classification algorithm. {T}he overall
	classification success rate for a k-nearest neighbor classifier built
	with just six topological descriptors is 81.2% for the training set
	and 86.5% for an external prediction set. {T}he overall classification
	success rate for a three-descriptor support vector machine model
	is 99.7% for the training set, 92.1% for the cross-validation set,
	and 83.8% for an external prediction set.},
  doi = {10.1021/tx020077w},
  pdf = {../local/Serra2003Development.pdf},
  file = {Serra2003Development.pdf:local/Serra2003Development.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1021/tx020077w}
}

@article{Sette2001HLA,
  author = {Sette, A. and Chesnut, R. and Fikes, J.},
  title = {{HLA} expression in cancer: implications for {T} cell-based immunotherapy},
  journal = {Immunogenetics},
  year = {2001},
  volume = {53},
  pages = {255--263},
  number = {4},
  abstract = {HLA class I expression is altered in a significant fraction of the
	tumor types reviewed here, reflecting either immune pressure or,
	simply, the accumulation of pathological changes and alterations.
	However, in all tumor types analyzed, a majority of the tumors express
	HLA class I. with a general tendency for the more severe alterations
	to be found in later-stage and less differentiated tumors. These
	results are encouraging for the development of specific immunotherapies,
	especially considering that (1) the relatively low sensitivity of
	immunohistochemical techniques might underestimate HLA expression
	in tumors, (2) class I expression can be induced in tumor cells as
	a result of local inflammation and lymphokine release, and (3) class
	I-negative cells would be predicted to be sensitive to Iysis by natural
	killer cells.},
  keywords = {immunoinformatics},
  pmid = {11491528},
  timestamp = {2007.01.25}
}

@article{Sette1999Nine,
  author = {A. Sette and J. Sidney},
  title = {Nine major {HLA} class {I} supertypes account for the vast preponderance
	of {HLA-A} and -{B} polymorphism.},
  journal = {Immunogenetics},
  year = {1999},
  volume = {50},
  pages = {201--212},
  number = {3-4},
  month = {Nov},
  abstract = {Herein, we review the epitope approach to vaccine development, and
	discuss how knowledge of HLA supertypes might be used as a tool in
	the development of such vaccines. After reviewing the main structural
	features of the A2-, A3-, B7-, and B44- supertype alleles, and biological
	data demonstrating their immunological relevance, we analyze the
	frequency at which these supertype alleles are expressed in various
	ethnicities and discuss the relevance of those observations to vaccine
	development. Next, the existence of five new supertypes (A1, A24,
	B27, B58, and B62) is reported. As a result, it is possible to account
	for the predominance of all known HLA class I with only nine main
	functional binding specificities. The practical implications of this
	finding, as well as its relevance to understanding the functional
	implication of MHC polymorphism in humans, are discussed.},
  keywords = {Alleles; Amino Acid Sequence; Animals; Epitopes; HLA-A Antigens; HLA-B
	Antigens; Histocompatibility Antigens Class I; Humans; Molecular
	Sequence Data; Polymorphism, Genetic},
  owner = {laurent},
  pii = {90500201.251},
  pmid = {10602880},
  timestamp = {2007.01.05}
}

@article{Sette1998HLA,
  author = {A. Sette and J. Sidney},
  title = {{HLA} supertypes and supermotifs: a functional perspective on {HLA}
	polymorphism.},
  journal = {Curr. Opin. Immunol.},
  year = {1998},
  volume = {10},
  pages = {478--482},
  number = {4},
  month = {Aug},
  abstract = {A large fraction of HLA class I, and possibly class II, molecules
	can be classified into relatively few supertypes, characterized by
	overlapping peptide-binding repertoires and consensus B- and F-pocket
	structures. Cross-binding peptides are frequently recognized by specific
	T cells in the course of natural disease processes and in the context
	of multiple HLA molecules, validating the concept of HLA supertypes
	at the functional level.},
  keywords = {Animals; Communicable Diseases; Evolution, Molecular; HLA Antigens;
	HLA-A2 Antigen; HLA-A3 Antigen; Humans; Neoplasms; Polymorphism,
	Genetic},
  owner = {laurent},
  pii = {S0952-7915(98)80124-6},
  pmid = {9722926},
  timestamp = {2007.01.05}
}

@article{Sette1994relationship,
  author = {A. Sette and A. Vitiello and B. Reherman and P. Fowler and R. Nayersina
	and W. M. Kast and C. J. Melief and C. Oseroff and L. Yuan and J.
	Ruppert and J. Sidney and M. F. del Guercio and S. Southwood and
	R. T. Kubo and R. W. Chesnut and H. M. Grey and F. V. Chisari},
  title = {The relationship between class I binding affinity and immunogenicity
	of potential cytotoxic T cell epitopes.},
  journal = {J. Immunol.},
  year = {1994},
  volume = {153},
  pages = {5586--5592},
  number = {12},
  month = {Dec},
  abstract = {The relationship between binding affinity for HLA class I molecules
	and immunogenicity of discrete peptide epitopes has been analyzed
	in two different experimental approaches. In the first approach,
	the immunogenicity of potential epitopes ranging in MHC binding affinity
	over a 10,000-fold range was analyzed in HLA-A*0201 transgenic mice.
	In the second approach, the antigenicity of approximately 100 different
	hepatitis B virus (HBV)-derived potential epitopes, all carrying
	A*0201 binding motifs, was assessed by using PBL of acute hepatitis
	patients. In both cases, it was found that an affinity threshold
	of approximately 500 nM (preferably 50 nM or less) apparently determines
	the capacity of a peptide epitope to elicit a CTL response. These
	data correlate well with class I binding affinity measurements of
	either naturally processed peptides or previously described T cell
	epitopes. Taken together, these data have important implications
	for the selection of epitopes for peptide-based vaccines, and also
	formally demonstrate the crucial role of determinant selection in
	the shaping of T cell responses. Because in most (but not all) cases,
	high affinity peptides seem to be immunogenic, our data also suggest
	that holes in the functional T cell repertoire, if they exist, may
	be relatively rare.},
  keywords = {Amino Acid Sequence; Animals; Cell Line; Cytotoxicity Tests, Immunologic;
	Epitopes; HLA-A Antigens; Hepatitis B; Hepatitis B Antigens; Humans;
	Mice; Mice, Transgenic; Molecular Sequence Data; Peptides; Protein
	Binding; T-Lymphocytes, Cytotoxic},
  owner = {laurent},
  pmid = {7527444},
  timestamp = {2007.07.12}
}

@article{Shabalina2006Computational,
  author = {Shabalina, S. and Spiridonov, A. and Ogurtsov, A.},
  title = {{C}omputational models with thermodynamic and composition features
	improve si{RNA} design.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {65},
  number = {1},
  month = {Feb},
  abstract = {ABSTRACT: BACKGROUND: Small interfering RNAs (siRNAs) have become
	an important tool in cell and molecular biology. Reliable design
	of siRNA molecules is essential for the needs of large functional
	genomics projects. RESULTS: To improve the design of efficient siRNA
	molecules, we performed a comparative, thermodynamic and correlation
	analysis on a heterogeneous set of 653 siRNAs collected from the
	literature. We used this training set to select siRNA features and
	optimize computational models. We identified 18 parameters that correlate
	significantly with silencing efficiency. Some of these parameters
	characterize only the siRNA sequence, while others involve the whole
	mRNA. Most importantly, we derived an siRNA position-dependent consensus,
	and optimized the free-energy difference of the 5' and 3' terminal
	dinucleotides of the siRNA antisense strand. The position-dependent
	consensus is based on correlation and t-test analyses of the training
	set, and accounts for both significantly preferred and avoided nucleotides
	in all sequence positions. On the training set, the two parameters'
	correlation with silencing efficiency was 0.5 and 0.36, respectively.
	Among other features, a dinucleotide content index and the frequency
	of potential targets for siRNA in the mRNA added predictive power
	to our model (R = 0.55). We showed that our model is effective for
	predicting the efficiency of siRNAs at different concentrations.
	We optimized a neural network model on our training set using three
	parameters characterizing the siRNA sequence, and predicted efficiencies
	for the test siRNA dataset recently published by Novartis. On this
	validation set, the correlation coefficient between predicted and
	observed efficiency was 0.75. Using the same model, we performed
	a transcriptome-wide analysis of optimal siRNA targets for 22,600
	human mRNAs. CONCLUSIONS: We demonstrated that the properties of
	the siRNAs themselves are essential for efficient RNA interference.
	The 5' ends of antisense strands of efficient siRNAs are U-rich and
	possess a content similarity to the pyrimidine-rich oligonucleotides
	interacting with the polypurine RNA tracks that are recognized by
	RNase H. The advantage of our method over similar methods is the
	small number of parameters. As a result, our method requires a much
	smaller training set to produce consistent results. Other mRNA features,
	though expensive to compute, can slightly improve our model.},
  doi = {10.1186/1471-2105-7-65},
  pdf = {../local/Shabalina2006Computational.pdf},
  file = {Shabalina2006Computational.pdf:local/Shabalina2006Computational.pdf:PDF},
  keywords = {sirna},
  owner = {vert},
  pii = {1471-2105-7-65},
  pmid = {16472402},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1186/1471-2105-7-65}
}

@article{Shacham2004PREDICT,
  author = {Shacham, S. and Marantz, Y. and Bar-Haim, S. and Kalid, O. and Warshaviak,
	D. and Avisar, N. and Inbal, B. and Heifetz, A. and Fichman, M. and
	Topf, M. and Naor, Z. and Noiman, S. and Becker, O. M.},
  title = {{PREDICT} modeling and in-silico screening for {G}-protein coupled
	receptors.},
  journal = {Proteins},
  year = {2004},
  volume = {57},
  pages = {51--86},
  number = {1},
  month = {Oct},
  abstract = {G-protein coupled receptors (GPCRs) are a major group of drug targets
	for which only one x-ray structure is known (the nondrugable rhodopsin),
	limiting the application of structure-based drug discovery to GPCRs.
	In this paper we present the details of PREDICT, a new algorithmic
	approach for modeling the 3D structure of GPCRs without relying on
	homology to rhodopsin. PREDICT, which focuses on the transmembrane
	domain of GPCRs, starts from the primary sequence of the receptor,
	simultaneously optimizing multiple 'decoy' conformations of the protein
	in order to find its most stable structure, culminating in a virtual
	receptor-ligand complex. In this paper we present a comprehensive
	analysis of three PREDICT models for the dopamine D2, neurokinin
	NK1, and neuropeptide Y Y1 receptors. A shorter discussion of the
	CCR3 receptor model is also included. All models were found to be
	in good agreement with a large body of experimental data. The quality
	of the PREDICT models, at least for drug discovery purposes, was
	evaluated by their successful utilization in in-silico screening.
	Virtual screening using all three PREDICT models yielded enrichment
	factors 9-fold to 44-fold better than random screening. Namely, the
	PREDICT models can be used to identify active small-molecule ligands
	embedded in large compound libraries with an efficiency comparable
	to that obtained using crystal structures for non-GPCR targets.},
  doi = {10.1002/prot.20195},
  keywords = {chemogenomics},
  owner = {laurent},
  pmid = {15326594},
  timestamp = {2008.03.27},
  url = {http://dx.doi.org/10.1002/prot.20195}
}

@article{Shadforth2005Protein,
  author = {Ian Shadforth and Daniel Crowther and Conrad Bessant},
  title = {Protein and peptide identification algorithms using MS for use in
	high-throughput, automated pipelines.},
  journal = {Proteomics},
  year = {2005},
  volume = {5},
  pages = {4082--4095},
  number = {16},
  month = {Nov},
  abstract = {Current proteomics experiments can generate vast quantities of data
	very quickly, but this has not been matched by data analysis capabilities.
	Although there have been a number of recent reviews covering various
	aspects of peptide and protein identification methods using MS, comparisons
	of which methods are either the most appropriate for, or the most
	effective at, their proposed tasks are not readily available. As
	the need for high-throughput, automated peptide and protein identification
	systems increases, the creators of such pipelines need to be able
	to choose algorithms that are going to perform well both in terms
	of accuracy and computational efficiency. This article therefore
	provides a review of the currently available core algorithms for
	PMF, database searching using MS/MS, sequence tag searches and de
	novo sequencing. We also assess the relative performances of a number
	of these algorithms. As there is limited reporting of such information
	in the literature, we conclude that there is a need for the adoption
	of a system of standardised reporting on the performance of new peptide
	and protein identification algorithms, based upon freely available
	datasets. We go on to present our initial suggestions for the format
	and content of these datasets.},
  doi = {10.1002/pmic.200402091},
  institution = {Cranfield Centre for Bioinformatics and IT, Cranfield University,
	Silsoe, UK.},
  keywords = {Algorithms; Alternative Splicing; Databases, Protein; Peptides; Polymorphism,
	Genetic; Proteins; Proteomics; Sequence Analysis; Software; Spectrometry,
	Mass, Matrix-Assisted Laser Desorption-Ionization},
  owner = {phupe},
  pmid = {16196103},
  timestamp = {2010.08.19},
  url = {http://dx.doi.org/10.1002/pmic.200402091}
}

@article{Shah2008SVM-HUSTLE,
  author = {Shah, A. R. and Oehmen, C. S. and Webb-Robertson, B.-J.},
  title = {{SVM-HUSTLE}--an iterative semi-supervised machine learning approach
	for pairwise protein remote homology detection.},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {783--790},
  number = {6},
  month = {Mar},
  abstract = {MOTIVATION: As the amount of biological sequence data continues to
	grow exponentially we face the increasing challenge of assigning
	function to this enormous molecular 'parts list'. The most popular
	approaches to this challenge make use of the simplifying assumption
	that similar functional molecules, or proteins, sometimes have similar
	composition, or sequence. However, these algorithms often fail to
	identify remote homologs (proteins with similar function but dissimilar
	sequence) which often are a significant fraction of the total homolog
	collection for a given sequence. We introduce a Support Vector Machine
	(SVM)-based tool to detect homology using semi-supervised iterative
	learning (SVM-HUSTLE) that identifies significantly more remote homologs
	than current state-of-the-art sequence or cluster-based methods.
	As opposed to building profiles or position specific scoring matrices,
	SVM-HUSTLE builds an SVM classifier for a query sequence by training
	on a collection of representative high-confidence training sets,
	recruits additional sequences and assigns a statistical measure of
	homology between a pair of sequences. SVM-HUSTLE combines principles
	of semi-supervised learning theory with statistical sampling to create
	many concurrent classifiers to iteratively detect and refine, on-the-fly,
	patterns indicating homology. RESULTS: When compared against existing
	methods for identifying protein homologs (BLAST, PSI-BLAST, COMPASS,
	PROF_SIM, RANKPROP and their variants) on two different benchmark
	datasets SVM-HUSTLE significantly outperforms each of the above methods
	using the most stringent ROC(1) statistic with P-values less than
	1e-20. SVM-HUSTLE also yields results comparable to HHSearch but
	at a substantially reduced computational cost since we do not require
	the construction of HMMs. AVAILABILITY: The software executable to
	run SVM-HUSTLE can be downloaded from http://www.sysbio.org/sysbio/networkbio/svm_hustle},
  doi = {10.1093/bioinformatics/btn028},
  pdf = {../local/Shah2008SVM-HUSTLE.pdf},
  file = {Shah2008SVM-HUSTLE.pdf:Shah2008SVM-HUSTLE.pdf:PDF},
  institution = {Scientific Data Management and Computational Biology and Bioinformatics,
	Pacific Northwest National Laboratory, Richland, WA, USA.},
  keywords = {PUlearning},
  owner = {mordelet},
  pii = {btn028},
  pmid = {18245127},
  timestamp = {2010.01.26},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn028}
}

@article{Shah2007Modeling,
  author = {Shah, S.P. and Lam, W.L. and Ng, R.T. and Murphy, K.P.},
  title = {Modeling recurrent {DNA} copy number alterations in array {CGH} data},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {i450-i458},
  number = {13},
  pdf = {../local/Shah2007Modeling.pdf},
  file = {Shah2007Modeling.pdf:Shah2007Modeling.pdf:PDF},
  owner = {jp},
  publisher = {Oxford Univ Press},
  timestamp = {2010.01.11}
}

@article{Shah2004Fingerprint,
  author = {Shesha Shah and P. S. Sastry},
  title = {Fingerprint classification using a feedback-based line detector.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {85-94},
  number = {1},
  month = {Feb},
  abstract = {We present a fingerprint classification algorithm in this paper. {T}his
	algorithm classifies a fingerprint image into one of the five classes:
	{A}rch, {L}eft loop, {R}ight loop, {W}horl, and {T}ented arch. {W}e
	use a new low-dimensional feature vector obtained from the output
	of a novel oriented line detector presented here. {O}ur line detector
	is a co-operative dynamical system that gives oriented lines and
	preserves multiple orientations at points where differently oriented
	lines meet. {O}ur feature extraction process is based on characterizing
	the distribution of orientations around the fingerprint. {W}e discuss
	three different classifiers: support vector machines, nearest-neighbor
	classifier, and neural network classifier. {W}e present results obtained
	on a {N}ational {I}nstitute of {S}tandards and {T}echnology ({NIST})
	fingerprint database and compare with other published results on
	{NIST} databases. {A}ll our classifiers perform equally well, and
	this suggests that our novel line detection and feature extraction
	process indeed captures all the crucial information needed for classification
	in this problem.}
}

@article{Shah2009Mutational,
  author = {Shah, S. P. and Morin, R. D. and Khattra, J. and Prentice, L. and
	Pugh, T. and Burleigh, A. and Delaney, A. and Gelmon, K. and Guliany,
	R. and Senz, J. and Steidl, C. and Holt, R.A . and Jones, S. and
	Sun, M. and Leung, G. and Moore, R. and Severson, T. and Taylor,
	G. A. and Teschendorff, A. E. and Tse, K. and Turashvili, G. and
	Varhol, R. and Warren, R. L. and Watson, P. and Zhao, Y. and Caldas,
	C. and Huntsman, D. and Hirst, M. and Marra, M. A. and Aparicio,
	A.},
  title = {Mutational evolution in a lobular breast tumour profiled at single
	nucleotide resolution},
  journal = {Nature},
  year = {2009},
  volume = {461},
  pages = {809--813},
  number = {7265},
  month = {Oct},
  abstract = {Recent advances in next generation sequencing have made it possible
	to precisely characterize all somatic coding mutations that occur
	during the development and progression of individual cancers. Here
	we used these approaches to sequence the genomes (>43-fold coverage)
	and transcriptomes of an oestrogen-receptor-alpha-positive metastatic
	lobular breast cancer at depth. We found 32 somatic non-synonymous
	coding mutations present in the metastasis, and measured the frequency
	of these somatic mutations in DNA from the primary tumour of the
	same patient, which arose 9 years earlier. Five of the 32 mutations
	(in ABCB11, HAUS3, SLC24A4, SNX4 and PALB2) were prevalent in the
	DNA of the primary tumour removed at diagnosis 9 years earlier, six
	(in KIF1C, USP28, MYH8, MORC1, KIAA1468 and RNASEH2A) were present
	at lower frequencies (1-13\%), 19 were not detected in the primary
	tumour, and two were undetermined. The combined analysis of genome
	and transcriptome data revealed two new RNA-editing events that recode
	the amino acid sequence of SRP9 and COG3. Taken together, our data
	show that single nucleotide mutational heterogeneity can be a property
	of low or intermediate grade primary breast cancers and that significant
	evolution can occur with disease progression.},
  doi = {10.1038/nature08489},
  pdf = {../local/Shah2009Mutational.pdf},
  file = {Shah2009Mutational.pdf:Shah2009Mutational.pdf:PDF},
  institution = {Molecular Oncology, BC Cancer Agency, 675 West 10th Avenue, Vancouver
	V5Z 1L3, Canada.},
  keywords = {ngs},
  owner = {jp},
  pii = {nature08489},
  pmid = {19812674},
  timestamp = {2009.10.12},
  url = {http://dx.doi.org/10.1038/nature08489}
}

@article{Shann2008Genome,
  author = {Shann, Y.J. and Cheng, C. and Chiao, C.H. and Chen, D.T. and Li,
	P.H. and Hsu, M.T.},
  title = {Genome-Wide Mapping and Characterization of Hypomethylated Sites
	in Human Tissues and Breast Cancer Cell Lines},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {791-801},
  keywords = {csbcbook}
}

@article{Shannon2003Cytoscape,
  author = {Shannon, P. and Markiel, A. and Ozier, O. and Baliga, N. S. and Wang,
	J. T. and Ramage, D. and Amin, N. and Schwikowski, B. and Ideker,
	T.},
  title = {{C}ytoscape: a software environment for integrated models of biomolecular
	interaction networks.},
  journal = {Genome Res.},
  year = {2003},
  volume = {13},
  pages = {2498--2504},
  number = {11},
  month = {Nov},
  abstract = {Cytoscape is an open source software project for integrating biomolecular
	interaction networks with high-throughput expression data and other
	molecular states into a unified conceptual framework. Although applicable
	to any system of molecular components and interactions, Cytoscape
	is most powerful when used in conjunction with large databases of
	protein-protein, protein-DNA, and genetic interactions that are increasingly
	available for humans and model organisms. Cytoscape's software Core
	provides basic functionality to layout and query the network; to
	visually integrate the network with expression profiles, phenotypes,
	and other molecular states; and to link the network to databases
	of functional annotations. The Core is extensible through a straightforward
	plug-in architecture, allowing rapid development of additional computational
	analyses and features. Several case studies of Cytoscape plug-ins
	are surveyed, including a search for interaction pathways correlating
	with changes in gene expression, a study of protein complexes involved
	in cellular recovery to DNA damage, inference of a combined physical/functional
	interaction network for Halobacterium, and an interface to detailed
	stochastic/kinetic gene regulatory models.},
  doi = {10.1101/gr.1239303},
  pii = {13/11/2498},
  pmid = {14597658},
  timestamp = {2008.02.11},
  url = {http://dx.doi.org/10.1101/gr.1239303}
}

@article{Shannon2003Analyzing,
  author = {William Shannon and Robert Culverhouse and Jill Duncan},
  title = {Analyzing microarray data using cluster analysis.},
  journal = {Pharmacogenomics},
  year = {2003},
  volume = {4},
  pages = {41-52},
  number = {1},
  month = {Jan},
  abstract = {As pharmacogenetics researchers gather more detailed and complex data
	on gene polymorphisms that effect drug metabolizing enzymes, drug
	target receptors and drug transporters, they will need access to
	advanced statistical tools to mine that data. {T}hese tools include
	approaches from classical biostatistics, such as logistic regression
	or linear discriminant analysis, and supervised learning methods
	from computer science, such as support vector machines and artificial
	neural networks. {I}n this review, we present an overview of another
	class of models, cluster analysis, which will likely be less familiar
	to pharmacogenetics researchers. {C}luster analysis is used to analyze
	data that is not a priori known to contain any specific subgroups.
	{T}he goal is to use the data itself to identify meaningful or informative
	subgroups. {S}pecifically, we will focus on demonstrating the use
	of distance-based methods of hierarchical clustering to analyze gene
	expression data.},
  keywords = {Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base Sequence,
	Biosensing Techniques, Cluster Analysis, Comparative Study, Computer-Assisted,
	DNA, Gene Expression Profiling, Gene Expression Regulation, Genes,
	Hemolysins, Humans, Markov Chains, Messenger, Molecular Probe Techniques,
	Molecular Sequence Data, Nanotechnology, Neoplastic, Neural Networks
	(Computer), Non-U.S. Gov't, Nucleic Acid Conformation, Oligonucleotide
	Array Sequence Analysis, Pattern Recognition, Quality Control, RNA,
	Research Support, Signal Processing, Stomach Neoplasms, 12517285}
}

@article{Shapiro1992Feature,
  author = {L. S. Shapiro and M. Brady},
  title = {Feature-based correspondence: an eigenvector approach},
  journal = {Image Vision Comput.},
  year = {1992},
  volume = {10},
  pages = {283-288},
  number = {5},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1016/0262-8856(92)90043-3}
}

@article{Sharan2005motif-based,
  author = {R. Sharan and E. W Myers},
  title = {A motif-based framework for recognizing sequence families.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21 Suppl 1},
  pages = {i387-i393},
  month = {Jun},
  abstract = {M{OTIVATION}: {M}any signals in biological sequences are based on
	the presence or absence of base signals and their spatial combinations.
	{O}ne of the best known examples of this is the signal identifying
	a core promoter-the site at which the basal transcription machinery
	starts the transcription of a gene. {O}ur goal is a fully automatic
	pattern recognition system for a family of sequences, which simultaneously
	discovers the base signals, their spatial relationships and a classifier
	based upon them. {RESULTS}: {I}n this paper we present a general
	method for characterizing a set of sequences by their recurrent motifs.
	{O}ur approach relies on novel probabilistic models for {DNA} binding
	sites and modules of binding sites, on algorithms to study them from
	the data and on a support vector machine that uses the models studied
	to classify a set of sequences. {W}e demonstrate the applicability
	of our approach to diverse instances, ranging from families of promoter
	sequences to a dataset of intronic sequences flanking alternatively
	spliced exons. {O}n a core promoter dataset our results are comparable
	with the state-of-the-art {M}c{P}romoter. {O}n a dataset of alternatively
	spliced exons we outperform a previous approach. {W}e also achieve
	high success rates in recognizing cell cycle regulated genes. {T}hese
	results demonstrate that a fully automatic pattern recognition algorithm
	can meet or exceed the performance of hand-crafted approaches. {AVAILABILITY}:
	{T}he software and datasets are available from the authors upon request.
	{CONTACT}: roded@tau.ac.il.},
  doi = {10.1093/bioinformatics/bti1002},
  pdf = {../local/Sharan2005motif-based.pdf},
  file = {Sharan2005motif-based.pdf:local/Sharan2005motif-based.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i387},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1002}
}

@article{Sharan2005Conserved,
  author = {Sharan, R. and Suthram, S. and Kelley, R.M. and Kuhn, T. and McCuine,
	S. and Uetz, P. and Sittler, T. and Karp, R.M. and Ideker, T.},
  title = {Conserved patterns of protein interaction in multiple species.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2005},
  volume = {102},
  pages = {1974--1979},
  number = {6},
  month = {Feb},
  abstract = {To elucidate cellular machinery on a global scale, we performed a
	multiple comparison of the recently available protein-protein interaction
	networks of Caenorhabditis elegans, Drosophila melanogaster, and
	Saccharomyces cerevisiae. This comparison integrated protein interaction
	and sequence information to reveal 71 network regions that were conserved
	across all three species and many exclusive to the metazoans. We
	used this conservation, and found statistically significant support
	for 4,645 previously undescribed protein functions and 2,609 previously
	undescribed protein interactions. We tested 60 interaction predictions
	for yeast by two-hybrid analysis, confirming approximately half of
	these. Significantly, many of the predicted functions and interactions
	would not have been identified from sequence similarity alone, demonstrating
	that network comparisons provide essential biological information
	beyond what is gleaned from the genome.},
  doi = {10.1073/pnas.0409522102},
  pdf = {../local/Sharan2005Conserved.pdf},
  file = {Sharan2005Conserved.pdf:local/Sharan2005Conserved.pdf:PDF},
  institution = {Computer Science Division, University of California, Berkeley, CA
	94704, USA.},
  owner = {jp},
  pii = {0409522102},
  pmid = {15687504},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1073/pnas.0409522102}
}

@book{Shawe-Taylor2004Kernel,
  title = {Kernel {M}ethods for {P}attern {A}nalysis},
  publisher = {Cambridge University Press},
  year = {2004},
  author = {Shawe-Taylor, J. and Cristianini, N.},
  address = {New York, NY, USA},
  owner = {vert}
}

@article{Shawe-Taylor2002On,
  author = {J. Shawe-Taylor and N. Cristianini},
  title = {On the {G}eneralization of {S}oft {M}argin {A}lgorithms},
  journal = {I{EEE} {T}ransactions on {I}nformation {T}heory},
  year = {2002},
  volume = {48},
  pages = {2721-2735},
  number = {10},
  month = {October},
  doi = {10.1109/TIT.2002.802647},
  pdf = {../local/Shawe-Taylor2002On.pdf},
  file = {Shawe-Taylor2002On.pdf:local/Shawe-Taylor2002On.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TIT.2002.802647}
}

@inproceedings{She2003Frequent-subsequence-based,
  author = {She, R. and Chen, F. and Wang, K. and Ester, M. and Gardy, J.L. and
	Brinkman, F.S.L.},
  title = {Frequent-subsequence-based prediction of outer membrane proteins},
  booktitle = {K{DD} '03: {P}roceedings of the ninth {ACM} {SIGKDD} international
	conference on {K}nowledge discovery and data mining},
  year = {2003},
  pages = {436-445},
  publisher = {ACM Press},
  abstract = {A number of medically important disease-causing bacteria (collectively
	called {G}ram-negative bacteria) are noted for the extra "outer"
	membrane that surrounds their cell. {P}roteins resident in this membrane
	(outer membrane proteins, or {OMP}s) are of primary research interest
	for antibiotic and vaccine drug design as they are on the surface
	of the bacteria and so are the most accessible targets to develop
	new drugs against. {W}ith the development of genome sequencing technology
	and bioinformatics, biologists can now deduce all the proteins that
	are likely produced in a given bacteria and have attempted to classify
	where proteins are located in a bacterial cell. {H}owever such protein
	localization programs are currently least accurate when predicting
	{OMP}s, and so there is a current need for the development of a better
	{OMP} classifier. {D}ata mining research suggests that the use of
	frequent patterns has good performance in aiding the development
	of accurate and efficient classification algorithms. {I}n this paper,
	we present two methods to identify {OMP}s based on frequent subsequences
	and test them on all {G}ram-negative bacterial proteins whose localizations
	have been determined by biological experiments. {O}ne classifier
	follows an association rule approach, while the other is based on
	support vector machines ({SVM}s). {W}e compare the proposed methods
	with the state-of-the-art methods in the biological domain. {T}he
	results demonstrate that our methods are better both in terms of
	accurately identifying {OMP}s and providing biological insights that
	increase our understanding of the structures and functions of these
	important proteins.},
  doi = {10.1145/956750.956800},
  pdf = {../local/She2003Frequent-subsequence-based.pdf},
  file = {She2003Frequent-subsequence-based.pdf:local/She2003Frequent-subsequence-based.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Sheinerman2003Sequence,
  author = {Felix B Sheinerman and Bissan Al-Lazikani and Barry Honig},
  title = {Sequence, structure and energetic determinants of phosphopeptide
	selectivity of {SH2} domains.},
  journal = {J. Mol. Biol.},
  year = {2003},
  volume = {334},
  pages = {823--841},
  number = {4},
  month = {Dec},
  abstract = {Here, we present an approach for the prediction of binding preferences
	of members of a large protein family for which structural information
	for a number of family members bound to a substrate is available.
	The approach involves a number of steps. First, an accurate multiple
	alignment of sequences of all members of a protein family is constructed
	on the basis of a multiple structural superposition of family members
	with known structure. Second, the methods of continuum electrostatics
	are used to characterize the energetic contribution of each residue
	in a protein to the binding of its substrate. Residues that make
	a significant contribution are mapped onto the protein sequence and
	are used to define a "binding site signature" for the complex being
	considered. Third, sequences whose structures have not been determined
	are checked to see if they have binding-site signatures similar to
	one of the known complexes. Predictions of binding affinity to a
	given substrate are based on similarities in binding-site signature.
	An important component of the approach is the introduction of a context-specific
	substitution matrix suitable for comparison of binding-site residues.The
	methods are applied to the prediction of phosphopeptide selectivity
	of SH2 domains. To this end, the energetic roles of all protein residues
	in 17 different complexes of SH2 domains with their cognate targets
	are analyzed. The total number of residues that make significant
	contributions to binding is found to vary from nine to 19 in different
	complexes. These energetically important residues are found to contribute
	to binding through a variety of mechanisms, involving both electrostatic
	and hydrophobic interactions. Binding-site signatures are found to
	involve residues in different positions in SH2 sequences, some of
	them as far as 9A away from a bound peptide. Surprisingly, similarities
	in the signatures of different domains do not correlate with whole-domain
	sequence identities unless the latter is greater than 50\%.An extensive
	comparison with the optimal binding motifs determined by peptide
	library experiments, as well as other experimental data indicate
	that the similarity in binding preferences of different SH2 domains
	can be deduced on the basis of their binding-site signatures. The
	analysis provides a rationale for the empirically derived classification
	of SH2 domains described by Songyang & Cantley, in that proteins
	in the same group are found to have similar residues at positions
	important for binding. Confident predictions of binding preference
	can be made for about 85\% of SH2 domain sequences found in SWISSPROT.
	The approach described in this work is quite general and can, in
	principle, be used to analyze binding preferences of members of large
	protein families for which structural information for a number of
	family members is available. It also offers a strategy for predicting
	cross-reactivity of compounds designed to bind to a particular target,
	for example in structure-based drug design.},
  keywords = {Amino Acid Sequence; Binding Sites; Molecular Sequence Data; Peptide
	Library; Phosphopeptides; Protein Binding; Sequence Alignment; Substrate
	Specificity; src Homology Domains},
  owner = {laurent},
  pii = {S0022283603012373},
  pmid = {14636606},
  timestamp = {2007.01.03}
}

@article{Sheinerman2005High,
  author = {Felix B Sheinerman and Elie Giraud and Abdelazize Laoui},
  title = {High affinity targets of protein kinase inhibitors have similar residues
	at the positions energetically important for binding.},
  journal = {J. Mol. Biol.},
  year = {2005},
  volume = {352},
  pages = {1134--1156},
  number = {5},
  month = {Oct},
  abstract = {Inhibition of protein kinase activity is a focus of intense drug discovery
	efforts in several therapeutic areas. Major challenges facing the
	field include understanding of the factors determining the selectivity
	of kinase inhibitors and the development of compounds with the desired
	selectivity profile. Here, we report the analysis of sequence variability
	among high and low affinity targets of eight different small molecule
	kinase inhibitors (BIRB796, Tarceva, NU6102, Gleevec, SB203580, balanol,
	H89, PP1). It is observed that all high affinity targets of each
	inhibitor are found among a relatively small number of kinases, which
	have similar residues at the specific positions important for binding.
	The findings are highly statistically significant, and allow one
	to exclude the majority of kinases in a genome from a list of likely
	targets for an inhibitor. The findings have implications for the
	design of novel inhibitors with a desired selectivity profile (e.g.
	targeted at multiple kinases), the discovery of new targets for kinase
	inhibitor drugs, comparative analysis of different in vivo models,
	and the design of "a-la-carte" chemical libraries tailored for individual
	kinases.},
  doi = {10.1016/j.jmb.2005.07.074},
  keywords = {Amino Acid Sequence; Amino Acids; Binding Sites; Electrostatics; Humans;
	Ligands; Molecular Sequence Data; Piperazines; Protein Binding; Protein
	Kinase Inhibitors; Protein Kinases; Pyrazoles; Pyrimidines; Sequence
	Alignment; Thermodynamics},
  owner = {laurent},
  pii = {S0022-2836(05)00900-9},
  pmid = {16139843},
  timestamp = {2007.01.03},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.07.074}
}

@article{Shen2005[Detection,
  author = {Li Shen and Jie Yang and Yue Zhou},
  title = {Detection of {PVC}s with support vector machine},
  journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi},
  year = {2005},
  volume = {22},
  pages = {78-81},
  number = {1},
  month = {Feb},
  abstract = {The classifiction of heart beats is the foundation for automated arrhythmia
	monitoring devices. {S}upport vector machnies ({SVM}s) have meant
	a great advance in solving classification or pattern recognition.
	{T}his study describes {SVM} for the identification of premature
	ventricular contractions ({PVC}s) in surface {ECG}s. {F}eatures for
	the classification task are extracted by analyzing the heart rate,
	morphology and wavelet energy of the heart beats from a single lead.
	{T}he performance of different {SVM}s is evaluated on the {MIT}-{BIH}
	arrhythmia database following the association for the advancement
	of medical instrumentation ({AAMI}) recommendations.},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15762121}
}

@article{Shen2008Pathway,
  author = {Shen, R. and Chinnaiyan, A. M. and Ghosh, D.},
  title = {Pathway analysis reveals functional convergence of gene expression
	profiles in breast cancer},
  journal = {BMC Medical Genomics},
  year = {2008},
  volume = {1},
  pages = {28},
  number = {1},
  doi = {10.1186/1755-8794-1-28},
  pdf = {../local/Shen2008Pathway.pdf},
  file = {Shen2008Pathway.pdf:Shen2008Pathway.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1186/1755-8794-1-28}
}

@inproceedings{Sherashidze2009Efficient,
  author = {Sherashidze, N. and Vishwanathan, S.V.N. and Petri, T.H. and Mehlhorn,
	K. and Borgwardt, K.M.},
  title = {Efficient Graphlet Kernels for Large Graph Comparison},
  booktitle = {12th International Conference on Artificial Intelligence and Statistics
	(AISTATS)},
  year = {2009},
  pages = {488--495},
  address = {Clearwater Beach, Florida USA},
  publisher = {Society for Artificial Intelligence and Statistics},
  pdf = {../local/Sherashidze2009Efficient.pdf},
  file = {Sherashidze2009Efficient.pdf:Sherashidze2009Efficient.pdf:PDF},
  owner = {jp},
  timestamp = {2009.09.27}
}

@article{Sherlock2001Stanford,
  author = {G. Sherlock and T. Hernandez-Boussard and A. Kasarskis and G. Binkley
	and J.C. Matese and S.S. Dwight and M. Kaloper and S. Weng and H.
	Jin and C.A. Ball and M.B. Eisen and P.T. Spellman},
  title = {The {S}tanford {M}icroarray {D}atabase},
  journal = {Nucleic {A}cids {R}es.},
  year = {2001},
  volume = {29},
  pages = {152--155},
  number = {1},
  month = {Jan},
  pdf = {../local/sher01.pdf},
  file = {sher01.pdf:local/sher01.pdf:PDF},
  subject = {microarray},
  url = {http://genome-www5.Stanford.EDU/MicroArray/SMD/SMD.pdf}
}

@article{Sherr2004Principles,
  author = {Charles J. Sherr},
  title = {Principles of Tumor Suppression},
  journal = {Cell},
  year = {2004},
  volume = {116},
  pages = {235-246},
  keywords = {csbcbook}
}

@article{Sherry2001dbSNP,
  author = {S. T. Sherry and M. H. Ward and M. Kholodov and J. Baker and L. Phan
	and E. M. Smigielski and K. Sirotkin},
  title = {dbSNP: the NCBI database of genetic variation.},
  journal = {Nucleic Acids Res},
  year = {2001},
  volume = {29},
  pages = {308--311},
  number = {1},
  month = {Jan},
  abstract = {In response to a need for a general catalog of genome variation to
	address the large-scale sampling designs required by association
	studies, gene mapping and evolutionary biology, the National Center
	for Biotechnology Information (NCBI) has established the dbSNP database
	[S.T.Sherry, M.Ward and K. Sirotkin (1999) Genome Res., 9, 677-679].
	Submissions to dbSNP will be integrated with other sources of information
	at NCBI such as GenBank, PubMed, LocusLink and the Human Genome Project
	data. The complete contents of dbSNP are available to the public
	at website: http://www.ncbi.nlm.nih.gov/SNP. The complete contents
	of dbSNP can also be downloaded in multiple formats via anonymous
	FTP at ftp://ncbi.nlm.nih.gov/snp/.},
  institution = {National Center for Biotechnology Information, National Library of
	Medicine, National Institutes of Health, Bethesda, MD, 20894, USA.
	sherry@ncbi.nlm.nih.gov},
  keywords = {Animals; Biotechnology; Databases, Factual; Genetic Variation; Humans;
	Information Services; Internet; National Institutes of Health (U.S.);
	National Library of Medicine (U.S.); Polymorphism, Single Nucleotide,
	genetics; United States},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {11125122},
  timestamp = {2010.08.01}
}

@article{Shevade2003simple,
  author = {Shevade, S.K. and Keerthi, S.S.},
  title = {A simple and efficient algorithm for gene selection using sparse
	logistic regression},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {2246--2253},
  number = {17},
  publisher = {Oxford Univ Press}
}

@article{Shi2005Sensitivity,
  author = {D. Shi and D. S. Yeung and J. Gao},
  title = {Sensitivity analysis applied to the construction of radial basis
	function networks.},
  journal = {Neural {N}etw},
  year = {2005},
  month = {Jun},
  abstract = {Conventionally, a radial basis function ({RBF}) network is constructed
	by obtaining cluster centers of basis function by maximum likelihood
	learning. {T}his paper proposes a novel learning algorithm for the
	construction of radial basis function using sensitivity analysis.
	{I}n training, the number of hidden neurons and the centers of their
	radial basis functions are determined by the maximization of the
	output's sensitivity to the training data. {I}n classification, the
	minimal number of such hidden neurons with the maximal sensitivity
	will be the most generalizable to unknown data. {O}ur experimental
	results show that our proposed sensitivity-based {RBF} classifier
	outperforms the conventional {RBF}s and is as accurate as support
	vector machine ({SVM}). {H}ence, sensitivity analysis is expected
	to be a new alternative way to the construction of {RBF} networks.},
  doi = {10.1016/j.neunet.2005.02.006},
  pii = {S0893-6080(05)00054-7},
  url = {http://dx.doi.org/10.1016/j.neunet.2005.02.006}
}

@article{Shi2005Building,
  author = {Lei Shi and Fabien Campagne},
  title = {Building a protein name dictionary from full text: a machine learning
	term extraction approach.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {88},
  number = {1},
  month = {Apr},
  abstract = {B{ACKGROUND}: {T}he majority of information in the biological literature
	resides in full text articles, instead of abstracts. {Y}et, abstracts
	remain the focus of many publicly available literature data mining
	tools. {M}ost literature mining tools rely on pre-existing lexicons
	of biological names, often extracted from curated gene or protein
	databases. {T}his is a limitation, because such databases have low
	coverage of the many name variants which are used to refer to biological
	entities in the literature. {RESULTS}: {W}e present an approach to
	recognize named entities in full text. {T}he approach collects high
	frequency terms in an article, and uses support vector machines ({SVM})
	to identify biological entity names. {I}t is also computationally
	efficient and robust to noise commonly found in full text material.
	{W}e use the method to create a protein name dictionary from a set
	of 80,528 full text articles. {O}nly 8.3\% of the names in this dictionary
	match {S}wiss{P}rot description lines. {W}e assess the quality of
	the dictionary by studying its protein name recognition performance
	in full text. {CONCLUSION}: {T}his dictionary term lookup method
	compares favourably to other published methods, supporting the significance
	of our direct extraction approach. {T}he method is strong in recognizing
	name variants not found in {S}wiss{P}rot.},
  doi = {10.1186/1471-2105-6-88},
  pdf = {../local/Shi2005Building.pdf},
  file = {Shi2005Building.pdf:local/Shi2005Building.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-88},
  url = {http://dx.doi.org/10.1186/1471-2105-6-88}
}

@article{Shi2010MicroArray,
  author = {Leming Shi and Gregory Campbell and Wendell D Jones and Fabien Campagne
	and Zhining Wen and Stephen J Walker and Zhenqiang Su and Tzu-Ming
	Chu and Federico M Goodsaid and Lajos Pusztai and John D Shaughnessy
	and André Oberthuer and Russell S Thomas and Richard S Paules and
	Mark Fielden and Bart Barlogie and Weijie Chen and Pan Du and Matthias
	Fischer and Cesare Furlanello and Brandon D Gallas and Xijin Ge and
	Dalila B Megherbi and W. Fraser Symmans and May D Wang and John Zhang
	and Hans Bitter and Benedikt Brors and Pierre R Bushel and Max Bylesjo
	and Minjun Chen and Jie Cheng and Jing Cheng and Jeff Chou and Timothy
	S Davison and Mauro Delorenzi and Youping Deng and Viswanath Devanarayan
	and David J Dix and Joaquin Dopazo and Kevin C Dorff and Fathi Elloumi
	and Jianqing Fan and Shicai Fan and Xiaohui Fan and Hong Fang and
	Nina Gonzaludo and Kenneth R Hess and Huixiao Hong and Jun Huan and
	Rafael A Irizarry and Richard Judson and Dilafruz Juraeva and Samir
	Lababidi and Christophe G Lambert and Li Li and Yanen Li and Zhen
	Li and Simon M Lin and Guozhen Liu and Edward K Lobenhofer and Jun
	Luo and Wen Luo and Matthew N McCall and Yuri Nikolsky and Gene A
	Pennello and Roger G Perkins and Reena Philip and Vlad Popovici and
	Nathan D Price and Feng Qian and Andreas Scherer and Tieliu Shi and
	Weiwei Shi and Jaeyun Sung and Danielle Thierry-Mieg and Jean Thierry-Mieg
	and Venkata Thodima and Johan Trygg and Lakshmi Vishnuvajjala and
	Sue Jane Wang and Jianping Wu and Yichao Wu and Qian Xie and Waleed
	A Yousef and Liang Zhang and Xuegong Zhang and Sheng Zhong and Yiming
	Zhou and Sheng Zhu and Dhivya Arasappan and Wenjun Bao and Anne Bergstrom
	Lucas and Frank Berthold and Richard J Brennan and Andreas Buness
	and Jennifer G Catalano and Chang Chang and Rong Chen and Yiyu Cheng
	and Jian Cui and Wendy Czika and Francesca Demichelis and Xutao Deng
	and Damir Dosymbekov and Roland Eils and Yang Feng and Jennifer Fostel
	and Stephanie Fulmer-Smentek and James C Fuscoe and Laurent Gatto
	and Weigong Ge and Darlene R Goldstein and Li Guo and Donald N Halbert
	and Jing Han and Stephen C Harris and Christos Hatzis and Damir Herman
	and Jianping Huang and Roderick V Jensen and Rui Jiang and Charles
	D Johnson and Giuseppe Jurman and Yvonne Kahlert and Sadik A Khuder
	and Matthias Kohl and Jianying Li and Li Li and Menglong Li and Quan-Zhen
	Li and Shao Li and Zhiguang Li and Jie Liu and Ying Liu and Zhichao
	Liu and Lu Meng and Manuel Madera and Francisco Martinez-Murillo
	and Ignacio Medina and Joseph Meehan and Kelci Miclaus and Richard
	A Moffitt and David Montaner and Piali Mukherjee and George J Mulligan
	and Padraic Neville and Tatiana Nikolskaya and Baitang Ning and Grier
	P Page and Joel Parker and R. Mitchell Parry and Xuejun Peng and
	Ron L Peterson and John H Phan and Brian Quanz and Yi Ren and Samantha
	Riccadonna and Alan H Roter and Frank W Samuelson and Martin M Schumacher
	and Joseph D Shambaugh and Qiang Shi and Richard Shippy and Shengzhu
	Si and Aaron Smalter and Christos Sotiriou and Mat Soukup and Frank
	Staedtler and Guido Steiner and Todd H Stokes and Qinglan Sun and
	Pei-Yi Tan and Rong Tang and Zivana Tezak and Brett Thorn and Marina
	Tsyganova and Yaron Turpaz and Silvia C Vega and Roberto Visintainer
	and Juergen von Frese and Charles Wang and Eric Wang and Junwei Wang
	and Wei Wang and Frank Westermann and James C Willey and Matthew
	Woods and Shujian Wu and Nianqing Xiao and Joshua Xu and Lei Xu and
	Lun Yang and Xiao Zeng and Jialu Zhang and Li Zhang and Min Zhang
	and Chen Zhao and Raj K Puri and Uwe Scherf and Weida Tong and Russell
	D Wolfinger and M. A. Q. C. Consortium},
  title = {The MicroArray Quality Control (MAQC)-II study of common practices
	for the development and validation of microarray-based predictive
	models.},
  journal = {Nat Biotechnol},
  year = {2010},
  volume = {28},
  pages = {827--838},
  number = {8},
  month = {Aug},
  abstract = {Gene expression data from microarrays are being applied to predict
	preclinical and clinical endpoints, but the reliability of these
	predictions has not been established. In the MAQC-II project, 36
	independent teams analyzed six microarray data sets to generate predictive
	models for classifying a sample with respect to one of 13 endpoints
	indicative of lung or liver toxicity in rodents, or of breast cancer,
	multiple myeloma or neuroblastoma in humans. In total, >30,000 models
	were built using many combinations of analytical methods. The teams
	generated predictive models without knowing the biological meaning
	of some of the endpoints and, to mimic clinical reality, tested the
	models on data that had not been used for training. We found that
	model performance depended largely on the endpoint and team proficiency
	and that different approaches generated models of similar performance.
	The conclusions and recommendations from MAQC-II should be useful
	for regulatory agencies, study committees and independent investigators
	that evaluate methods for global gene expression analysis.},
  institution = {National Center for Toxicological Research, US Food and Drug Administration,
	Jefferson, Arkansas, USA.},
  keywords = {Animals; Breast Neoplasms, diagnosis/genetics; Disease Models, Animal;
	Female; Gene Expression Profiling, methods/standards; Guidelines
	as Topic; Humans; Liver Diseases, etiology/genetics/pathology; Lung
	Diseases, etiology/genetics/pathology; Multiple Myeloma, diagnosis/genetics;
	Neoplasms, diagnosis/genetics/mortality; Neuroblastoma, diagnosis/genetics;
	Oligonucleotide Array Sequence Analysis, methods/standards; Predictive
	Value of Tests; Quality Control; Rats; Survival Analysis},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pmid = {20676074},
  timestamp = {2011.04.08}
}

@article{Shi2010Functional,
  author = {Shi, W. and Bessarabova, M. and Dosymbekov, D. and Dezso, Z. and
	Nikolskaya, T. and Dudoladova, M. and Serebryiskaya, T. and Bugrim,
	A. and Gyuryanov, A. and Brennan, R. J. and Shah, R. and Dopazo,
	J. and Chen, M. and Deng, Y. and Shi, T. and Jurman, G. and Furnlanelle,
	C. and Thomas, R. S. and Corton, J. C. and Tong, W. and Shi, L. and
	Nikolsky, Y.},
  title = {Functional analysis of multiple genomic signatures demonstrates that
	classification algorithms choose phenotype-related genes},
  journal = {Pharmacogenomics J.},
  year = {2010},
  volume = {10},
  pages = {310--323},
  number = {4},
  doi = {10.1038/tpj.2010.35},
  pdf = {../local/Shi2010Functional.pdf},
  file = {Shi2010Functional.pdf:Shi2010Functional.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1038/tpj.2010.35}
}

@article{Shields1993Universal,
  author = {Shields, P.C. },
  title = {Universal redundancy rates do not exist},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1993},
  volume = {39},
  pages = {520-524},
  number = {2},
  month = {Mar},
  abstract = {The expected redundancy per symbol of an n-block prefix code {C}n
	on a source ? measures how far the code is from being optimal for
	that source. {T}he existence of sequences of codes with expected
	redundancy per symbol of {O}((log n)/n) for `nice' classes of sources,
	such as {M}arkov sources of a given order, is well known. {I}t is
	shown that some restriction on the class of processes is necessary
	in order to obtain such redundancy bounds, for there is no universal
	redundancy rate for any sequence of prefix codes on the class of
	all ergodic sources },
  pdf = {../local/Shields1993Universal.pdf},
  file = {Shields1993Universal.pdf:local/Shields1993Universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Shilton2005Incremental,
  author = {Alistair Shilton and M. Palaniswami and Daniel Ralph and Ah Chung
	Tsoi},
  title = {Incremental training of support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {114-31},
  number = {1},
  month = {Jan},
  abstract = {We propose a new algorithm for the incremental training of support
	vector machines ({SVM}s) that is suitable for problems of sequentially
	arriving data and fast constraint parameter variation. {O}ur method
	involves using a "warm-start" algorithm for the training of {SVM}s,
	which allows us to take advantage of the natural incremental properties
	of the standard active set approach to linearly constrained optimization
	problems. {I}ncremental training involves quickly retraining a support
	vector machine after adding a small number of additional training
	vectors to the training set of an existing (trained) support vector
	machine. {S}imilarly, the problem of fast constraint parameter variation
	involves quickly retraining an existing support vector machine using
	the same training set but different constraint parameters. {I}n both
	cases, we demonstrate the computational superiority of incremental
	training over the usual batch retraining method.}
}

@article{Shimodaira2000Improving,
  author = {Shimodaira, H.},
  title = {Improving predictive inference under covariate shift by weighting
	the log-likelihood function},
  journal = {Journal of Statistical Planning and Inference},
  year = {2000},
  volume = {90},
  pages = {227--244},
  number = {2},
  month = {October},
  abstract = {A class of predictive densities is derived by weighting the observed
	samples in maximizing the log-likelihood function. This approach
	is effective in cases such as sample surveys or design of experiments,
	where the observed covariate follows a different distribution than
	that in the whole population. Under misspecification of the parametric
	model, the optimal choice of the weight function is asymptotically
	shown to be the ratio of the density function of the covariate in
	the population to that in the observations. This is the pseudo-maximum
	likelihood estimation of sample surveys. The optimality is defined
	by the expected Kullback\&\#x2013;Leibler loss, and the optimal weight
	is obtained by considering the importance sampling identity. Under
	correct specification of the model, however, the ordinary maximum
	likelihood estimate (i.e. the uniform weight) is shown to be optimal
	asymptotically. For moderate sample size, the situation is in between
	the two extreme cases, and the weight function is selected by minimizing
	a variant of the information criterion derived as an estimate of
	the expected loss. The method is also applied to a weighted version
	of the Bayesian predictive density. Numerical examples as well as
	Monte-Carlo simulations are shown for polynomial regression. A connection
	with the robust parametric estimation is discussed.},
  doi = {10.1016/S0378-3758(00)00115-4},
  issn = {03783758},
  keywords = {domain-adaptation},
  url = {http://dx.doi.org/10.1016/S0378-3758(00)00115-4}
}

@inproceedings{Shimodaira2001Dynamic,
  author = {Shimodaira, H. and Noma, K.-I. and Nakai, M. and Sagayama, S.},
  title = {Dynamic time-alignment kernel in support vector machine},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2001},
  pages = {921-928},
  timestamp = {2006.07.12}
}

@inproceedings{Shin2011Partitionable,
  author = {Shin, K.},
  title = {Partitionable Kernels for Mapping Kernels},
  booktitle = {Proceedings of the 11th IEEE International Conference on Data Mining,
	ICDM 2011, Vancouver, BC, Canada, December 11-14, 2011.},
  year = {2011},
  editor = {Cook, D. J. and Pei, J. and Wang, W. and Za\"{\i}ane, O. R. and Wu,
	X.},
  pages = {645--654},
  doi = {10.1109/ICDM.2011.115},
  pdf = {../local/Shin2011Partitionable.pdf},
  file = {Shin2011Partitionable.pdf:Shin2011Partitionable.pdf:PDF},
  owner = {jp},
  timestamp = {2012.10.22},
  url = {http://dx.doi.org/10.1109/ICDM.2011.115}
}

@article{Shipp2002Diffuse,
  author = {Shipp, M. A. and Ross, K. N. and Tamayo, P. and Weng, A. P. and Kutok,
	J. L. and Aguiar, R. C. T. and Gaasenbeek, M. and Angelo, M. and
	Reich, M. and Pinkus, G. A. and Ray, T. S. and Koval, M. A. and Last,
	K. W. and Norton, A. and Lister, T. A. and Mesirov, J. and Neuberg,
	D. S. and Lander, E. S. and Aster, J. C. and Golub, T. R.},
  title = {Diffuse large {B}-cell lymphoma outcome prediction by gene-expression
	profiling and supervised machine learning},
  journal = {Nat. {M}ed.},
  year = {2002},
  volume = {8},
  pages = {68-74},
  number = {1},
  abstract = {Diffuse large {B}-cell lymphoma ({DLBCL}), the most common lymphoid
	malignancy in adults, is curable in less than 50% of patients. {P}rognostic
	models based on pre-treatment characteristics, such as the {I}nternational
	{P}rognostic {I}ndex ({IPI}), are currently used to predict outcome
	in {DLBCL}. {H}owever, clinical outcome models identify neither the
	molecular basis of clinical heterogeneity, nor specific therapeutic
	targets. {W}e analyzed the expression of 6,817 genes in diagnostic
	tumor specimens from {DLBCL} patients who received cyclophosphamide,
	adriamycin, vincristine and prednisone ({CHOP})-based chemotherapy,
	and applied a supervised learning prediction method to identify cured
	versus fatal or refractory disease. {T}he algorithm classified two
	categories of patients with very different five-year overall survival
	rates (70% versus 12%). {T}he model also effectively delineated patients
	within specific {IPI} risk categories who were likely to be cured
	or to die of their disease. {G}enes implicated in {DLBCL} outcome
	included some that regulate responses to {B}-cell?receptor signaling,
	critical serine/threonine phosphorylation pathways and apoptosis.
	{O}ur data indicate that supervised learning classification techniques
	can predict outcome in {DLBCL} and identify rational targets for
	intervention.},
  doi = {10.1038/nm0102-68},
  pdf = {../local/Shipp2002Diffuse.pdf},
  file = {Shipp2002Diffuse.pdf:local/Shipp2002Diffuse.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Shivakumar2009Structural,
  author = {Pavithra Shivakumar and Michael Krauthammer},
  title = {Structural similarity assessment for drug sensitivity prediction
	in cancer.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10 Suppl 9},
  pages = {S17},
  abstract = {BACKGROUND: The ability to predict drug sensitivity in cancer is one
	of the exciting promises of pharmacogenomic research. Several groups
	have demonstrated the ability to predict drug sensitivity by integrating
	chemo-sensitivity data and associated gene expression measurements
	from large anti-cancer drug screens such as NCI-60. The general approach
	is based on comparing gene expression measurements from sensitive
	and resistant cancer cell lines and deriving drug sensitivity profiles
	consisting of lists of genes whose expression is predictive of response
	to a drug. Importantly, it has been shown that such profiles are
	generic and can be applied to cancer cell lines that are not part
	of the anti-cancer screen. However, one limitation is that the profiles
	can not be generated for untested drugs (i.e., drugs that are not
	part of an anti-cancer drug screen). In this work, we propose using
	an existing drug sensitivity profile for drug A as a substitute for
	an untested drug B given high structural similarities between drugs
	A and B. RESULTS: We first show that structural similarity between
	pairs of compounds in the NCI-60 dataset highly correlates with the
	similarity between their activities across the cancer cell lines.
	This result shows that structurally similar drugs can be expected
	to have a similar effect on cancer cell lines. We next set out to
	test our hypothesis that we can use existing drug sensitivity profiles
	as substitute profiles for untested drugs. In a cross-validation
	experiment, we found that the use of substitute profiles is possible
	without a significant loss of prediction accuracy if the substitute
	profile was generated from a compound with high structural similarity
	to the untested compound. CONCLUSION: Anti-cancer drug screens are
	a valuable resource for generating omics-based drug sensitivity profiles.
	We show that it is possible to extend the usefulness of existing
	screens to untested drugs by deriving substitute sensitivity profiles
	from structurally similar drugs part of the screen.},
  doi = {10.1186/1471-2105-10-S9-S17},
  pdf = {../local/Shivakumar2009Structural.pdf},
  file = {Shivakumar2009Structural.pdf:Shivakumar2009Structural.pdf:PDF},
  institution = {Department of Pathology, Yale University School of Medicine, New
	Haven, CT, USA. pavithra.shivakumar@yale.edu},
  keywords = {chemogenomics},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-10-S9-S17},
  pmid = {19761571},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1186/1471-2105-10-S9-S17}
}

@article{Shock2007Whole-genome,
  author = {Shock, J. L. and Fischer, K. F. and DeRisi, J. L.},
  title = {Whole-genome analysis of mRNA decay in Plasmodium falciparum reveals
	a global lengthening of mRNA half-life during the intra-erythrocytic
	development cycle.},
  journal = {Genome Biol.},
  year = {2007},
  volume = {8},
  pages = {R134},
  number = {7},
  abstract = {BACKGROUND: The rate of mRNA decay is an essential element of post-transcriptional
	regulation in all organisms. Previously, studies in several organisms
	found that the specific half-life of each mRNA is precisely related
	to its physiologic role, and plays an important role in determining
	levels of gene expression. RESULTS: We used a genome-wide approach
	to characterize mRNA decay in Plasmodium falciparum. We found that,
	globally, rates of mRNA decay increase dramatically during the asexual
	intra-erythrocytic developmental cycle. During the ring stage of
	the cycle, the average mRNA half-life was 9.5 min, but this was extended
	to an average of 65 min during the late schizont stage of development.
	Thus, a major determinant of mRNA decay rate appears to be linked
	to the stage of intra-erythrocytic development. Furthermore, we found
	specific variations in decay patterns superimposed upon the dominant
	trend of progressive half-life lengthening. These variations in decay
	pattern were frequently enriched for genes with specific cellular
	functions or processes. CONCLUSION: Elucidation of Plasmodium mRNA
	decay rates provides a key element for deciphering mechanisms of
	genetic control in this parasite, by complementing and extending
	previous mRNA abundance studies. Our results indicate that progressive
	stage-dependent decreases in mRNA decay rate function are a major
	determinant of mRNA accumulation during the schizont stage of intra-erythrocytic
	development. This type of genome-wide change in mRNA decay rate has
	not been observed in any other organism to date, and indicates that
	post-transcriptional regulation may be the dominant mechanism of
	gene regulation in P. falciparum.},
  doi = {10.1186/gb-2007-8-7-r134},
  institution = {Department of Biochemistry and Biophysics, University of California
	San Francisco, 1700 4th Street, San Francisco, California 94158-2330,
	USA.},
  keywords = {plasmodium},
  owner = {jp},
  pii = {gb-2007-8-7-r134},
  pmid = {17612404},
  timestamp = {2009.01.21},
  url = {http://dx.doi.org/10.1186/gb-2007-8-7-r134}
}

@article{Shoeb2004Patient-specific,
  author = {Ali Shoeb and Herman Edwards and Jack Connolly and Blaise Bourgeois
	and S. Ted Treves and John Guttag},
  title = {Patient-specific seizure onset detection.},
  journal = {Epilepsy {B}ehav},
  year = {2004},
  volume = {5},
  pages = {483-98},
  number = {4},
  month = {Aug},
  abstract = {This article presents an automated, patient-specific method for the
	detection of epileptic seizure onset from noninvasive electroencephalography.
	{W}e adopt a patient-specific approach to exploit the consistency
	of an individual patient's seizure and nonseizure electroencephalograms.
	{O}ur method uses a wavelet decomposition to construct a feature
	vector that captures the morphology and spatial distribution of an
	electroencephalographic epoch, and then determines whether that vector
	is representative of a patient's seizure or nonseizure electroencephalogram
	using the support vector machine classification algorithm. {O}ur
	completely automated method was tested on noninvasive electroencephalograms
	from 36 pediatric subjects suffering from a variety of seizure types.
	{I}t detected 131 of 139 seizure events within 8.0+/-3.2 seconds
	of electrographic onset, and declared 15 false detections in 60 hours
	of clinical electroencephalography. {O}ur patient-specific method
	can be used to initiate delay-sensitive clinical procedures following
	seizure onset, for example, the injection of a functional imaging
	radiotracer.},
  doi = {10.1016/j.yebeh.2004.05.005},
  pdf = {../local/Shoeb2004Patient-specific.pdf},
  file = {Shoeb2004Patient-specific.pdf:local/Shoeb2004Patient-specific.pdf:PDF},
  keywords = {Algorithms, Comparative Study, Computational Biology, Computer-Assisted,
	Databases, Diagnosis, Drug Resistance, Electroencephalography, Epilepsy,
	Forecasting, Genetic, Genotype, HIV Protease Inhibitors, HIV-1, Humans,
	Information Management, Information Storage and Retrieval, Kinetics,
	Linear Models, Microbial Sensitivity Tests, Models, Monitoring, Non-U.S.
	Gov't, P.H.S., Periodicals, Physiologic, Point Mutation, Pyrimidinones,
	Reaction Time, Research Support, Reverse Transcriptase Inhibitors,
	Signal Processing, Theoretical, Time Factors, U.S. Gov't, Viral,
	15256184},
  pii = {S1525505004001593},
  url = {http://dx.doi.org/10.1016/j.yebeh.2004.05.005}
}

@article{Shoval2010Cell,
  author = {Oren Shoval and Uri Alon},
  title = {SnapShot: network motifs.},
  journal = {Cell},
  year = {2010},
  volume = {143},
  pages = {326--3e1},
  number = {2},
  month = {Oct},
  doi = {10.1016/j.cell.2010.09.050},
  institution = {Department of Molecular Cell Biology, Weizmann Institute of Science,
	Rehovot 76100, Israel.},
  keywords = {Animals; Feedback; Gene Regulatory Networks; Humans; Signal Transduction},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {S0092-8674(10)01136-0},
  pmid = {20946989},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1016/j.cell.2010.09.050}
}

@article{Shulman2004Uniform,
  author = {Shulman, N. and Feder, M.},
  title = {The {U}niform {D}istribution as a {U}niversal {P}rior},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2004},
  volume = {50},
  pages = {1356 - 1362 },
  number = {6},
  month = {Jun},
  abstract = {In this correspondence, we discuss the properties of the uniform prior
	as a universal prior, i.e., a prior that induces a mutual information
	that is simultaneously close to the capacity for all channels. {W}e
	determine bounds on the amount of the mutual information loss in
	using the uniform prior instead of the capacity-achieving prior.
	{S}pecifically, for the class of binary input channels with any output
	alphabet, we show that the${Z}$-channel has the minimal mutual information
	with uniform prior, out of all channels with a given capacity. {F}rom
	this, we conclude that the degradation of the mutual information
	with respect to the capacity is at most 0.011 bit, and as was shown
	previously, at most 6%. {A} related result is that the capacity-achieving
	prior, for any channel, is not far from uniform. {S}ome of these
	results are extended to channels with nonbinary input.},
  pdf = {../local/Shulman2004Uniform.pdf},
  file = {Shulman2004Uniform.pdf:local/Shulman2004Uniform.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Shulman-Peleg2005SiteEngines,
  author = {Alexandra Shulman-Peleg and Ruth Nussinov and Haim J Wolfson},
  title = {SiteEngines: recognition and comparison of binding sites and protein-protein
	interfaces.},
  journal = {Nucleic Acids Res},
  year = {2005},
  volume = {33},
  pages = {W337--W341},
  number = {Web Server issue},
  month = {Jul},
  abstract = {Protein surface regions with similar physicochemical properties and
	shapes may perform similar functions and bind similar binding partners.
	Here we present two web servers and software packages for recognition
	of the similarity of binding sites and interfaces. Both methods recognize
	local geometrical and physicochemical similarity, which can be present
	even in the absence of overall sequence or fold similarity. The first
	method, SiteEngine (http:/bioinfo3d.cs.tau.ac.il/SiteEngine), receives
	as an input two protein structures and searches the complete surface
	of one protein for regions similar to the binding site of the other.
	The second, Interface-to-Interface (I2I)-SiteEngine (http:/bioinfo3d.cs.tau.ac.il/I2I-SiteEngine),
	compares protein-protein interfaces, which are regions of interaction
	between two protein molecules. It receives as an input two structures
	of protein-protein complexes, extracts the interfaces and finds the
	three-dimensional transformation that maximizes the similarity between
	two pairs of interacting binding sites. The output of both servers
	consists of a superimposition in PDB file format and a list of physicochemical
	properties shared by the compared entities. The methods are highly
	efficient and the freely available software packages are suitable
	for large-scale database searches of the entire PDB.},
  doi = {10.1093/nar/gki482},
  institution = {School of Computer Science, Raymond and Beverly Sackler Faculty of
	Exact Sciences, Tel Aviv University, Tel Aviv 69978, Israel. shulmana@tau.ac.il},
  keywords = {Amino Acids, chemistry; Binding Sites; Internet; Multiprotein Complexes,
	chemistry/metabolism; Protein Conformation; Protein Interaction Mapping,
	methods; Software; User-Computer Interface},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pii = {33/suppl_2/W337},
  pmid = {15980484},
  timestamp = {2009.11.12},
  url = {http://dx.doi.org/10.1093/nar/gki482}
}

@article{Shulman-Peleg2004Recognition,
  author = {Alexandra Shulman-Peleg and Ruth Nussinov and Haim J Wolfson},
  title = {Recognition of functional sites in protein structures.},
  journal = {J Mol Biol},
  year = {2004},
  volume = {339},
  pages = {607--633},
  number = {3},
  month = {Jun},
  abstract = {Recognition of regions on the surface of one protein, that are similar
	to a binding site of another is crucial for the prediction of molecular
	interactions and for functional classifications. We first describe
	a novel method, SiteEngine, that assumes no sequence or fold similarities
	and is able to recognize proteins that have similar binding sites
	and may perform similar functions. We achieve high efficiency and
	speed by introducing a low-resolution surface representation via
	chemically important surface points, by hashing triangles of physico-chemical
	properties and by application of hierarchical scoring schemes for
	a thorough exploration of global and local similarities. We proceed
	to rigorously apply this method to functional site recognition in
	three possible ways: first, we search a given functional site on
	a large set of complete protein structures. Second, a potential functional
	site on a protein of interest is compared with known binding sites,
	to recognize similar features. Third, a complete protein structure
	is searched for the presence of an a priori unknown functional site,
	similar to known sites. Our method is robust and efficient enough
	to allow computationally demanding applications such as the first
	and the third. From the biological standpoint, the first application
	may identify secondary binding sites of drugs that may lead to side-effects.
	The third application finds new potential sites on the protein that
	may provide targets for drug design. Each of the three applications
	may aid in assigning a function and in classification of binding
	patterns. We highlight the advantages and disadvantages of each type
	of search, provide examples of large-scale searches of the entire
	Protein Data Base and make functional predictions.},
  doi = {10.1016/j.jmb.2004.04.012},
  institution = {School of Computer Science, Tel Aviv University, Tel Aviv 69978,
	Israel.},
  keywords = {Algorithms; Catalytic Domain; Hydrogen Bonding; Models, Molecular;
	Protein Conformation; Proteins, chemistry},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pii = {S0022283604004139},
  pmid = {15147845},
  timestamp = {2009.11.12},
  url = {http://dx.doi.org/10.1016/j.jmb.2004.04.012}
}

@article{Shulman2008MultiBind,
  author = {Shulman-Peleg, A. and Shatsky, M. and Nussinov, R. and Wolfson, H.
	J. J. },
  title = {MultiBind and MAPPIS: webservers for multiple alignment of protein
	3D-binding sites and their interactions.},
  journal = {Nucleic Acids Res.},
  year = {2008},
  volume = {36},
  pages = {260--264},
  month = {May},
  abstract = {Analysis of protein-ligand complexes and recognition of spatially
	conserved physico-chemical properties is important for the prediction
	of binding and function. Here, we present two webservers for multiple
	alignment and recognition of binding patterns shared by a set of
	protein structures. The first webserver, MultiBind (http://bioinfo3d.cs.tau.ac.il/MultiBind),
	performs multiple alignment of protein binding sites. It recognizes
	the common spatial chemical binding patterns even in the absence
	of similarity of the sequences or the folds of the compared proteins.
	The input to the MultiBind server is a set of protein-binding sites
	defined by interactions with small molecules. The output is a detailed
	list of the shared physico-chemical binding site properties. The
	second webserver, MAPPIS (http://bioinfo3d.cs.tau.ac.il/MAPPIS),
	aims to analyze protein-protein interactions. It performs multiple
	alignment of protein-protein interfaces (PPIs), which are regions
	of interaction between two protein molecules. MAPPIS recognizes the
	spatially conserved physico-chemical interactions, which often involve
	energetically important hot-spot residues that are crucial for protein-protein
	associations. The input to the MAPPIS server is a set of protein-protein
	complexes. The output is a detailed list of the shared interaction
	properties of the interfaces.},
  address = {School of Computer Science, Raymond and Beverly Sackler Faculty of
	Exact Sciences, Tel Aviv University, Tel Aviv 69978, Israel, Physical
	Biosciences Division, Berkeley National Lab, California, CA, USA,
	Sackler Inst. of Molecular Medicine, Sackler Faculty of Medicine,
	Tel Aviv University, Tel Aviv, Israel and Basic Research Program,
	SAIC-Frederick, Inc., Laboratory of Experimental and Computational
	Biology, NCI-Frederick, Bldg 469, Rm 151, Frederick, MD 21702, USA.},
  citeulike-article-id = {2882716},
  doi = {http://dx.doi.org/10.1093/nar/gkn185},
  issn = {1362-4962},
  keywords = {binding-site},
  posted-at = {2008-06-11 14:24:40},
  priority = {2},
  url = {http://dx.doi.org/10.1093/nar/gkn185}
}

@article{Sidney1996Definition,
  author = {J. Sidney and H. M. Grey and S. Southwood and E. Celis and P. A.
	Wentworth and M. F. del Guercio and R. T. Kubo and R. W. Chesnut
	and A. Sette},
  title = {Definition of an {HLA-A3}-like supermotif demonstrates the overlapping
	peptide-binding repertoires of common {HLA} molecules.},
  journal = {Hum Immunol},
  year = {1996},
  volume = {45},
  pages = {79--93},
  number = {2},
  month = {Feb},
  abstract = {An HLA-A3-like supertype (minimally comprised of products from the
	HLA class I alleles A3, A11, A31, A*3301, and A*6801) has been defined
	on the basis of (a) structural similarities in the antigen-binding
	groove, (b) shared main anchor peptide-binding motifs, (c) the identification
	of peptides cross-reacting with most or all of these molecules, and
	(d) the definition of an A3-like supermotif that efficiently predicts
	highly cross-reactive peptides. Detailed secondary anchor maps for
	A3, A11, A31, A*3301, and A*6801 are also described. The biologic
	relevance of the A3-like supertype is indicated by the fact that
	high frequencies of the A3-like supertype alleles are conserved in
	all major ethnic groups. Because A3-like supertype alleles are found
	in most major HLA evolutionary lineages, possibly a reflection of
	common ancestry, the A3-like supermotif might in fact represent a
	primeval human HLA class I peptide-binding specificity. It is also
	possible that these phenomena might be related to optimal exploitation
	of the peptide specificity by human TAP molecules. The grouping of
	HLA alleles into supertypes on the basis of their overlapping peptide-binding
	repertoires represents an alternative to serologic or phylogenetic
	classification.},
  keywords = {Alleles; Amino Acid Sequence; Cell Line, Transformed; Cross Reactions;
	HLA Antigens; HLA-A3 Antigen; HLA-B Antigens; Haplotypes; Humans;
	Molecular Sequence Data; Peptide Fragments; Protein Binding; Structure-Activity
	Relationship},
  owner = {laurent},
  pii = {0198-8859(95)00173-5},
  pmid = {8882405},
  timestamp = {2007.01.05}
}

@article{Sidney1995Several,
  author = {J. Sidney and M. F. del Guercio and S. Southwood and V. H. Engelhard
	and E. Appella and H. G. Rammensee and K. Falk and O. R\"otzschke
	and M. Takiguchi and R. T. Kubo},
  title = {Several {HLA} alleles share overlapping peptide specificities.},
  journal = {J. Immunol.},
  year = {1995},
  volume = {154},
  pages = {247--259},
  number = {1},
  month = {Jan},
  abstract = {Herein we describe the establishment of assays to measure peptide
	binding to purified HLA-B*0701, -B*0801, -B*2705, -B*3501-03, -B*5401,
	-Cw*0401, -Cw*0602, and -Cw*0702 molecules. The binding of known
	peptide epitopes or naturally processed peptides correlates well
	with HLA restriction or origin, underscoring the immunologic relevance
	of these assays. Analysis of the sequences of various HLA class I
	alleles suggested that alleles with peptide motifs characterized
	by proline in position 2 and aromatic or hydrophobic residues at
	their C-terminus shared key consensus residues at positions 9, 63,
	66, 67, and 70 (B pocket) and residue 116 (F pocket). Prediction
	of the peptide-binding specificity of HLA-B*5401, on the basis of
	this consensus B and F pocket structure, verified this hypothesis
	and suggested that a relatively large family of HLA-B alleles (which
	we have defined as the HLA-B7-like supertype) may significantly overlap
	in peptide binding specificity. Availability of quantitative binding
	assays allowed verification that, indeed, many (25\%) of the peptide
	ligands carrying proline in position 2 and hydrophobic/aromatic residues
	at the C-terminus (the B7-like supermotif) were capable of binding
	at least three of five HLA-B7-like supertype alleles. Identification
	of epitopes carrying the B7-like supermotif and binding to a family
	of alleles represented in over 40\% of individuals from all major
	ethnic groups may be of considerable use in the design of peptide
	vaccines.},
  keywords = {Alleles; Amino Acid Sequence; Cell Line, Transformed; Consensus Sequence;
	Epitopes; Genes, MHC Class I; HLA-B Antigens; HLA-C Antigens; Humans;
	Molecular Sequence Data; Peptide Fragments; Protein Binding; Protein
	Structure, Tertiary; Structure-Activity Relationship; Substrate Specificity},
  owner = {laurent},
  pmid = {7527812},
  timestamp = {2007.01.05}
}

@article{Siepen2003Beta,
  author = {Siepen, J. A. and Radford, S. E. and Westhead, D. R.},
  title = {Beta {E}dge strands in protein structure prediction and aggregation},
  journal = {Protein {S}ci.},
  year = {2003},
  volume = {12},
  pages = {2348-2359},
  number = {10},
  abstract = {It is well established that recognition between exposed edges of {beta}-sheets
	is an important mode of protein-protein interaction and can have
	pathological consequences; for instance, it has been linked to the
	aggregation of proteins into a fibrillar structure, which is associated
	with a number of predominantly neurodegenerative disorders. {A} number
	of protective mechanisms have evolved in the edge strands of {beta}-sheets,
	preventing the aggregation and insolubility of most natural {beta}-sheet
	proteins. {S}uch mechanisms are unfavorable in the interior of a
	{beta}-sheet. {T}he problem of distinguishing edge strands from central
	strands based on sequence information alone is important in predicting
	residues and mutations likely to be involved in aggregation, and
	is also a first step in predicting folding topology. {H}ere we report
	support vector machine ({SVM}) and decision tree methods developed
	to classify edge strands from central strands in a representative
	set of protein domains. {I}nterestingly, rules generated by the decision
	tree method are in close agreement with our knowledge of protein
	structure and are potentially useful in a number of different biological
	applications. {W}hen trained on strands from proteins of known structure,
	using structure-based ({D}ictionary of {S}econdary {S}tructure in
	{P}roteins) strand assignments, both methods achieved mean cross-validated,
	prediction accuracies of ~78%. {T}hese accuracies were reduced when
	strand assignments from secondary structure prediction were used.
	{F}urther investigation of this effect revealed that it could be
	explained by a significant reduction in the accuracy of standard
	secondary structure prediction methods for edge strands, in comparison
	with central strands.},
  doi = {10.1110/ps.03234503},
  pdf = {../local/Siepen2003beta.pdf},
  file = {Siepen2003beta.pdf:local/Siepen2003beta.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.proteinscience.org/cgi/content/abstract/12/10/2348}
}

@book{Lee1995Mouse,
  title = {Mouse Genetics: Concepts and Applications},
  publisher = {Oxford University Press},
  year = {1995},
  author = {Silver, L.M.},
  owner = {fantine},
  timestamp = {2010.10.20},
  url = {http://www.informatics.jax.org/silver/}
}

@article{Silverman1982On,
  author = {Silverman, B. W.},
  title = {On the {E}stimation of a {P}robability {D}ensity {F}unction by the
	{M}aximum {P}enalized {L}ikelihood {M}ethod},
  journal = {Ann. {S}tat.},
  year = {1982},
  volume = {10},
  pages = {795-810},
  pdf = {../local/Silverman1982On.pdf},
  file = {Silverman1982On.pdf:local/Silverman1982On.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0090-5364%28198209%2910%3A3%3C795%3AOTEOAP%3E2.0.CO%3B2-S}
}

@inproceedings{Simard1992Tangent,
  author = {Simard, P. and Victorri, B. and LeCun, Y. and Denker, J. S.},
  title = {Tangent Prop - A Formalism for Specifying Selected Invariances in
	an Adaptive Network},
  booktitle = {Adv. Neural. Inform. Process Syst. 4},
  year = {1992},
  editor = {Moody, J. E. and Hanson, S. J. and Lippmann, R.},
  pages = {895--903},
  publisher = {Morgan Kaufman},
  pdf = {../local/Simard1992Tangent.pdf},
  file = {Simard1992Tangent.pdf:Simard1992Tangent.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.22}
}

@article{Simon2008Lost,
  author = {Simon, R.},
  title = {Lost in Translation Problems and Pitfalls in Translating Laboratory
	Observations to Clinical Utility},
  journal = {European journal of cancer (Oxford, England: 1990)},
  year = {2008},
  volume = {44},
  pages = {2707},
  number = {18},
  publisher = {NIH Public Access}
}

@article{Simon2003Pitfalls,
  author = {Simon, R. and Radmacher, M.D. and Dobbin, K. and McShane, L.M.},
  title = {Pitfalls in the use of DNA microarray data for diagnostic and prognostic
	classification},
  journal = {Journal of the National Cancer Institute},
  year = {2003},
  volume = {95},
  pages = {14--18},
  number = {1},
  publisher = {Oxford University Press}
}

@article{Simonis2007evaluation,
  author = {Marieke Simonis and Jurgen Kooren and Wouter de Laat},
  title = {An evaluation of 3C-based methods to capture DNA interactions.},
  journal = {Nat Methods},
  year = {2007},
  volume = {4},
  pages = {895--901},
  number = {11},
  month = {Nov},
  abstract = {The shape of the genome is thought to play an important part in the
	coordination of transcription and other DNA-metabolic processes.
	Chromosome conformation capture (3C) technology allows us to analyze
	the folding of chromatin in the native cellular state at a resolution
	beyond that provided by current microscopy techniques. It has been
	used, for example, to demonstrate that regulatory DNA elements communicate
	with distant target genes through direct physical interactions that
	loop out the intervening chromatin fiber. Here we discuss the intricacies
	of 3C and new 3C-based methods including the 4C, 5C and ChIP-loop
	assay.},
  doi = {10.1038/nmeth1114},
  institution = {Department of Cell Biology and Genetics, Erasmus MC, PO Box 2040,
	3000 CA, Rotterdam.},
  keywords = {Animals; Chromatin Immunoprecipitation, methods; Chromatin, chemistry/metabolism;
	DNA Ligases, chemistry/metabolism; DNA Restriction Enzymes, chemistry/metabolism;
	DNA, chemistry/genetics/metabolism; Formaldehyde, chemistry; Genetic
	Techniques; Humans; Reproducibility of Results},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nmeth1114},
  pmid = {17971780},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1038/nmeth1114}
}

@article{Sinden2004proteomic,
  author = {R. E. Sinden},
  title = {A proteomic analysis of malaria biology: integration of old literature
	and new technologies.},
  journal = {Int. J. Parasitol.},
  year = {2004},
  volume = {34},
  pages = {1441--1450},
  number = {13-14},
  month = {Dec},
  abstract = {The genomic revolution has brought a new vitality into research on
	Plasmodium, its insect and vertebrate hosts. At the cellular level
	nowhere is the impact greater than in the analysis of protein expression
	and the 'assembly' of the supramolecular machines that together comprise
	the functional cell. The repetitive phases of invasion and replication
	that typify the malaria life cycle, together with the unique phase
	of sexual differentiation provide a powerful platform on which to
	investigate the 'molecular machines' that underpin parasite strategy
	and stage-specific functions. This approach is illustrated here in
	an analysis of the ookinete of Plasmodium berghei. Such analyses
	are useful only if conducted with a secure understanding of parasite
	biology. The importance of carefully searching the older literature
	to reach this understanding cannot be over-emphasised. When viewed
	together, the old and new data can give rapid and penetrating insights
	into what some might now term the 'Systems-Biology' of Plasmodium.},
  doi = {10.1016/j.ijpara.2004.10.005},
  pdf = {../local/Sinden2004proteomic.pdf},
  file = {Sinden2004proteomic.pdf:local/Sinden2004proteomic.pdf:PDF},
  keywords = {plasmodium},
  pii = {S0020-7519(04)00210-3},
  pmid = {15582521},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1016/j.ijpara.2004.10.005}
}

@techreport{Sindhwani2004Manifold,
  author = {Sindhwani, V. and Niyogi, P. and Belkin, M.},
  title = {Manifold {R}egularization: {A} {G}eometric {F}ramework for {L}earning
	from {E}xamples},
  institution = {The University of Chicago},
  year = {2004},
  number = {TR-2004-06},
  owner = {vert}
}

@article{Sindhwani2004Feature,
  author = {Vikas Sindhwani and Subrata Rakshit and Dipti Deodhare and Deniz
	Erdogmus and Jose C Principe and Partha Niyogi},
  title = {Feature selection in {MLP}s and {SVM}s based on maximum output information.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {937-48},
  number = {4},
  month = {Jul},
  abstract = {This paper presents feature selection algorithms for multilayer perceptrons
	({MLP}s) and multiclass support vector machines ({SVM}s), using mutual
	information between class labels and classifier outputs, as an objective
	function. {T}his objective function involves inexpensive computation
	of information measures only on discrete variables; provides immunity
	to prior class probabilities; and brackets the probability of error
	of the classifier. {T}he maximum output information ({MOI}) algorithms
	employ this function for feature subset selection by greedy elimination
	and directed search. {T}he output of the {MOI} algorithms is a feature
	subset of user-defined size and an associated trained classifier
	({MLP}/{SVM}). {T}hese algorithms compare favorably with a number
	of other methods in terms of performance on various artificial and
	real-world data sets.}
}

@article{Singer2002Universal,
  author = {Singer, A.C. and Kozat, S.S. and Feder, M. },
  title = {Universal linear least squares prediction: upper and lower bounds},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2002},
  volume = {48},
  pages = {2354 - 2362},
  number = {8},
  month = {Aug},
  doi = {10.1109/TIT.2002.800489},
  pdf = {../local/Singer2002Universal.pdf},
  file = {Singer2002Universal.pdf:local/Singer2002Universal.pdf:PDF},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/TIT.2002.800489}
}

@article{Singh2008Global,
  author = {Singh, R. and Xu, J. and Berger, B.},
  title = {Global alignment of multiple protein interaction networks with application
	to functional orthology detection.},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2008},
  volume = {105},
  pages = {12763--12768},
  number = {35},
  month = {Sep},
  abstract = {Protein-protein interactions (PPIs) and their networks play a central
	role in all biological processes. Akin to the complete sequencing
	of genomes and their comparative analysis, complete descriptions
	of interactomes and their comparative analysis is fundamental to
	a deeper understanding of biological processes. A first step in such
	an analysis is to align two or more PPI networks. Here, we introduce
	an algorithm, IsoRank, for global alignment of multiple PPI networks.
	The guiding intuition here is that a protein in one PPI network is
	a good match for a protein in another network if their respective
	sequences and neighborhood topologies are a good match. We encode
	this intuition as an eigenvalue problem in a manner analogous to
	Google's PageRank method. Using IsoRank, we compute a global alignment
	of the Saccharomyces cerevisiae, Drosophila melanogaster, Caenorhabditis
	elegans, Mus musculus, and Homo sapiens PPI networks. We demonstrate
	that incorporating PPI data in ortholog prediction results in improvements
	over existing sequence-only approaches and over predictions from
	local alignments of the yeast and fly networks. Previous methods
	have been effective at identifying conserved, localized network patterns
	across pairs of networks. This work takes the further step of performing
	a global alignment of multiple PPI networks. It simultaneously uses
	sequence similarity and network data and, unlike previous approaches,
	explicitly models the tradeoff inherent in combining them. We expect
	IsoRank-with its simultaneous handling of node similarity and network
	similarity-to be applicable across many scientific domains.},
  doi = {10.1073/pnas.0806627105},
  pdf = {../local/Singh2008Global.pdf},
  file = {Singh2008Global.pdf:local/Singh2008Global.pdf:PDF},
  institution = {Computer Science and Artificial Intelligence Laboratory, Massachusetts
	Institute of Technology, Cambridge, MA 02139, USA.},
  owner = {jp},
  pii = {0806627105},
  pmid = {18725631},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1073/pnas.0806627105}
}

@article{Singh2007Pairwise,
  author = {R. Singh and J. Xu and B. Berger},
  title = {Pairwise Global Alignment of Protein Interaction Networks By Matching
	Neighborhood Topology},
  journal = {The Proceedings of the 11th International Conference on Research
	in Computational Molecular Biology (RECOMB)},
  year = {2007},
  owner = {michael},
  timestamp = {2008.10.02}
}

@article{Sjoelander2004Phylogenomic,
  author = {Sj{\"o}lander, K.},
  title = {Phylogenomic inference of protein molecular function: advances and
	challenges},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {170--179},
  number = {2},
  month = {Jan},
  abstract = {MOTIVATION: Protein families evolve a multiplicity of functions through
	gene duplication, speciation and other processes. As a number of
	studies have shown, standard methods of protein function prediction
	produce systematic errors on these data. Phylogenomic analysis--combining
	phylogenetic tree construction, integration of experimental data
	and differentiation of orthologs and paralogs--has been proposed
	to address these errors and improve the accuracy of functional classification.
	The explicit integration of structure prediction and analysis in
	this framework, which we call structural phylogenomics, provides
	additional insights into protein superfamily evolution. RESULTS:
	Results of protein functional classification using phylogenomic analysis
	show fewer expected false positives overall than when pairwise methods
	of functional classification are employed. We present an overview
	of the motivations and fundamental principles of phylogenomic analysis,
	new methods developed for the key tasks, benchmark datasets for these
	tasks (when available) and suggest procedures to increase accuracy.
	We also discuss some of the methods used in the Celera Genomics high-throughput
	phylogenomic classification of the human genome. AVAILABILITY: Software
	tools from the Berkeley Phylogenomics Group are available at http://phylogenomics.berkeley.edu},
  pdf = {../local/Sjoelander2004Phylogenomic.pdf},
  file = {Sjoelander2004Phylogenomic.pdf:local/Sjoelander2004Phylogenomic.pdf:PDF},
  institution = {Berkeley Phylogenomics Group, Department of Bioengineering, University
	of California, 473 Evans Hall 1762, Berkeley, CA 94720-1762, USA.
	kimmen@uclink.berkeley.edu},
  owner = {jp},
  pmid = {14734307},
  timestamp = {2008.10.02},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/20/2/170}
}

@article{Slanina2000Random,
  author = {Slanina, F. and Kotrla, M.},
  title = {Random networks created by biological evolution},
  journal = {Phys. {R}ev. {E}},
  year = {2000},
  volume = {62},
  pages = {6170-6177},
  number = {5},
  pdf = {../local/slan00.pdf},
  file = {slan00.pdf:local/slan00.pdf:PDF},
  subject = {bionet},
  url = {http://ojps.aip.org/getabs/servlet/GetabsServlet?prog=normal&id=PLEEE8000062000005006170000001&idtype=cvips&gifs=yes}
}

@article{Slonim2002From,
  author = {Slonim, D. K.},
  title = {From patterns to pathways: gene expression data analysis comes of
	age},
  journal = {Nat. Genet.},
  year = {2002},
  volume = {32 Suppl},
  pages = {502--508},
  month = {Dec},
  abstract = {Many different biological questions are routinely studied using transcriptional
	profiling on microarrays. A wide range of approaches are available
	for gleaning insights from the data obtained from such experiments.
	The appropriate choice of data-analysis technique depends both on
	the data and on the goals of the experiment. This review summarizes
	some of the common themes in microarray data analysis, including
	detection of differential expression, clustering, and predicting
	sample characteristics. Several approaches to each problem, and their
	relative merits, are discussed and key areas for additional research
	highlighted.},
  doi = {10.1038/ng1033},
  pdf = {../local/Slonim2002From.pdf},
  file = {Slonim2002From.pdf:Slonim2002From.pdf:PDF},
  institution = {Department of Genomics, Wyeth Research, 35 Cambridge Park Drive,
	Cambridge, Massachusetts 02140, USA. dslonim@wyeth.com},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {ng1033},
  pmid = {12454645},
  timestamp = {2011.10.03},
  url = {http://dx.doi.org/10.1038/ng1033}
}

@article{Smale2003Estimating,
  author = {Smale, S. and Zhou, D.},
  title = {Estimating the approximation error in learning theory},
  journal = {Analysis and {A}pplications},
  year = {2003},
  volume = {1},
  number = {1},
  pdf = {../local/Smale2003Estimating.pdf},
  file = {Smale2003Estimating.pdf:local/Smale2003Estimating.pdf:PDF},
  owner = {jeanphilippevert}
}

@article{Smalter2009Feature,
  author = {Aaron Smalter and Jun Huan and Gerald Lushington},
  title = {Feature Selection in the Tensor Product Feature Space},
  journal = {Data Mining, IEEE International Conference on},
  year = {2009},
  volume = {0},
  pages = {1004-1009},
  address = {Los Alamitos, CA, USA},
  doi = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2009.101},
  issn = {1550-4786},
  publisher = {IEEE Computer Society}
}

@article{Smart2008Cascading,
  author = {Smart, A. G. and Amaral, L. A. N. and Ottino, J. M.},
  title = {Cascading failure and robustness in metabolic networks},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2008},
  volume = {105},
  pages = {13223--13228},
  number = {36},
  month = {Sep},
  abstract = {We investigate the relationship between structure and robustness in
	the metabolic networks of Escherichia coli, Methanosarcina barkeri,
	Staphylococcus aureus, and Saccharomyces cerevisiae, using a cascading
	failure model based on a topological flux balance criterion. We find
	that, compared to appropriate null models, the metabolic networks
	are exceptionally robust. Furthermore, by decomposing each network
	into rigid clusters and branched metabolites, we demonstrate that
	the enhanced robustness is related to the organization of branched
	metabolites, as rigid cluster formations in the metabolic networks
	appear to be consistent with null model behavior. Finally, we show
	that cascading in the metabolic networks can be described as a percolation
	process.},
  doi = {10.1073/pnas.0803571105},
  pdf = {../local/Smart2008Cascading.pdf},
  file = {Smart2008Cascading.pdf:Smart2008Cascading.pdf:PDF},
  institution = {Department of Chemical and Biological Engineering, Northwestern University,
	2145 Sheridan Road, Evanston, IL 60208, USA. a-smart@u.northwestern.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0803571105},
  pmid = {18765805},
  timestamp = {2011.11.29},
  url = {http://dx.doi.org/10.1073/pnas.0803571105}
}

@inproceedings{Smeaton2006Evaluation,
  author = {Smeaton, A. F. and Over, P. and Kraaij, W.},
  title = {Evaluation campaigns and {TRECVid}},
  booktitle = {{MIR} '06: {P}roceedings of the 8th {ACM} {I}nternational {W}orkshop
	on {M}ultimedia {I}nformation {R}etrieval},
  year = {2006},
  pages = {321--330},
  address = {New-York, NY, USA},
  publisher = {ACM Press},
  doi = {10.1145/1178677.1178722},
  timestamp = {2008.07.29},
  url = {http://dx.doi.org/10.1145/1178677.1178722}
}

@article{Smith2004Towards,
  author = {P. A. Smith and M. J. Sorich and L. S C Low and R. A. McKinnon and
	J. O. Miners},
  title = {Towards integrated {ADME} prediction: past, present and future directions
	for modelling metabolism by {UDP}-glucuronosyltransferases.},
  journal = {J {M}ol {G}raph {M}odel},
  year = {2004},
  volume = {22},
  pages = {507-17},
  number = {6},
  month = {Jul},
  abstract = {Undesirable absorption, distribution, metabolism, excretion ({ADME})
	properties are the cause of many drug development failures and this
	has led to the need to identify such problems earlier in the development
	process. {T}his review highlights computational (in silico) approaches
	that have been used to identify the characteristics of ligands influencing
	molecular recognition and/or metabolism by the drug-metabolising
	enzyme {UDP}-gucuronosyltransferase ({UGT}). {C}urrent studies applying
	pharmacophore elucidation, 2{D}-quantitative structure metabolism
	relationships (2{D}-{QSMR}), 3{D}-quantitative structure metabolism
	relationships (3{D}-{QSMR}), and non-linear pattern recognition techniques
	such as artificial neural networks and support vector machines for
	modelling metabolism by {UGT} are reported. {A}n assessment of the
	utility of in silico approaches for the qualitative and quantitative
	prediction of drug glucuronidation parameters highlights the benefit
	of using multiple pharmacophores and also non-linear techniques for
	classification. {S}ome of the challenges facing the development of
	generalisable models for predicting metabolism by {UGT}, including
	the need for screening of more diverse structures, are also outlined.},
  doi = {10.1016/j.jmgm.2004.03.011},
  pdf = {../local/Smith2004Towards.pdf},
  file = {Smith2004Towards.pdf:local/Smith2004Towards.pdf:PDF},
  keywords = {Algorithms, Animals, Antisense, Artificial Intelligence, Astrocytoma,
	Automated, Autonomic Nervous System, Brain, Brain Neoplasms, Cell
	Line, Cerebral Cortex, Child, Cluster Analysis, Cognition, Comparative
	Study, Computational Biology, Computer Simulation, Computer-Assisted,
	DNA Fingerprinting, Databases, Diagnosis, Discriminant Analysis,
	Drug Design, Drug Evaluation, Electroencephalography, Emotions, Event-Related
	Potentials, Evoked Potentials, Factual, Fluorescence, Fuzzy Logic,
	Gene Silencing, Gene Targeting, Genetic, Glucuronosyltransferase,
	Hand, Hela Cells, Humans, Imaging, Intracellular Space, Magnetic
	Resonance Spectroscopy, Male, Meningeal Neoplasms, Meningioma, Microscopy,
	Models, Molecular Structure, Monitoring, Motor, Neoplasm Metastasis,
	Neoplasms, Neural Networks (Computer), Non-U.S. Gov't, Oligonucleotides,
	P.H.S., P300, Pattern Recognition, Peptides, Pharmaceutical Preparations,
	Physiologic, Preclinical, Predictive Value of Tests, Preschool, Prognosis,
	Protein Interaction Mapping, Protein Structure, Proteins, Proteomics,
	Quantitative Structure-Activity Relationship, Quaternary, RNA, RNA
	Interference, Recognition (Psychology), Reproducibility of Results,
	Research Support, Sensitivity and Specificity, Signal Processing,
	Small Interfering, Software, Thionucleotides, Three-Dimensional,
	Tumor, U.S. Gov't, User-Computer Interface, Word Processing, 15182810},
  pii = {S1093326304000269},
  url = {http://dx.doi.org/10.1016/j.jmgm.2004.03.011}
}

@article{Smith1981Identification,
  author = {T. Smith and M. Waterman},
  title = {Identification of common molecular subsequences.},
  journal = {J. {M}ol. {B}iol.},
  year = {1981},
  volume = {147},
  pages = {195-197}
}

@inproceedings{Smola2003Kernels,
  author = {Smola, A. and Kondor, R.},
  title = {Kernels and {R}egularization on {G}raphs.},
  booktitle = {Proceedings of 16th {A}nnual {C}onference on {C}omputational {L}earning
	{T}heory},
  year = {2003},
  editor = {Sch{\"o}lkopf, B. and Warmuth,M.K.},
  pages = {144-158},
  publisher = {Springer-Verlag},
  citeseerurl = {http://citeseer.ist.psu.edu/smola03kernels.html},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@article{Smola1998connection,
  author = {A.J. Smola and B. Sch{\"o}lkopf and K.-R. M{\"u}ller},
  title = {The connection between regularization operators and support vector
	kernels},
  journal = {Neural {N}etworks},
  year = {1998},
  volume = {11},
  pages = {637--649},
  number = {4},
  doi = {10.1016/S0893-6080(98)00032-X},
  pdf = {../local/Smola1998connection.pdf},
  file = {Smola1998connection.pdf:local/Smola1998connection.pdf:PDF},
  url = {http://dx.doi.org/10.1016/S0893-6080(98)00032-X}
}

@article{Smoot2011Cytoscape,
  author = {Smoot, M.E. and Ono, K. and Ruscheinski, J. and Wang, P.L. and Ideker,
	T.},
  title = {Cytoscape 2.8: new features for data integration and network visualization},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {431--432},
  number = {3},
  publisher = {Oxford Univ Press}
}

@inbook{Smyth2005Bioinformatics,
  chapter = {Limma: linear model for microarray data},
  pages = {397--420},
  title = {Bioinformatics and Computational Biology Solutions using {R} and
	{B}ioconductor},
  publisher = {Springer},
  year = {2005},
  editor = {Gentleman, R. and Carey, V. and Dudoit, S. and Irizarry, R. and Huber,
	W.},
  author = {Smyth, G. K.},
  address = {New York},
  pdf = {../local/Smyth2005Bioinformatics.pdf},
  file = {Smyth2005Bioinformatics.pdf:local/Smyth2005Bioinformatics.pdf:PDF},
  timestamp = {2007.09.19}
}

@article{Smyth2004Linear,
  author = {Smyth, G. K.},
  title = {Linear models and empirical {B}ayes methods for assessing differential
	expression in microarray experiments.},
  journal = {Stat. Appl. Genet. Mol. Biol.},
  year = {2004},
  volume = {3},
  pages = {Article3},
  abstract = {The problem of identifying differentially expressed genes in designed
	microarray experiments is considered. Lonnstedt and Speed (2002)
	derived an expression for the posterior odds of differential expression
	in a replicated two-color experiment using a simple hierarchical
	parametric model. The purpose of this paper is to develop the hierarchical
	model of Lonnstedt and Speed (2002) into a practical approach for
	general microarray experiments with arbitrary numbers of treatments
	and RNA samples. The model is reset in the context of general linear
	models with arbitrary coefficients and contrasts of interest. The
	approach applies equally well to both single channel and two color
	microarray experiments. Consistent, closed form estimators are derived
	for the hyperparameters in the model. The estimators proposed have
	robust behavior even for small numbers of arrays and allow for incomplete
	data arising from spot filtering or spot quality weights. The posterior
	odds statistic is reformulated in terms of a moderated t-statistic
	in which posterior residual standard deviations are used in place
	of ordinary standard deviations. The empirical Bayes approach is
	equivalent to shrinkage of the estimated sample variances towards
	a pooled estimate, resulting in far more stable inference when the
	number of arrays is small. The use of moderated t-statistics has
	the advantage over the posterior odds that the number of hyperparameters
	which need to estimated is reduced; in particular, knowledge of the
	non-null prior for the fold changes are not required. The moderated
	t-statistic is shown to follow a t-distribution with augmented degrees
	of freedom. The moderated t inferential approach extends to accommodate
	tests of composite null hypotheses through the use of moderated F-statistics.
	The performance of the methods is demonstrated in a simulation study.
	Results are presented for two publicly available data sets.},
  doi = {10.2202/1544-6115.1027},
  pdf = {../local/Smyth2004Linear.pdf},
  file = {Smyth2004Linear.pdf:Smyth2004Linear.pdf:PDF},
  institution = {Walter and Eliza Hall Institute. smyth@wehi.edu.au},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {16646809},
  timestamp = {2012.01.15},
  url = {http://dx.doi.org/10.2202/1544-6115.1027}
}

@article{Smyth2003Normalization,
  author = {Smyth, G. K. and Speed, T. P.},
  title = {Normalization of {cDNA} microarray data},
  journal = {Methods},
  year = {2003},
  volume = {31},
  pages = {265--273},
  timestamp = {2007.09.19}
}

@article{Snijder2009Population,
  author = {Berend Snijder and Raphael Sacher and Pauli Rämö and Eva-Maria Damm
	and Prisca Liberali and Lucas Pelkmans},
  title = {Population context determines cell-to-cell variability in endocytosis
	and virus infection.},
  journal = {Nature},
  year = {2009},
  volume = {461},
  pages = {520--523},
  number = {7263},
  month = {Sep},
  abstract = {Single-cell heterogeneity in cell populations arises from a combination
	of intrinsic and extrinsic factors. This heterogeneity has been measured
	for gene transcription, phosphorylation, cell morphology and drug
	perturbations, and used to explain various aspects of cellular physiology.
	In all cases, however, the causes of heterogeneity were not studied.
	Here we analyse, for the first time, the heterogeneous patterns of
	related cellular activities, namely virus infection, endocytosis
	and membrane lipid composition in adherent human cells. We reveal
	correlations with specific cellular states that are defined by the
	population context of a cell, and we derive probabilistic models
	that can explain and predict most cellular heterogeneity of these
	activities, solely on the basis of each cell's population context.
	We find that accounting for population-determined heterogeneity is
	essential for interpreting differences between the activity levels
	of cell populations. Finally, we reveal that synergy between two
	molecular components, focal adhesion kinase and the sphingolipid
	GM1, enhances the population-determined pattern of simian virus 40
	(SV40) infection. Our findings provide an explanation for the origin
	of heterogeneity patterns of cellular activities in adherent cell
	populations.},
  doi = {10.1038/nature08282},
  pdf = {../local/Snijder2009Population.pdf},
  file = {Snijder2009Population.pdf:Snijder2009Population.pdf:PDF},
  institution = {Institute of Molecular Systems Biology, ETH Zurich (Swiss Federal
	Institute of Technology), Wolfgang Pauli-Strasse 16, CH-8093 Zurich,
	Switzerland.},
  keywords = {highcontentscreening},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature08282},
  pmid = {19710653},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1038/nature08282}
}

@article{Snoeve2004Designing,
  author = {SnÃ¸ve, O. and Nedland, M. and Fjeldstad, S. H. and Humberset, H.
	and Birkeland, O. R. and Gr{\"o}nfeld, T. and Saetrom, P.},
  title = {Designing effective si{RNA}s with off-target control.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {325},
  pages = {769-73},
  number = {3},
  month = {Dec},
  abstract = {Successful gene silencing by {RNA} interference requires a potent
	and specific depletion of the target m{RNA}. {T}arget candidates
	must be chosen so that their corresponding short interfering {RNA}s
	are likely to be effective against that target and unlikely to accidentally
	silence other transcripts due to sequence similarity. {W}e show that
	both effective and unique targets exist in mouse, fruit fly, and
	worm, and present a new design tool that enables users to make the
	trade-off between efficacy and uniqueness. {T}he tool lists all targets
	with partial sequence similarity to the primary target to highlight
	candidates for negative controls.},
  doi = {10.1016/j.bbrc.2004.10.097},
  keywords = {sirna},
  pii = {S0006-291X(04)02391-5},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.10.097}
}

@article{Soares2009Identifying,
  author = {Soares, H. D. and Chen, Y. and Sabbagh, M. and Roher, A. and Rohrer,
	A. and Schrijvers, E. and Breteler, M.},
  title = {Identifying early markers of Alzheimer's disease using quantitative
	multiplex proteomic immunoassay panels},
  journal = {Ann. N. Y. Acad. Sci.},
  year = {2009},
  volume = {1180},
  pages = {56--67},
  month = {Oct},
  abstract = {Alzheimer's disease (AD) is a debilitating neurodegenerative disorder
	with incidence expected to increase four-fold over the next decade.
	Extensive research efforts are focused upon identifying new treatments,
	and early diagnosis is considered key to successful intervention.
	Although imaging and cerebrospinal fluid biomarkers have shown promise
	in identifying patients in very early stages of the disease, more
	noninvasive cost-effective tools have remained elusive. Recent studies
	have reported that an 18-analyte multiplexed plasma panel can differentiate
	AD from controls suggesting plasma-based screening tools for early
	AD diagnosis exists. The current study tested the reproducibility
	of a subset of the original 18-analyte panel using a bead-based multiplex
	technology. Preliminary results suggest diagnostic accuracy using
	the subset was 61\%. Multivariate analysis of an 89-analyte multivariate
	panel yielded a diagnostic accuracy of 70\% suggesting a plasma-based
	AD signature that may be a useful screening tool.},
  doi = {10.1111/j.1749-6632.2009.05066.x},
  pdf = {../local/Soares2009Identifying.pdf},
  file = {Soares2009Identifying.pdf:Soares2009Identifying.pdf:PDF},
  institution = {Pfizer Global Research and Development, Groton, Connecticut, USA.
	holly.d.soares@pfizer.com},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {NYAS5066},
  pmid = {19906261},
  timestamp = {2011.03.14},
  url = {http://dx.doi.org/10.1111/j.1749-6632.2009.05066.x}
}

@article{Sohler2004New,
  author = {Sohler, F. and Hanisch, D. and Zimmer, R.},
  title = {New methods for joint analysis of biological networks and expression
	data},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1517--1521},
  number = {10},
  month = {Jul},
  abstract = {SUMMARY: Biological networks, such as protein interaction, regulatory
	or metabolic networks, derived from public databases, biological
	experiments or text mining can be useful for the analysis of high-throughput
	experimental data. We present two algorithms embedded in the ToPNet
	application that show promising performance in analyzing expression
	data in the context of such networks. First, the Significant Area
	Search algorithm detects subnetworks consisting of significantly
	regulated genes. These subnetworks often provide hints on which biological
	processes are affected in the measured conditions. Second, Pathway
	Queries allow detection of networks including molecules that are
	not necessarily significantly regulated, such as transcription factors
	or signaling proteins. Moreover, using these queries, the user can
	formulate biological hypotheses and check their validity with respect
	to experimental data. All resulting networks and pathways can be
	explored further using the interactive analysis tools provided by
	ToPNet program.},
  doi = {10.1093/bioinformatics/bth112},
  pdf = {../local/Sohler2004New.pdf},
  file = {Sohler2004New.pdf:Sohler2004New.pdf:PDF},
  institution = {Institut für Informatik, Ludwig-Maximilians-Universität München,
	Amalienstrasse 17, 80333 München, Germany. florian.sohler@ifi.lmu.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {20/10/1517},
  pmid = {15231545},
  timestamp = {2011.09.26},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth112}
}

@article{Solinas1997Matrix,
  author = {S. Solinas-Toldo and S. Lampel and S. Stilgenbauer and J. Nickolenko
	and A. Benner and H. Dohner and T. Cremer and P. Lichter},
  title = {Matrix-based comparative genomic hybridization: Biochips to screen
	for genomic imbalances},
  journal = {Genes Chromosomes Cancer},
  year = {1997},
  volume = {20},
  pages = {399-407},
  keywords = {csbcbook, csbcbook-ch2}
}

@techreport{Sole2001Model,
  author = {Sol{\'e}, R. V. and Pastor-Satorras, R. and Smith, E. D. and Kepler,
	T.},
  title = {A {M}odel of {L}arge-{S}cale {P}roteome {E}volution},
  institution = {Santa Fe Institute},
  year = {2001},
  note = {Working paper 01-08-041},
  pdf = {../local/sole01.pdf},
  file = {sole01.pdf:local/sole01.pdf:PDF},
  subject = {bionetprot},
  url = {http://www.santafe.edu/sfi/publications/Abstracts/01-08-041abs.html}
}

@article{Son2005Database,
  author = {Son, C.G. and Bilke, S. and Davis, S. and Greer, B.T. and Wei, J.S.
	and Whiteford, C.C. and Chen, Q-R. and Cenacchi, N. and Khan, J.},
  title = {Database of m{RNA} gene expression profiles of multiple human organs},
  journal = {Genome Res.},
  year = {2005},
  volume = {15},
  pages = {443--450},
  number = {3},
  month = {Mar},
  abstract = {Genome-wide expression profiling of normal tissue may facilitate our
	understanding of the etiology of diseased organs and augment the
	development of new targeted therapeutics. Here, we have developed
	a high-density gene expression database of 18,927 unique genes for
	158 normal human samples from 19 different organs of 30 different
	individuals using DNA microarrays. We report four main findings.
	First, despite very diverse sample parameters (e.g., age, ethnicity,
	sex, and postmortem interval), the expression profiles belonging
	to the same organs cluster together, demonstrating internal stability
	of the database. Second, the gene expression profiles reflect major
	organ-specific functions on the molecular level, indicating consistency
	of our database with known biology. Third, we demonstrate that any
	small (i.e., n approximately 100), randomly selected subset of genes
	can approximately reproduce the hierarchical clustering of the full
	data set, suggesting that the observed differential expression of
	>90\% of the probed genes is of biological origin. Fourth, we demonstrate
	a potential application of this database to cancer research by identifying
	19 tumor-specific genes in neuroblastoma. The selected genes are
	relatively underexpressed in all of the organs examined and belong
	to therapeutically relevant pathways, making them potential novel
	diagnostic markers and targets for therapy. We expect this database
	will be of utility for developing rationally designed molecularly
	targeted therapeutics in diseases such as cancer, as well as for
	exploring the functions of genes.},
  doi = {10.1101/gr.3124505},
  institution = {Advanced Technology Center, Oncogenomics Section, Pediatric Oncology
	Branch, National Cancer Institute, National Institutes of Health,
	Gaithersburg, Maryland 20877, USA.},
  keywords = {Cluster Analysis; Databases, Nucleic Acid; Gene Expression Profiling;
	Humans; Oligonucleotide Array Sequence Analysis; Organ Specificity;
	Principal Component Analysis; RNA, Messenger},
  owner = {mordelet},
  pii = {15/3/443},
  pmid = {15741514},
  timestamp = {2010.11.02},
  url = {http://dx.doi.org/10.1101/gr.3124505}
}

@article{Soneson2010Integrative,
  author = {Soneson, Charlotte and Lilljebjörn, Henrik and Fioretos, Thoas and
	Fontes, Magnus},
  title = {Integrative analysis of gene expression and copy number alterations
	using canonical correlation analysis.},
  journal = {BMC Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {191},
  abstract = {With the rapid development of new genetic measurement methods, several
	types of genetic alterations can be quantified in a high-throughput
	manner. While the initial focus has been on investigating each data
	set separately, there is an increasing interest in studying the correlation
	structure between two or more data sets. Multivariate methods based
	on Canonical Correlation Analysis (CCA) have been proposed for integrating
	paired genetic data sets. The high dimensionality of microarray data
	imposes computational difficulties, which have been addressed for
	instance by studying the covariance structure of the data, or by
	reducing the number of variables prior to applying the CCA. In this
	work, we propose a new method for analyzing high-dimensional paired
	genetic data sets, which mainly emphasizes the correlation structure
	and still permits efficient application to very large data sets.
	The method is implemented by translating a regularized CCA to its
	dual form, where the computational complexity depends mainly on the
	number of samples instead of the number of variables. The optimal
	regularization parameters are chosen by cross-validation. We apply
	the regularized dual CCA, as well as a classical CCA preceded by
	a dimension-reducing Principal Components Analysis (PCA), to a paired
	data set of gene expression changes and copy number alterations in
	leukemia.Using the correlation-maximizing methods, regularized dual
	CCA and PCA+CCA, we show that without pre-selection of known disease-relevant
	genes, and without using information about clinical class membership,
	an exploratory analysis singles out two patient groups, corresponding
	to well-known leukemia subtypes. Furthermore, the variables showing
	the highest relevance to the extracted features agree with previous
	biological knowledge concerning copy number alterations and gene
	expression changes in these subtypes. Finally, the correlation-maximizing
	methods are shown to yield results which are more biologically interpretable
	than those resulting from a covariance-maximizing method, and provide
	different insight compared to when each variable set is studied separately
	using PCA.We conclude that regularized dual CCA as well as PCA+CCA
	are useful methods for exploratory analysis of paired genetic data
	sets, and can be efficiently implemented also when the number of
	variables is very large.},
  doi = {10.1186/1471-2105-11-191},
  institution = {Centre for Mathematical Sciences, Lund University, Box 118, SE-221
	00 Lund, Sweden. lottas@maths.lth.se},
  keywords = {Algorithms; Databases, Genetic; Gene Dosage; Gene Expression; Gene
	Expression Profiling, methods; Genomics, methods; Humans; Leukemia,
	classification/genetics; Principal Component Analysis},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-11-191},
  pmid = {20398334},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2105-11-191}
}

@article{Song2002Prediction,
  author = {Minghu Song and Curt M Breneman and Jinbo Bi and N. Sukumar and Kristin
	P Bennett and Steven Cramer and Nihal Tugcu},
  title = {Prediction of protein retention times in anion-exchange chromatography
	systems using support vector regression.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2002},
  volume = {42},
  pages = {1347-57},
  number = {6},
  abstract = {Quantitative {S}tructure-{R}etention {R}elationship ({QSRR}) models
	are developed for the prediction of protein retention times in anion-exchange
	chromatography systems. {T}opological, subdivided surface area, and
	{TAE} ({T}ransferable {A}tom {E}quivalent) electron-density-based
	descriptors are computed directly for a set of proteins using molecular
	connectivity patterns and crystal structure geometries. {A} novel
	algorithm based on {S}upport {V}ector {M}achine ({SVM}) regression
	has been employed to obtain predictive {QSRR} models using a two-step
	computational strategy. {I}n the first step, a sparse linear {SVM}
	was utilized as a feature selection procedure to remove irrelevant
	or redundant information. {S}ubsequently, the selected features were
	used to produce an ensemble of nonlinear {SVM} regression models
	that were combined using bootstrap aggregation (bagging) techniques,
	where various combinations of training and validation data sets were
	selected from the pool of available data. {A} visualization scheme
	(star plots) was used to display the relative importance of each
	selected descriptor in the final set of "bagged" models. {O}nce these
	predictive models have been validated, they can be used as an automated
	prediction tool for virtual high-throughput screening ({VHTS}).},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Classification,
	Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted,
	Cystadenoma, DNA, Decision Making, Diagnosis, Differential, Drug,
	Drug Design, Electrostatics, Eukaryotic Cells, Feasibility Studies,
	Female, Gene Expression, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans,
	Internet, Ion Exchange, Leukemia, Ligands, Likelihood Functions,
	Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains,
	Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques,
	Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic,
	Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S.
	Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer
	Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms,
	P.H.S., Pattern Recognition, Probability, Protein Binding, Protein
	Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA
	Splicing, Receptors, Reference Values, Regression Analysis, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12444731},
  pii = {ci025580t}
}

@article{Song2006Development,
  author = {Song, M. and Clark, M.},
  title = {{D}evelopment and evaluation of an in silico model for h{ERG} binding.},
  journal = {J. Chem. Inf. Model.},
  year = {2006},
  volume = {46},
  pages = {392--400},
  number = {1},
  abstract = {It has been recognized that drug-induced QT prolongation is related
	to blockage of the human ether-a-go-go-related gene (hERG) ion channel.
	Therefore, it is prudent to evaluate the hERG binding of active compounds
	in early stages of drug discovery. In silico approaches provide an
	economic and quick method to screen for potential hERG liability.
	A diverse set of 90 compounds with hERG IC(50) inhibition data was
	collected from literature references. Fragment-based QSAR descriptors
	and three different statistical methods, support vector regression,
	partial least squares, and random forests, were employed to construct
	QSAR models for hERG binding affinity. Important fragment descriptors
	relevant to hERG binding affinity were identified through an efficient
	feature selection method based on sparse linear support vector regression.
	The support vector regression predictive model built upon selected
	fragment descriptors outperforms the other two statistical methods
	in this study, resulting in an r(2) of 0.912 and 0.848 for the training
	and testing data sets, respectively. The support vector regression
	model was applied to predict hERG binding affinities of 20 in-house
	compounds belonging to three different series. The model predicted
	the relative binding affinity well for two out of three compound
	series. The hierarchical clustering and dendrogram results show that
	the compound series with the best prediction has much higher structural
	similarity and more neighbors of training compounds than the other
	two compound series, demonstrating the predictive scope of the model.
	The combination of a QSAR model and postprocessing analysis, such
	as clustering and visualization, provides a way to assess the confidence
	level of QSAR prediction results on the basis of similarity to the
	training set.},
  doi = {10.1021/ci050308f},
  pdf = {../local/Song2006Development.pdf},
  file = {Song2006Development.pdf:Song2006Development.pdf:PDF},
  keywords = {chemoinformatics herg},
  pmid = {16426073},
  timestamp = {2006.10.06},
  url = {http://dx.doi.org/10.1021/ci050308f}
}

@article{Song2004Comparison,
  author = {Xiaowei Song and Arnold Mitnitski and Jafna Cox and Kenneth Rockwood},
  title = {Comparison of machine learning techniques with classical statistical
	models in predicting health outcomes.},
  journal = {Medinfo},
  year = {2004},
  volume = {11},
  pages = {736-40},
  number = {Pt 1},
  abstract = {Several machine learning techniques (multilayer and single layer perceptron,
	logistic regression, least square linear separation and support vector
	machines) are applied to calculate the risk of death from two biomedical
	data sets, one from patient care records, and another from a population
	survey. {E}ach dataset contained multiple sources of information:
	history of related symptoms and other illnesses, physical examination
	findings, laboratory tests, medications (patient records dataset),
	health attitudes, and disabilities in activities of daily living
	(survey dataset). {E}ach technique showed very good mortality prediction
	in the acute patients data sample ({AUC} up to 0.89) and fair prediction
	accuracy for six year mortality ({AUC} from 0.70 to 0.76) in individuals
	from epidemiological database surveys. {T}he results suggest that
	the nature of data is of primary importance rather than the learning
	technique. {H}owever, the consistently superior performance of the
	artificial neural network (multi-layer perceptron) indicates that
	nonlinear relationships (which cannot be discerned by linear separation
	techniques) can provide additional improvement in correctly predicting
	health outcomes.},
  keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts,
	Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis,
	Cerebrovascular Accident, Cerebrovascular Circulation, Comparative
	Study, Computer-Assisted, Cysteine, Decision Trees, Dementia, Diagnosis,
	Disulfides, Doppler, Embolism, Expert Systems, Extramural, Factor
	Analysis, Female, Gene Expression, Gene Expression Profiling, Health
	Status, Heart Septal Defects, Humans, Intracranial Embolism, Male,
	Models, Molecular, Myocardial Infarction, N.I.H., Neoplasms, Neural
	Networks (Computer), Non-U.S. Gov't, Oligonucleotide Array Sequence
	Analysis, Oxidation-Reduction, P.H.S., Pattern Recognition, Prognosis,
	Protein Binding, Protein Folding, Proteins, ROC Curve, Research Support,
	Sensitivity and Specificity, Software, Statistical, Transcranial,
	Treatment Outcome, U.S. Gov't, Ultrasonography, 15360910},
  pii = {D040004933}
}

@article{Sonnenburg2006Large,
  author = {Sonnenburg, S\"{o}ren and R\"{a}tsch, Gunnar and Sch\"{a}fer, Christin
	and Sch\"{o}lkopf, Bernhard},
  title = {Large Scale Multiple Kernel Learning},
  journal = {J. Mach. Learn. Res.},
  year = {2006},
  volume = {7},
  pages = {1531--1565},
  address = {Cambridge, MA, USA},
  issn = {1533-7928},
  publisher = {MIT Press}
}

@inproceedings{Sonnenburg2002New,
  author = {Sonnenburg, S. and R{\"a}tsch, G. and Jagota, A. and M{\"u}ller,
	K.-R.},
  title = {New methods for splice-site recognition},
  booktitle = {Proc. {I}nternational conference on artificial {N}eural {N}etworks
	? {ICANN}?02},
  year = {2002},
  editor = {JR. Dorronsoro},
  number = {2415},
  series = {LNCS},
  pages = {329-336},
  publisher = {Springer Berlin},
  pdf = {../local/Sonnenburg2002New.pdf},
  file = {Sonnenburg2002New.pdf:local/Sonnenburg2002New.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Sonnhammer1997Pfam,
  author = {Sonnhammer, E. L. and Eddy, S. R. and Durbin, R.},
  title = {Pfam: a comprehensive database of protein domain families based on
	seed alignments.},
  journal = {Proteins},
  year = {1997},
  volume = {28},
  pages = {405--420},
  number = {3},
  month = {Jul},
  abstract = {Databases of multiple sequence alignments are a valuable aid to protein
	sequence classification and analysis. One of the main challenges
	when constructing such a database is to simultaneously satisfy the
	conflicting demands of completeness on the one hand and quality of
	alignment and domain definitions on the other. The latter properties
	are best dealt with by manual approaches, whereas completeness in
	practice is only amenable to automatic methods. Herein we present
	a database based on hidden Markov model profiles (HMMs), which combines
	high quality and completeness. Our database, Pfam, consists of parts
	A and B. Pfam-A is curated and contains well-characterized protein
	domain families with high quality alignments, which are maintained
	by using manually checked seed alignments and HMMs to find and align
	all members. Pfam-B contains sequence families that were generated
	automatically by applying the Domainer algorithm to cluster and align
	the remaining protein sequences after removal of Pfam-A domains.
	By using Pfam, a large number of previously unannotated proteins
	from the Caenorhabditis elegans genome project were classified. We
	have also identified many novel family memberships in known proteins,
	including new kazal, Fibronectin type III, and response regulator
	receiver domains. Pfam-A families have permanent accession numbers
	and form a library of HMMs available for searching and automatic
	annotation of new protein sequences.},
  institution = {Sanger Centre, Wellcome Trust Genome Campus, Hinxton, Cambridge,
	United Kingdom.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {3.0.CO;2-L},
  pmid = {9223186},
  timestamp = {2010.02.21}
}

@article{Sorich2004Rapid,
  author = {Michael J Sorich and Ross A McKinnon and John O Miners and David
	A Winkler and Paul A Smith},
  title = {Rapid prediction of chemical metabolism by human {UDP}-glucuronosyltransferase
	isoforms using quantum chemical descriptors derived with the electronegativity
	equalization method.},
  journal = {J {M}ed {C}hem},
  year = {2004},
  volume = {47},
  pages = {5311-7},
  number = {21},
  month = {Oct},
  abstract = {This study aimed to evaluate in silico models based on quantum chemical
	({QC}) descriptors derived using the electronegativity equalization
	method ({EEM}) and to assess the use of {QC} properties to predict
	chemical metabolism by human {UDP}-glucuronosyltransferase ({UGT})
	isoforms. {V}arious {EEM}-derived {QC} molecular descriptors were
	calculated for known {UGT} substrates and nonsubstrates. {C}lassification
	models were developed using support vector machine and partial least
	squares discriminant analysis. {I}n general, the most predictive
	models were generated with the support vector machine. {C}ombining
	{QC} and 2{D} descriptors (from previous work) using a consensus
	approach resulted in a statistically significant improvement in predictivity
	(to 84\%) over both the {QC} and 2{D} models and the other methods
	of combining the descriptors. {EEM}-derived {QC} descriptors were
	shown to be both highly predictive and computationally efficient.
	{I}t is likely that {EEM}-derived {QC} properties will be generally
	useful for predicting {ADMET} and physicochemical properties during
	drug discovery.},
  doi = {10.1021/jm0495529},
  pdf = {../local/Sorich2004Rapid.pdf},
  file = {Sorich2004Rapid.pdf:local/Sorich2004Rapid.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/jm0495529}
}

@article{Sorich2003Comparison,
  author = {M. J. Sorich and J. O. Miners and R. A. McKinnon and D. A. Winkler
	and F. R. Burden and P. A. Smith},
  title = {Comparison of linear and nonlinear classification algorithms for
	the prediction of drug and chemical metabolism by human {UDP}-glucuronosyltransferase
	isoforms.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {2019-24},
  number = {6},
  abstract = {Partial least squares discriminant analysis ({PLSDA}), {B}ayesian
	regularized artificial neural network ({BRANN}), and support vector
	machine ({SVM}) methodologies were compared by their ability to classify
	substrates and nonsubstrates of 12 isoforms of human {UDP}-glucuronosyltransferase
	({UGT}), an enzyme "superfamily" involved in the metabolism of drugs,
	nondrug xenobiotics, and endogenous compounds. {S}imple two-dimensional
	descriptors were used to capture chemical information. {F}or each
	data set, 70\% of the data were used for training, and the remainder
	were used to assess the generalization performance. {I}n general,
	the {SVM} methodology was able to produce models with the best predictive
	performance, followed by {BRANN} and then {PLSDA}. {H}owever, a small
	number of data sets showed either equivalent or better predictability
	using {PLSDA}, which may indicate relatively linear relationships
	in these data sets. {A}ll {SVM} models showed predictive ability
	(>60\% of test set predicted correctly) and five out of the 12 test
	sets showed excellent prediction (>80\% prediction accuracy). {T}hese
	models represent the first use of pattern recognition methods to
	discriminate between substrates and nonsubstrates of human drug metabolizing
	enzymes and the first thorough assessment of three classification
	algorithms using multiple metabolic data sets.},
  doi = {10.1021/ci034108k},
  pdf = {../local/Sorich2003Comparison.pdf},
  file = {Sorich2003Comparison.pdf:local/Sorich2003Comparison.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci034108k}
}

@article{Sotiriou2003Breast,
  author = {Sotiriou, C. and Neo, S.-Y. and McShane, L. M. and Korn, E. L. and
	Long, P. M. and Jazaeri, A. and Martiat, P. and Fox, S. B. and Harris,
	A. L. and Liu, E. T.},
  title = {Breast cancer classification and prognosis based on gene expression
	profiles from a population-based study.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2003},
  volume = {100},
  pages = {10393--10398},
  number = {18},
  month = {Sep},
  abstract = {Comprehensive gene expression patterns generated from cDNA microarrays
	were correlated with detailed clinico-pathological characteristics
	and clinical outcome in an unselected group of 99 node-negative and
	node-positive breast cancer patients. Gene expression patterns were
	found to be strongly associated with estrogen receptor (ER) status
	and moderately associated with grade, but not associated with menopausal
	status, nodal status, or tumor size. Hierarchical cluster analysis
	segregated the tumors into two main groups based on their ER status,
	which correlated well with basal and luminal characteristics. Cox
	proportional hazards regression analysis identified 16 genes that
	were significantly associated with relapse-free survival at a stringent
	significance level of 0.001 to account for multiple comparisons.
	Of 231 genes previously reported by others [van't Veer, L. J., et
	al. (2002) Nature 415, 530-536] as being associated with survival,
	93 probe elements overlapped with the set of 7,650 probe elements
	represented on the arrays used in this study. Hierarchical cluster
	analysis based on the set of 93 probe elements segregated our population
	into two distinct subgroups with different relapse-free survival
	(P < 0.03). The number of these 93 probe elements showing significant
	univariate association with relapse-free survival (P < 0.05) in the
	present study was 14, representing 11 unique genes. Genes involved
	in cell cycle, DNA replication, and chromosomal stability were consistently
	elevated in the various poor prognostic groups. In addition, glutathione
	S-transferase M3 emerged as an important survival marker in both
	studies. When taken together with other array studies, our results
	highlight the consistent biological and clinical associations with
	gene expression profiles.},
  doi = {10.1073/pnas.1732912100},
  pdf = {../local/Sotiriou2003Breast.pdf},
  file = {Sotiriou2003Breast.pdf:Sotiriou2003Breast.pdf:PDF},
  institution = {Division of Clinical Sciences, National Cancer Institute, Advanced
	Technology Center, 8717 Grovemont Circle, Gaithersburg, MD 20877,
	USA.},
  keywords = {breastcancer},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {1732912100},
  pmid = {12917485},
  timestamp = {2010.07.02},
  url = {http://dx.doi.org/10.1073/pnas.1732912100}
}

@article{Sotiriou2009Gene-Expression,
  author = {Sotiriou, C. and Pusztai, L.},
  title = {Gene-Expression Signatures in Breast Cancer},
  journal = {N. Engl. J. Med.},
  year = {2009},
  volume = {360},
  pages = {790--800},
  number = {8},
  doi = {10.1056/NEJMra0801289},
  pdf = {../local/Sotiriou2009Gene-Expression.pdf},
  file = {Sotiriou2009Gene-Expression.pdf:Sotiriou2009Gene-Expression.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.14},
  url = {http://dx.doi.org/10.1056/NEJMra0801289}
}

@article{Sotiriou2006Gene,
  author = {Christos Sotiriou and Pratyaksha Wirapati and Sherene Loi and Adrian
	Harris and Steve Fox and Johanna Smeds and Hans Nordgren and Pierre
	Farmer and Viviane Praz and Benjamin Haibe-Kains and Christine Desmedt
	and Denis Larsimont and Fatima Cardoso and Hans Peterse and Dimitry
	Nuyten and Marc Buyse and Marc J Van de Vijver and Jonas Bergh and
	Martine Piccart and Mauro Delorenzi},
  title = {Gene expression profiling in breast cancer: understanding the molecular
	basis of histologic grade to improve prognosis.},
  journal = {J Natl Cancer Inst},
  year = {2006},
  volume = {98},
  pages = {262--272},
  number = {4},
  month = {Feb},
  abstract = {Histologic grade in breast cancer provides clinically important prognostic
	information. However, 30\%-60\% of tumors are classified as histologic
	grade 2. This grade is associated with an intermediate risk of recurrence
	and is thus not informative for clinical decision making. We examined
	whether histologic grade was associated with gene expression profiles
	of breast cancers and whether such profiles could be used to improve
	histologic grading.We analyzed microarray data from 189 invasive
	breast carcinomas and from three published gene expression datasets
	from breast carcinomas. We identified differentially expressed genes
	in a training set of 64 estrogen receptor (ER)-positive tumor samples
	by comparing expression profiles between histologic grade 3 tumors
	and histologic grade 1 tumors and used the expression of these genes
	to define the gene expression grade index. Data from 597 independent
	tumors were used to evaluate the association between relapse-free
	survival and the gene expression grade index in a Kaplan-Meier analysis.
	All statistical tests were two-sided.We identified 97 genes in our
	training set that were associated with histologic grade; most of
	these genes were involved in cell cycle regulation and proliferation.
	In validation datasets, the gene expression grade index was strongly
	associated with histologic grade 1 and 3 status; however, among histologic
	grade 2 tumors, the index spanned the values for histologic grade
	1-3 tumors. Among patients with histologic grade 2 tumors, a high
	gene expression grade index was associated with a higher risk of
	recurrence than a low gene expression grade index (hazard ratio =
	3.61, 95\% confidence interval = 2.25 to 5.78; P < .001, log-rank
	test).Gene expression grade index appeared to reclassify patients
	with histologic grade 2 tumors into two groups with high versus low
	risks of recurrence. This approach may improve the accuracy of tumor
	grading and thus its prognostic value.},
  doi = {10.1093/jnci/djj052},
  institution = {Functional Genomics and Translational Research Unit, Université Libre
	de Bruxelles, Brussels, Belgium. christos.sotiriou@bordet.be},
  keywords = {Breast Neoplasms, chemistry/genetics/pathology; Cell Cycle; Cell Proliferation;
	Disease-Free Survival; Female; Gene Expression Profiling; Gene Expression
	Regulation, Neoplastic; Humans; Lymphatic Metastasis; Mathematical
	Computing; Middle Aged; Multivariate Analysis; Oligonucleotide Array
	Sequence Analysis; Prognosis; Proportional Hazards Models; Receptors,
	Estrogen, analysis; Risk Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {98/4/262},
  pmid = {16478745},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1093/jnci/djj052}
}

@article{Southern1999Molecular,
  author = {Edwin Southern and Kalim Mir and Mikhail Shchepinov},
  title = {Molecular interactions on microarrays},
  journal = {Nat. Genet.},
  year = {1999},
  volume = {21},
  pages = {5-9},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Southern1975Detection,
  author = {Southern, E. M.},
  title = {Detection of specific sequences among {DNA} fragments separated by
	gel electrophoresis},
  journal = {J. {M}ol. {B}iol.},
  year = {1975},
  volume = {98},
  pages = {503--517}
}

@article{Spellman1998Comprehensive,
  author = {Spellman, P.T. and Sherlock, G. and Zhang, M.Q. and Iyer, V.R. and
	Anders, K. and Eisen, M.B. and Brown, P.O. and Botstein, D. and Futcher,
	B.},
  title = {Comprehensive {I}dentification of {C}ell {C}ycle-regulated {G}enes
	of the {Y}east {S}accharomyces cerevisiae by {M}icroarray {H}ybridization},
  journal = {Mol. {B}iol. {C}ell},
  year = {1998},
  volume = {9},
  pages = {3273--3297},
  pdf = {../local/spel98.pdf},
  file = {spel98.pdf:local/spel98.pdf:PDF},
  subject = {microarray},
  url = {http://www.molbiolcell.org/cgi/reprint/9/12/3273.pdf}
}

@article{Spencer2009Non-genetic,
  author = {Sabrina L Spencer and Suzanne Gaudet and John G Albeck and John M
	Burke and Peter K Sorger},
  title = {Non-genetic origins of cell-to-cell variability in TRAIL-induced
	apoptosis.},
  journal = {Nature},
  year = {2009},
  volume = {459},
  pages = {428--432},
  number = {7245},
  month = {May},
  abstract = {In microorganisms, noise in gene expression gives rise to cell-to-cell
	variability in protein concentrations. In mammalian cells, protein
	levels also vary and individual cells differ widely in their responsiveness
	to uniform physiological stimuli. In the case of apoptosis mediated
	by TRAIL (tumour necrosis factor (TNF)-related apoptosis-inducing
	ligand) it is common for some cells in a clonal population to die
	while others survive-a striking divergence in cell fate. Among cells
	that die, the time between TRAIL exposure and caspase activation
	is highly variable. Here we image sister cells expressing reporters
	of caspase activation and mitochondrial outer membrane permeabilization
	after exposure to TRAIL. We show that naturally occurring differences
	in the levels or states of proteins regulating receptor-mediated
	apoptosis are the primary causes of cell-to-cell variability in the
	timing and probability of death in human cell lines. Protein state
	is transmitted from mother to daughter, giving rise to transient
	heritability in fate, but protein synthesis promotes rapid divergence
	so that sister cells soon become no more similar to each other than
	pairs of cells chosen at random. Our results have implications for
	understanding 'fractional killing' of tumour cells after exposure
	to chemotherapy, and for variability in mammalian signal transduction
	in general.},
  doi = {10.1038/nature08012},
  institution = {Center for Cell Decision Processes, Department of Systems Biology,
	Harvard Medical School, Boston, Massachusetts 02115, USA.},
  keywords = {highcontentscreening},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature08012},
  pmid = {19363473},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1038/nature08012}
}

@article{Speybroeck2002From,
  author = {L Van Speybroeck},
  title = {From Epigenesis to Epigenetics: The Case of {C. H. Waddington}},
  journal = {Annals of the New York Academy of Sciences},
  year = {2002},
  volume = {981},
  pages = {61-81},
  keywords = {csbcbook}
}

@article{Spyrou2009BayesPeak,
  author = {Christiana Spyrou and Rory Stark and Andy G Lynch and Simon Tavaré},
  title = {BayesPeak: Bayesian analysis of ChIP-seq data.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {299},
  abstract = {BACKGROUND: High-throughput sequencing technology has become popular
	and widely used to study protein and DNA interactions. Chromatin
	immunoprecipitation, followed by sequencing of the resulting samples,
	produces large amounts of data that can be used to map genomic features
	such as transcription factor binding sites and histone modifications.
	METHODS: Our proposed statistical algorithm, BayesPeak, uses a fully
	Bayesian hidden Markov model to detect enriched locations in the
	genome. The structure accommodates the natural features of the Solexa/Illumina
	sequencing data and allows for overdispersion in the abundance of
	reads in different regions. Moreover, a control sample can be incorporated
	in the analysis to account for experimental and sequence biases.
	Markov chain Monte Carlo algorithms are applied to estimate the posterior
	distributions of the model parameters, and posterior probabilities
	are used to detect the sites of interest. CONCLUSION: We have presented
	a flexible approach for identifying peaks from ChIP-seq reads, suitable
	for use on both transcription factor binding and histone modification
	data. Our method estimates probabilities of enrichment that can be
	used in downstream analysis. The method is assessed using experimentally
	verified data and is shown to provide high-confidence calls with
	low false positive rates.},
  doi = {10.1186/1471-2105-10-299},
  pdf = {../local/Spyrou2009BayesPeak.pdf},
  file = {Spyrou2009BayesPeak.pdf:Spyrou2009BayesPeak.pdf:PDF},
  institution = {Statistical Laboratory, Centre for Mathematical Sciences, Wilberforce
	Road, Cambridge, UK. C.Spyrou@statslab.cam.ac.uk},
  keywords = {ngs},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-10-299},
  pmid = {19772557},
  timestamp = {2009.10.29},
  url = {http://dx.doi.org/10.1186/1471-2105-10-299}
}

@inproceedings{Srebro2003Weighted,
  author = {Srebro, N. and Jaakkola, T.},
  title = {Weighted Low-Rank Approximations},
  booktitle = {Proceedings of the Twentieth International Conference on Machine
	Learning},
  year = {2003},
  editor = {Fawcett, T. and Mishra, N.},
  pages = {720--727},
  publisher = {AAAI Press},
  timestamp = {2007.10.22}
}

@inproceedings{Srebro2005Maximum,
  author = {N. Srebro and J. D. M. Rennie and T. S. Jaakkola},
  title = {Maximum-Margin Matrix Factorization},
  booktitle = {Adv. Neural. Inform. Process Syst. 17},
  year = {2005},
  editor = {L. K. Saul and Y. Weiss and L. Bottou},
  pages = {1329-1336},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  timestamp = {2006.05.30}
}

@inproceedings{Srebro2005Rank,
  author = {Nathan Srebro and Adi Shraibman},
  title = {Rank, Trace-Norm and Max-Norm.},
  booktitle = {COLT},
  year = {2005},
  pages = {545-560},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1007/11503415_37}
}

@article{Srebrow2006Connection,
  author = {A. Srebrow and A. R. Kornblihtt},
  title = {The connection between splicing and cancer},
  journal = {J. Cell Sci.},
  year = {2006},
  volume = {119},
  pages = {2635-2641},
  keywords = {csbcbook}
}

@inproceedings{Sriphaew2009Cool,
  author = {Sriphaew, K. and Takamura, H. and Okumura, M.},
  title = {Cool Blog Classification from Positive and Unlabeled Examples},
  booktitle = {Proceedings of the 13th Pacific-Asia Conference on Advances in Knowledge
	Discovery and Data Mining},
  year = {2009},
  editor = {Theeramunkong, T. and Kijsirikul, B. and Cercone, C. and Ho, T-B.},
  series = {PAKDD '09},
  pages = {62--73},
  address = {Berlin, Heidelberg},
  publisher = {Springer-Verlag},
  abstract = {We address the problem of cool blog classification using only positive
	and unlabeled examples. We propose an algorithm, called PUB, that
	exploits the information of unlabeled data together with the positive
	examples to predict whether the unseen blogs are cool or not. The
	algorithm uses the weighting technique to assign a weight to each
	unlabeled example which is assumed to be negative in the training
	set, and the bagging technique to obtain several weak classifiers,
	each of which is learned on a small training set generated by randomly
	sampling some positive examples and some unlabeled examples, which
	are assumed to be negative. Each of the weak classifiers must achieve
	admissible performance measure evaluated based on the whole labeled
	positive examples or has the best performance measure within iteration
	limit. The majority voting function on all weak classifiers is employed
	to predict the class of a test instance. The experimental results
	show that PUB can correctly predict the classes of unseen blogs where
	this situation cannot be handled by the traditional learning from
	positive and negative examples. The results also show that PUB outperforms
	other algorithms for learning from positive and unlabeled examples
	in the task of cool blog classification. },
  acmid = {1533869},
  date-added = {2010-07-14 19:22:27 +0200},
  date-modified = {2010-07-14 19:22:27 +0200},
  doi = {10.1007/978-3-642-01307-2_9},
  isbn = {978-3-642-01306-5},
  keywords = {Cool blog, PU-learning, bagging, weighting examples},
  location = {Bangkok, Thailand},
  m3 = {10.1007/978-3-642-01307-2{\_}9},
  numpages = {12},
  owner = {fantine},
  timestamp = {2010.07.14},
  ty = {CHAPTER},
  url = {http://dx.doi.org/10.1007/978-3-642-01307-2_9}
}

@article{Srivastava1979Estimation,
  author = {Srivastava, V. K. and Dwivedi, T. D.},
  title = {Estimation of seemingly unrelated regression equations : A brief
	survey},
  journal = {Journal of Econometrics},
  year = {1979},
  volume = {10},
  pages = {15-32},
  number = {1},
  month = {April},
  url = {http://ideas.repec.org/a/eee/econom/v10y1979i1p15-32.html}
}

@incollection{Stadler1999"Spectral,
  author = {Stadler, P. F.},
  title = {Spectral landscape theory},
  booktitle = { {E}volutionary {D}ynamics --- {E}xploring the {I}nterplay of {S}election,
	{N}eutrality, {A}ccident and {F}unction},
  publisher = {Oxford University Press},
  year = {1999},
  editor = {J.P. Crutchfield and P. Schuster},
  address = {New York},
  url = {http://citeseer.nj.nec.com/stadler99spectral.html}
}

@article{Stadler1996Landscapes,
  author = {Stadler, P. F.},
  title = {Landscapes and {T}heir {C}orrelation {F}unctions},
  journal = {J. {M}ath. {C}hem.},
  year = {1996},
  volume = {20},
  pages = {1--45},
  pdf = {../local/stad96.pdf},
  file = {stad96.pdf:local/stad96.pdf:PDF},
  subject = {net},
  url = {htt://citeseer.nj.nec.com/43461.html}
}

@article{Stahura2004Virtual,
  author = {Florence L Stahura and JÃ¼rgen Bajorath},
  title = {Virtual screening methods that complement {HTS}.},
  journal = {Comb {C}hem {H}igh {T}hroughput {S}creen},
  year = {2004},
  volume = {7},
  pages = {259-69},
  number = {4},
  month = {Jun},
  abstract = {In this review, we discuss a number of computational methods that
	have been developed or adapted for molecule classification and virtual
	screening ({VS}) of compound databases. {I}n particular, we focus
	on approaches that are complementary to high-throughput screening
	({HTS}). {T}he discussion is limited to {VS} methods that operate
	at the small molecular level, which is often called ligand-based
	{VS} ({LBVS}), and does not take into account docking algorithms
	or other structure-based screening tools. {W}e describe areas that
	greatly benefit from combining virtual and biological screening and
	discuss computational methods that are most suitable to contribute
	to the integration of screening technologies. {R}elevant approaches
	range from established methods such as clustering or similarity searching
	to techniques that have only recently been introduced for {LBVS}
	applications such as statistical methods or support vector machines.
	{F}inally, we discuss a number of representative applications at
	the interface between {VS} and {HTS}.},
  keywords = {Algorithms, Animals, Antisense, Artificial Intelligence, Cell Line,
	Cluster Analysis, Comparative Study, Computational Biology, Computer
	Simulation, DNA Fingerprinting, Drug Evaluation, Fluorescence, Fuzzy
	Logic, Gene Silencing, Gene Targeting, Genetic, Hela Cells, Humans,
	Imaging, Intracellular Space, Microscopy, Models, Neoplasms, Neural
	Networks (Computer), Non-U.S. Gov't, Oligonucleotides, P.H.S., Preclinical,
	Prognosis, Proteomics, Quantitative Structure-Activity Relationship,
	RNA, RNA Interference, Research Support, Sensitivity and Specificity,
	Small Interfering, Thionucleotides, Three-Dimensional, Tumor, U.S.
	Gov't, 15200375}
}

@article{Staiger2012Critical,
  author = {Staiger, C. and Cadot, S. and Kooter, R. and Dittrich, M. and M{\"u}ller,
	T. and Klau, G.W. and Wessels, L.F.A.},
  title = {A Critical Evaluation of Network and Pathway-Based Classifiers for
	Outcome Prediction in Breast Cancer},
  journal = {PloS one},
  year = {2012},
  volume = {7},
  pages = {e34796},
  number = {4},
  publisher = {Public Library of Science}
}

@inproceedings{Stapley2002Predicting,
  author = {Stapley, B.J. and Kelley, L.A. and Sternberg, M.J.},
  title = {Predicting the sub-cellular location of proteins from text using
	support vector machines.},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2002},
  editor = {Russ B. Altman and A. Keith Dunker and Lawrence Hunter and Kevin
	Lauerdale and Teri E. Klein},
  pages = {374-385},
  publisher = {World Scientific},
  abstract = {We present an automatic method to classify the sub-cellular location
	of proteins based on the text of relevant medline abstracts. {F}or
	each protein, a vector of terms is generated from medline abstracts
	in which the protein/gene's name or synonym occurs. {A} {S}upport
	{V}ector {M}achine ({SVM}) is used to automatically partition the
	term space and to thus discriminate the textual features that define
	sub-cellular location. {T}he method is benchmarked on a set of proteins
	of known sub-cellular location from {S}. cerevisiae. {N}o prior knowledge
	of the problem domain nor any natural language processing is used
	at any stage. {T}he method out-performs support vector machines trained
	on amino acid composition and has comparable performance to rule-based
	text classifiers. {C}ombining text with protein amino-acid composition
	improves recall for some sub-cellular locations. {W}e discuss the
	generality of the method and its potential application to a variety
	of biological classification problems.},
  pdf = {../local/Stapley2002Predicting.pdf},
  file = {Stapley2002Predicting.pdf:local/Stapley2002Predicting.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.smi.stanford.edu/projects/helix/psb02/stapley.pdf}
}

@article{Stark2006BioGRID:,
  author = {Stark, C. and Breitkreutz, B-J. and Reguly, T. and Boucher, L. and
	Breitkreutz, A. and Tyers, M.},
  title = {BioGRID: a general repository for interaction datasets.},
  journal = {Nucleic Acids Res},
  year = {2006},
  volume = {34},
  pages = {D535--D539},
  number = {Database issue},
  month = {Jan},
  abstract = {Access to unified datasets of protein and genetic interactions is
	critical for interrogation of gene/protein function and analysis
	of global network properties. BioGRID is a freely accessible database
	of physical and genetic interactions available at http://www.thebiogrid.org.
	BioGRID release version 2.0 includes >116 000 interactions from Saccharomyces
	cerevisiae, Caenorhabditis elegans, Drosophila melanogaster and Homo
	sapiens. Over 30 000 interactions have recently been added from 5778
	sources through exhaustive curation of the Saccharomyces cerevisiae
	primary literature. An internally hyper-linked web interface allows
	for rapid search and retrieval of interaction data. Full or user-defined
	datasets are freely downloadable as tab-delimited text files and
	PSI-MI XML. Pre-computed graphical layouts of interactions are available
	in a variety of file formats. User-customized graphs with embedded
	protein, gene and interaction attributes can be constructed with
	a visualization system called Osprey that is dynamically linked to
	the BioGRID.},
  doi = {10.1093/nar/gkj109},
  institution = {Ontario Cancer Institute, Princess Margaret Hospital, 610 University
	Avenue, Toronto, Ontario, Canada M5G 2M9.},
  owner = {fantine},
  pii = {34/suppl_1/D535},
  pmid = {16381927},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/nar/gkj109}
}

@article{Starkuviene2007potential,
  author = {Starkuviene, V. and Pepperkok, R.},
  title = {The potential of high-content high-throughput microscopy in drug
	discovery.},
  journal = {Br. J. Pharmacol.},
  year = {2007},
  volume = {152},
  pages = {62--71},
  number = {1},
  month = {Sep},
  abstract = {Fluorescence microscopy is a powerful method to study protein function
	in its natural habitat, the living cell. With the availability of
	the green fluorescent protein and its spectral variants, almost any
	gene of interest can be fluorescently labelled in living cells opening
	the possibility to study protein localization, dynamics and interactions.
	The emergence of automated cellular systems allows rapid visualization
	of large groups of cells and phenotypic analysis in a quantitative
	manner. Here, we discuss recent advances in high-content high-throughput
	microscopy and its potential application to several steps of the
	drug discovery process.},
  doi = {10.1038/sj.bjp.0707346},
  institution = {Cell Biology and Cell Biophysics Unit, European Molecular Biology
	Laboratory (EMBL), Meyerhofstrasse 1, 69117 Heidelberg, Germany.
	starkuvi@embl.de},
  owner = {jp},
  pii = {0707346},
  pmid = {17603554},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1038/sj.bjp.0707346}
}

@article{Statnikov2004Methods,
  author = {Alexander Statnikov and Constantin F Aliferis and Ioannis Tsamardinos},
  title = {Methods for multi-category cancer diagnosis from gene expression
	data: a comprehensive evaluation to inform decision support system
	development.},
  journal = {Medinfo},
  year = {2004},
  volume = {11},
  pages = {813-7},
  number = {Pt 2},
  abstract = {Cancer diagnosis is a major clinical applications area of gene expression
	microarray technology. {W}e are seeking to develop a system for cancer
	diagnostic model creation based on microarray data. {I}n order to
	equip the system with the optimal combination of data modeling methods,
	we performed a comprehensive evaluation of several major classification
	algorithms, gene selection methods, and cross-validation designs
	using 11 datasets spanning 74 diagnostic categories (41 cancer types
	and 12 normal tissue types). {T}he {M}ulti-{C}ategory {S}upport {V}ector
	{M}achine techniques by {C}rammer and {S}inger, {W}eston and {W}atkins,
	and one-versus-rest were found to be the best methods and they outperform
	other learning algorithms such as {K}-{N}earest {N}eighbors and {N}eural
	{N}etworks often to a remarkable degree. {G}ene selection techniques
	are shown to significantly improve classification performance. {T}hese
	results guided the development of a software system that fully automates
	cancer diagnostic model construction with quality on par with or
	better than previously published results derived by expert human
	analysts.},
  keywords = {biosvm},
  pii = {D040004907}
}

@article{Statnikov2005comprehensive,
  author = {Statnikov, A. and Aliferis, C. F. and Tsamardinos, I. and Hardin,
	D. and Levy, S.},
  title = {A comprehensive evaluation of multicategory classification methods
	for microarray gene expression cancer diagnosis},
  journal = {Bioinformatics},
  year = {2005},
  note = {To appear},
  abstract = {Motivation: {C}ancer diagnosis is one of the most important emerging
	clinical applications of gene expression microarray technology. {W}e
	are seeking to develop a computer system for powerful and reliable
	cancer diagnostic model creation based on microarray data. {T}o keep
	a realistic perspective on clinical applications we focus on multicategory
	diagnosis. {I}n order to equip the system with the optimum combination
	of classifier, gene selection and cross-validation methods, we performed
	a systematic and comprehensive evaluation of several major algorithms
	for multicategory classification, several gene selection methods,
	multiple ensemble classifier methods, and two cross validation designs
	using 11 datasets spanning 74 diagnostic categories and 41 cancer
	types and 12 normal tissue types.{R}esults: {M}ulticategory {S}upport
	{V}ector {M}achines ({MC}-{SVM}s) are the most effective classifiers
	in performing accurate cancer diagnosis from gene expression data.
	{T}he {MC}-{SVM} techniques by {C}rammer and {S}inger, {W}eston and
	{W}atkins, and one-versus-rest were found to be the best methods
	in this domain. {MC}-{SVM}s outperform other popular machine learning
	algorithms such as {K}-{N}earest {N}eighbors, {B}ackpropagation and
	{P}robabilistic {N}eural {N}etworks, often to a remarkable degree.
	{G}ene selection techniques can significantly improve classification
	performance of both {MC}-{SVM}s and other non-{SVM} learning algorithms.
	{E}nsemble classifiers do not generally improve performance of the
	best non-ensemble models. {T}hese results guided the construction
	of a software system {GEMS} ({G}ene {E}xpression {M}odel {S}elector)
	that automates high-quality model construction and enforces sound
	optimization and performance estimation procedures. {T}his is the
	first such system to be informed by a rigorous comparative analysis
	of the available algorithms and datasets.{A}vailability: {T}he software
	system {GEMS} is available for download from http://www.gems-system.org
	for non-commercial use.},
  pdf = {../local/Statnikov2005comprehensive.pdf},
  file = {Statnikov2005comprehensive.pdf:local/Statnikov2005comprehensive.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti033v1}
}

@article{Stein2010Case,
  author = {Lincoln D Stein},
  title = {The case for cloud computing in genome informatics.},
  journal = {Genome Biol},
  year = {2010},
  volume = {11},
  pages = {207},
  number = {5},
  doi = {10.1186/gb-2010-11-5-207},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {gb-2010-11-5-207},
  pmid = {20441614},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1186/gb-2010-11-5-207}
}

@article{Steiner2004Discriminating,
  author = {Guido Steiner and Laura Suter and Franziska Boess and Rodolfo Gasser
	and Maria Cristina de Vera and Silvio Albertini and Stefan Ruepp},
  title = {Discriminating different classes of toxicants by transcript profiling.},
  journal = {Environ. {H}ealth {P}erspect.},
  year = {2004},
  volume = {112},
  pages = {1236-48},
  number = {12},
  month = {Aug},
  abstract = {Male rats were treated with various model compounds or the appropriate
	vehicle controls. {M}ost substances were either well-known hepatotoxicants
	or showed hepatotoxicity during preclinical testing. {T}he aim of
	the present study was to determine if biological samples from rats
	treated with various compounds can be classified based on gene expression
	profiles. {I}n addition to gene expression analysis using microarrays,
	a complete serum chemistry profile and liver and kidney histopathology
	were performed. {W}e analyzed hepatic gene expression profiles using
	a supervised learning method (support vector machines; {SVM}s) to
	generate classification rules and combined this with recursive feature
	elimination to improve classification performance and to identify
	a compact subset of probe sets with potential use as biomarkers.
	{T}wo different {SVM} algorithms were tested, and the models obtained
	were validated with a compound-based external cross-validation approach.
	{O}ur predictive models were able to discriminate between hepatotoxic
	and nonhepatotoxic compounds. {F}urthermore, they predicted the correct
	class of hepatotoxicant in most cases. {W}e provide an example showing
	that a predictive model built on transcript profiles from one rat
	strain can successfully classify profiles from another rat strain.
	{I}n addition, we demonstrate that the predictive models identify
	nonresponders and are able to discriminate between gene changes related
	to pharmacology and toxicity. {T}his work confirms the hypothesis
	that compound classification based on gene expression data is feasible.},
  pdf = {../local/Steiner2004Discriminating.pdf},
  file = {Steiner2004Discriminating.pdf:local/Steiner2004Discriminating.pdf:PDF},
  keywords = {biosvm},
  url = {http://ehp.niehs.nih.gov/txg/docs/2004/7036/abstract.html}
}

@article{Steinwart2005Consistency,
  author = {Steinwart, I.},
  title = {Consistency of support vector machines and other regularized kernel
	classifiers},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2005},
  volume = {51},
  pages = {128-142},
  number = {1},
  doi = {10.1109/TIT.2004.839514},
  pdf = {../local/Steinwart2005Consistency.pdf},
  file = {Steinwart2005Consistency.pdf:local/Steinwart2005Consistency.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1109/TIT.2004.839514}
}

@article{Steinwart2003Sparseness,
  author = {Steinwart, I.},
  title = {Sparseness of {S}upport {V}ector {M}achines},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2003},
  volume = {4},
  pages = {1071-1105},
  abstract = {Support vector machines ({SVM}s) construct decision functions that
	are linear combinations of kernel evaluations on the training set.
	{T}he samples with non-vanishing coefficients are called support
	vectors. {I}n this work we establish lower (asymptotical) bounds
	on the number of support vectors. {O}n our way we prove several results
	which are of great importance for the understanding of {SVM}s. {I}n
	particular, we describe to which "limit" {SVM} decision functions
	tend, discuss the corresponding notion of convergence and provide
	some results on the stability of {SVM}s using subdifferential calculus
	in the associated reproducing kernel {H}ilbert space.},
  pdf = {../local/Steinwart2003Sparseness.pdf},
  file = {Steinwart2003Sparseness.pdf:local/Steinwart2003Sparseness.pdf:PDF}
}

@article{Steinwart2002Support,
  author = {Steinwart, I.},
  title = {Support {V}ector {M}achines are {U}niversally {C}onsistent},
  journal = {J. {C}omplexity},
  year = {2002},
  volume = {18},
  pages = {768--791},
  abstract = {We show that support vector machines of the 1-norm soft margin type
	are universally consistent provided that the regularization parameter
	is chosen in a distinct manner and the kernel belongs to a specific
	class?the so-called universal kernels?which has recently been considered
	by the author. {I}n particular it is shown that the 1-norm soft margin
	classifier with {G}aussian {RBF} kernel on a compact subset {X} of
	d and regularization parameter cn=n??1 is universally consistent,
	if n is the training set size and 0http://dx.doi.org/10.1006/jcom.2002.0642}
}

@article{Steinwart2001On,
  author = {Steinwart, I.},
  title = {On the influence of the kernel on the consistency of support vector
	machines},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2001},
  volume = {2},
  pages = {67-93},
  abstract = {In this article we study the generalization abilities of several classifiers
	of support vector machine ({SVM}) type using a certain class of kernels
	that we call universal. {I}t is shown that the soft margin algorithms
	with universal kernels are consistent for a large class of classification
	problems including some kind of noisy tasks provided that the regularization
	parameter is chosen well. {I}n particular we derive a simple sufficient
	condition for this parameter in the case of {G}aussian {RBF} kernels.
	{O}n the one hand our considerations are based on an investigation
	of an approximation property---the so-called universality---of the
	used kernels that ensures that all continuous functions can be approximated
	by certain kernel expressions. {T}his approximation property also
	gives a new insight into the role of kernels in these and other algorithms.
	{O}n the other hand the results are achieved by a precise study of
	the underlying optimization problems of the classifiers. {F}urthermore,
	we show consistency for the maximal margin classifier as well as
	for the soft margin {SVM}'s in the presence of large margins. {I}n
	this case it turns out that also constant regularization parameters
	ensure consistency for the soft margin {SVM}'s. {F}inally we prove
	that even for simple, noise free classification problems {SVM}'s
	with polynomial kernels can behave arbitrarily badly.},
  pdf = {../local/Steinwart2001On.pdf},
  file = {Steinwart2001On.pdf:local/Steinwart2001On.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://jmlr.csail.mit.edu/papers/v2/steinwart01a.html}
}

@incollection{Steinwart2005Density,
  author = {Steinwart, I. and Hush, D. and Scovel, C.},
  title = {Density {L}evel {D}etection is {C}lassification},
  booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 17},
  publisher = {MIT Press},
  year = {2005},
  editor = {Lawrence K. Saul and Yair Weiss and {L\'{e}on} Bottou},
  address = {Cambridge, MA},
  pdf = {../local/Steinwart2005Density.pdf},
  file = {Steinwart2005Density.pdf:local/Steinwart2005Density.pdf:PDF}
}

@article{Steinwart2005classification,
  author = {Steinwart, I. and Hush, D. and Scovel, C.},
  title = {A classification framework for anomaly detection},
  journal = {J. Mach. Learn. Res.},
  year = {2005},
  volume = {6},
  pages = {211-232},
  pdf = {../local/Steinwart2005classification.pdf},
  file = {Steinwart2005classification.pdf:Steinwart2005classification.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.26},
  url = {http://jmlr.csail.mit.edu/papers/volume6/steinwart05a/steinwart05a.pdf}
}

@techreport{Steinwart2005explicit,
  author = {Steinwart, I. and Hush, D. and Scovel, C.},
  title = {An explicit description of the reproducing kernel {H}ilbert spaces
	of {G}aussian {RBF} kernels},
  institution = {Los Alamos National Laboratory},
  year = {2004},
  number = {LA-UR 04-8274}
}

@techreport{Steinwart2004Fast,
  author = {I. Steinwart and C. Scovel},
  title = {Fast {R}ates for {S}upport {V}ector {M}achines using {G}aussian {K}ernels},
  institution = {Los Alamos National Laboratory},
  year = {2004},
  number = {LA-UR 04-8796}
}

@article{Stelling2002Metabolic,
  author = {Stelling, J. and Klamt, S. and Bettenbrock, K. and Schuster, S. and
	Gilles, E. D.},
  title = {Metabolic network structure determines key aspects of functionality
	and regulation.},
  journal = {Nature},
  year = {2002},
  volume = {420},
  pages = {190--193},
  number = {6912},
  month = {Nov},
  abstract = {The relationship between structure, function and regulation in complex
	cellular networks is a still largely open question. Systems biology
	aims to explain this relationship by combining experimental and theoretical
	approaches. Current theories have various strengths and shortcomings
	in providing an integrated, predictive description of cellular networks.
	Specifically, dynamic mathematical modelling of large-scale networks
	meets difficulties because the necessary mechanistic detail and kinetic
	parameters are rarely available. In contrast, structure-oriented
	analyses only require network topology, which is well known in many
	cases. Previous approaches of this type focus on network robustness
	or metabolic phenotype, but do not give predictions on cellular regulation.
	Here, we devise a theoretical method for simultaneously predicting
	key aspects of network functionality, robustness and gene regulation
	from network structure alone. This is achieved by determining and
	analysing the non-decomposable pathways able to operate coherently
	at steady state (elementary flux modes). We use the example of Escherichia
	coli central metabolism to illustrate the method.},
  doi = {10.1038/nature01166},
  pdf = {../local/Stelling2002Metabolic.pdf},
  file = {Stelling2002Metabolic.pdf:Stelling2002Metabolic.pdf:PDF},
  institution = {Max Planck Institute for Dynamics of Complex Technical Systems, D-39106
	Magdeburg, Germany. stelling@mpi-magdeburg.mpg.de},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature01166},
  pmid = {12432396},
  timestamp = {2013.01.25},
  url = {http://dx.doi.org/10.1038/nature01166}
}

@article{Stelzl2005human,
  author = {Ulrich Stelzl and Uwe Worm and Maciej Lalowski and Christian Haenig
	and Felix H Brembeck and Heike Goehler and Martin Stroedicke and
	Martina Zenkner and Anke Schoenherr and Susanne Koeppen and Jan Timm
	and Sascha Mintzlaff and Claudia Abraham and Nicole Bock and Silvia
	Kietzmann and Astrid Goedde and Engin Toksöz and Anja Droege and
	Sylvia Krobitsch and Bernhard Korn and Walter Birchmeier and Hans
	Lehrach and Erich E Wanker},
  title = {A human protein-protein interaction network: a resource for annotating
	the proteome.},
  journal = {Cell},
  year = {2005},
  volume = {122},
  pages = {957--968},
  number = {6},
  month = {Sep},
  abstract = {Protein-protein interaction maps provide a valuable framework for
	a better understanding of the functional organization of the proteome.
	To detect interacting pairs of human proteins systematically, a protein
	matrix of 4456 baits and 5632 preys was screened by automated yeast
	two-hybrid (Y2H) interaction mating. We identified 3186 mostly novel
	interactions among 1705 proteins, resulting in a large, highly connected
	network. Independent pull-down and co-immunoprecipitation assays
	validated the overall quality of the Y2H interactions. Using topological
	and GO criteria, a scoring system was developed to define 911 high-confidence
	interactions among 401 proteins. Furthermore, the network was searched
	for interactions linking uncharacterized gene products and human
	disease proteins to regulatory cellular pathways. Two novel Axin-1
	interactions were validated experimentally, characterizing ANP32A
	and CRMP1 as modulators of Wnt signaling. Systematic human protein
	interaction screens can lead to a more comprehensive understanding
	of protein function and cellular processes.},
  doi = {10.1016/j.cell.2005.08.029},
  institution = {Max Delbrueck Center for Molecular Medicine, 13092 Berlin-Buch, Germany.},
  keywords = {Databases as Topic; Humans; Intracellular Signaling Peptides and Proteins;
	Models, Molecular; Nerve Tissue Proteins; Protein Binding; Proteins;
	Proteomics; Repressor Proteins; Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {S0092-8674(05)00866-4},
  pmid = {16169070},
  timestamp = {2010.09.01},
  url = {http://dx.doi.org/10.1016/j.cell.2005.08.029}
}

@phdthesis{Stempfel2009Robustesse,
  author = {Stempfel, G.},
  title = {Robustesse des s\'eparateurs lin\'eaires au bruit de classification},
  school = {Universit`e de Provence},
  year = {2009},
  owner = {mordelet},
  timestamp = {2010.12.08}
}

@incollection{Stempfel2009Learning,
  author = {Stempfel, G. and Ralaivola, L.},
  title = {Learning SVMs from Sloppily Labeled Data},
  booktitle = {Artificial Neural Networks – ICANN 2009},
  publisher = {Springer Berlin / Heidelberg},
  year = {2009},
  editor = {Alippi, Cesare and Polycarpou, Marios and Panayiotou, Christos and
	Ellinas, Georgios},
  volume = {5768},
  series = {Lecture Notes in Computer Science},
  pages = {884-893},
  affiliation = {Aix-Marseille Université Laboratoire d’Informatique Fondamentale de
	Marseille},
  owner = {mordelet},
  timestamp = {2010.12.08},
  url = {http://dx.doi.org/10.1007/978-3-642-04274-4_91}
}

@article{Stephens2011Massive,
  author = {Philip J Stephens and Chris D Greenman and Beiyuan Fu and Fengtang
	Yang and Graham R Bignell and Laura J Mudie and Erin D Pleasance
	and King Wai Lau and David Beare and Lucy A Stebbings and Stuart
	McLaren and Meng-Lay Lin and David J McBride and Ignacio Varela and
	Serena Nik-Zainal and Catherine Leroy and Mingming Jia and Andrew
	Menzies and Adam P Butler and Jon W Teague and Michael A Quail and
	John Burton and Harold Swerdlow and Nigel P Carter and Laura A Morsberger
	and Christine Iacobuzio-Donahue and George A Follows and Anthony
	R Green and Adrienne M Flanagan and Michael R Stratton and P. Andrew
	Futreal and Peter J Campbell},
  title = {Massive genomic rearrangement acquired in a single catastrophic event
	during cancer development.},
  journal = {Cell},
  year = {2011},
  volume = {144},
  pages = {27--40},
  number = {1},
  month = {Jan},
  abstract = {Cancer is driven by somatically acquired point mutations and chromosomal
	rearrangements, conventionally thought to accumulate gradually over
	time. Using next-generation sequencing, we characterize a phenomenon,
	which we term chromothripsis, whereby tens to hundreds of genomic
	rearrangements occur in a one-off cellular crisis. Rearrangements
	involving one or a few chromosomes crisscross back and forth across
	involved regions, generating frequent oscillations between two copy
	number states. These genomic hallmarks are highly improbable if rearrangements
	accumulate over time and instead imply that nearly all occur during
	a single cellular catastrophe. The stamp of chromothripsis can be
	seen in at least 2\%-3\% of all cancers, across many subtypes, and
	is present in ∼25\% of bone cancers. We find that one, or indeed
	more than one, cancer-causing lesion can emerge out of the genomic
	crisis. This phenomenon has important implications for the origins
	of genomic remodeling and temporal emergence of cancer.},
  doi = {10.1016/j.cell.2010.11.055},
  institution = {Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK.},
  keywords = {Bone Neoplasms, genetics; Cell Line, Tumor; Chromosome Aberrations;
	Chromosome Painting; Female; Gene Rearrangement; Humans; Leukemia,
	Lymphocytic, Chronic, B-Cell, genetics; Middle Aged; Neoplasms, genetics/pathology},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {S0092-8674(10)01377-2},
  pmid = {21215367},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1016/j.cell.2010.11.055}
}

@article{Stoddart2010Nucleobase,
  author = {David Stoddart and Andrew J Heron and Jochen Klingelhoefer and Ellina
	Mikhailova and Giovanni Maglia and Hagan Bayley},
  title = {Nucleobase recognition in ssDNA at the central constriction of the
	alpha-hemolysin pore.},
  journal = {Nano Lett},
  year = {2010},
  volume = {10},
  pages = {3633--3637},
  number = {9},
  month = {Sep},
  abstract = {Nanopores are under investigation for single-molecule DNA sequencing.
	The alpha-hemolysin (alphaHL) protein nanopore contains three recognition
	points capable of nucleobase discrimination in individual immobilized
	ssDNA molecules. We have modified the recognition point R(1) by extensive
	mutagenesis of residue 113. Amino acids that provide an energy barrier
	to ion flow (e.g., bulky or hydrophobic residues) strengthen base
	identification, while amino acids that lower the barrier weaken it.
	Amino acids with related side chains produce similar patterns of
	nucleobase recognition providing a rationale for the redesign of
	recognition points.},
  doi = {10.1021/nl101955a},
  institution = {Department of Chemistry, University of Oxford, Oxford OX1 3TA, United
	Kingdom.},
  keywords = {Amino Acid Substitution; Base Sequence; DNA, Single-Stranded, chemistry;
	Hemolysin Proteins, chemistry; Models, Molecular; Mutagenesis},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pmid = {20704324},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1021/nl101955a}
}

@article{Stone1977Consistent,
  author = {Stone, C.J.},
  title = {Consistent nonparametric regression},
  journal = {Ann. {S}tat.},
  year = {1977},
  volume = {8},
  pages = {1348-1360},
  pdf = {../local/Stone1977Consistent.pdf},
  file = {Stone1977Consistent.pdf:local/Stone1977Consistent.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://links.jstor.org/sici?sici=0090-5364%28197707%295%3A4%3C595%3ACNR%3E2.0.CO%3B2-O}
}

@article{Strahl2000language,
  author = {Strahl, B. D. and Allis, C. D.},
  title = {The language of covalent histone modifications},
  journal = {Nature},
  year = {2000},
  volume = {403},
  pages = {41--45},
  number = {6765},
  month = {Jan},
  abstract = {Histone proteins and the nucleosomes they form with DNA are the fundamental
	building blocks of eukaryotic chromatin. A diverse array of post-translational
	modifications that often occur on tail domains of these proteins
	has been well documented. Although the function of these highly conserved
	modifications has remained elusive, converging biochemical and genetic
	evidence suggests functions in several chromatin-based processes.
	We propose that distinct histone modifications, on one or more tails,
	act sequentially or in combination to form a 'histone code' that
	is, read by other proteins to bring about distinct downstream events.},
  doi = {10.1038/47412},
  institution = {Department of Biochemistry and Molecular Genetics, University of
	Virginia Health Science Center, Charlottesville 22908, USA.},
  keywords = {Acetylation; Amino Acid Sequence; Animals; Chromatin, physiology;
	Histones, chemistry/metabolism/physiology; Humans; Lysine, physiology;
	Microtubules, physiology; Models, Biological; Molecular Sequence
	Data; Phosphorylation; Protein Processing, Post-Translational; Serine,
	metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10638745},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1038/47412}
}

@article{Strahl2001Methylation,
  author = {B. D. Strahl and S. D. Briggs and C. J. Brame and J. A. Caldwell
	and S. S. Koh and H. Ma and R. G. Cook and J. Shabanowitz and D.
	F. Hunt and M. R. Stallcup and C. D. Allis},
  title = {Methylation of histone H4 at arginine 3 occurs in vivo and is mediated
	by the nuclear receptor coactivator PRMT1.},
  journal = {Curr Biol},
  year = {2001},
  volume = {11},
  pages = {996--1000},
  number = {12},
  month = {Jun},
  abstract = {Posttranslational modifications of histone amino termini play an important
	role in modulating chromatin structure and function. Lysine methylation
	of histones has been well documented, and recently this modification
	has been linked to cellular processes involving gene transcription
	and heterochromatin assembly. However, the existence of arginine
	methylation on histones has remained unclear. Recent discoveries
	of protein arginine methyltransferases, CARM1 and PRMT1, as transcriptional
	coactivators for nuclear receptors suggest that histones may be physiological
	targets of these enzymes as part of a poorly defined transcriptional
	activation pathway. Here we show by using mass spectrometry that
	histone H4, isolated from asynchronously growing human 293T cells,
	is methylated at arginine 3 (Arg-3) in vivo. In support, a novel
	antibody directed against histone H4 methylated at Arg-3 independently
	demonstrates the in vivo occurrence of this modification and reveals
	that H4 Arg-3 methylation is highly conserved throughout eukaryotes.
	Finally, we show that PRMT1 is the major, if not exclusive, H4 Arg-3
	methyltransfase in human 293T cells. These findings suggest a role
	for arginine methylation of histones in the transcription process.},
  institution = {Department of Biochemistry and Molecular Genetics, University of
	Virginia Health Science Center, Charlottesville, VA 22908, USA.},
  keywords = {Amino Acid Motifs; Animals; Arginine, metabolism; Cell Line; Genes,
	Reporter; Histones, metabolism; Humans; Immunoblotting; Methylation;
	Protein-Arginine N-Methyltransferases, metabolism; Recombinant Fusion
	Proteins, genetics/metabolism},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0960-9822(01)00294-9},
  pmid = {11448779},
  timestamp = {2010.11.23}
}

@article{Strahl2002Set2,
  author = {Brian D Strahl and Patrick A Grant and Scott D Briggs and Zu-Wen
	Sun and James R Bone and Jennifer A Caldwell and Sahana Mollah and
	Richard G Cook and Jeffrey Shabanowitz and Donald F Hunt and C. David
	Allis},
  title = {Set2 is a nucleosomal histone H3-selective methyltransferase that
	mediates transcriptional repression.},
  journal = {Mol Cell Biol},
  year = {2002},
  volume = {22},
  pages = {1298--1306},
  number = {5},
  month = {Mar},
  abstract = {Recent studies of histone methylation have yielded fundamental new
	insights pertaining to the role of this modification in gene activation
	as well as in gene silencing. While a number of methylation sites
	are known to occur on histones, only limited information exists regarding
	the relevant enzymes that mediate these methylation events. We thus
	sought to identify native histone methyltransferase (HMT) activities
	from Saccharomyces cerevisiae. Here, we describe the biochemical
	purification and characterization of Set2, a novel HMT that is site-specific
	for lysine 36 (Lys36) of the H3 tail. Using an antiserum directed
	against Lys36 methylation in H3, we show that Set2, via its SET domain,
	is responsible for methylation at this site in vivo. Tethering of
	Set2 to a heterologous promoter reveals that Set2 represses transcription,
	and part of this repression is mediated through the HMT activity
	of the SET domain. These results suggest that Set2 and methylation
	at H3 Lys36 play a role in the repression of gene transcription.},
  institution = {Department of Biochemistry and Molecular Genetics,University of Virginia
	Health System, University of Virginia, Charlottesville, Virginia
	22908, USA.},
  keywords = {Amino Acid Sequence; Gene Expression Regulation, Fungal; Histones,
	metabolism; Methyltransferases, metabolism; Molecular Sequence Data;
	Nucleosomes, enzymology; Saccharomyces cerevisiae Proteins, metabolism;
	Saccharomyces cerevisiae, enzymology/genetics; Substrate Specificity;
	Transcription, Genetic; Transcriptional Activation},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {11839797},
  timestamp = {2010.11.23}
}

@article{Strahl1999Methylation,
  author = {B. D. Strahl and R. Ohba and R. G. Cook and C. D. Allis},
  title = {Methylation of histone H3 at lysine 4 is highly conserved and correlates
	with transcriptionally active nuclei in Tetrahymena.},
  journal = {Proc Natl Acad Sci U S A},
  year = {1999},
  volume = {96},
  pages = {14967--14972},
  number = {26},
  month = {Dec},
  abstract = {Studies into posttranslational modifications of histones, notably
	acetylation, have yielded important insights into the dynamic nature
	of chromatin structure and its fundamental role in gene expression.
	The roles of other covalent histone modifications remain poorly understood.
	To gain further insight into histone methylation, we investigated
	its occurrence and pattern of site utilization in Tetrahymena, yeast,
	and human HeLa cells. In Tetrahymena, transcriptionally active macronuclei,
	but not transcriptionally inert micronuclei, contain a robust histone
	methyltransferase activity that is highly selective for H3. Microsequence
	analyses of H3 from Tetrahymena, yeast, and HeLa cells indicate that
	lysine 4 is a highly conserved site of methylation, which to date,
	is the major site detected in Tetrahymena and yeast. These data document
	a nonrandom pattern of H3 methylation that does not overlap with
	known acetylation sites in this histone. In as much as H3 methylation
	at lysine 4 appears to be specific to macronuclei in Tetrahymena,
	we suggest that this modification pattern plays a facilitatory role
	in the transcription process in a manner that remains to be determined.
	Consistent with this possibility, H3 methylation in yeast occurs
	preferentially in a subpopulation of H3 that is preferentially acetylated.},
  institution = {Department of Biochemistry, University of Virginia Health Science
	Center, Charlottesville, VA 22908, USA.},
  keywords = {Acetyltransferases, metabolism; Amino Acid Sequence; Animals; Cell
	Nucleus, metabolism; Hela Cells; Histone Acetyltransferases; Histone-Lysine
	N-Methyltransferase; Histones, metabolism; Humans; Lysine, analogs
	/&/ derivatives/metabolism; Methylation; Methyltransferases, metabolism;
	Molecular Sequence Data; Protein Methyltransferases; Protein Processing,
	Post-Translational; Saccharomyces cerevisiae Proteins; Species Specificity;
	Tetrahymena thermophila; Transcription, Genetic; Yeasts},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10611321},
  timestamp = {2010.11.23}
}

@article{Stransky2006Regional,
  author = {Stransky, N. and Vallot, C. and Reyal, F. and Bernard-Pierrot, I.
	and Diez de Medina, S. G. and Segraves, R. and de Rycke, Y. and Elvin,
	P. and Cassidy, A. and Spraggon, C. and Graham, A. and Southgate,
	J. and Asselain, B. and Allory, Y. and Abbou, C. C. and Albertson,
	D. G. and Thiery, J.-P. and Chopin, D. K. and Pinkel, D. and Radvanyi,
	F.},
  title = {Regional copy number-independent deregulation of transcription in
	cancer},
  journal = {Nat. Genet.},
  year = {2006},
  volume = {38},
  pages = {1386--1396},
  number = {12},
  month = {Dec},
  abstract = {Genetic and epigenetic alterations have been identified that lead
	to transcriptional deregulation in cancers. Genetic mechanisms may
	affect single genes or regions containing several neighboring genes,
	as has been shown for DNA copy number changes. It was recently reported
	that epigenetic suppression of gene expression can also extend to
	a whole region; this is known as long-range epigenetic silencing.
	Various techniques are available for identifying regional genetic
	alterations, but no large-scale analysis has yet been carried out
	to obtain an overview of regional epigenetic alterations. We carried
	out an exhaustive search for regions susceptible to such mechanisms
	using a combination of transcriptome correlation map analysis and
	array CGH data for a series of bladder carcinomas. We validated one
	candidate region experimentally, demonstrating histone methylation
	leading to the loss of expression of neighboring genes without DNA
	methylation.},
  doi = {10.1038/ng1923},
  pdf = {../local/Stransky2006Regional.pdf},
  file = {Stransky2006Regional.pdf:Stransky2006Regional.pdf:PDF},
  institution = {UMR 144 Centre National de la Recherche Scientifique (CNRS)/Institut
	Curie, 75248 Paris Cedex 05, France.},
  keywords = {csbcbook},
  owner = {jp},
  pii = {ng1923},
  pmid = {17099711},
  timestamp = {2009.10.08},
  url = {http://dx.doi.org/10.1038/ng1923}
}

@article{Stratton2008Emerging,
  author = {Stratton, M.R. and Rahman, N.},
  title = {{{T}he emerging landscape of breast cancer susceptibility}},
  journal = {Nat. Genet.},
  year = {2008},
  volume = {40},
  pages = {17--22},
  keywords = {csbcbook}
}

@article{Strauss2004Objective,
  author = {Daniel J Strauss and Wolfgang Delb and Peter K Plinkert},
  title = {Objective detection of the central auditory processing disorder:
	a new machine learning approach.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2004},
  volume = {51},
  pages = {1147-55},
  number = {7},
  month = {Jul},
  abstract = {The objective detection of binaural interaction is of diagnostic interest
	for the evaluation of the central auditory processing disorder ({CAPD}).
	{T}he beta-wave of the binaural interaction component in auditory
	brainstem responses has been suggested as an objective measure of
	binaural interaction and has been shown to be of diagnostic value
	in the {CAPD} diagnosis. {H}owever, a reliable and automated detection
	of the beta-wave capable of clinical use still remains a challenge.
	{W}e propose a new machine learning approach to the detection of
	the {CAPD} that is based on adapted tight frame decompositions which
	are tailored for support vector machines with radial kernels. {U}sing
	shift-invariant scale and morphological features of the binaurally
	evoked brainstem potentials, our approach provides at least comparable
	results to the beta-wave detection in view of the discrimination
	of subjects being at risk for {CAPD} and subjects being not at risk
	for {CAPD}. {F}urthermore, as no information from the monaurally
	evoked potentials is necessary, the measurement cost is reduced by
	two-thirds compared to the computation of the binaural interaction
	component. {W}e conclude that a machine learning approach in the
	form of a hybrid tight frame-support vector classification is effective
	in the objective detection of the {CAPD}.}
}

@article{Strobl2007Bias,
  author = {Strobl, C. and Boulesteix, A.L. and Zeileis, A. and Hothorn, T.},
  title = {Bias in random forest variable importance measures: Illustrations,
	sources and a solution},
  journal = {BMC bioinformatics},
  year = {2007},
  volume = {8},
  pages = {25},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@article{Strogatz2001Exploring,
  author = {Strogatz, S. S.},
  title = {Exploring complex networks},
  journal = {Nature},
  year = {2001},
  volume = {410},
  pages = {268--276},
  pdf = {../local/stro01.pdf},
  file = {stro01.pdf:local/stro01.pdf:PDF},
  subject = {compnet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v410/n6825/full/410268a0_fs.html&content_filetype=pdf}
}

@article{Stucki2005JTB,
  author = {Stucki, J. W. and Simon, H.-U.},
  title = {Mathematical modeling of the regulation of caspase-3 activation and
	degradation},
  journal = {Journal of Theoretical Biology},
  year = {2005},
  volume = {234},
  pages = {123--131},
  number = {1},
  abstract = {Caspases are thought to be important players in the execution process
	of apoptosis. Inhibitors of apoptosis (IAPs) are able to block caspases
	and therefore apoptosis. The fact that a subgroup of the IAP family
	inhibits active caspases implies that not each caspase activation
	necessarily leads to apoptosis. In such a scenario, however, processed
	and enzymically active caspases should somehow be removed. Indeed,
	IAP-caspase complexes covalently bind ubiquitin, resulting in degradation
	by the 26S proteasome. Following release from mitochondria, IAP antagonists
	(e.g. second mitochondrial activator of caspases (Smac)) inactivate
	IAPs. Moreover, although pro-apoptotic factors such as irradiation
	or anti-cancer drugs may release Smac from mitochondria in tumor
	cells, high cytoplasmic survivin and ML-IAP levels might be able
	to neutralize it and, consequently, IAPs would further be able to
	bind activated caspases. Here, we propose a simple mathematical model,
	describing the molecular interactions between Smac deactivators,
	Smac, IAPs, and caspase-3, including the requirements for both induction
	and prevention of apoptosis, respectively. In addition, we predict
	a novel mechanism of caspase-3 degradation that might be particularly
	relevant in long-living cells.},
  doi = {DOI: 10.1016/j.jtbi.2004.11.011},
  issn = {0022-5193},
  keywords = {csbcbook},
  url = {http://www.sciencedirect.com/science/article/B6WMD-4F9N72G-1/2/1dbb63d611f86dc936c8d6cb218685f0}
}

@article{Sturn2002Genesis:,
  author = {Alexander Sturn and John Quackenbush and Zlatko Trajanoski},
  title = {Genesis: cluster analysis of microarray data.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {207-8},
  number = {1},
  month = {Jan},
  abstract = {A versatile, platform independent and easy to use {J}ava suite for
	large-scale gene expression analysis was developed. {G}enesis integrates
	various tools for microarray data analysis such as filters, normalization
	and visualization tools, distance measures as well as common clustering
	algorithms including hierarchical clustering, self-organizing maps,
	k-means, principal component analysis, and support vector machines.
	{T}he results of the clustering are transparent across all implemented
	methods and enable the analysis of the outcome of different algorithms
	and parameters. {A}dditionally, mapping of gene expression data onto
	chromosomal sequences was implemented to enhance promoter analysis
	and investigation of transcriptional control mechanisms.},
  keywords = {Algorithms, Artificial Intelligence, Cluster Analysis, Comparative
	Study, Computational Biology, Databases, Gene Expression Profiling,
	Genetic, Models, Molecular Structure, Neural Networks (Computer),
	Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Principal
	Component Analysis, Programming Languages, Promoter Regions (Genetics),
	Protein, Proteins, Research Support, Software, Statistical, Transcription,
	11836235}
}

@article{Su2002Large-scale,
  author = {Su, A.I. and Cooke, M.P. and Ching, K.A. and Hakak, Y. and Walker,
	J.R. and Wiltshire, T. and Orth, A.P. and Vega, R.Q. and Sapinoso,
	L.M. and Moqrich, A. and Patapoutian, A. and Hampton, G.M. and Schultz,
	P.G. and Hogenesch, J.B.},
  title = {Large-scale analysis of the human and mouse transcriptomes.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {2002},
  volume = {99},
  pages = {4465--4470},
  number = {7},
  month = {Apr},
  abstract = {High-throughput gene expression profiling has become an important
	tool for investigating transcriptional activity in a variety of biological
	samples. To date, the vast majority of these experiments have focused
	on specific biological processes and perturbations. Here, we have
	generated and analyzed gene expression from a set of samples spanning
	a broad range of biological conditions. Specifically, we profiled
	gene expression from 91 human and mouse samples across a diverse
	array of tissues, organs, and cell lines. Because these samples predominantly
	come from the normal physiological state in the human and mouse,
	this dataset represents a preliminary, but substantial, description
	of the normal mammalian transcriptome. We have used this dataset
	to illustrate methods of mining these data, and to reveal insights
	into molecular and physiological gene function, mechanisms of transcriptional
	regulation, disease etiology, and comparative genomics. Finally,
	to allow the scientific community to use this resource, we have built
	a free and publicly accessible website (http://expression.gnf.org)
	that integrates data visualization and curation of current gene annotations.},
  doi = {10.1073/pnas.012025199},
  institution = {Department of Chemistry, The Scripps Research Institute, La Jolla,
	CA 92037, USA.},
  keywords = {Animals; Collagen; Female; Gene Expression Profiling; Humans; Male;
	Mice; Organ Specificity; Polymerase Chain Reaction; Receptors, Cell
	Surface; Transcription, Genetic},
  owner = {mordelet},
  pii = {012025199},
  pmid = {11904358},
  timestamp = {2010.11.02},
  url = {http://dx.doi.org/10.1073/pnas.012025199}
}

@article{Su2001Molecular,
  author = {Su, A. I. and Welsh, J. B. and Sapinoso, L. M. and Kern, S. G. and
	Dimitrov, P. and Lapp, H. and Schultz, P. G. and Powell, S. M. and
	Moskaluk, C. A. and Frierson, H. F.Jr. and Hampton, G. M.},
  title = {Molecular {C}lassification of {H}uman {C}arcinomas by {U}se of {G}ene
	{E}xpression {S}ignatures},
  journal = {Cancer {R}es.},
  year = {2001},
  volume = {61},
  pages = {7388-7393},
  number = {20},
  abstract = {Classification of human tumors according to their primary anatomical
	site of origin is fundamental for the optimal treatment of patients
	with cancer. {H}ere we describe the use of large-scale {RNA} profiling
	and supervised machine learning algorithms to construct a first-generation
	molecular classification scheme for carcinomas of the prostate, breast,
	lung, ovary, colorectum, kidney, liver, pancreas, bladder/ureter,
	and gastroesophagus, which collectively account for [~]70% of all
	cancer-related deaths in the {U}nited {S}tates. {T}he classification
	scheme was based on identifying gene subsets whose expression typifies
	each cancer class, and we quantified the extent to which these genes
	are characteristic of a specific tumor type by accurately and confidently
	predicting the anatomical site of tumor origin for 90% of 175 carcinomas,
	including 9 of 12 metastatic lesions. {T}he predictor gene subsets
	include those whose expression is typical of specific types of normal
	epithelial differentiation, as well as other genes whose expression
	is elevated in cancer. {T}his study demonstrates the feasibility
	of predicting the tissue origin of a carcinoma in the context of
	multiple cancer classes.},
  pdf = {../local/Su2001Molecular.pdf.html},
  file = {Su2001Molecular.pdf.html:local/Su2001Molecular.pdf.html:PDF},
  keywords = {biosvm, breastcancer},
  owner = {jeanphilippevert},
  url = {http://cancerres.aacrjournals.org/cgi/content/abstract/61/20/7388}
}

@article{Su2003RankGene,
  author = {Su, Yang and Murali, T.M. and Pavlovic, Vladimir and Schaffer, Michael
	and Kasif, Simon},
  title = {{{R}ank{G}ene}: identification of diagnostic genes based on expression
	data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1578-1579},
  number = {12},
  abstract = {Summary: {R}ank{G}ene is a program for analyzing gene expression data
	and computing diagnostic genes based on their predictive power in
	distinguishing between different types of samples. {T}he program
	integrates into one system a variety of popular ranking criteria,
	ranging from the traditional t-statistic to one-dimensional support
	vector machines. {T}his flexibility makes {R}ank{G}ene a useful tool
	in gene expression analysis and feature selection. {A}vailability:
	http://genomics10.bu.edu/yangsu/rankgene {C}ontact: murali@bu.edu},
  pdf = {../local/Su2003RankGene.pdf},
  file = {Su2003RankGene.pdf:local/Su2003RankGene.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/12/1578}
}

@article{Subramanian2005Gene,
  author = {Subramanian, A. and Tamayo, P. and Mootha, V. K. and Mukherjee, S.
	and Ebert, B. L. and Gillette, M. A. and Paulovich, A. and Pomeroy,
	S. L. and Golub, T. R. and Lander, E. S. and Mesirov, J. P.},
  title = {Gene set enrichment analysis: a knowledge-based approach for interpreting
	genome-wide expression profiles},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2005},
  volume = {102},
  pages = {15545--15550},
  number = {43},
  month = {Oct},
  abstract = {Although genomewide RNA expression analysis has become a routine tool
	in biomedical research, extracting biological insight from such information
	remains a major challenge. Here, we describe a powerful analytical
	method called Gene Set Enrichment Analysis (GSEA) for interpreting
	gene expression data. The method derives its power by focusing on
	gene sets, that is, groups of genes that share common biological
	function, chromosomal location, or regulation. We demonstrate how
	GSEA yields insights into several cancer-related data sets, including
	leukemia and lung cancer. Notably, where single-gene analysis finds
	little similarity between two independent studies of patient survival
	in lung cancer, GSEA reveals many biological pathways in common.
	The GSEA method is embodied in a freely available software package,
	together with an initial database of 1,325 biologically defined gene
	sets.},
  doi = {10.1073/pnas.0506580102},
  pdf = {../local/Subramanian2005Gene.pdf},
  file = {Subramanian2005Gene.pdf:Subramanian2005Gene.pdf:PDF},
  institution = {Broad Institute of Massachusetts Institute of Technology and Harvard,
	320 Charles Street, Cambridge, MA 02141, USA.},
  owner = {jp},
  pii = {0506580102},
  pmid = {16199517},
  timestamp = {2008.12.05},
  url = {http://dx.doi.org/10.1073/pnas.0506580102}
}

@article{Sultan2002Binary,
  author = {M. Sultan and D. A. Wigle and C. A. Cumbaa and M. Maziarz and J.
	Glasgow and M. S. Tsao and I. Jurisica},
  title = {Binary tree-structured vector quantization approach to clustering
	and visualizing microarray data.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18 Suppl 1},
  pages = {S111-9},
  abstract = {M{OTIVATION}: {W}ith the increasing number of gene expression databases,
	the need for more powerful analysis and visualization tools is growing.
	{M}any techniques have successfully been applied to unravel latent
	similarities among genes and/or experiments. {M}ost of the current
	systems for microarray data analysis use statistical methods, hierarchical
	clustering, self-organizing maps, support vector machines, or k-means
	clustering to organize genes or experiments into 'meaningful' groups.
	{W}ithout prior explicit bias almost all of these clustering methods
	applied to gene expression data not only produce different results,
	but may also produce clusters with little or no biological relevance.
	{O}f these methods, agglomerative hierarchical clustering has been
	the most widely applied, although many limitations have been identified.
	{RESULTS}: {S}tarting with a systematic comparison of the underlying
	theories behind clustering approaches, we have devised a technique
	that combines tree-structured vector quantization and partitive k-means
	clustering ({BTSVQ}). {T}his hybrid technique has revealed clinically
	relevant clusters in three large publicly available data sets. {I}n
	contrast to existing systems, our approach is less sensitive to data
	preprocessing and data normalization. {I}n addition, the clustering
	results produced by the technique have strong similarities to those
	of self-organizing maps ({SOM}s). {W}e discuss the advantages and
	the mathematical reasoning behind our approach.}
}

@article{Sun2003Identifying,
  author = {Sun, Y.F. and Fan, X.D. and Li, Y.D.},
  title = {Identifying splicing sites in eukaryotic {RNA}: support vector machine
	approach.},
  journal = {Comput. {B}iol. {M}ed.},
  year = {2003},
  volume = {33},
  pages = {17-29},
  number = {1},
  abstract = {We introduce a new method for splicing sites prediction based on the
	theory of support vector machines ({SVM}). {T}he {SVM} represents
	a new approach to supervised pattern classification and has been
	successfully applied to a wide range of pattern recognition problems.
	{I}n the process of splicing sites prediction, the statistical information
	of {RNA} secondary structure in the vicinity of splice sites, e.g.
	donor and acceptor sites, is introduced in order to compare recognition
	ratio of true positive and true negative. {F}rom the results of comparison,
	addition of structural information has brought no significant benefit
	for the recognition of splice sites and had even lowered the rate
	of recognition. {O}ur results suggest that, through three cross validation,
	the {SVM} method can achieve a good performance for splice sites
	identification.},
  doi = {10.1016/S0010-4825(02)00057-4},
  pdf = {../local/Sun2003Identifying.pdf},
  file = {Sun2003Identifying.pdf:local/Sun2003Identifying.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/S0010-4825(02)00057-4}
}

@article{Sun2004protein,
  author = {Zhenghong Sun and Xiaoli Fu and Lu Zhang and Xiaoli Yang and Feizhou
	Liu and Gengxi Hu},
  title = {A protein chip system for parallel analysis of multi-tumor markers
	and its application in cancer detection.},
  journal = {Anticancer {R}es},
  year = {2004},
  volume = {24},
  pages = {1159-65},
  number = {2C},
  abstract = {B{ACKGROUND}: {T}umor markers are routinely measured in clinical oncology.
	{H}owever, their value in cancer detection has been controversial
	largely because no single tumor marker is sensitive and specific
	enough to meet strict diagnostic criteria. {O}ne strategy to overcome
	the shortcomings of single tumor markers is to measure a combination
	of tumor markers to increase sensitivity and look for distinct patterns
	to increase specificity. {T}his study aimed to develop a system for
	parallel detection of tumor markers as a tool for tumor detection
	in both cancer patients and asymptomatic populations at high risk.
	{MATERIALS} {AND} {METHODS}: {A} protein chip was fabricated with
	twelve monoclonal antibodies against the following tumor markers
	respectively: {CA}125, {CA}15-3, {CA}19-9, {CA}242, {CEA}, {AFP},
	{PSA}, free-{PSA}, {HGH}, beta-{HCG}, {NSE} and ferritin. {T}umor
	markers were captured after the protein chip was incubated with serum
	samples. {A} secondary antibody conjugated with {HRP} was used to
	detect the captured tumor markers using chemiluminescence technique.
	{Q}uantification of the tumor markers was obtained after calibration
	with standard curves. {RESULTS}: {T}he chip system showed an overall
	sensitivity of 68.18\% after testing 1147 cancer patients, with high
	sensitivities for liver, pancreas and ovarian tumors and low sensitivities
	for gastrointestinal tumors, and a specificity of 97.1\% after testing
	793 healthy individuals. {A}pplication of the chip system in physical
	checkups of 15,867 individuals resulted in 16 cases that were subsequently
	confirmed as having cancers. {A}nalysis of the detection results
	with a {S}upport {V}ector {M}achine algorithm considerably increased
	the specificity of the system as reflected in healthy individuals
	and hepatitis/cirrhosis patients, but only modestly decreased the
	sensitivity for cancer patients. {CONCLUSION}: {T}his protein chip
	system is a potential tool for assisting cancer diagnosis and for
	screening cancer in high-risk populations.},
  keywords = {Antibodies, Artificial Intelligence, Biological, Calibration, Female,
	Horseradish Peroxidase, Humans, Male, Monoclonal, Neoplasms, Protein
	Array Analysis, Sensitivity and Specificity, Tumor Markers, 15154641}
}

@article{Surabhi2002RNA,
  author = {Surabhi, R. M. and Gaynor, R. B.},
  title = {{RNA} interference directed against viral and cellular targets inhibits
	human immunodeficiency {V}irus {T}ype 1 replication.},
  journal = {J. Virol.},
  year = {2002},
  volume = {76},
  pages = {12963--12973},
  number = {24},
  month = {Dec},
  abstract = {Human immunodeficiency virus type 1 (HIV-1) gene expression is regulated
	by both cellular transcription factors and Tat. The ability of Tat
	to stimulate transcriptional elongation is dependent on its binding
	to TAR RNA in conjunction with cyclin T1 and CDK9. A variety of other
	cellular factors that bind to the HIV-1 long terminal repeat, including
	NF-kappaB, SP1, LBP, and LEF, are also important in the control of
	HIV-1 gene expression. Although these factors have been demonstrated
	to regulate HIV-1 gene expression by both genetic and biochemical
	analysis, in most cases a direct in vivo demonstration of their role
	on HIV-1 replication has not been established. Recently, the efficacy
	of RNA interference in mammalian cells has been shown utilizing small
	interfering RNAs (siRNAs) to result in the specific degradation of
	host mRNAs and decreases the levels of their corresponding proteins.
	In this study, we addressed whether siRNAs directed against either
	HIV-1 tat or reverse transcriptase or the NF-kappaB p65 subunit could
	specifically decrease the levels of these proteins and thus alter
	HIV-1 replication. Our results demonstrate the specificity of siRNAs
	for decreasing the expression of these viral and cellular proteins
	and inhibiting HIV-1 replication. These studies suggest that RNA
	interference is useful in exploring the biological role of cellular
	and viral regulatory factors involved in the control of HIV-1 gene
	expression.},
  keywords = {sirna},
  owner = {vert},
  pmid = {12438622},
  timestamp = {2006.03.28}
}

@article{Surgand2006chemogenomic,
  author = {Jean-Sebastien Surgand and Jordi Rodrigo and Esther Kellenberger
	and Didier Rognan},
  title = {A chemogenomic analysis of the transmembrane binding cavity of human
	G-protein-coupled receptors.},
  journal = {Proteins},
  year = {2006},
  volume = {62},
  pages = {509--538},
  number = {2},
  month = {Feb},
  abstract = {The amino acid sequences of 369 human nonolfactory G-protein-coupled
	receptors (GPCRs) have been aligned at the seven transmembrane domain
	(TM) and used to extract the nature of 30 critical residues supposed--from
	the X-ray structure of bovine rhodopsin bound to retinal--to line
	the TM binding cavity of ground-state receptors. Interestingly, the
	clustering of human GPCRs from these 30 residues mirrors the recently
	described phylogenetic tree of full-sequence human GPCRs (Fredriksson
	et al., Mol Pharmacol 2003;63:1256-1272) with few exceptions. A TM
	cavity could be found for all investigated GPCRs with physicochemical
	properties matching that of their cognate ligands. The current approach
	allows a very fast comparison of most human GPCRs from the focused
	perspective of the predicted TM cavity and permits to easily detect
	key residues that drive ligand selectivity or promiscuity.},
  doi = {10.1002/prot.20768},
  keywords = {Amino Acid Sequence; Binding Sites; Genomics; Humans; Ligands; Models,
	Molecular; Phylogeny; Receptors, G-Protein-Coupled},
  owner = {laurent},
  pmid = {16294340},
  timestamp = {2008.03.27},
  url = {http://dx.doi.org/10.1002/prot.20768}
}

@article{Sussenguth1963Graph,
  author = {E. H. Sussenguth},
  title = {A Graph-Theoretic Algorithm for Matching Chemical Structures},
  journal = {J. Chem. Doc.},
  year = {1963},
  volume = {5},
  pages = {36-43},
  number = {1}
}

@article{Suter2008Two-hybrid,
  author = {Bernhard Suter and Saranya Kittanakom and Igor Stagljar},
  title = {Two-hybrid technologies in proteomics research.},
  journal = {Curr Opin Biotechnol},
  year = {2008},
  volume = {19},
  pages = {316--323},
  number = {4},
  month = {Aug},
  abstract = {Given that protein-protein interactions (PPIs) regulate nearly every
	living process; the exploration of global and pathway-specific protein
	interaction networks is expected to have major implications in the
	understanding of diseases and for drug discovery. Consequently, the
	development and application of methodologies that address physical
	associations among proteins is of major importance in today's proteomics
	research. The most widely and successfully used methodology to assess
	PPIs is the yeast two-hybrid system (YTH). Here we present an overview
	on the current applications of YTH and variant technologies in yeast
	and mammalian systems. Two-hybrid-based methods will not only continue
	to have a dominant role in the assessment of protein interactomes
	but will also become important in the development of novel compounds
	that target protein interaction interfaces for therapeutic intervention.},
  doi = {10.1016/j.copbio.2008.06.005},
  institution = {Department of Biochemistry and Department of Molecular Genetics,
	Terrence Donnelly Centre for Cellular and Biomolecular Research (DCCBR),
	University of Toronto, 160 College Street, Toronto, ON M5S 3E1, Canada.},
  keywords = {Animals; Drug Design; Mammals; Proteomics; Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {S0958-1669(08)00075-X},
  pmid = {18619540},
  timestamp = {2010.08.31},
  url = {http://dx.doi.org/10.1016/j.copbio.2008.06.005}
}

@article{Sutherland2009Transcription,
  author = {Heidi Sutherland and Wendy A Bickmore},
  title = {Transcription factories: gene expression in unions?},
  journal = {Nat Rev Genet},
  year = {2009},
  volume = {10},
  pages = {457--466},
  number = {7},
  month = {Jul},
  abstract = {Transcription is a fundamental step in gene expression, yet it remains
	poorly understood at a cellular level. Visualization of transcription
	sites and active genes has led to the suggestion that transcription
	occurs at discrete sites in the nucleus, termed transcription factories,
	where multiple active RNA polymerases are concentrated and anchored
	to a nuclear substructure. However, this concept is not universally
	accepted. This Review discusses the experimental evidence in support
	of the transcription factory model and the evidence that argues against
	such a spatially structured view of transcription. The transcription
	factory model has implications for the regulation of transcription
	initiation and elongation, for the organization of genes in the genome,
	for the co-regulation of genes and for genome instability.},
  doi = {10.1038/nrg2592},
  institution = {MRC Human Genetics Unit, Institute of Genetics and Molecular Medicine,
	Crewe Road, Edinburgh EH4 2XU, UK.},
  keywords = {Animals; Cell Nucleus; DNA-Directed RNA Polymerases; Genome; Genomic
	Instability; Humans; Models, Biological; Transcription, Genetic},
  owner = {phupe},
  pii = {nrg2592},
  pmid = {19506577},
  timestamp = {2010.08.27},
  url = {http://dx.doi.org/10.1038/nrg2592}
}

@article{Sutherland2003Spline-fitting,
  author = {Sutherland, J. J. and O'Brien, L. A. and Weaver, D. F.},
  title = {Spline-fitting with a genetic algorithm: a method for developing
	classification structure-activity relationships.},
  journal = {J. Chem. Inf. Comput. Sci.},
  year = {2003},
  volume = {43},
  pages = {1906--1915},
  number = {6},
  abstract = {Classification methods allow for the development of structure-activity
	relationship models when the target property is categorical rather
	than continuous. We describe a classification method which fits descriptor
	splines to activities, with descriptors selected using a genetic
	algorithm. This method, which we identify as SFGA, is compared to
	the well-established techniques of recursive partitioning (RP) and
	soft independent modeling by class analogy (SIMCA) using five series
	of compounds: cyclooxygenase-2 (COX-2) inhibitors, benzodiazepine
	receptor (BZR) ligands, estrogen receptor (ER) ligands, dihydrofolate
	reductase (DHFR) inhibitors, and monoamine oxidase (MAO) inhibitors.
	Only 1-D and 2-D descriptors were used. Approximately 40\% of compounds
	in each series were assigned to a test set, "cherry-picked" from
	the complete set such that they lie outside the training set as much
	as possible. SFGA produced models that were more predictive for all
	but the DHFR set, for which SIMCA was most predictive. RP gave the
	least predictive models for all but the MAO set. A similar trend
	was observed when using training and test sets to which compounds
	were randomly assigned and when gradually eliminating compounds from
	the (designed) training set. The stability of models was examined
	for the random and reduced sets, where stability means that classification
	statistics and the selected descriptors are similar for models derived
	from different sets. Here, SIMCA produced the most stable models,
	followed by SFGA and RP. We show that a consensus approach that combines
	all three methods outperforms the single best model for all data
	sets.},
  doi = {10.1021/ci034143r},
  pdf = {../local/Sutherland2003Spline-fitting.pdf},
  file = {Sutherland2003Spline-fitting.pdf:Sutherland2003Spline-fitting.pdf:PDF},
  institution = {Departments of Chemistry and Pathology, Queen's University, Kingston,
	Ontario, Canada K7L 3N6.},
  keywords = {chemoinformatics},
  owner = {jp},
  pmid = {14632439},
  timestamp = {2009.03.12},
  url = {http://dx.doi.org/10.1021/ci034143r}
}

@article{Suthram2005Plasmodium,
  author = {Suthram, S. and Sittler, T. and Ideker, T.},
  title = {The Plasmodium protein network diverges from those of other eukaryotes},
  journal = {Nature},
  year = {2005},
  volume = {438},
  pages = {108--112},
  number = {7064},
  month = {Nov},
  abstract = {Plasmodium falciparum is the pathogen responsible for over 90\% of
	human deaths from malaria. Therefore, it has been the focus of a
	considerable research initiative, involving the complete DNA sequencing
	of the genome, large-scale expression analyses, and protein characterization
	of its life-cycle stages. The Plasmodium genome sequence is relatively
	distant from those of most other eukaryotes, with more than 60\%
	of the 5,334 encoded proteins lacking any notable sequence similarity
	to other organisms. To systematically elucidate functional relationships
	among these proteins, a large two-hybrid study has recently mapped
	a network of 2,846 interactions involving 1,312 proteins within Plasmodium.
	This network adds to a growing collection of available interaction
	maps for a number of different organisms, and raises questions about
	whether the divergence of Plasmodium at the sequence level is reflected
	in the configuration of its protein network. Here we examine the
	degree of conservation between the Plasmodium protein network and
	those of model organisms. Although we find 29 highly connected protein
	complexes specific to the network of the pathogen, we find very little
	conservation with complexes observed in other organisms (three in
	yeast, none in the others). Overall, the patterns of protein interaction
	in Plasmodium, like its genome sequence, set it apart from other
	species.},
  doi = {10.1038/nature04135},
  pdf = {../local/Suthram2005Plasmodium},
  file = {Suthram2005Plasmodium:local/Suthram2005Plasmodium.pdf:PDF},
  institution = {Bioinformatics Program, University of California, San Diego, California
	92093, USA. ssuthram@ucsd.edu},
  owner = {jp},
  pii = {nature04135},
  pmid = {16267557},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1038/nature04135}
}

@article{Suykens2001Optimal,
  author = {J. A. Suykens and J. Vandewalle and B. De Moor},
  title = {Optimal control by least squares support vector machines.},
  journal = {Neural {N}etw},
  year = {2001},
  volume = {14},
  pages = {23-35},
  number = {1},
  month = {Jan},
  abstract = {Support vector machines have been very successful in pattern recognition
	and function estimation problems. {I}n this paper we introduce the
	use of least squares support vector machines ({LS}-{SVM}'s) for the
	optimal control of nonlinear systems. {L}inear and neural full static
	state feedback controllers are considered. {T}he problem is formulated
	in such a way that it incorporates the {N}-stage optimal control
	problem as well as a least squares support vector machine approach
	for mapping the state space into the action space. {T}he solution
	is characterized by a set of nonlinear equations. {A}n alternative
	formulation as a constrained nonlinear optimization problem in less
	unknowns is given, together with a method for imposing local stability
	in the {LS}-{SVM} control scheme. {T}he results are discussed for
	support vector machines with radial basis function kernel. {A}dvantages
	of {LS}-{SVM} control are that no number of hidden units has to be
	determined for the controller and that no centers have to be specified
	for the {G}aussian kernels when applying {M}ercer's condition. {T}he
	curse of dimensionality is avoided in comparison with defining a
	regular grid for the centers in classical radial basis function networks.
	{T}his is at the expense of taking the trajectory of state variables
	as additional unknowns in the optimization problem, while classical
	neural network approaches typically lead to parametric optimization
	problems. {I}n the {SVM} methodology the number of unknowns equals
	the number of training data, while in the primal space the number
	of unknowns can be infinite dimensional. {T}he method is illustrated
	both on stabilization and tracking problems including examples on
	swinging up an inverted pendulum with local stabilization at the
	endpoint and a tracking problem for a ball and beam system.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins,
	Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites,
	Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry,
	Child, Chromosome Aberrations, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, DNA, Data Interpretation,
	Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric
	Conductivity, Electrophysiology, Escherichia coli Proteins, Factual,
	Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling,
	Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins,
	Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic
	Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular,
	Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus,
	Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution,
	Nucleic Acid Conformation, Organ Specificity, Organelles, P.H.S.,
	Pattern Recognition, Physical, Pigmented, Predictive Value of Tests,
	Promoter Regions (Genetics), Protein Folding, Protein Structure,
	Proteins, Proteome, RNA, Reproducibility of Results, Research Support,
	Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	U.S. Gov't, 11213211},
  pii = {S0893608000000770}
}

@article{Swamidass2005Kernels,
  author = {Swamidass, S. J. and Chen, J. and Bruand, J. and Phung, P. and Ralaivola,
	L. and Baldi, P.},
  title = {Kernels for small molecules and the prediction of mutagenicity, toxicity
	and anti-cancer activity.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i359-i368},
  number = {Suppl. 1},
  month = {Jun},
  abstract = {M{OTIVATION}: {S}mall molecules play a fundamental role in organic
	chemistry and biology. {T}hey can be used to probe biological systems
	and to discover new drugs and other useful compounds. {A}s increasing
	numbers of large datasets of small molecules become available, it
	is necessary to develop computational methods that can deal with
	molecules of variable size and structure and predict their physical,
	chemical and biological properties. {RESULTS}: {H}ere we develop
	several new classes of kernels for small molecules using their 1{D},
	2{D} and 3{D} representations. {I}n 1{D}, we consider string kernels
	based on {SMILES} strings. {I}n 2{D}, we introduce several similarity
	kernels based on conventional or generalized fingerprints. {G}eneralized
	fingerprints are derived by counting in different ways subpaths contained
	in the graph of bonds, using depth-first searches. {I}n 3{D}, we
	consider similarity measures between histograms of pairwise distances
	between atom classes. {T}hese kernels can be computed efficiently
	and are applied to problems of classification and prediction of mutagenicity,
	toxicity and anti-cancer activity on three publicly available datasets.
	{T}he results derived using cross-validation methods are state-of-the-art.
	{T}radeoffs between various kernels are briefly discussed. {AVAILABILITY}:
	{D}atasets available from http://www.igb.uci.edu/servers/servers.html
	{CONTACT}: pfbaldi@ics.uci.edu.},
  doi = {10.1093/bioinformatics/bti1055},
  pdf = {../local/Swamidass2005Kernels.pdf},
  file = {Swamidass2005Kernels.pdf:Swamidass2005Kernels.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i359},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1055}
}

@article{Sylvester1878Chemistry,
  author = {J. J. Sylvester},
  title = {Chemistry and Algebra},
  journal = {Nature},
  year = {1878},
  volume = {17},
  number = {432}
}

@incollection{Szafranski2008Hierarchical,
  author = {Marie Szafranski and Yves Grandvalet and Pierre Morizet-Mahoudeaux},
  title = {Hierarchical Penalization},
  booktitle = {Advances in Neural Information Processing Systems 20},
  publisher = {MIT Press},
  year = {2008},
  editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  pages = {1457--1464},
  address = {Cambridge, MA}
}

@inproceedings{Szafranski2008Composite,
  author = {Szafranski, M. and Grandvalet, Y. and Rakotomamonjy, A.},
  title = {Composite Kernel Learning},
  booktitle = {ICML '08: Proceedings of the 25th international conference on Machine
	learning},
  year = {2008},
  address = {Helsinki Finlande},
  month = {07},
  international = {y},
  teams = {DI},
  url = {http://hal.archives-ouvertes.fr/hal-00316016/en/}
}

@article{Soerlie2006Gene,
  author = {S{\o}rlie, T. and Perou, C. M. and Fan, C. and Geisler, S. and Aas,
	T. and Nobel, A. and Anker, G. and Akslen, L. A. and Botstein, D.
	and B{\o}rresen-Dale, A.-L. and L{\o}nning, P. E.},
  title = {Gene expression profiles do not consistently predict the clinical
	treatment response in locally advanced breast cancer},
  journal = {Mol. Cancer Ther.},
  year = {2006},
  volume = {5},
  pages = {2914--2918},
  number = {11},
  month = {Nov},
  abstract = {Neoadjuvant treatment offers an opportunity to correlate molecular
	variables to treatment response and to explore mechanisms of drug
	resistance in vivo. Here, we present a statistical analysis of large-scale
	gene expression patterns and their relationship to response following
	neoadjuvant chemotherapy in locally advanced breast cancers. We analyzed
	cDNA expression data from 81 tumors from two patient series, one
	treated with doxorubicin alone (51) and the other treated with 5-fluorouracil
	and mitomycin (30), and both were previously studied for correlations
	between TP53 status and response to therapy. We observed a low frequency
	of progressive disease within the luminal A subtype from both series
	(2 of 36 versus 13 of 45 patients; P = 0.0089) and a high frequency
	of progressive disease among patients with luminal B type tumors
	treated with doxorubicin (5 of 8 patients; P = 0.0078); however,
	aside from these two observations, no other consistent associations
	between response to chemotherapy and tumor subtype were observed.
	These specific associations could possibly be explained by covariance
	with TP53 mutation status, which also correlated with tumor subtype.
	Using supervised analysis, we could not uncover a gene profile that
	could reliably (>70\% accuracy and specificity) predict response
	to either treatment regimen.},
  doi = {10.1158/1535-7163.MCT-06-0126},
  pdf = {../local/Soerlie2006Gene.pdf},
  file = {Soerlie2006Gene.pdf:Soerlie2006Gene.pdf:PDF},
  institution = {Department of Medicine, Section of Oncology, Haukeland University
	Hospital, N-5021 Bergen, Norway.},
  keywords = {csbcbook, csbcbook-ch3},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {5/11/2914},
  pmid = {17121939},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1158/1535-7163.MCT-06-0126}
}

@article{Soerlie2001Gene,
  author = {S{\o}rlie, T. and Perou, C. M. and Tibshirani, R. and Aas, T. and
	Geisler, S. and Johnsen, H. and Hastie, T. and Eisen, M. B. and van
	de Rijn, M. and Jeffrey, S. S. and Thorsen, T. and Quist, H. and
	Matese, J. C. and Brown, P. O. and Botstein, D. and Eystein L{\o}nning,
	P. and B{\o}rresen-Dale, A. L.},
  title = {Gene expression patterns of breast carcinomas distinguish tumor subclasses
	with clinical implications},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2001},
  volume = {98},
  pages = {10869--10874},
  number = {19},
  month = {Sep},
  abstract = {The purpose of this study was to classify breast carcinomas based
	on variations in gene expression patterns derived from cDNA microarrays
	and to correlate tumor characteristics to clinical outcome. A total
	of 85 cDNA microarray experiments representing 78 cancers, three
	fibroadenomas, and four normal breast tissues were analyzed by hierarchical
	clustering. As reported previously, the cancers could be classified
	into a basal epithelial-like group, an ERBB2-overexpressing group
	and a normal breast-like group based on variations in gene expression.
	A novel finding was that the previously characterized luminal epithelial/estrogen
	receptor-positive group could be divided into at least two subgroups,
	each with a distinctive expression profile. These subtypes proved
	to be reasonably robust by clustering using two different gene sets:
	first, a set of 456 cDNA clones previously selected to reflect intrinsic
	properties of the tumors and, second, a gene set that highly correlated
	with patient outcome. Survival analyses on a subcohort of patients
	with locally advanced breast cancer uniformly treated in a prospective
	study showed significantly different outcomes for the patients belonging
	to the various groups, including a poor prognosis for the basal-like
	subtype and a significant difference in outcome for the two estrogen
	receptor-positive groups.},
  doi = {10.1073/pnas.191367098},
  pdf = {../local/Soerlie2001Gene.pdf},
  file = {Soerlie2001Gene.pdf:Soerlie2001Gene.pdf:PDF},
  institution = {cs, The Norwegian Radium Hospital, Montebello, N-0310 Oslo, Norway.},
  keywords = {breastcancer, csbcbook, csbcbook-ch2},
  owner = {jp},
  pii = {98/19/10869},
  pmid = {11553815},
  timestamp = {2008.11.15},
  url = {http://dx.doi.org/10.1073/pnas.191367098}
}

@article{Sorlie2003Repeated,
  author = {S{\o}rlie, T. and Tibshirani, R. and Parker, J. and Hastie, T. and
	Marron, J.S. and Nobel, A. and Deng, S. and Johnsen, H. and Pesich,
	R. and Geisler, S. and Demeter, J. and Perou, C.M. and Lønning, P.E.
	and Brown, P.O. and Børresen-Dale, A.L. and Botstein, D.},
  title = {Repeated observation of breast tumor subtypes in independent gene
	expression data sets},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {8418--8423},
  number = {14},
  month = {Jul},
  abstract = {Characteristic patterns of gene expression measured by DNA microarrays
	have been used to classify tumors into clinically relevant subgroups.
	In this study, we have refined the previously defined subtypes of
	breast tumors that could be distinguished by their distinct patterns
	of gene expression. A total of 115 malignant breast tumors were analyzed
	by hierarchical clustering based on patterns of expression of 534
	"intrinsic" genes and shown to subdivide into one basal-like, one
	ERBB2-overexpressing, two luminal-like, and one normal breast tissue-like
	subgroup. The genes used for classification were selected based on
	their similar expression levels between pairs of consecutive samples
	taken from the same tumor separated by 15 weeks of neoadjuvant treatment.
	Similar cluster analyses of two published, independent data sets
	representing different patient cohorts from different laboratories,
	uncovered some of the same breast cancer subtypes. In the one data
	set that included information on time to development of distant metastasis,
	subtypes were associated with significant differences in this clinical
	feature. By including a group of tumors from BRCA1 carriers in the
	analysis, we found that this genotype predisposes to the basal tumor
	subtype. Our results strongly support the idea that many of these
	breast tumor subtypes represent biologically distinct disease entities.},
  doi = {10.1073/pnas.0932692100},
  pdf = {../local/Sorlie2003Repeated.pdf},
  file = {Sorlie2003Repeated.pdf:Sorlie2003Repeated.pdf:PDF},
  keywords = {csbcbook, csbcbook-ch3},
  url = {http://dx.doi.org/10.1073/pnas.0932692100}
}

@article{Toeroenen1999Analysis,
  author = {T\"or\"onen, P. and Kolehmainen, M. and Wong, G. and Castr\'en, E.},
  title = {Analysis of gene expression data using self-organizing maps.},
  journal = {FEBS Lett.},
  year = {1999},
  volume = {451},
  pages = {142--146},
  number = {2},
  month = {May},
  abstract = {DNA microarray technologies together with rapidly increasing genomic
	sequence information is leading to an explosion in available gene
	expression data. Currently there is a great need for efficient methods
	to analyze and visualize these massive data sets. A self-organizing
	map (SOM) is an unsupervised neural network learning algorithm which
	has been successfully used for the analysis and organization of large
	data files. We have here applied the SOM algorithm to analyze published
	data of yeast gene expression and show that SOM is an excellent tool
	for the analysis and visualization of gene expression profiles.},
  pdf = {../local/Toeroenen1999Analysis.pdf},
  file = {Toeroenen1999Analysis.pdf:Toeroenen1999Analysis.pdf:PDF},
  institution = {A.I. Virtanen Institute, University of Kuopio, Finland.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S0014-5793(99)00524-4},
  pmid = {10371154},
  timestamp = {2011.10.03}
}

@article{Taby2010Cancer,
  author = {Rodolphe Taby and Jean-Pierre J Issa},
  title = {Cancer epigenetics.},
  journal = {CA Cancer J Clin},
  year = {2010},
  volume = {60},
  pages = {376--392},
  number = {6},
  abstract = {Epigenetics refers to stable alterations in gene expression with no
	underlying modifications in the genetic sequence and is best exemplified
	by differentiation, in which multiple cell types diverge physiologically
	despite a common genetic code. Interest in this area of science has
	grown over the past decades, especially since it was found to play
	a major role in physiologic phenomena such as embryogenesis, imprinting,
	and X chromosome inactivation, and in disease states such as cancer.
	The latter had been previously thought of as a disease with an exclusive
	genetic etiology. However, recent data have demonstrated that the
	complexity of human carcinogenesis cannot be accounted for by genetic
	alterations alone, but also involves epigenetic changes in processes
	such as DNA methylation, histone modifications, and microRNA expression.
	In turn, these molecular alterations lead to permanent changes in
	the expression of genes that regulate the neoplastic phenotype, such
	as cellular growth and invasiveness. Targeting epigenetic modifiers
	has been referred to as epigenetic therapy. The success of this approach
	in hematopoietic malignancies validates the importance of epigenetic
	alterations in cancer, not only at the therapeutic level but also
	with regard to prevention, diagnosis, risk stratification, and prognosis.},
  doi = {10.3322/caac.20085},
  institution = {Department of Leukemia, The University of Texas M. D. Anderson Cancer
	Center, Houston, TX 77030, USA.},
  keywords = {Animals; Cell Cycle, genetics; Cell Transformation, Neoplastic, genetics;
	DNA Methylation; Epigenesis, Genetic; Histones, genetics; Humans;
	MicroRNAs, genetics; Neoplasm Invasiveness, genetics; Neoplasms,
	classification/diagnosis/genetics/metabolism/prevention /&/ control/therapy;
	Prognosis; Risk Assessment; Tumor Markers, Biological, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {caac.20085},
  pmid = {20959400},
  timestamp = {2011.06.04},
  url = {http://dx.doi.org/10.3322/caac.20085}
}

@article{Takahashi2005Rigorous,
  author = {Norikazu Takahashi and Tetsuo Nishi},
  title = {Rigorous proof of termination of {SMO} algorithm for support vector
	machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {774-6},
  number = {3},
  month = {May},
  abstract = {Sequential minimal optimization ({SMO}) algorithm is one of the simplest
	decomposition methods for learning of support vector machines ({SVM}s).
	{K}eerthi and {G}ilbert have recently studied the convergence property
	of {SMO} algorithm and given a proof that {SMO} algorithm always
	stops within a finite number of iterations. {I}n this letter, we
	point out the incompleteness of their proof and give a more rigorous
	proof.}
}

@article{Takahashi2003Proteomic,
  author = {Nobuhiro Takahashi and Mitsuaki Yanagida and Sally Fujiyama and Toshiya
	Hayano and Toshiaki Isobe},
  title = {Proteomic snapshot analyses of preribosomal ribonucleoprotein complexes
	formed at various stages of ribosome biogenesis in yeast and mammalian
	cells.},
  journal = {Mass {S}pectrom {R}ev},
  year = {2003},
  volume = {22},
  pages = {287-317},
  number = {5},
  abstract = {Proteomic technologies powered by advancements in mass spectrometry
	and bioinformatics and coupled with accumulated genome sequence data
	allow a comprehensive study of cell function through large-scale
	and systematic protein identifications of protein constituents of
	the cell and tissues, as well as of multi-protein complexes that
	carry out many cellular function in a higher-order network in the
	cell. {O}ne of the most extensively analyzed cellular functions by
	proteomics is the production of ribosome, the protein-synthesis machinery,
	in the nucle(ol)us--the main site of ribosome biogenesis. {T}he use
	of tagged proteins as affinity bait, coupled with mass spectrometric
	identification, enabled us to isolate synthetic intermediates of
	ribosomes that might represent snapshots of nascent ribosomes at
	particular stages of ribosome biogenesis and to identify their constituents--some
	of which showed dynamic changes for association with the intermediates
	at various stages of ribosome biogenesis. {I}n this review, in conjunction
	with the results from yeast cells, our proteomic approach to analyze
	ribosome biogenesis in mammalian cells is described.},
  doi = {10.1002/mas.10057},
  pdf = {../local/Takahashi2003Proteomic.pdf},
  file = {Takahashi2003Proteomic.pdf:local/Takahashi2003Proteomic.pdf:PDF},
  keywords = {Affinity Labels, Animals, Comparative Study, Electrospray Ionization,
	Genetic, Macromolecular Substances, Mass, Mitosis, Non-P.H.S., Non-U.S.
	Gov't, P.H.S., Protein Interaction Mapping, Proteome, Proteomics,
	Research Support, Ribonucleoproteins, Ribosomes, Saccharomyces cerevisiae,
	Saccharomyces cerevisiae Proteins, Signal Transduction, Spectrometry,
	Transcription, U.S. Gov't, 12949916},
  owner = {vert},
  url = {http://dx.doi.org/10.1002/mas.10057}
}

@article{Takaoka2003Development,
  author = {Y. Takaoka and Y. Endo and S. Yamanobe and H. Kakinuma and T. Okubo
	and Y. Shimazaki and T. Ota and S. Sumiya and K. Yoshikawa},
  title = {Development of a method for evaluating drug-likeness and ease of
	synthesis using a data set in which compounds are assigned scores
	based on chemists' intuition.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {1269-75},
  number = {4},
  abstract = {The concept of drug-likeness, an important characteristic for any
	compound in a screening library, is nevertheless difficult to pin
	down. {B}ased on our belief that this concept is implicit within
	the collective experience of working chemists, we devised a data
	set to capture an intuitive human understanding of both this characteristic
	and ease of synthesis, a second key characteristic. {F}ive chemists
	assigned a pair of scores to each of 3980 diverse compounds, with
	the component scores of each pair corresponding to drug-likeness
	and ease of synthesis, respectively. {U}sing this data set, we devised
	binary classifiers with an artificial neural network and a support
	vector machine. {T}hese models were found to efficiently eliminate
	compounds that are not drug-like and/or hard-to-synthesize derivatives,
	demonstrating the suitability of these models for use as compound
	acquisition filters.},
  doi = {10.1021/ci034043l},
  pdf = {../local/Takaoka2003Development.pdf},
  file = {Takaoka2003Development.pdf:local/Takaoka2003Development.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci034043l}
}

@article{Takemoto2012Analysis,
  author = {Takemoto, K. and Tamura, T. and Cong, Y. and Ching, W.-K. and Vert,
	J.-P. and Akutsu, T.},
  title = {Analysis of the impact degree distribution inmetabolic networks using
	branching process approximation},
  journal = {Physica A},
  year = {2012},
  volume = {391},
  pages = {379--387},
  doi = {10.1016/j.physa.2011.08.011},
  pdf = {../local/Takemoto2012Analysis.pdf},
  file = {Takemoto2012Analysis.pdf:Takemoto2012Analysis.pdf:PDF},
  owner = {jp},
  timestamp = {2011.10.12},
  url = {http://dx.doi.org/10.1016/j.physa.2011.08.011}
}

@article{Takeuchi2005Bio-medical,
  author = {Koichi Takeuchi and Nigel Collier},
  title = {Bio-medical entity extraction using support vector machines.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  volume = {33},
  pages = {125-37},
  number = {2},
  month = {Feb},
  abstract = {O{BJECTIVE}: {S}upport vector machines ({SVM}s) have achieved state-of-the-art
	performance in several classification tasks. {I}n this article we
	apply them to the identification and semantic annotation of scientific
	and technical terminology in the domain of molecular biology. {T}his
	illustrates the extensibility of the traditional named entity task
	to special domains with large-scale terminologies such as those in
	medicine and related disciplines. {METHODS} {AND} {MATERIALS}: {T}he
	foundation for the model is a sample of text annotated by a domain
	expert according to an ontology of concepts, properties and relations.
	{T}he model then learns to annotate unseen terms in new texts and
	contexts. {T}he results can be used for a variety of intelligent
	language processing applications. {W}e illustrate {SVM}s capabilities
	using a sample of 100 journal abstracts texts taken from the {human,
	blood cell, transcription factor} domain of {MEDLINE}. {RESULTS}:
	{A}pproximately 3400 terms are annotated and the model performs at
	about 74\% {F}-score on cross-validation tests. {A} detailed analysis
	based on empirical evidence shows the contribution of various feature
	sets to performance. {CONCLUSION}: {O}ur experiments indicate a relationship
	between feature window size and the amount of training data and that
	a combination of surface words, orthographic features and head noun
	features achieve the best performance among the feature sets tested.},
  doi = {10.1016/j.artmed.2004.07.019},
  pdf = {../local/Takeuchi2005Bio-medical.pdf},
  file = {Takeuchi2005Bio-medical.pdf:local/Takeuchi2005Bio-medical.pdf:PDF},
  keywords = {biosvm},
  pii = {S0933-3657(04)00130-7},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.07.019}
}

@article{Talagrand1996Majorizing,
  author = {Talagrand, M.},
  title = {Majorizing measures: {T}he generic chaining},
  journal = {Ann. {P}robab.},
  year = {1996},
  volume = {24},
  pages = {1049--1103},
  pdf = {../local/tala96b.pdf},
  file = {tala96b.pdf:local/tala96b.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/majmeas.dvi}
}

@article{Talagrand1996New,
  author = {Talagrand, M.},
  title = {New concentration inequalities for product spaces},
  journal = {Inventionnes {M}ath.},
  year = {1996},
  volume = {126},
  pages = {505--563},
  pdf = {../local/tala96.pdf},
  file = {tala96.pdf:local/tala96.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/newcon.dvi}
}

@article{Talagrand1996Newa,
  author = {Talagrand, M.},
  title = {A {N}ew {L}ook at {I}ndependence},
  journal = {Ann. {P}robab.},
  year = {1996},
  volume = {24},
  pages = {1--34},
  pdf = {../local/tala96c.pdf},
  file = {tala96c.pdf:local/tala96c.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/newlook.dvi}
}

@article{Talagrand1995Concentration,
  author = {Talagrand, M.},
  title = {Concentration of measure and isoperimetric inequalities in product
	spaces},
  journal = {Publ. {M}ath. {I}.{H}.{E}.{S}.},
  year = {1995},
  volume = {81},
  pages = {73--203},
  pdf = {../local/tala95.pdf},
  file = {tala95.pdf:local/tala95.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/ihes.dvi}
}

@article{Talih2005Structural,
  author = {Talih, M. and Hengartner, N.},
  title = {Structural learning with time-varying components: tracking the cross-section
	of financial time series},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2005},
  volume = {67},
  pages = {321--341},
  number = {3},
  pdf = {../local/Talih2005Structural.pdf},
  file = {Talih2005Structural.pdf:Talih2005Structural.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.13}
}

@article{Talukder2001closed-form,
  author = {A. Talukder and D. Casasent},
  title = {A closed-form neural network for discriminatory feature extraction
	from high-dimensional data.},
  journal = {Neural {N}etw},
  year = {2001},
  volume = {14},
  pages = {1201-18},
  number = {9},
  month = {Nov},
  abstract = {We consider a new neural network for data discrimination in pattern
	recognition applications. {W}e refer to this as a maximum discriminating
	feature ({MDF}) neural network. {I}ts weights are obtained in closed-form,
	thereby overcoming problems associated with other nonlinear neural
	networks. {I}t uses neuron activation functions that are dynamically
	chosen based on the application. {I}t is theoretically shown to provide
	nonlinear transforms of the input data that are more general than
	those provided by other nonlinear multilayer perceptron neural network
	and support-vector machine techniques for cases involving high-dimensional
	(image) inputs where training data are limited and the classes are
	not linearly separable. {W}e experimentally verify this on synthetic
	examples.}
}

@article{Tamayo1999Interpreting,
  author = {Tamayo, P. and Slonim, D. and Mesirov, J. and Zhu, Q. and Kitareewan,
	S. and Dmitrovsky, E. and Lander, E. S. and Golub, T. R.},
  title = {Interpreting patterns of gene expression with self-organizing maps:
	methods and application to hematopoietic differentiation.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {1999},
  volume = {96},
  pages = {2907--2912},
  number = {6},
  month = {Mar},
  abstract = {Array technologies have made it straightforward to monitor simultaneously
	the expression pattern of thousands of genes. The challenge now is
	to interpret such massive data sets. The first step is to extract
	the fundamental patterns of gene expression inherent in the data.
	This paper describes the application of self-organizing maps, a type
	of mathematical cluster analysis that is particularly well suited
	for recognizing and classifying features in complex, multidimensional
	data. The method has been implemented in a publicly available computer
	package, GENECLUSTER, that performs the analytical calculations and
	provides easy data visualization. To illustrate the value of such
	analysis, the approach is applied to hematopoietic differentiation
	in four well studied models (HL-60, U937, Jurkat, and NB4 cells).
	Expression patterns of some 6,000 human genes were assayed, and an
	online database was created. GENECLUSTER was used to organize the
	genes into biologically relevant clusters that suggest novel hypotheses
	about hematopoietic differentiation-for example, highlighting certain
	genes and pathways involved in "differentiation therapy" used in
	the treatment of acute promyelocytic leukemia.},
  pdf = {../local/Tamayo1999Interpreting.pdf},
  file = {Tamayo1999Interpreting.pdf:Tamayo1999Interpreting.pdf:PDF},
  institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center,
	Cambridge, MA 02142, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {10077610},
  timestamp = {2011.10.04}
}

@article{Tang2005siRNA,
  author = {Tang, G.},
  title = {{siRNA} and {miRNA}: an insight into {RISC}s.},
  journal = {Trends {B}iochem. {S}ci.},
  year = {2005},
  volume = {30},
  pages = {106-14},
  number = {2},
  month = {Feb},
  abstract = {Two classes of short {RNA} molecule, small interfering {RNA} (si{RNA})
	and micro{RNA} (mi{RNA}), have been identified as sequence-specific
	posttranscriptional regulators of gene expression. si{RNA} and mi{RNA}
	are incorporated into related {RNA}-induced silencing complexes ({RISC}s),
	termed si{RISC} and mi{RISC}, respectively. {T}he current model argues
	that si{RISC} and mi{RISC} are functionally interchangeable and target
	specific m{RNA}s for cleavage or translational repression, depending
	on the extent of sequence complementarity between the small {RNA}
	and its target. {E}merging evidence indicates, however, that si{RISC}
	and mi{RISC} are distinct complexes that regulate m{RNA} stability
	and translation. {T}he assembly of {RISC}s can be traced from the
	biogenesis of the small {RNA} molecules and the recruitment of these
	{RNA}s by the {RISC} loading complex ({RLC}) to the transition of
	the {RLC} into the active {RISC}. {T}arget recognition by the {RISC}
	can then take place through different interacting modes.},
  doi = {10.1016/j.tibs.2004.12.007},
  keywords = {sirna},
  pii = {S0968-0004(04)00321-4},
  url = {http://dx.doi.org/10.1016/j.tibs.2004.12.007}
}

@article{Tang2005Discovering,
  author = {Thomas Tang and Jinbo Xu and Ming Li},
  title = {Discovering sequence-structure motifs from protein segments and two
	applications.},
  journal = {Pac {S}ymp {B}iocomput},
  year = {2005},
  pages = {370-81},
  abstract = {We present a novel method for clustering short protein segments having
	strong sequence-structure correlations, and demonstrate that these
	clusters contain useful structural information via two applications.
	{W}hen applied to local tertiary structure prediction, we achieve
	approximately 60\% accuracy with a novel dynamic programming algorithm.
	{W}hen applied to secondary structure prediction based on {S}upport
	{V}ector {M}achines, we obtain a approximately 2\% gain in {Q}3 performance
	by incorporating cluster-derived data into training and classification.
	{T}hese encouraging results illustrate the great potential of using
	conserved local motifs to tackle protein structure predictions and
	possibly other important problems in biology.},
  keywords = {biosvm}
}

@article{Tang2005Granular,
  author = {Yuchun Tang and Bo Jin and Yan-Qing Zhang},
  title = {Granular support vector machines with association rules mining for
	protein homology prediction.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  month = {Jul},
  abstract = {O{BJECTIVE}:: {P}rotein homology prediction between protein sequences
	is one of critical problems in computational biology. {S}uch a complex
	classification problem is common in medical or biological information
	processing applications. {H}ow to build a model with superior generalization
	capability from training samples is an essential issue for mining
	knowledge to accurately predict/classify unseen new samples and to
	effectively support human experts to make correct decisions. {METHODOLOGY}::
	{A} new learning model called granular support vector machines ({GSVM})
	is proposed based on our previous work. {GSVM} systematically and
	formally combines the principles from statistical learning theory
	and granular computing theory and thus provides an interesting new
	mechanism to address complex classification problems. {I}t works
	by building a sequence of information granules and then building
	support vector machines ({SVM}) in some of these information granules
	on demand. {A} good granulation method to find suitable granules
	is crucial for modeling a {GSVM} with good performance. {I}n this
	paper, we also propose an association rules-based granulation method.
	{F}or the granules induced by association rules with high enough
	confidence and significant support, we leave them as they are because
	of their high "purity" and significant effect on simplifying the
	classification task. {F}or every other granule, a {SVM} is modeled
	to discriminate the corresponding data. {I}n this way, a complex
	classification problem is divided into multiple smaller problems
	so that the learning task is simplified. {RESULTS} {AND} {CONCLUSIONS}::
	{T}he proposed algorithm, here named {GSVM}-{AR}, is compared with
	{SVM} by {KDDCUP}04 protein homology prediction data. {T}he experimental
	results show that finding the splitting hyperplane is not a trivial
	task (we should be careful to select the association rules to avoid
	overfitting) and {GSVM}-{AR} does show significant improvement compared
	to building one single {SVM} in the whole feature space. {A}nother
	advantage is that the utility of {GSVM}-{AR} is very good because
	it is easy to be implemented. {M}ore importantly and more interestingly,
	{GSVM} provides a new mechanism to address complex classification
	problems.},
  doi = {10.1016/j.artmed.2005.02.003},
  pdf = {../local/Tang2005Granular.pdf},
  file = {Tang2005Granular.pdf:local/Tang2005Granular.pdf:PDF},
  keywords = {, , 16024240},
  pii = {S0933-3657(05)00054-0},
  url = {http://dx.doi.org/10.1016/j.artmed.2005.02.003}
}

@article{Tartakovsky2006novel,
  author = {Tartakovsky, A. G. and Rozovskii, B. L. and Blazek, R. B. and Hongjoong
	Kim},
  title = {A novel approach to detection of intrusions in computer networks
	via adaptive sequential and batch-sequential change-point detection
	methods},
  journal = {IEEE T. Signal. Proces.},
  year = {2006},
  volume = {54},
  pages = {3372--3382},
  number = {9},
  doi = {10.1109/TSP.2006.879308},
  pdf = {../local/Tartakovsky2006novel.pdf},
  file = {Tartakovsky2006novel.pdf:Tartakovsky2006novel.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.13},
  url = {http://dx.doi.org/10.1109/TSP.2006.879308}
}

@article{Tarumi2003Remote,
  author = {Toshiyasu Tarumi and Gary W Small and Roger J Combs and Robert T
	Kroutil},
  title = {Remote detection of heated ethanol plumes by airborne passive {F}ourier
	transform infrared spectrometry.},
  journal = {Appl {S}pectrosc},
  year = {2003},
  volume = {57},
  pages = {1432-41},
  number = {11},
  month = {Nov},
  abstract = {Methodology is developed for the automated detection of heated plumes
	of ethanol vapor with airborne passive {F}ourier transform infrared
	spectrometry. {P}ositioned in a fixed-wing aircraft in a downward-looking
	mode, the spectrometer is used to detect ground sources of ethanol
	vapor from an altitude of 2000-3000 ft. {C}hallenges to the use of
	this approach for the routine detection of chemical plumes include
	(1) the presence of a constantly changing background radiance as
	the aircraft flies, (2) the cost and complexity of collecting the
	data needed to train the classification algorithms used in implementing
	the plume detection, and (3) the need for rapid interferogram scans
	to minimize the ground area viewed per scan. {T}o address these challenges,
	this work couples a novel ground-based data collection and training
	protocol with the use of signal processing and pattern recognition
	methods based on short sections of the interferogram data collected
	by the spectrometer. {I}n the data collection, heated plumes of ethanol
	vapor are released from a portable emission stack and viewed by the
	spectrometer from ground level against a synthetic background designed
	to simulate a terrestrial radiance source. {C}lassifiers trained
	with these data are subsequently tested with airborne data collected
	over a period of 2.5 years. {T}wo classifier architectures are compared
	in this work: support vector machines ({SVM}) and piecewise linear
	discriminant analysis ({PLDA}). {W}hen applied to the airborne test
	data, the {SVM} classifiers perform best, failing to detect ethanol
	in only 8\% of the cases in which it is present. {F}alse detections
	occur at a rate of less than 0.5\%. {T}he classifier performs well
	in spite of differences between the backgrounds associated with the
	ground-based and airborne data collections and the instrumental drift
	arising from the long time span of the data collection. {F}urther
	improvements in classification performance are judged to require
	increased sophistication in the ground-based data collection in order
	to provide a better match to the infrared backgrounds observed from
	the air.},
  keywords = {Air Pollutants, Aircraft, Algorithms, Artificial Intelligence, Automated,
	Comparative Study, Computer Simulation, Computer-Assisted, Computing
	Methodologies, Environmental Monitoring, Ethanol, Fourier Transform
	Infrared, Humans, Image Interpretation, Non-P.H.S., Non-U.S. Gov't,
	Online Systems, Pattern Recognition, Photography, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Signal
	Processing, Spectroscopy, Subtraction Technique, U.S. Gov't, Video
	Recording, Walking, 14658159}
}

@inproceedings{Taskar2004Max-Margin,
  author = {Taskar, B. and Guestrin, C. and Koller, D.},
  title = {Max-{M}argin {M}arkov {N}etworks},
  booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 16},
  year = {2004},
  editor = {Sebastian Thrun and Lawrence Saul and Bernhard {Sch\"{o}lkopf}},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  pdf = {../local/Taskar2004Max-Margin.pdf},
  file = {Taskar2004Max-Margin.pdf:local/Taskar2004Max-Margin.pdf:PDF},
  keywords = {conditional-random-field},
  owner = {vert}
}

@article{Taslim2009Comparative,
  author = {Taslim, C. and Wu, J. and Yan, P. and Singer, G. and Parvin, J. and
	Huan, T. and Lin, S. and Huang, K.},
  title = {Comparative study on {ChIP}-seq data: normalization and binding pattern
	characterization.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {2334--2340},
  number = {18},
  month = {Sep},
  abstract = {MOTIVATION: Antibody-based Chromatin Immunoprecipitation assay followed
	by high-throughput sequencing technology (ChIP-seq) is a relatively
	new method to study the binding patterns of specific protein molecules
	over the entire genome. ChIP-seq technology allows scientist to get
	more comprehensive results in shorter time. Here, we present a non-linear
	normalization algorithm and a mixture modeling method for comparing
	ChIP-seq data from multiple samples and characterizing genes based
	on their RNA polymerase II (Pol II) binding patterns. RESULTS: We
	apply a two-step non-linear normalization method based on locally
	weighted regression (LOESS) approach to compare ChIP-seq data across
	multiple samples and model the difference using an Exponential-Normal(K)
	mixture model. Fitted model is used to identify genes associated
	with differential binding sites based on local false discovery rate
	(fdr). These genes are then standardized and hierarchically clustered
	to characterize their Pol II binding patterns. As a case study, we
	apply the analysis procedure comparing normal breast cancer (MCF7)
	to tamoxifen-resistant (OHT) cell line. We find enriched regions
	that are associated with cancer (P < 0.0001). Our findings also imply
	that there may be a dysregulation of cell cycle and gene expression
	control pathways in the tamoxifen-resistant cells. These results
	show that the non-linear normalization method can be used to analyze
	ChIP-seq data across multiple samples. AVAILABILITY: Data are available
	at http://www.bmi.osu.edu/~khuang/Data/ChIP/RNAPII/.},
  doi = {10.1093/bioinformatics/btp384},
  institution = { Medical Genetics, Ohio State University, Columbus, OH 43210, USA.
	taslim.2@osu.edu},
  owner = {jp},
  pii = {btp384},
  pmid = {19561022},
  timestamp = {2009.10.12},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp384}
}

@article{Tavazoie1999Systematic,
  author = {Tavazoie, S. and Hughes, J. D. and Campbell, M. J. and Cho, R. J.
	and Church, G. M.},
  title = {Systematic determination of genetic network architecture},
  journal = {Nat. Genet.},
  year = {1999},
  volume = {22},
  pages = {281--285},
  doi = {doi:10.1038/10343},
  pdf = {../local/Tavazoie1999Systematic.pdf},
  file = {Tavazoie1999Systematic.pdf:local/Tavazoie1999Systematic.pdf:PDF},
  subject = {microarray},
  url = {http://dx.doi.org/10.1038/10343}
}

@article{Tax2001Uniform,
  author = {Tax, D. M. J. and Duin, R. P. W.},
  title = {Uniform {O}bject {G}eneration for {O}ptimizing {O}ne-class {C}lassifiers},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2001},
  volume = {2},
  pages = {155-173},
  pdf = {../local/Tax2001Uniform.pdf},
  file = {Tax2001Uniform.pdf:local/Tax2001Uniform.pdf:PDF}
}

@article{Taylor2008Guidelines,
  author = {Chris F Taylor and Pierre-Alain Binz and Ruedi Aebersold and Michel
	Affolter and Robert Barkovich and Eric W Deutsch and David M Horn
	and Andreas Hühmer and Martin Kussmann and Kathryn Lilley and Marcus
	Macht and Matthias Mann and Dieter Müller and Thomas A Neubert and
	Janice Nickson and Scott D Patterson and Roberto Raso and Kathryn
	Resing and Sean L Seymour and Akira Tsugita and Ioannis Xenarios
	and Rong Zeng and Randall K Julian},
  title = {Guidelines for reporting the use of mass spectrometry in proteomics.},
  journal = {Nat Biotechnol},
  year = {2008},
  volume = {26},
  pages = {860--861},
  number = {8},
  month = {Aug},
  doi = {10.1038/nbt0808-860},
  keywords = {Databases, Protein; Guidelines as Topic; Mass Spectrometry; Proteomics},
  owner = {phupe},
  pii = {nbt0808-860},
  pmid = {18688232},
  timestamp = {2010.08.13},
  url = {http://dx.doi.org/10.1038/nbt0808-860}
}

@article{Taylor2007minimum,
  author = {Chris F Taylor and Norman W Paton and Kathryn S Lilley and Pierre-Alain
	Binz and Randall K Julian and Andrew R Jones and Weimin Zhu and Rolf
	Apweiler and Ruedi Aebersold and Eric W Deutsch and Michael J Dunn
	and Albert J R Heck and Alexander Leitner and Marcus Macht and Matthias
	Mann and Lennart Martens and Thomas A Neubert and Scott D Patterson
	and Peipei Ping and Sean L Seymour and Puneet Souda and Akira Tsugita
	and Joel Vandekerckhove and Thomas M Vondriska and Julian P Whitelegge
	and Marc R Wilkins and Ioannnis Xenarios and John R Yates and Henning
	Hermjakob},
  title = {The minimum information about a proteomics experiment (MIAPE).},
  journal = {Nat Biotechnol},
  year = {2007},
  volume = {25},
  pages = {887--893},
  number = {8},
  month = {Aug},
  abstract = {Both the generation and the analysis of proteomics data are now widespread,
	and high-throughput approaches are commonplace. Protocols continue
	to increase in complexity as methods and technologies evolve and
	diversify. To encourage the standardized collection, integration,
	storage and dissemination of proteomics data, the Human Proteome
	Organization's Proteomics Standards Initiative develops guidance
	modules for reporting the use of techniques such as gel electrophoresis
	and mass spectrometry. This paper describes the processes and principles
	underpinning the development of these modules; discusses the ramifications
	for various interest groups such as experimentalists, funders, publishers
	and the private sector; addresses the issue of overlap with other
	reporting guidelines; and highlights the criticality of appropriate
	tools and resources in enabling 'MIAPE-compliant' reporting.},
  doi = {10.1038/nbt1329},
  institution = {The HUPO Proteomics Standards Initiative, Wellcome Trust Genome Campus,
	Hinxton, Cambridgeshire CB10 1SD, UK. chris.taylor@ebi.ac.uk},
  keywords = {Databases, Protein; Gene Expression Profiling; Genome, Human; Guidelines
	as Topic; Humans; Information Storage and Retrieval; Internationality;
	Proteomics; Research},
  owner = {phupe},
  pii = {nbt1329},
  pmid = {17687369},
  timestamp = {2010.08.13},
  url = {http://dx.doi.org/10.1038/nbt1329}
}

@article{Taylor2002Protein,
  author = {Taylor, W. R.},
  title = {Protein structure comparison using bipartite graph matching and its
	application to protein structure classification.},
  journal = {Mol. Cell. Proteomics},
  year = {2002},
  volume = {1},
  pages = {334--339},
  number = {4},
  month = {April},
  abstract = {A measure of protein structure similarity is calculated from the matching
	of pairs of secondary structure elements between two proteins. The
	interaction of each pair was estimated from their axial line segments
	and combined with other geometric features to produce an optimal
	discrimination between intrafamily and interfamily relationships.
	The matching used a fast bipartite graph-matching algorithm that
	avoids the computational complexity of searching for the full subgraph
	isomorphism between the two sets of interactions. The main algorithm
	used was the "stable marriage" algorithm, which works on the ranked
	"preferences" of one interaction for another. The method takes 1/10
	of a second for a typical comparison making it suitable as a fast
	pre-filter for slower, more exhaustive approaches. An application
	to protein structure classification is described.},
  address = {Division of Mathematical Biology, National Institute for Medical
	Research, The Ridgeway, Mill Hill, London NW7 1AA, United Kingdom.
	wtaylor@nimr.mrc.ac.uk},
  citeulike-article-id = {892571},
  citeulike-linkout-0 = {http://view.ncbi.nlm.nih.gov/pubmed/12096115},
  citeulike-linkout-1 = {http://www.hubmed.org/display.cgi?uids=12096115},
  issn = {1535-9476},
  keywords = {structure\_classification, structure\_comparison},
  posted-at = {2006-10-11 11:21:15},
  priority = {2},
  url = {http://view.ncbi.nlm.nih.gov/pubmed/12096115}
}

@article{Teer2010Exome,
  author = {Jamie K Teer and James C Mullikin},
  title = {Exome sequencing: the sweet spot before whole genomes.},
  journal = {Hum Mol Genet},
  year = {2010},
  month = {Aug},
  abstract = {The development of massively parallel sequencing technologies, coupled
	with new massively parallel DNA enrichment technologies (genomic
	capture), has allowed the sequencing of targeted regions of the human
	genome in rapidly increasing numbers of samples. Genomic capture
	can target specific areas in the genome, including genes of interest
	and linkage regions, but this limits the study to what is already
	known. Exome capture allows an unbiased investigation of the complete
	protein-coding regions in the genome. Researchers can use exome capture
	to focus on a critical part of the human genome, allowing larger
	numbers of samples than are currently practical with whole-genome
	sequencing. In this review, we briefly describe some of the methodologies
	currently used for genomic and exome capture and highlight recent
	applications of this technology.},
  doi = {10.1093/hmg/ddq333},
  institution = {Genetic Disease Research Branch and.},
  owner = {phupe},
  pii = {ddq333},
  pmid = {20705737},
  timestamp = {2010.08.30},
  url = {http://dx.doi.org/10.1093/hmg/ddq333}
}

@article{Tegner2003Reverse,
  author = {Tegner, J. and Yeung, M. K. S. and Hasty, J. and Collins, J. J.},
  title = {{R}everse engineering gene networks: integrating genetic perturbations
	with dynamical modeling},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {5944--5949},
  number = {10},
  month = {May},
  abstract = {While the fundamental building blocks of biology are being tabulated
	by the various genome projects, microarray technology is setting
	the stage for the task of deducing the connectivity of large-scale
	gene networks. We show how the perturbation of carefully chosen genes
	in a microarray experiment can be used in conjunction with a reverse
	engineering algorithm to reveal the architecture of an underlying
	gene regulatory network. Our iterative scheme identifies the network
	topology by analyzing the steady-state changes in gene expression
	resulting from the systematic perturbation of a particular node in
	the network. We highlight the validity of our reverse engineering
	approach through the successful deduction of the topology of a linear
	in numero gene network and a recently reported model for the segmentation
	polarity network in Drosophila melanogaster. Our method may prove
	useful in identifying and validating specific drug targets and in
	deconvolving the effects of chemical compounds.},
  doi = {10.1073/pnas.0933416100},
  pii = {0933416100},
  pmid = {12730377},
  timestamp = {2008.02.04},
  url = {http://dx.doi.org/10.1073/pnas.0933416100}
}

@article{Teixeira2001Recent,
  author = {R. D. Teixeira and A. P. Braga and R. H. Takahashi and R. R. Saldanha},
  title = {Recent advances in the {MOBJ} algorithm for training artificial neural
	networks.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2001},
  volume = {11},
  pages = {265-70},
  number = {3},
  month = {Jun},
  abstract = {This paper presents a new scheme for training {MLP}s which employs
	a relaxation method for multi-objective optimization. {T}he algorithm
	works by obtaining a reduced set of solutions, from which the one
	with the best generalization is selected. {T}his approach allows
	balancing between the training error and norm of network weight vectors,
	which are the two objective functions of the multi-objective optimization
	problem. {T}he method is applied to classification and regression
	problems and compared with {W}eight {D}ecay ({WD}), {S}upport {V}ector
	{M}achines ({SVM}s) and standard {B}ackpropagation ({BP}). {I}t is
	shown that the systematic procedure for training proposed results
	on good generalization neural models, and outperforms traditional
	methods.},
  pii = {S0129065701000709}
}

@article{Tenenbaum2000global,
  author = {Tenenbaum, J. B. and de Silva, V. and Langford, J. C.},
  title = {A global geometric framework for nonlinear dimensionality reduction},
  journal = {Science},
  year = {2000},
  volume = {290},
  pages = {2319-23},
  number = {5500},
  month = {Dec},
  abstract = {Scientists working with large volumes of high-dimensional data, such
	as global climate patterns, stellar spectra, or human gene distributions,
	regularly confront the problem of dimensionality reduction: finding
	meaningful low-dimensional structures hidden in their high-dimensional
	observations. {T}he human brain confronts the same problem in everyday
	perception, extracting from its high-dimensional sensory inputs-30,000
	auditory nerve fibers or 10(6) optic nerve fibers-a manageably small
	number of perceptually relevant features. {H}ere we describe an approach
	to solving dimensionality reduction problems that uses easily measured
	local metric information to learn the underlying global geometry
	of a data set. {U}nlike classical techniques such as principal component
	analysis ({PCA}) and multidimensional scaling ({MDS}), our approach
	is capable of discovering the nonlinear degrees of freedom that underlie
	complex natural observations, such as human handwriting or images
	of a face under different viewing conditions. {I}n contrast to previous
	algorithms for nonlinear dimensionality reduction, ours efficiently
	computes a globally optimal solution, and, for an important class
	of data manifolds, is guaranteed to converge asymptotically to the
	true structure.},
  doi = {10.1126/science.290.5500.2319},
  pdf = {../local/Tenenbaum2000global.pdf},
  file = {Tenenbaum2000global.pdf:local/Tenenbaum2000global.pdf:PDF},
  keywords = {dimred},
  pii = {290/5500/2319},
  url = {http://dx.doi.org/10.1126/science.290.5500.2319}
}

@article{Teramoto2005Prediction,
  author = {Reiji Teramoto and Mikio Aoki and Toru Kimura and Masaharu Kanaoka},
  title = {Prediction of si{RNA} functionality using generalized string kernel
	and support vector machine.},
  journal = {F{EBS} {L}ett.},
  year = {2005},
  volume = {579},
  pages = {2878-82},
  number = {13},
  month = {May},
  abstract = {Small interfering {RNA}s (si{RNA}s) are becoming widely used for sequence-specific
	gene silencing in mammalian cells, but designing an effective si{RNA}
	is still a challenging task. {I}n this study, we developed an algorithm
	for predicting si{RNA} functionality by using generalized string
	kernel ({GSK}) combined with support vector machine ({SVM}). {W}ith
	{GSK}, si{RNA} sequences were represented as vectors in a multi-dimensional
	feature space according to the numbers of subsequences in each si{RNA},
	and subsequently classified with {SVM} into effective or ineffective
	si{RNA}s. {W}e applied this algorithm to published si{RNA}s, and
	could classify effective and ineffective si{RNA}s with 90.6\%, 86.2\%
	accuracy, respectively.},
  doi = {10.1016/j.febslet.2005.04.045},
  pdf = {../local/Teramoto2005Prediction.pdf},
  file = {Teramoto2005Prediction.pdf:local/Teramoto2005Prediction.pdf:PDF},
  keywords = {sirna biosvm},
  pii = {S0014-5793(05)00520-X},
  url = {http://dx.doi.org/10.1016/j.febslet.2005.04.045}
}

@article{Terentiev2009Dynamic,
  author = {A. A. Terentiev and N. T. Moldogazieva and K. V. Shaitan},
  title = {Dynamic proteomics in modeling of the living cell. Protein-protein
	interactions.},
  journal = {Biochemistry (Mosc)},
  year = {2009},
  volume = {74},
  pages = {1586--1607},
  number = {13},
  month = {Dec},
  abstract = {This review is devoted to describing, summarizing, and analyzing of
	dynamic proteomics data obtained over the last few years and concerning
	the role of protein-protein interactions in modeling of the living
	cell. Principles of modern high-throughput experimental methods for
	investigation of protein-protein interactions are described. Systems
	biology approaches based on integrative view on cellular processes
	are used to analyze organization of protein interaction networks.
	It is proposed that finding of some proteins in different protein
	complexes can be explained by their multi-modular and polyfunctional
	properties; the different protein modules can be located in the nodes
	of protein interaction networks. Mathematical and computational approaches
	to modeling of the living cell with emphasis on molecular dynamics
	simulation are provided. The role of the network analysis in fundamental
	medicine is also briefly reviewed.},
  institution = {Russian State Medical University, ul. Ostrovityanova 1, Moscow, Russia.
	aaterent@mtu-net.ru},
  keywords = {Animals; Humans; Mass Spectrometry; Models, Theoretical; Molecular
	Dynamics Simulation; Multiprotein Complexes; Protein Conformation;
	Protein Interaction Mapping; Proteins; Proteomics; Systems Biology;
	Two-Hybrid System Techniques},
  owner = {phupe},
  pii = {BCM74131586},
  pmid = {20210711},
  timestamp = {2010.08.31}
}

@article{Thies2004Optimal,
  author = {Thorsten Thies and Frank Weber},
  title = {Optimal reduced-set vectors for support vector machines with a quadratic
	kernel.},
  journal = {Neural {C}omput},
  year = {2004},
  volume = {16},
  pages = {1769-77},
  number = {9},
  month = {Sep},
  abstract = {To reduce computational cost, the discriminant function of a support
	vector machine ({SVM}) should be represented using as few vectors
	as possible. {T}his problem has been tackled in different ways. {I}n
	this article,we develop an explicit solution in the case of a general
	quadratic kernel k(x. x') = ({C} + {D} x{T} x')2. {F}or a given number
	of vectors, this solution provides the best possible approximation
	and can even recover the discriminant function if the number of used
	vectors is large enough. {T}he key idea is to express the inhomogeneous
	kernel as a homogeneous kernel ona space having one dimension more
	than the original one and to follow the approach of {B}urges (1996).}
}

@article{Thimm2004Comparison,
  author = {M. Thimm and A. Goede and S. Hougardy and R. Preissner},
  title = {{C}omparison of 2{D} similarity and 3{D} superposition. {A}pplication
	to searching a conformational drug database.},
  journal = {J Chem Inf Comput Sci},
  year = {2004},
  volume = {44},
  pages = {1816--1822},
  number = {5},
  abstract = {In a database of about 2000 approved drugs, represented by 10(5) structural
	conformers, we have performed 2D comparisons (Tanimoto coefficients)
	and 3D superpositions. For one class of drugs the correlation between
	structural resemblance and similar action was analyzed in detail.
	In general Tanimoto coefficients and 3D scores give similar results,
	but we find that 2D similarity measures neglect important structural/funtional
	features. Examples for both over- and underestimation of similarity
	by 2D metrics are discussed. The required additional effort for 3D
	superpositions is assessed by implementation of a fast algorithm
	with a processing time below 0.01 s and a more sophisticated approach
	(0.5 s per superposition). According to the improvement of similarity
	detection compared to 2D screening and the pleasant rapidity on a
	desktop PC, full-atom 3D superposition will be an upcoming method
	of choice for library prioritization or similarity screening approaches.},
  doi = {10.1021/ci049920h},
  keywords = {Arabidopsis, Carbohydrates, Circadian Rhythm, Comparative Study, Database
	Management Systems, Gene Expression Regulation, Genes, Genome, Messenger,
	Molecular Conformation, Mutation, Nitrogen, Non-U.S. Gov't, Oligonucleotide
	Array Sequence Analysis, Pharmaceutical Preparations, Plant, RNA,
	Research Support, 15446841},
  owner = {mahe},
  pmid = {15446841},
  timestamp = {2006.08.22},
  url = {http://dx.doi.org/10.1021/ci049920h}
}

@article{Thissen2004Multivariate,
  author = {Uwe Thissen and BÃ¼lent UstÃ¼n and Willem J Melssen and Lutgarde
	M C Buydens},
  title = {Multivariate calibration with least-squares support vector machines.},
  journal = {Anal {C}hem},
  year = {2004},
  volume = {76},
  pages = {3099-105},
  number = {11},
  month = {Jun},
  abstract = {This paper proposes the use of least-squares support vector machines
	({LS}-{SVM}s) as a relatively new nonlinear multivariate calibration
	method, capable of dealing with ill-posed problems. {LS}-{SVM}s are
	an extension of "traditional" {SVM}s that have been introduced recently
	in the field of chemistry and chemometrics. {T}he advantages of {SVM}-based
	methods over many other methods are that these lead to global models
	that are often unique, and nonlinear regression can be performed
	easily as an extension to linear regression. {A}n additional advantage
	of {LS}-{SVM} (compared to {SVM}) is that model calculation and optimization
	can be performed relatively fast. {A}s a test case to study the use
	of {LS}-{SVM}, the well-known and important chemical problem is considered
	in which spectra are affected by nonlinear interferences. {A}s one
	specific example, a commonly used case is studied in which near-infrared
	spectra are affected by temperature-induced spectral variation. {U}sing
	this test case, model optimization, pruning, and model interpretation
	of the {LS}-{SVM} have been demonstrated. {F}urthermore, excellent
	performance of the {LS}-{SVM}, compared to other approaches, has
	been presented on the specific example. {T}herefore, it can be concluded
	that {LS}-{SVM}s can be seen as very promising techniques to solve
	ill-posed problems. {F}urthermore, these have been shown to lead
	to robust models in cases of spectral variations due to nonlinear
	interferences.},
  doi = {10.1021/ac035522m},
  pdf = {../local/Thissen2004Multivariate.pdf},
  file = {Thissen2004Multivariate.pdf:local/Thissen2004Multivariate.pdf:PDF},
  url = {http://dx.doi.org/10.1021/ac035522m}
}

@article{ThomasDLF2001,
  author = {Thomas, R. and Kaufman, M.},
  title = {Multistationarity, the basis of cell differentiation and memory.
	{II}. {L}ogical analysis of regulatory networks in terms of feedback
	circuits},
  journal = {Chaos},
  year = {2001},
  volume = {11},
  pages = {180-195},
  number = {1},
  abstract = {Circuits and their involvement in complex dynamics are described in
	differential terms in {P}art {I} of this work. {H}ere, we first explain
	why it may be appropriate to use a logical description, either by
	itself or in symbiosis with the differential description. {T}he major
	problem of a logical description is to find an adequate way to involve
	time. {T}he procedure we adopted differs radically from the classical
	one by its fully asynchronous character. {I}n {S}ec. {II} we describe
	our "naive" logical approach, and use it to illustrate the major
	laws of circuitry (namely, the involvement of positive circuits in
	multistationarity and of negative circuits in periodicity) and in
	a biological example. {A}lready in the naive description, the major
	steps of the logical description are to: (i) describe a model as
	a set of logical equations, (ii) derive the state table from the
	equations, (iii) derive the graph of the sequences of states from
	the state table, and (iv) determine which of the possible pathways
	will be actually followed in terms of time delays. {I}n the following
	sections we consider multivalued variables where required, the introduction
	of logical parameters and of logical values ascribed to the thresholds,
	and the concept of characteristic state of a circuit. {T}his generalized
	logical description provides an image whose qualitative fit with
	the differential description is quite remarkable. {A} major interest
	of the generalized logical description is that it implies a limited
	and often quite small number of possible combinations of values of
	the logical parameters. {T}he space of the logical parameters is
	thus cut into a limited number of boxes, each of which is characterized
	by a defined qualitative behavior of the system. {O}ur analysis tells
	which constraints on the logical parameters must be fulfilled in
	order for any circuit (or combination of circuits) to be functional.
	{F}unctionality of a circuit will result in multistationarity (in
	the case of a positive circuit) or in a cycle (in the case of a negative
	circuit). {T}he last sections deal with "more about time delays"
	and "reverse logic," an approach that aims to proceed rationally
	from facts to models. (c) 2001 {A}merican {I}nstitute of {P}hysics.}
}

@article{Thomassen2007Comparison,
  author = {Thomassen, M. and Tan, Q. and Eiriksdottir, F. and Bak, M. and Cold,
	S. and Kruse, T.A.},
  title = {Comparison of gene sets for expression profiling: prediction of metastasis
	from low-malignant breast cancer},
  journal = {Clinical Cancer Research},
  year = {2007},
  volume = {13},
  pages = {5355--5360},
  number = {18},
  publisher = {AACR}
}

@article{Thompson2011properties,
  author = {John F Thompson and Patrice M Milos},
  title = {The properties and applications of single-molecule DNA sequencing.},
  journal = {Genome Biol},
  year = {2011},
  volume = {12},
  pages = {217},
  number = {2},
  month = {Feb},
  abstract = {ABSTRACT: Single-molecule sequencing enables DNA or RNA to be sequenced
	directly from biological samples, making it well-suited for diagnostic
	and clinical applications. Here we review the properties and applications
	of this rapidly evolving and promising technology.},
  doi = {10.1186/gb-2011-12-2-217},
  institution = {Helicos BioSciences Corporation, Building 200LL, One Kendall Square,
	Cambridge, MA 02139, USA. jthompson@helicosbio.com.},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {phupe},
  pii = {gb-2011-12-2-217},
  pmid = {21349208},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1186/gb-2011-12-2-217}
}

@article{Thukral2005Prediction,
  author = {Sushil K Thukral and Paul J Nordone and Rong Hu and Leah Sullivan
	and Eric Galambos and Vincent D Fitzpatrick and Laura Healy and Michael
	B Bass and Mary E Cosenza and Cynthia A Afshari},
  title = {Prediction of nephrotoxicant action and identification of candidate
	toxicity-related biomarkers.},
  journal = {Toxicol {P}athol},
  year = {2005},
  volume = {33},
  pages = {343-55},
  number = {3},
  abstract = {A vast majority of pharmacological compounds and their metabolites
	are excreted via the urine, and within the complex structure of the
	kidney,the proximal tubules are a main target site of nephrotoxic
	compounds. {W}e used the model nephrotoxicants mercuric chloride,
	2-bromoethylamine hydrobromide, hexachlorobutadiene, mitomycin, amphotericin,
	and puromycin to elucidate time- and dose-dependent global gene expression
	changes associated with proximal tubular toxicity. {M}ale {S}prague-{D}awley
	rats were dosed via intraperitoneal injection once daily for mercuric
	chloride and amphotericin (up to 7 doses), while a single dose was
	given for all other compounds. {A}nimals were exposed to 2 different
	doses of these compounds and kidney tissues were collected on day
	1, 3, and 7 postdosing. {G}ene expression profiles were generated
	from kidney {RNA} using 17{K} rat c{DNA} dual dye microarray and
	analyzed in conjunction with histopathology. {A}nalysis of gene expression
	profiles showed that the profiles clustered based on similarities
	in the severity and type of pathology of individual animals. {F}urther,
	the expression changes were indicative of tubular toxicity showing
	hallmarks of tubular degeneration/regeneration and necrosis. {U}se
	of gene expression data in predicting the type of nephrotoxicity
	was then tested with a support vector machine ({SVM})-based approach.
	{A} {SVM} prediction module was trained using 120 profiles of total
	profiles divided into four classes based on the severity of pathology
	and clustering. {A}lthough mitomycin {C} and amphotericin {B} treatments
	did not cause toxicity, their expression profiles were included in
	the {SVM} prediction module to increase the sample size. {U}sing
	this classifier, the {SVM} predicted the type of pathology of 28
	test profiles with 100\% selectivity and 82\% sensitivity. {T}hese
	data indicate that valid predictions could be made based on gene
	expression changes from a small set of expression profiles. {A} set
	of potential biomarkers showing a time- and dose-response with respect
	to the progression of proximal tubular toxicity were identified.
	{T}hese include several transporters ({S}lc21a2, {S}lc15, {S}lc34a2),
	{K}im 1, {IGF}bp-1, osteopontin, alpha-fibrinogen, and {G}stalpha.},
  doi = {10.1080/01926230590927230},
  keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence,
	Butadienes, Chloroplasts, Comparative Study, Computer Simulation,
	Computer-Assisted, Diagnosis, Disinfectants, Dose-Response Relationship,
	Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines,
	Expert Systems, Feedback, Fungicides, Gene Expression Profiling,
	Genes, Genetic Markers, Humans, Implanted, Industrial, Information
	Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric
	Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement,
	Natural Language Processing, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Plant Proteins, Predictive Value of Tests, Proteins,
	Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility
	of Results, Research Support, Sprague-Dawley, Subcellular Fractions,
	Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer
	Interface, 15805072},
  pii = {X3U2206L2747H31G},
  url = {http://dx.doi.org/10.1080/01926230590927230}
}

@article{Tian2005Discovering,
  author = {Tian, L and Greenberg, S. A. and Kong, S. W. and Altschuler, J. and
	Kohane, I. S. and Park, P. J.},
  title = {Discovering statistically significant pathways in expression profiling
	studies.},
  journal = {Proc Natl Acad Sci U S A},
  year = {2005},
  volume = {102},
  pages = {13544--13549},
  number = {38},
  month = {Sep},
  abstract = {Accurate and rapid identification of perturbed pathways through the
	analysis of genome-wide expression profiles facilitates the generation
	of biological hypotheses. We propose a statistical framework for
	determining whether a specified group of genes for a pathway has
	a coordinated association with a phenotype of interest. Several issues
	on proper hypothesis-testing procedures are clarified. In particular,
	it is shown that the differences in the correlation structure of
	each set of genes can lead to a biased comparison among gene sets
	unless a normalization procedure is applied. We propose statistical
	tests for two important but different aspects of association for
	each group of genes. This approach has more statistical power than
	currently available methods and can result in the discovery of statistically
	significant pathways that are not detected by other methods. This
	method is applied to data sets involving diabetes, inflammatory myopathies,
	and Alzheimer's disease, using gene sets we compiled from various
	public databases. In the case of inflammatory myopathies, we have
	correctly identified the known cytotoxic T lymphocyte-mediated autoimmunity
	in inclusion body myositis. Furthermore, we predicted the presence
	of dendritic cells in inclusion body myositis and of an IFN-alpha/beta
	response in dermatomyositis, neither of which was previously described.
	These predictions have been subsequently corroborated by immunohistochemistry.},
  doi = {10.1073/pnas.0506577102},
  pdf = {../local/Tian2005Discovering.pdf},
  file = {Tian2005Discovering.pdf:Tian2005Discovering.pdf:PDF},
  institution = {Department of Preventive Medicine, Feinberg School of Medicine, Northwestern
	University, 680 North Lake Shore Drive, Chicago, IL 60611, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {0506577102},
  pmid = {16174746},
  timestamp = {2011.08.07},
  url = {http://dx.doi.org/10.1073/pnas.0506577102}
}

@article{Tian2004novel,
  author = {Liang Tian and Afzel Noore},
  title = {A novel approach for short-term load forecasting using support vector
	machines.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2004},
  volume = {14},
  pages = {329-35},
  number = {5},
  month = {Oct},
  abstract = {A support vector machine ({SVM}) modeling approach for short-term
	load forecasting is proposed. {T}he {SVM} learning scheme is applied
	to the power load data, forcing the network to learn the inherent
	internal temporal property of power load sequence. {W}e also study
	the performance when other related input variables such as temperature
	and humidity are considered. {T}he performance of our proposed {SVM}
	modeling approach has been tested and compared with feed-forward
	neural network and cosine radial basis function neural network approaches.
	{N}umerical results show that the {SVM} approach yields better generalization
	capability and lower prediction error compared to those neural network
	approaches.},
  pii = {S0129065704002078}
}

@article{Tibshirani1997lasso,
  author = {Tibshirani, R.},
  title = {The lasso method for variable selection in the {C}ox model.},
  journal = {Stat. Med.},
  year = {1997},
  volume = {16},
  pages = {385--395},
  number = {4},
  month = {Feb},
  abstract = {I propose a new method for variable selection and shrinkage in Cox's
	proportional hazards model. My proposal minimizes the log partial
	likelihood subject to the sum of the absolute values of the parameters
	being bounded by a constant. Because of the nature of this constraint,
	it shrinks coefficients and produces some coefficients that are exactly
	zero. As a result it reduces the estimation variance while providing
	an interpretable final model. The method is a variation of the 'lasso'
	proposal of Tibshirani, designed for the linear regression context.
	Simulations indicate that the lasso can be more accurate than stepwise
	selection in this setting.},
  pdf = {../local/Tibshirani1997lasso.pdf},
  file = {Tibshirani1997lasso.pdf:Tibshirani1997lasso.pdf:PDF},
  institution = {Department of Preventive Medicine and Biostatistics, University of
	Toronto, Ontario, Canada.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {3.0.CO;2-3},
  pmid = {9044528},
  timestamp = {2010.01.10}
}

@article{Tibshirani1996Regression,
  author = {Tibshirani, R.},
  title = {Regression shrinkage and selection via the lasso},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {1996},
  volume = {58},
  pages = {267-288},
  number = {1}
}

@article{Tibshirani2005Sparsity,
  author = {Tibshirani, R. and Saunders, M. and Rosset, S. and Zhu, J. and Knight,
	K.},
  title = {Sparsity and smoothness via the fused lasso},
  journal = {J. R. Stat. Soc. Ser. B Stat. Methodol.},
  year = {2005},
  volume = {67},
  pages = {91-108},
  number = {1},
  url = {http://ideas.repec.org/a/bla/jorssb/v67y2005i1p91-108.html}
}

@article{Tibshirani2001Estimating,
  author = {Tibshirani, R. and Walther, G. and Hastie, T.},
  title = {Estimating the number of clusters in a data set via the gap statistics},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2001},
  volume = {63},
  pages = {411--423},
  owner = {jp},
  timestamp = {2011.12.30}
}

@article{Tibshirani2008Spatial,
  author = {Tibshirani, R. and Wang, P.},
  title = {Spatial smoothing and hot spot detection for CGH data using the fused
	lasso.},
  journal = {Biostatistics (Oxford, England)},
  year = {2008},
  volume = {9},
  pages = {18--29},
  number = {1},
  month = {January},
  abstract = {We apply the "fused lasso" regression method of (TSRZ2004) to the
	problem of "hot- spot detection", in particular, detection of regions
	of gain or loss in comparative genomic hybridization (CGH) data.
	The fused lasso criterion leads to a convex optimization problem,
	and we provide a fast algorithm for its solution. Estimates of false-discovery
	rate are also provided. Our studies show that the new method generally
	outperforms competing methods for calling gains and losses in CGH
	data.},
  address = {Department of Health, Stanford University Stanford, CA 94305, USA.
	tibs@stat.stanford.edu},
  citeulike-article-id = {2744846},
  citeulike-linkout-0 = {http://dx.doi.org/10.1093/biostatistics/kxm013},
  citeulike-linkout-1 = {http://biostatistics.oxfordjournals.org/cgi/content/abstract/9/1/18},
  citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/17513312},
  citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=17513312},
  doi = {10.1093/biostatistics/kxm013},
  issn = {1465-4644},
  keywords = {copy\_number},
  posted-at = {2008-05-02 10:45:47},
  priority = {4},
  url = {http://dx.doi.org/10.1093/biostatistics/kxm013}
}

@article{Tiffin2005Integration,
  author = {Tiffin, N. and Kelso, J. F. and Powell, A. R. and Pan, H. and Bajic,
	V. B. and Hide, W. A.},
  title = {Integration of text- and data-mining using ontologies successfully
	selects disease gene candidates},
  journal = {Nucleic Acids Res.},
  year = {2005},
  volume = {33},
  pages = {1544--1552},
  number = {5},
  abstract = {Genome-wide techniques such as microarray analysis, Serial Analysis
	of Gene Expression (SAGE), Massively Parallel Signature Sequencing
	(MPSS), linkage analysis and association studies are used extensively
	in the search for genes that cause diseases, and often identify many
	hundreds of candidate disease genes. Selection of the most probable
	of these candidate disease genes for further empirical analysis is
	a significant challenge. Additionally, identifying the genes that
	cause complex diseases is problematic due to low penetrance of multiple
	contributing genes. Here, we describe a novel bioinformatic approach
	that selects candidate disease genes according to their expression
	profiles. We use the eVOC anatomical ontology to integrate text-mining
	of biomedical literature and data-mining of available human gene
	expression data. To demonstrate that our method is successful and
	widely applicable, we apply it to a database of 417 candidate genes
	containing 17 known disease genes. We successfully select the known
	disease gene for 15 out of 17 diseases and reduce the candidate gene
	set to 63.3\% (+/-18.8\%) of its original size. This approach facilitates
	direct association between genomic data describing gene expression
	and information from biomedical texts describing disease phenotype,
	and successfully prioritizes candidate genes according to their expression
	in disease-affected tissues.},
  doi = {10.1093/nar/gki296},
  pdf = {../local/Tiffin2005Integration.pdf},
  file = {Tiffin2005Integration.pdf:Tiffin2005Integration.pdf:PDF},
  institution = {South African National Bioinformatics Institute, University of the
	Western Cape Belville 7535, South Africa. nicki@sanbi.ac.za},
  owner = {jp},
  pii = {33/5/1544},
  pmid = {15767279},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1093/nar/gki296}
}

@article{Tikhonov1963Solution,
  author = {A.N. Tikhonov},
  title = {Solution of incorrectly problems and the regularization method},
  journal = {{S}oviet {M}athematics {D}oklady},
  year = {1963},
  volume = {4},
  pages = {1035-1038}
}

@article{Tikhonov1943stability,
  author = {A.N. Tikhonov},
  title = {On the stability of inverse problems},
  journal = {{D}oklady {A}kademii nauk {SSSR}},
  year = {1943},
  volume = {39},
  pages = {195-198},
  number = {5}
}

@book{Tikhonov1977Solutions,
  title = {Solutions of ill-posed problems},
  publisher = {W.H. Winston},
  year = {1977},
  author = {A.N. Tikhonov and V.Y. Arsenin},
  address = {Washington, D.C.},
  subject = {ml}
}

@inproceedings{Tillmann06Efficient,
  author = {Tillmann, C.},
  title = {{Efficient Dynamic Programming Search Algorithms For Phrase-Based
	SMT}},
  booktitle = {{Workshop On Computationally Hard Problems And Joint Inference In
	Speech And Language Processing}},
  year = {2006},
  url = {http://www.aclweb.org/anthology-new/W/W06/W06-3602.pdf}
}

@article{Tillman03Word,
  author = {Tillmann,, C. and Ney,, H.},
  title = {Word reordering and a dynamic programming beam search algorithm for
	statistical machine translation},
  journal = {Comput. Linguist.},
  year = {2003},
  volume = {29},
  pages = {97--133},
  number = {1},
  address = {Cambridge, MA, USA},
  doi = {http://dx.doi.org/10.1162/089120103321337458},
  issn = {0891-2017},
  publisher = {MIT Press}
}

@article{Tjalkens1992universal,
  author = {Tjalkens, T.J. and Willems, F.M.J.},
  title = {A universal variable-to-fixed length source code based on {L}awrence's
	algorithm},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1992},
  volume = {38},
  pages = {247-253},
  number = {2},
  month = {Mar},
  abstract = {It is shown that the modified {L}awrence algorithm is universal over
	the class of binary memoryless sources and that the rate converges
	asymptotically optimally fast to the source entropy. {I}t is proven
	that no codes exist that have a better asymptotic performance. {T}he
	asymptotic bounds show that universal variable-to-fixed-length codes
	can have a significantly lower redundancy than universal fixed-to-variable-length
	codes with the same number of codewords},
  pdf = {../local/Tjalkens1992universal.pdf},
  file = {Tjalkens1992universal.pdf:local/Tjalkens1992universal.pdf:PDF},
  keywords = {information-theory source-coding},
  owner = {vert}
}

@inproceedings{Tjalkens1997Implementing,
  author = {Tj.J. Tjalkens and F. M. J. Willems},
  title = {Implementing the {C}ontext-{T}ree {W}eighting {M}ethod: {A}rithmetic
	{C}oding,},
  booktitle = {Int. {C}onf. on {C}ombinatorics, {I}nformation {T}heory and {S}tatistics},
  year = {1997},
  pages = {83},
  address = {Portland, Maine, U.S.A},
  month = {18-20},
  pdf = {../local/tjal97.pdf},
  file = {tjal97.pdf:local/tjal97.pdf:PDF},
  subject = {it},
  url = {http://ei1.ei.ele.tue.nl/~frans/maine.ps}
}

@article{Tjong2012Physical,
  author = {Tjong, H. and Gong, K. and Chen, L. and Alber, F.},
  title = {Physical tethering and volume exclusion determine higher-order genome
	organization in budding yeast.},
  journal = {Genome Res.},
  year = {2012},
  month = {May},
  abstract = {In this paper we show that tethering of heterochromatic regions to
	nuclear landmarks and random encounters of chromosomes in the confined
	nuclear volume are sufficient to explain the higher-order organization
	of the budding yeast genome. We have quantitatively characterized
	the contact patterns and nuclear territories that emerge when chromosomes
	are allowed to behave as constrained but otherwise randomly configured
	flexible polymer chains in the nucleus. Remarkably, this constrained
	random encounter model explains in a statistical manner the experimental
	hallmarks of the S. cerevisiae genome organization, including (1)
	the folding patterns of individual chromosomes; (2) the highly enriched
	interactions between specific chromatin regions and chromosomes;
	(3) the emergence, shape, and position of gene territories; (4) the
	mean distances between pairs of telomeres; and (5) even the co-location
	of functionally related gene loci, including early replication start
	sites and tRNA genes. Therefore, most aspects of the yeast genome
	organization can be explained without calling on biochemically mediated
	chromatin interactions. Such interactions may modulate the pre-existing
	propensity for co-localization but seem not to be the cause for the
	observed higher-order organization. The fact that geometrical constraints
	alone yield a highly organized genome structure, on which different
	functional elements are specifically distributed, has strong implications
	for the folding principles of the genome and the evolution of its
	function.},
  doi = {10.1101/gr.129437.111},
  pdf = {../local/Tjong2012Physical.pdf},
  file = {Tjong2012Physical.pdf:Tjong2012Physical.pdf:PDF},
  institution = {Molecular and Computational Biology, Department of Biological Sciences,
	University of Southern California, Los Angeles, California 90089,
	USA;},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {gr.129437.111},
  pmid = {22619363},
  timestamp = {2012.06.24},
  url = {http://dx.doi.org/10.1101/gr.129437.111}
}

@article{Tobita2005discriminant,
  author = {Tobita, M. and Nishikawa, T. and Nagashima, R.},
  title = {A discriminant model constructed by the support vector machine method
	for {HERG} potassium channel inhibitors.},
  journal = {Bioorg. {M}ed. {C}hem. {L}ett.},
  year = {2005},
  volume = {15},
  pages = {2886-90},
  number = {11},
  month = {Jun},
  abstract = {H{ERG} attracts attention as a risk factor for arrhythmia, which might
	trigger torsade de pointes. {A} highly accurate classifier of chemical
	compounds for inhibition of the {HERG} potassium channel is constructed
	using support vector machine. {F}or two test sets, our discriminant
	models achieved 90\% and 95\% accuracy, respectively. {T}he classifier
	is even applied for the prediction of cardio vascular adverse effects
	to achieve about 70\% accuracy. {W}hile modest inhibitors are partly
	characterized by properties linked to global structure of a molecule
	including hydrophobicity and diameter, strong inhibitors are exclusively
	characterized by properties linked to substructures of a molecule.},
  doi = {10.1016/j.bmcl.2005.03.080},
  pdf = {../local/Tobita2005discriminant.pdf},
  file = {Tobita2005discriminant.pdf:local/Tobita2005discriminant.pdf:PDF},
  keywords = {biosvm chemoinformatics herg},
  pii = {S0960-894X(05)00403-8},
  url = {http://dx.doi.org/10.1016/j.bmcl.2005.03.080}
}

@book{Todeschini2002Handbook,
  title = {Handbook of Molecular Descriptors},
  publisher = {Wiley-VCH},
  year = {2002},
  author = {Todeschini, R. and Consonni, V.},
  address = {New York},
  keywords = {chemoinformatics},
  timestamp = {2007.09.03}
}

@article{Tomari2005Perspective,
  author = {Tomari, Y. and Zamore, P. D.},
  title = {Perspective: machines for {RNA}i.},
  journal = {Genes {D}ev.},
  year = {2005},
  volume = {19},
  pages = {517-29},
  number = {5},
  month = {Mar},
  abstract = {R{NA} silencing pathways convert the sequence information in long
	{RNA}, typically double-stranded {RNA}, into approximately 21-nt
	{RNA} signaling molecules such as small interfering {RNA}s (si{RNA}s)
	and micro{RNA}s (mi{RNA}s). si{RNA}s and mi{RNA}s provide specificity
	to protein effector complexes that repress m{RNA} transcription or
	translation, or catalyze m{RNA} destruction. {H}ere, we review our
	current understanding of how small {RNA}s are produced, how they
	are loaded into protein complexes, and how they repress gene expression.},
  doi = {10.1101/gad.1284105},
  keywords = {sirna},
  pii = {19/5/517},
  url = {http://dx.doi.org/10.1101/gad.1284105}
}

@article{Tomfohr2005Pathway,
  author = {Tomfohr, J. and Lu, J. and Kepler, T. B.},
  title = {Pathway level analysis of gene expression using singular value decomposition.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {225},
  abstract = {A promising direction in the analysis of gene expression focuses on
	the changes in expression of specific predefined sets of genes that
	are known in advance to be related (e.g., genes coding for proteins
	involved in cellular pathways or complexes). Such an analysis can
	reveal features that are not easily visible from the variations in
	the individual genes and can lead to a picture of expression that
	is more biologically transparent and accessible to interpretation.
	In this article, we present a new method of this kind that operates
	by quantifying the level of 'activity' of each pathway in different
	samples. The activity levels, which are derived from singular value
	decompositions, form the basis for statistical comparisons and other
	applications.We demonstrate our approach using expression data from
	a study of type 2 diabetes and another of the influence of cigarette
	smoke on gene expression in airway epithelia. A number of interesting
	pathways are identified in comparisons between smokers and non-smokers
	including ones related to nicotine metabolism, mucus production,
	and glutathione metabolism. A comparison with results from the related
	approach, 'gene-set enrichment analysis', is also provided.Our method
	offers a flexible basis for identifying differentially expressed
	pathways from gene expression data. The results of a pathway-based
	analysis can be complementary to those obtained from one more focused
	on individual genes. A web program PLAGE (Pathway Level Analysis
	of Gene Expression) for performing the kinds of analyses described
	here is accessible at http://dulci.biostat.edu/pathways.},
  doi = {10.1186/1471-2105-6-225},
  pdf = {../local/Tomfohr2005Pathway.pdf},
  file = {Tomfohr2005Pathway.pdf:Tomfohr2005Pathway.pdf:PDF},
  institution = {Department of Biostatistics and Bioinformatics, Center for Bioinformatics
	and Computational Biology, Institute for Genome Sciences and Policy,
	Duke University, Durham, North Carolina 27708, USA. tomfohr@duke.edu},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-6-225},
  pmid = {16156896},
  timestamp = {2011.08.07},
  url = {http://dx.doi.org/10.1186/1471-2105-6-225}
}

@techreport{Tomioka2008Sparse,
  author = {Tomioka, R. and Sugiyama, M.},
  title = {Sparse learning with duality gap guarantee},
  institution = {Department of Computer Science; Graduate School of Information Science
	and Engineering, Tokyo Institute of Technology, 152-8552, Tokyo,
	Japan},
  year = {2008},
  journal = {NIPS2008Workshop on Optimization for Machine Learning (OPT2008)}
}

@article{Tomioka2011Super,
  author = {Tomioka, R. and Suzuki, T. and Sugiyama, M.},
  title = {Super-Linear Convergence of Dual Augmented-Lagrangian Algorithm for
	Sparsity Regularized Estimation},
  journal = {J. Mach. Learn. Res.},
  year = {2011},
  volume = {12},
  pages = {1537--1586},
  pdf = {../local/Tomioka2011Super.pdf},
  file = {Tomioka2011Super.pdf:Tomioka2011Super.pdf:PDF}
}

@article{Tomita1999Bioinformatics,
  author = {Tomita, M. and Hashimoto, K. and Takahashi, K. and Shimizu, T. S.
	and Matsuzaki, Y. and Miyoshi, F. and Saito, K. and Tanida, S. and
	Yugi, K. and Venter, J. C. and Hutchison, C. A.},
  title = {{E-CELL}: software environment for whole-cell simulation},
  journal = {Bioinformatics},
  year = {1999},
  volume = {15},
  pages = {72-84},
  number = {1},
  doi = {10.1093/bioinformatics/15.1.72},
  eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/15/1/72.pdf},
  keywords = {csbcbook},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/15/1/72}
}

@article{Tomizaki2010Protein,
  author = {{Kin-ya} Tomizaki and Kenji Usui and Hisakazu Mihara},
  title = {Protein-protein interactions and selection: array-based techniques
	for screening disease-associated biomarkers in predictive/early diagnosis.},
  journal = {FEBS J},
  year = {2010},
  volume = {277},
  pages = {1996--2005},
  number = {9},
  month = {May},
  abstract = {There has been considerable interest in recent years in the development
	of miniaturized and parallelized array technology for protein-protein
	interaction analysis and protein profiling, namely 'protein-detecting
	microarrays'. Protein-detecting microarrays utilize a wide variety
	of capture agents (antibodies, fusion proteins, DNA/RNA aptamers,
	synthetic peptides, carbohydrates, and small molecules) immobilized
	at high spatial density on a solid surface. Each capture agent binds
	selectively to its target protein in a complex mixture, such as serum
	or cell lysate samples. Captured proteins are subsequently detected
	and quantified in a high-throughput fashion, with minimal sample
	consumption. Protein-detecting microarrays were first described by
	MacBeath and Schreiber in 2000, and the number of publications involving
	this technology is rapidly increasing. Furthermore, the first multiplex
	immunoassay systems have been cleared by the US Food and Drug Administration,
	signaling recognition of the usefulness of miniaturized and parallelized
	array technology for protein detection in predictive/early diagnosis.
	Although genetic tests still predominate, with further development
	protein-based diagnosis will become common in clinical use within
	a few years.},
  doi = {10.1111/j.1742-4658.2010.07626.x},
  institution = {Innovative Materials and Processing Research Center and Department
	of Materials Chemistry, Ryukoku University, Otsu, Japan.},
  keywords = {Animals; Biological Markers, analysis/metabolism; Early Diagnosis;
	Humans; Mass Screening, methods; Protein Array Analysis, methods;
	Proteins, analysis/metabolism; Risk Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {EJB7626},
  pmid = {20412053},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1111/j.1742-4658.2010.07626.x}
}

@article{Tomlins2007Integrative,
  author = {Scott A Tomlins and Rohit Mehra and Daniel R Rhodes and Xuhong Cao
	and Lei Wang and Saravana M Dhanasekaran and Shanker Kalyana-Sundaram
	and John T Wei and Mark A Rubin and Kenneth J Pienta and Rajal B
	Shah and Arul M Chinnaiyan},
  title = {Integrative molecular concept modeling of prostate cancer progression.},
  journal = {Nat. Genet.},
  year = {2007},
  volume = {39},
  pages = {41--51},
  number = {1},
  month = {Jan},
  abstract = {Despite efforts to profile prostate cancer, the genetic alterations
	and biological processes that correlate with the observed histological
	progression are unclear. Using laser-capture microdissection to isolate
	101 cell populations, we have profiled prostate cancer progression
	from benign epithelium to metastatic disease. By analyzing expression
	signatures in the context of over 14,000 'molecular concepts', or
	sets of biologically connected genes, we generated an integrative
	model of progression. Molecular concepts that demarcate critical
	transitions in progression include protein biosynthesis, E26 transformation-specific
	(ETS) family transcriptional targets, androgen signaling and cell
	proliferation. Of note, relative to low-grade prostate cancer (Gleason
	pattern 3), high-grade cancer (Gleason pattern 4) shows an attenuated
	androgen signaling signature, similar to metastatic prostate cancer,
	which may reflect dedifferentiation and explain the clinical association
	of grade with prognosis. Taken together, these data show that analyzing
	gene expression signatures in the context of a compendium of molecular
	concepts is useful in understanding cancer biology.},
  doi = {10.1038/ng1935},
  pdf = {../local/Tomlins2007Integrative.pdf},
  file = {Tomlins2007Integrative.pdf:Tomlins2007Integrative.pdf:PDF},
  institution = {Department of Pathology, University of Michigan Medical School, Ann
	Arbor, Michigan 48109, USA.},
  owner = {laurent},
  pii = {ng1935},
  pmid = {17173048},
  timestamp = {2008.10.26},
  url = {http://dx.doi.org/10.1038/ng1935}
}

@article{Tommaso2003Steady-state,
  author = {Marina de Tommaso and Sebastiano Stramaglia and Jan Mathijs Schoffelen
	and Marco Guido and Giuseppe Libro and Luciana Losito and Vittorio
	Sciruicchio and Michele Sardaro and Mario Pellicoro and Franco Michele
	Puca},
  title = {Steady-state visual evoked potentials in the low frequency range
	in migraine: a study of habituation and variability phenomena.},
  journal = {Int {J} {P}sychophysiol},
  year = {2003},
  volume = {49},
  pages = {165-74},
  number = {2},
  month = {Aug},
  abstract = {Previous studies have revealed that migraine patients display an increased
	photic driving to flash stimuli in the medium frequency range. {T}he
	aim of this study was to perform a topographic analysis of steady-state
	visual evoked potentials ({SVEP}s) in the low frequency range (3-9
	{H}z), evaluating the temporal behaviour of the {F}1 amplitude by
	investigating habituation and variability phenomena. {T}he main component
	of {SVEP}s, the {F}1, demonstrated an increased amplitude in several
	channels at 3 {H}z. {B}ehaviour of {F}1 amplitude was rather variable
	over time, and the wavelet-transform standard deviation was increased
	in migraine patients at a low stimulus rate. {T}he discriminative
	value of the {F}1 mean amplitude and variability index, tested by
	both an artificial neural network classifier and a support vector
	machine, were high according to both methods. {T}he increased photic
	driving in migraine should be subtended by a more generic abnormality
	of visual reactivity instead of a selective impairment of a visual
	subsystem. {T}emporal behaviour of {SVEP}s is not influenced by a
	clear tendency to habituation, but the {F}1 amplitude seemed to change
	in a complex way, which is better described by variability phenomena.
	{A}n increased variability in response to flicker stimuli in migraine
	patients could be interpreted as an overactive regulation mechanism,
	prone to instability and consequently to headache attacks, whether
	spontaneous or triggered.},
  pii = {S016787600300117X}
}

@article{Tompa2005Assessing,
  author = {Martin Tompa and Nan Li and Timothy L Bailey and George M Church
	and Bart De Moor and Eleazar Eskin and Alexander V Favorov and Martin
	C Frith and Yutao Fu and W James Kent and Vsevolod J Makeev and Andrei
	A Mironov and William Stafford Noble and Giulio Pavesi and Graziano
	Pesole and Mireille Régnier and Nicolas Simonis and Saurabh Sinha
	and Gert Thijs and Jacques Van Helden and Mathias Vandenbogaert and
	Zhiping Weng and Christopher Workman and Chun Ye and Zhou Zhu},
  title = {Assessing computational tools for the discovery of transcription
	factor binding sites},
  journal = {Nat. Biotechnol.},
  year = {2005},
  volume = {23},
  pages = {137-144},
  keywords = {csbcbook}
}

@article{Tong2007In,
  author = {Joo Chuan Tong and Tin Wee Tan and Shoba Ranganathan},
  title = {In silico grouping of peptide/HLA class I complexes using structural
	interaction characteristics.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {177--183},
  number = {2},
  month = {Jan},
  abstract = {MOTIVATION: Classification of human leukocyte antigen (HLA) proteins
	into supertypes underpins the development of epitope-based vaccines
	with wide population coverage. Current methods for HLA supertype
	definition, based on common structural features of HLA proteins and/or
	their functional binding specificities, leave structural interaction
	characteristics among different HLA supertypes with antigenic peptides
	unexplored. METHODS: We describe the use of structural interaction
	descriptors for the analysis of 68 peptide/HLA class I crystallographic
	structures. Interaction parameters computed include the number of
	intermolecular hydrogen bonds between each HLA protein and its corresponding
	bound peptide, solvent accessibility, gap volume and gap index. RESULTS:
	The structural interactions patterns of peptide/HLA class I complexes
	investigated herein vary among individual alleles and may be grouped
	in a supertype dependent manner. Using the proposed methodology,
	eight HLA class I supertypes were defined based on existing experimental
	crystallographic structures which largely overlaps (77\% consensus)
	with the definitions by binding motifs. This mode of classification,
	which considers conformational information of both peptide and HLA
	proteins, provides an alternative to the characterization of supertypes
	using either peptide or HLA protein information alone.},
  doi = {10.1093/bioinformatics/btl563},
  owner = {laurent},
  pii = {btl563},
  pmid = {17090577},
  timestamp = {2007.01.28},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl563}
}

@article{Tong2006Prediction,
  author = {Tong, J. C. and Zhang, G. L. and Tan, T. W. and August, J. T. and
	Brusic, V. and Ranganathan, S.},
  title = {{P}rediction of {HLA}-{DQ}3.2beta ligands: evidence of multiple registers
	in class {II} binding peptides.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {1232--1238},
  number = {10},
  month = {May},
  doi = {10.1093/bioinformatics/btl071},
  keywords = {immunoinformatics},
  pii = {btl071},
  pmid = {16510499},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl071}
}

@article{Topiol2009X-ray,
  author = {Sid Topiol and Michael Sabio},
  title = {X-ray structure breakthroughs in the {GPCR} transmembrane region.},
  journal = {Biochem Pharmacol},
  year = {2009},
  volume = {78},
  pages = {11--20},
  number = {1},
  month = {Jul},
  abstract = {G-protein-coupled receptor (GPCR) proteins [Lundstrom KH, Chiu ML,
	editors. G protein-coupled receptors in drug discovery. CRC Press;
	2006] are the single largest drug target, representing 25-50\% of
	marketed drugs [Overington JP, Al-Lazikani B, Hopkins AL. How many
	drug targets are there? Nat Rev Drug Discov 2006;5(12):993-6; Parrill
	AL. Crystal structures of a second G protein-coupled receptor: triumphs
	and implications. ChemMedChem 2008;3:1021-3]. While there are six
	subclasses of GPCR proteins, the hallmark of all GPCR proteins is
	the transmembrane-spanning region. The general architecture of this
	transmembrane (TM) region has been known for some time to contain
	seven alpha-helices. From a drug discovery and design perspective,
	structural information of the GPCRs has been sought as a tool for
	structure-based drug design. The advances in the past decade of technologies
	for structure-based design have proven to be useful in a number of
	areas. Invoking these approaches for GPCR targets has remained challenging.
	Until recently, the most closely related structures available for
	GPCR modeling have been those of bovine rhodopsin. While a representative
	of class A GPCRs, bovine rhodopsin is not a ligand-activated GPCR
	and is fairly distant in sequence homology to other class A GPCRs.
	Thus, there is a variable degree of uncertainty in the use of the
	rhodopsin X-ray structure as a template for homology modeling of
	other GPCR targets. Recent publications of X-ray structures of class
	A GPCRs now offer the opportunity to better understand the molecular
	mechanism of action at the atomic level, to deploy X-ray structures
	directly for their use in structure-based design, and to provide
	more promising templates for many other ligand-mediated GPCRs. We
	summarize herein some of the recent findings in this area and provide
	an initial perspective of the emerging opportunities, possible limitations,
	and remaining questions. Other aspects of the recent X-ray structures
	are described by Weis and Kobilka [Weis WI, Kobilka BK. Structural
	insights into G-protein-coupled receptor activation. Curr Opin Struct
	Biol 2008;18:734-40] and Mustafi and Palczewski [Mustafi D, Palczewski
	K. Topology of class A G protein-coupled receptors: insights gained
	from crystal structures of rhodopsins, adrenergic and adenosine receptors.
	Mol Pharmacol 2009;75:1-12].},
  doi = {10.1016/j.bcp.2009.02.012},
  institution = {Department of Computational Chemistry, Lundbeck Research USA, Inc.,
	215 College Road, Paramus, NJ 07652, USA.},
  keywords = {Animals; Cell Membrane; Humans; Models, Molecular; Molecular Conformation;
	Pindolol; Propanolamines; Protein Conformation; Receptor, Adenosine
	A2A; Receptors, Adrenergic, beta-2; Receptors, G-Protein-Coupled;
	Retinaldehyde; Rhodopsin; X-Ray Diffraction},
  owner = {ljacob},
  pii = {S0006-2952(09)00113-0},
  pmid = {19447219},
  timestamp = {2009.11.09},
  url = {http://dx.doi.org/10.1016/j.bcp.2009.02.012}
}

@article{Tothill2005expression-based,
  author = {Richard W Tothill and Adam Kowalczyk and Danny Rischin and Alex Bousioutas
	and Izhak Haviv and Ryan K van Laar and Paul M Waring and John Zalcberg
	and Robyn Ward and Andrew V Biankin and Robert L Sutherland and Susan
	M Henshall and Kwun Fong and Jonathan R Pollack and David D L Bowtell
	and Andrew J Holloway},
  title = {An expression-based site of origin diagnostic method designed for
	clinical application to cancer of unknown origin.},
  journal = {Cancer {R}es.},
  year = {2005},
  volume = {65},
  pages = {4031-40},
  number = {10},
  month = {May},
  abstract = {Gene expression profiling offers a promising new technique for the
	diagnosis and prognosis of cancer. {W}e have applied this technology
	to build a clinically robust site of origin classifier with the ultimate
	aim of applying it to determine the origin of cancer of unknown primary
	({CUP}). {A} single c{DNA} microarray platform was used to profile
	229 primary and metastatic tumors representing 14 tumor types and
	multiple histologic subtypes. {T}his data set was subsequently used
	for training and validation of a support vector machine ({SVM}) classifier,
	demonstrating 89\% accuracy using a 13-class model. {F}urther, we
	show the translation of a five-class classifier to a quantitative
	{PCR}-based platform. {S}electing 79 optimal gene markers, we generated
	a quantitative-{PCR} low-density array, allowing the assay of both
	fresh-frozen and formalin-fixed paraffin-embedded ({FFPE}) tissue.
	{D}ata generated using both quantitative {PCR} and microarray were
	subsequently used to train and validate a cross-platform {SVM} model
	with high prediction accuracy. {F}inally, we applied our {SVM} classifiers
	to 13 cases of {CUP}. {W}e show that the microarray {SVM} classifier
	was capable of making high confidence predictions in 11 of 13 cases.
	{T}hese predictions were supported by comprehensive review of the
	patients' clinical histories.},
  doi = {10.1158/0008-5472.CAN-04-3617},
  pdf = {../local/Tothill2005expression-based.pdf},
  file = {Tothill2005expression-based.pdf:Tothill2005expression-based.pdf:PDF},
  keywords = {biosvm microarray},
  pii = {65/10/4031},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-04-3617}
}

@article{Tournier2009JTB,
  author = {Tournier, L. and Chaves, M.},
  title = {Uncovering operational interactions in genetic networks using asynchronous
	Boolean dynamics},
  journal = {Journal of Theoretical Biology},
  year = {2009},
  volume = {260},
  pages = {196--209},
  number = {2},
  abstract = {Biological networks of large dimensions, with their diagram of interactions,
	are often well represented by a Boolean model with a family of logical
	rules. The state space of a Boolean model is finite, and its asynchronous
	dynamics are fully described by a transition graph in the state space.
	In this context, a model reduction method will be developed for identifying
	the active or operational interactions responsible for a given dynamic
	behaviour. The first step in this procedure is the decomposition
	of the asynchronous transition graph into its strongly connected
	components, to obtain a reduced and hierarchically organized graph
	of transitions. The second step consists of the identification of
	a partial graph of interactions and a sub-family of logical rules
	that remain operational in a given region of the state space. This
	model reduction method and its usefulness are illustrated by an application
	to a model of programmed cell death. The method identifies two mechanisms
	used by the cell to respond to death-receptor stimulation and decide
	between the survival and apoptotic pathways.},
  doi = {10.1016/j.jtbi.2009.06.006},
  issn = {0022-5193},
  keywords = {csbcbook},
  url = {http://www.sciencedirect.com/science/article/B6WMD-4WH8CGD-2/2/e9f844daaad4eef66eacf065c1416383}
}

@article{Tranchevent2010guide,
  author = {Tranchevent, L.-C. and Capdevila, F. B. and Nitsch, D. and De Moor,
	B. and De Causmaecker, P. and Moreau, Y.},
  title = {A guide to web tools to prioritize candidate genes},
  journal = {Brief. Bioinform.},
  year = {2011},
  volume = {12},
  pages = {22--32},
  number = {12},
  abstract = {Finding the most promising genes among large lists of candidate genes
	has been defined as the gene prioritization problem. It is a recurrent
	problem in genetics in which genetic conditions are reported to be
	associated with chromosomal regions. In the last decade, several
	different computational approaches have been developed to tackle
	this challenging task. In this study, we review 19 computational
	solutions for human gene prioritization that are freely accessible
	as web tools and illustrate their differences. We summarize the various
	biological problems to which they have been successfully applied.
	Ultimately, we describe several research directions that could increase
	the quality and applicability of the tools. In addition we developed
	a website (http://www.esat.kuleuven.be/gpp) containing detailed information
	about these and other tools, which is regularly updated. This review
	and the associated website constitute together a guide to help users
	select a gene prioritization strategy that suits best their needs.},
  doi = {10.1093/bib/bbq007},
  eprint = {http://bib.oxfordjournals.org/content/early/2010/03/21/bib.bbq007.full.pdf+html},
  owner = {mordelet},
  timestamp = {2010.12.08},
  url = {http://bib.oxfordjournals.org/content/early/2010/03/21/bib.bbq007.abstract}
}

@article{Trapnell2013Differential,
  author = {Trapnell, C. and Hendrickson, D. G. and Sauvageau, M. and Goff, L.
	and Rinn, J. L. and Pachter, L.},
  title = {Differential analysis of gene regulation at transcript resolution
	with {RNA-seq}.},
  journal = {Nat Biotechnol},
  year = {2013},
  volume = {31},
  pages = {46--53},
  number = {1},
  month = {Jan},
  abstract = {Differential analysis of gene and transcript expression using high-throughput
	RNA sequencing (RNA-seq) is complicated by several sources of measurement
	variability and poses numerous statistical challenges. We present
	Cuffdiff 2, an algorithm that estimates expression at transcript-level
	resolution and controls for variability evident across replicate
	libraries. Cuffdiff 2 robustly identifies differentially expressed
	transcripts and genes and reveals differential splicing and promoter-preference
	changes. We demonstrate the accuracy of our approach through differential
	analysis of lung fibroblasts in response to loss of the developmental
	transcription factor HOXA1, which we show is required for lung fibroblast
	and HeLa cell cycle progression. Loss of HOXA1 results in significant
	expression level changes in thousands of individual transcripts,
	along with isoform switching events in key regulators of the cell
	cycle. Cuffdiff 2 performs robust differential analysis in RNA-seq
	experiments at transcript resolution, revealing a layer of regulation
	not readily observable with other high-throughput technologies.},
  doi = {10.1038/nbt.2450},
  pdf = {../local/Trapnell2013Differential.pdf},
  file = {Trapnell2013Differential.pdf:Trapnell2013Differential.pdf:PDF},
  institution = {Department of Stem Cell and Regenerative Biology, Harvard University,
	Cambridge, Massachusetts, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt.2450},
  pmid = {23222703},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1038/nbt.2450}
}

@article{Trapnell2009TopHat,
  author = {Trapnell, C. and Pachter, L. and Salzberg, S. L.},
  title = {{TopHat}: discovering splice junctions with {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {1105--1111},
  number = {9},
  month = {May},
  abstract = {A new protocol for sequencing the messenger RNA in a cell, known as
	RNA-Seq, generates millions of short sequence fragments in a single
	run. These fragments, or 'reads', can be used to measure levels of
	gene expression and to identify novel splice variants of genes. However,
	current software for aligning RNA-Seq data to a genome relies on
	known splice junctions and cannot identify novel ones. TopHat is
	an efficient read-mapping algorithm designed to align reads from
	an RNA-Seq experiment to a reference genome without relying on known
	splice sites.We mapped the RNA-Seq reads from a recent mammalian
	RNA-Seq experiment and recovered more than 72\% of the splice junctions
	reported by the annotation-based software from that study, along
	with nearly 20,000 previously unreported junctions. The TopHat pipeline
	is much faster than previous systems, mapping nearly 2.2 million
	reads per CPU hour, which is sufficient to process an entire RNA-Seq
	experiment in less than a day on a standard desktop computer. We
	describe several challenges unique to ab initio splice site discovery
	from RNA-Seq reads that will require further algorithm development.TopHat
	is free, open-source software available from http://tophat.cbcb.umd.edu.Supplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btp120},
  pdf = {../local/Trapnell2009TopHat.pdf},
  file = {Trapnell2009TopHat.pdf:Trapnell2009TopHat.pdf:PDF},
  institution = {Center for Bioinformatics and Computational Biology, University of
	Maryland, College Park, MD 20742, USA. cole@cs.umd.edu},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btp120},
  pmid = {19289445},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp120}
}

@article{Trapnell2012Differential,
  author = {Trapnell, C. and Roberts, A. and Goff, L. and Pertea, G. and Kim,
	D. and Kelley, D. R. and Pimentel, H. and Salzberg, S. L. and Rinn,
	J. L. and Pachter, L.},
  title = {Differential gene and transcript expression analysis of {RNA-seq}
	experiments with {TopHat} and {Cufflinks}.},
  journal = {Nat Protoc},
  year = {2012},
  volume = {7},
  pages = {562--578},
  number = {3},
  month = {Mar},
  abstract = {Recent advances in high-throughput cDNA sequencing (RNA-seq) can reveal
	new genes and splice variants and quantify expression genome-wide
	in a single assay. The volume and complexity of data from RNA-seq
	experiments necessitate scalable, fast and mathematically principled
	analysis software. TopHat and Cufflinks are free, open-source software
	tools for gene discovery and comprehensive expression analysis of
	high-throughput mRNA sequencing (RNA-seq) data. Together, they allow
	biologists to identify new genes and new splice variants of known
	ones, as well as compare gene and transcript expression under two
	or more conditions. This protocol describes in detail how to use
	TopHat and Cufflinks to perform such analyses. It also covers several
	accessory tools and utilities that aid in managing data, including
	CummeRbund, a tool for visualizing RNA-seq analysis results. Although
	the procedure assumes basic informatics skills, these tools assume
	little to no background with RNA-seq analysis and are meant for novices
	and experts alike. The protocol begins with raw sequencing reads
	and produces a transcriptome assembly, lists of differentially expressed
	and regulated genes and transcripts, and publication-quality visualizations
	of analysis results. The protocol's execution time depends on the
	volume of transcriptome sequencing data and available computing resources
	but takes less than 1 d of computer time for typical experiments
	and ∼1 h of hands-on time.},
  doi = {10.1038/nprot.2012.016},
  pdf = {../local/Trapnell2012Differential.pdf},
  file = {Trapnell2012Differential.pdf:Trapnell2012Differential.pdf:PDF},
  institution = {Broad Institute of MIT and Harvard, Cambridge, Massachusetts, USA.
	cole@broadinstitute.org},
  keywords = {ngs, rnaseq},
  owner = {laurent},
  pii = {nprot.2012.016},
  pmid = {22383036},
  timestamp = {2012.04.11},
  url = {http://dx.doi.org/10.1038/nprot.2012.016}
}

@article{Trapnell2010Transcript,
  author = {Trapnell, C. and Williams, B. A. and Pertea, G. and Mortazavi, A.
	and Kwan, G. and {van Baren}, M. J. and Salzberg, S. L. and Wold,
	B. J. and Pachter, L.},
  title = {Transcript assembly and quantification by RNA-Seq reveals unannotated
	transcripts and isoform switching during cell differentiation.},
  journal = {Nat Biotechnol},
  year = {2010},
  volume = {28},
  pages = {511--515},
  number = {5},
  month = {May},
  abstract = {High-throughput mRNA sequencing (RNA-Seq) promises simultaneous transcript
	discovery and abundance estimation. However, this would require algorithms
	that are not restricted by prior gene annotations and that account
	for alternative transcription and splicing. Here we introduce such
	algorithms in an open-source software program called Cufflinks. To
	test Cufflinks, we sequenced and analyzed >430 million paired 75-bp
	RNA-Seq reads from a mouse myoblast cell line over a differentiation
	time series. We detected 13,692 known transcripts and 3,724 previously
	unannotated ones, 62\% of which are supported by independent expression
	data or by homologous genes in other species. Over the time series,
	330 genes showed complete switches in the dominant transcription
	start site (TSS) or splice isoform, and we observed more subtle shifts
	in 1,304 other genes. These results suggest that Cufflinks can illuminate
	the substantial regulatory flexibility and complexity in even this
	well-studied model of muscle development and that it can improve
	transcriptome-based genome annotation.},
  doi = {10.1038/nbt.1621},
  pdf = {../local/Trapnell2010Transcript.pdf},
  file = {Trapnell2010Transcript.pdf:Trapnell2010Transcript.pdf:PDF},
  institution = {Department of Computer Science, University of Maryland, College Park,
	Maryland, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt.1621},
  pmid = {20436464},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1038/nbt.1621}
}

@article{Trinh2009Elementary,
  author = {Trinh, C. T. and Wlaschin, A. and Srienc, F.},
  title = {Elementary mode analysis: a useful metabolic pathway analysis tool
	for characterizing cellular metabolism.},
  journal = {Appl Microbiol Biotechnol},
  year = {2009},
  volume = {81},
  pages = {813--826},
  number = {5},
  month = {Jan},
  abstract = {Elementary mode analysis is a useful metabolic pathway analysis tool
	to identify the structure of a metabolic network that links the cellular
	phenotype to the corresponding genotype. The analysis can decompose
	the intricate metabolic network comprised of highly interconnected
	reactions into uniquely organized pathways. These pathways consisting
	of a minimal set of enzymes that can support steady state operation
	of cellular metabolism represent independent cellular physiological
	states. Such pathway definition provides a rigorous basis to systematically
	characterize cellular phenotypes, metabolic network regulation, robustness,
	and fragility that facilitate understanding of cell physiology and
	implementation of metabolic engineering strategies. This mini-review
	aims to overview the development and application of elementary mode
	analysis as a metabolic pathway analysis tool in studying cell physiology
	and as a basis of metabolic engineering.},
  doi = {10.1007/s00253-008-1770-1},
  pdf = {../local/Trinh2009Elementary.pdf},
  file = {Trinh2009Elementary.pdf:Trinh2009Elementary.pdf:PDF},
  institution = {Department of Chemical Engineering and Materials Science, University
	of Minnesota, 151 Amundson Hall, 421 Washington Ave SE, Minneapolis,
	MN 55455, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19015845},
  timestamp = {2011.11.30},
  url = {http://dx.doi.org/10.1007/s00253-008-1770-1}
}

@article{Tropp2004Greed,
  author = {Joel A. Tropp},
  title = {Greed is good: Algorithmic results for sparse approximation},
  journal = {IEEE Trans. Inform. Theory},
  year = {2004},
  volume = {50},
  pages = {2231--2242}
}

@article{Tropp2006Algorithms,
  author = {Tropp, Joel A. and Gilbert, Anna C. and Strauss, Martin J.},
  title = {Algorithms for simultaneous sparse approximation: part I: Greedy
	pursuit},
  journal = {Signal Process.},
  year = {2006},
  volume = {86},
  pages = {572--588},
  number = {3},
  address = {Amsterdam, The Netherlands, The Netherlands},
  doi = {http://dx.doi.org/10.1016/j.sigpro.2005.05.030},
  issn = {0165-1684},
  publisher = {Elsevier North-Holland, Inc.}
}

@article{Troyanskaya2001Missing,
  author = {Troyanskaya, O. and Cantor, M. and Sherlock, G. and Brown, P. and
	Hastie, T. and Tibshirani, R. and Botstein, D. and Altman, R. B.},
  title = {Missing value estimation methods for {DNA} microarrays},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {520--525},
  pdf = {../local/Troyanskaya2001Missing.pdf},
  file = {Troyanskaya2001Missing.pdf:local/Troyanskaya2001Missing.pdf:PDF},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/17/6/520}
}

@article{Troyanskaya2003Bayesian,
  author = {Troyanskaya, O. G. and Dolinski, K. and Owen, A. B. and Altman, R.
	B. and Botstein, D.},
  title = {A Bayesian framework for combining heterogeneous data sources for
	gene function prediction (in {\it Saccharomyces cerevisiae})},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2003},
  volume = {100},
  pages = {8348--8353},
  number = {14},
  abstract = {Genomic sequencing is no longer a novelty, but gene function annotation
	remains a key challenge in modern biology. A variety of functional
	genomics experimental techniques are available, from classic methods
	such as affinity precipitation to advanced high-throughput techniques
	such as gene expression microarrays. In the future, more disparate
	methods will be developed, further increasing the need for integrated
	computational analysis of data generated by these studies. We address
	this problem with magic (Multisource Association of Genes by Integration
	of Clusters), a general framework that uses formal Bayesian reasoning
	to integrate heterogeneous types of high-throughput biological data
	(such as large-scale two-hybrid screens and multiple microarray analyses)
	for accurate gene function prediction. The system formally incorporates
	expert knowledge about relative accuracies of data sources to combine
	them within a normative framework. magic provides a belief level
	with its output that allows the user to vary the stringency of predictions.
	We applied magic to Saccharomyces cerevisiae genetic and physical
	interactions, microarray, and transcription factor binding sites
	data and assessed the biological relevance of gene groupings using
	Gene Ontology annotations produced by the Saccaromyces Genome Database.
	We found that by creating functional groupings based on heterogeneous
	data types, magic improved accuracy of the groupings compared with
	microarray analysis alone. We describe several of the biological
	gene groupings identified.},
  doi = {10.1073/pnas.0832373100},
  owner = {jp},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1073/pnas.0832373100}
}

@article{Truss2005HuSiDa,
  author = {Truss, M. and Swat, M. and Kielbasa, S. M. and Sch{\"a}fer, R. and
	Herzel, H. and Hagemeier, C.},
  title = {Hu{S}i{D}a--the human si{RNA} database: an open-access database for
	published functional si{RNA} sequences and technical details of efficient
	transfer into recipient cells.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {D108-11},
  number = {Database issue},
  month = {Jan},
  abstract = {Small interfering {RNA}s (si{RNA}s) have become a standard tool in
	functional genomics. {O}nce incorporated into the {RNA}-induced silencing
	complex ({RISC}), si{RNA}s mediate the specific recognition of corresponding
	target m{RNA}s and their cleavage. {H}owever, only a small fraction
	of randomly chosen si{RNA} sequences is able to induce efficient
	gene silencing. {I}n common laboratory practice, successful {RNA}
	interference experiments typically require both, the labour and cost-intensive
	identification of an active si{RNA} sequence and the optimization
	of target cell line-specific procedures for optimal si{RNA} delivery.
	{T}o optimize the design and performance of si{RNA} experiments,
	we have established the human si{RNA} database ({H}u{S}i{D}a). {T}he
	database provides sequences of published functional si{RNA} molecules
	targeting human genes and important technical details of the corresponding
	gene silencing experiments, including the mode of si{RNA} generation,
	recipient cell lines, transfection reagents and procedures and direct
	links to published references ({P}ub{M}ed). {T}he database can be
	accessed at http://www.human-si{RNA}-database.net. {W}e used the
	si{RNA} sequence information stored in the database for scrutinizing
	published sequence selection parameters for efficient gene silencing.},
  doi = {10.1093/nar/gki131},
  keywords = {sirna},
  pii = {33/suppl_1/D108},
  url = {http://dx.doi.org/10.1093/nar/gki131}
}

@article{Tsai2004Gene,
  author = {Tsai, C.A. and Chen, C.H. and Lee, T.C. and Ho, I.C. and Yang, U.C.
	and Chen, J.J.},
  title = {Gene selection for sample classifications in microarray experiments.},
  journal = {D{NA} {C}ell {B}iol.},
  year = {2004},
  volume = {23},
  pages = {607-614},
  number = {10},
  abstract = {D{NA} microarray technology provides useful tools for profiling global
	gene expression patterns in different cell/tissue samples. {O}ne
	major challenge is the large number of genes relative to the number
	of samples. {T}he use of all genes can suppress or reduce the performance
	of a classification rule due to the noise of nondiscriminatory genes.
	{S}election of an optimal subset from the original gene set becomes
	an important prestep in sample classification. {I}n this study, we
	propose a family-wise error ({FWE}) rate approach to selection of
	discriminatory genes for two-sample or multiple-sample classification.
	{T}he {FWE} approach controls the probability of the number of one
	or more false positives at a prespecified level. {A} public colon
	cancer data set is used to evaluate the performance of the proposed
	approach for the two classification methods: k nearest neighbors
	(k-{NN}) and support vector machine ({SVM}). {T}he selected gene
	sets from the proposed procedure appears to perform better than or
	comparable to several results reported in the literature using the
	univariate analysis without performing multivariate search. {I}n
	addition, we apply the {FWE} approach to a toxicogenomic data set
	with nine treatments (a control and eight metals, {A}s, {C}d, {N}i,
	{C}r, {S}b, {P}b, {C}u, and {A}s{V}) for a total of 55 samples for
	a multisample classification. {T}wo gene sets are considered: the
	gene set omega{F} formed by the {ANOVA} {F}-test, and a gene set
	omega{T} formed by the union of one-versus-all t-tests. {T}he predicted
	accuracies are evaluated using the internal and external crossvalidation.
	{U}sing the {SVM} classification, the overall accuracies to predict
	55 samples into one of the nine treatments are above 80% for internal
	crossvalidation. {O}mega{F} has slightly higher accuracy rates than
	omega{T}. {T}he overall predicted accuracies are above 70% for the
	external crossvalidation; the two gene sets omega{T} and omega{F}
	performed equally well.},
  doi = {10.1089/1044549042476947},
  pdf = {../local/Tsai2004Gene.pdf},
  file = {Tsai2004Gene.pdf:local/Tsai2004Gene.pdf:PDF},
  keywords = {biosvm microarray},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1089/1044549042476947}
}

@article{Tsai1979Error,
  author = {Tsai, W.H. and Fu, K.S.},
  title = {Error-Correcting Isomorphisms of Attributed Relational Graphs for
	Pattern Analysis},
  journal = {SMC},
  year = {1979},
  volume = {9},
  pages = {757-768},
  number = {12},
  month = {December},
  bibsource = {http://www.visionbib.com/bibliography/match557.html#TT46385},
  owner = {misha}
}

@inproceedings{Tsang2003Distance,
  author = {Tsang, I. W. and Kwok, J. T.},
  title = {Distance metric learning with kernels},
  booktitle = {Proceedings of the International Conference on Artificial Neural
	Networks},
  year = {2003},
  pages = {126--129},
  timestamp = {2007.06.06}
}

@article{Tseng2009Coordinate,
  author = {Tseng, P. and Sangwoon, Y.},
  title = {A Coordinate Gradient Descent Method for Nonsmooth Separable Minimization},
  journal = {Math. Program.},
  year = {2009},
  volume = {117},
  pages = {387--423},
  number = {1-2},
  doi = {10.1007/s10107-007-0170-0},
  pdf = {../local/Tseng2009Coordinate.pdf},
  file = {Tseng2009Coordinate.pdf:Tseng2009Coordinate.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2010.06.25},
  url = {http://dx.doi.org/10.1007/s10107-007-0170-0}
}

@article{Tsirigos2005sensitive,
  author = {Tsirigos, A. and Rigoutsos, I.},
  title = {A sensitive, support-vector-machine method for the detection of horizontal
	gene transfers in viral, archaeal and bacterial genomes.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {3699-707},
  number = {12},
  abstract = {In earlier work, we introduced and discussed a generalized computational
	framework for identifying horizontal transfers. {T}his framework
	relied on a gene's nucleotide composition, obviated the need for
	knowledge of codon boundaries and database searches, and was shown
	to perform very well across a wide range of archaeal and bacterial
	genomes when compared with previously published approaches, such
	as {C}odon {A}daptation {I}ndex and {C} + {G} content. {N}onetheless,
	two considerations remained outstanding: we wanted to further increase
	the sensitivity of detecting horizontal transfers and also to be
	able to apply the method to increasingly smaller genomes. {I}n the
	discussion that follows, we present such a method, {W}n-{SVM}, and
	show that it exhibits a very significant improvement in sensitivity
	compared with earlier approaches. {W}n-{SVM} uses a one-class support-vector
	machine and can learn using rather small training sets. {T}his property
	makes {W}n-{SVM} particularly suitable for studying small-size genomes,
	similar to those of viruses, as well as the typically larger archaeal
	and bacterial genomes. {W}e show experimentally that the new method
	results in a superior performance across a wide range of organisms
	and that it improves even upon our own earlier method by an average
	of 10\% across all examined genomes. {A}s a small-genome case study,
	we analyze the genome of the human cytomegalovirus and demonstrate
	that {W}n-{SVM} correctly identifies regions that are known to be
	conserved and prototypical of all beta-herpesvirinae, regions that
	are known to have been acquired horizontally from the human host
	and, finally, regions that had not up to now been suspected to be
	horizontally transferred. {A}typical region predictions for many
	eukaryotic viruses, including the alpha-, beta- and gamma-herpesvirinae,
	and 123 archaeal and bacterial genomes, have been made available
	online at http://cbcsrv.watson.ibm.com/{HGT}_{SVM}/.},
  doi = {10.1093/nar/gki660},
  pdf = {../local/Tsirigos2005sensitive.pdf},
  file = {Tsirigos2005sensitive.pdf:local/Tsirigos2005sensitive.pdf:PDF},
  keywords = {biosvm},
  pii = {33/12/3699},
  url = {http://dx.doi.org/10.1093/nar/gki660}
}

@inproceedings{Tsochantaridis2004Support,
  author = {Tsochantaridis, I. and Hofmann, T. and Joachims, T. and Altun, Y.},
  title = {Support vector machine learning for interdependent and structured
	output spaces},
  booktitle = {Twenty-first international conference on {M}achine learning},
  year = {2004},
  publisher = {ACM Press},
  abstract = {Learning general functional dependencies is one of the main goals
	in machine learning. {R}ecent progress in kernel-based methods has
	focused on designing flexible and powerful input representations.
	{T}his paper addresses the complementary issue of problems involving
	complex outputs such as multiple dependent output variables and structured
	output spaces. {W}e propose to generalize multiclass {S}upport {V}ector
	{M}achine learning in a formulation that involves features extracted
	jointly from inputs and outputs. {T}he resulting optimization problem
	is solved efficiently by a cutting plane algorithm that exploits
	the sparseness and structural decomposition of the problem. {W}e
	demonstrate the versatility and effectiveness of our method on problemsranging
	from supervised grammar learning and named-entity recognition, totaxonomic
	text classification and sequence alignment.},
  doi = {http://doi.acm.org/10.1145/1015330.1015341},
  pdf = {../local/Tsochantaridis2004Support.pdf},
  file = {Tsochantaridis2004Support.pdf:local/Tsochantaridis2004Support.pdf:PDF},
  isbn = {1-58113-828-5},
  keywords = {structured-output},
  location = {Banff, Alberta, Canada}
}

@article{Tsochantaridis2005Large,
  author = {Tsochantaridis, I. and Joachims, T. and Hofmann, T. and Altun, Y.},
  title = {Large margin methods for structured and interdependent output variables},
  journal = {J. Mach. Learn. Res.},
  year = {2005},
  volume = {6},
  pages = {1453-1484},
  abstract = {Learning general functional dependencies between arbitrary input and
	output spaces is one of the key challenges in computational intelligence.
	While recent progress in machine learning has mainly focused on designing
	flexible and powerful input representations, this paper addresses
	the complementary issue of designing classification algorithms that
	can deal with more complex outputs, such as trees, sequences, or
	sets. More generally, we consider problems involving multiple dependent
	output variables, structured output spaces, and classification problems
	with class attributes. In order to accomplish this, we propose to
	appropriately generalize the well-known notion of a separation margin
	and derive a corresponding maximum-margin formulation. While this
	leads to a quadratic program with a potentially prohibitive, i.e.
	exponential, number of constraints, we present a cutting plane algorithm
	that solves the optimization problem in polynomial time for a large
	class of problems. The proposed method has important applications
	in areas such as computational biology, natural language processing,
	information retrieval/extraction, and optical character recognition.
	Experiments from various domains involving different types of output
	spaces emphasize the breadth and generality of our approach.},
  pdf = {../local/Tsochantaridis2005Large.pdf},
  file = {Tsochantaridis2005Large.pdf:local/Tsochantaridis2005Large.pdf:PDF},
  timestamp = {2006.09.29},
  url = {http://jmlr.csail.mit.edu/papers/v6/tsochantaridis05a.html}
}

@inproceedings{Tsuda2007Entire,
  author = {Tsuda, K.},
  title = {Entire regularization path for graph data},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {919--926},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {10.1145/1273496.1273612},
  pdf = {../local/Tsuda2007Entire.pdf},
  file = {Tsuda2007Entire.pdf:Tsuda2007Entire.pdf:PDF},
  owner = {jp},
  timestamp = {2009.09.29},
  url = {http://doi.acm.org/10.1145/1273496.1273612}
}

@article{Tsuda2003em,
  author = {Tsuda, K. and Akaho, S. and Asai, K.},
  title = {The em {A}lgorithm for {K}ernel {M}atrix {C}ompletion with {A}uxiliary
	{D}ata},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2003},
  volume = {4},
  pages = {67-81},
  abstract = {In biological data, it is often the case that observed data are available
	only for a subset of samples. {W}hen a kernel matrix is derived from
	such data, we have to leave the entries for unavailable samples as
	missing. {I}n this paper, the missing entries are completed by exploiting
	an auxiliary kernel matrix derived from another information source.
	{T}he parametric model of kernel matrices is created as a set of
	spectral variants of the auxiliary kernel matrix, and the missing
	entries are estimated by fitting this model to the existing entries.
	{F}or model fitting, we adopt the em algorithm (distinguished from
	the {EM} algorithm of {D}empster et al., 1977) based on the information
	geometry of positive definite matrices. {W}e will report promising
	results on bacteria clustering experiments using two marker sequences:
	16{S} and gyr{B}.},
  pdf = {../local/Tsuda2003em.pdf},
  file = {Tsuda2003em.pdf:local/Tsuda2003em.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.jmlr.org/papers/v4/tsuda03a.html}
}

@article{Tsuda2002new,
  author = {K. Tsuda and M. Kawanabe and G. R{\"a}tsch and S. Sonnenburg and
	K.-R. M{\"u}ller},
  title = {A new discriminative kernel from probabilistic models},
  journal = {Neural {C}omputation},
  year = {2002},
  volume = {14},
  pages = {2397--2414},
  number = {10},
  doi = {10.1162/08997660260293274},
  pdf = {../local/Tsuda2002new.pdf},
  file = {Tsuda2002new.pdf:local/Tsuda2002new.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1162/08997660260293274}
}

@article{Tsuda2002Marginalized,
  author = {K. Tsuda and T. Kin and K. Asai},
  title = {Marginalized {K}ernels for {B}iological {S}equences},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {S268--S275},
  abstract = {Motivation: {K}ernel methods such as support vector machines require
	a kernel function between objects to be defined a priori. {S}everal
	works have been done to derive kernels from probability distributions,
	e.g., the {F}isher kernel. {H}owever, a general methodology to design
	a kernel is not fully developed. {R}esults: {W}e propose a reasonable
	way of designing a kernel when objects are generated from latent
	variable models (e.g., {HMM}). {F}irst of all, a joint kernel is
	designed for complete data which include both visible and hidden
	variables. {T}hen a marginalized kernel for visible data is obtained
	by taking the expectation with respect to hidden variables. {W}e
	will show that the {F}isher kernel is a special case of marginalized
	kernels, which gives another viewpoint to the {F}isher kernel theory.
	{A}lthough our approach can be applied to any object, we particularly
	derive several marginalized kernels useful for biological sequences
	(e.g., {DNA} and proteins). {T}he effectiveness of marginalized kernels
	is illustrated in the task of classifying bacterial gyrase subunit
	{B} (gyr{B}) amino acid sequences.},
  comment = {Introduces the idea of marginalized kernel. Show that the Fisher kernel
	is a particular case of it, and modify it. Application to bacterial
	gyrB classification.},
  pdf = {../local/Tsuda2002Marginalized.pdf},
  file = {Tsuda2002Marginalized.pdf:local/Tsuda2002Marginalized.pdf:PDF},
  keywords = {biosvm}
}

@article{Tsuda2004Learning,
  author = {Tsuda, K. and Noble, W.S.},
  title = {Learning kernels from biological networks by maximizing entropy},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {i326--i333},
  abstract = {Motivation: {T}he diffusion kernel is a general method for computing
	pairwise distances among all nodes in a graph, based on the sum of
	weighted paths between each pair of nodes. {T}his technique has been
	used successfully, in conjunction with kernel-based learning methods,
	to draw inferences from several types of biological networks. {R}esults:
	{W}e show that computing the diffusion kernel is equivalent to maximizing
	the von {N}eumann entropy, subject to a global constraint on the
	sum of the {E}uclidean distances between nodes. {T}his global constraint
	allows for high variance in the pairwise distances. {A}ccordingly,
	we propose an alternative, locally constrained diffusion kernel,
	and we demonstrate that the resulting kernel allows for more accurate
	support vector machine prediction of protein functional classifications
	from metabolic and protein?protein interaction networks. {A}vailability:
	{S}upplementary results and data are available at noble.gs.washington.edu/proj/maxent},
  comment = {Problem = multiclass classification of tumor cells from gene expression.
	Show that the one-versus-all approach of combining SVM yields the
	minimum number of classification errors on their Affymetrix data
	with 14 tumor types. In addition to not taking variability estimates
	of repeated measurements into account, this approach selects different
	relevant features (genes) for each binary classifier.},
  doi = {10.1093/bioinformatics/bth906},
  pdf = {../local/Tsuda2004Learning.pdf},
  file = {Tsuda2004Learning.pdf:local/Tsuda2004Learning.pdf:PDF},
  keywords = {learning-kernel graph-kernel biosvm},
  owner = {vert},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth906}
}

@article{Tsujinishi2003Fuzzy,
  author = {Daisuke Tsujinishi and Shigeo Abe},
  title = {Fuzzy least squares support vector machines for multiclass problems.},
  journal = {Neural {N}etw},
  year = {2003},
  volume = {16},
  pages = {785-92},
  number = {5-6},
  abstract = {In least squares support vector machines ({LS}-{SVM}s), the optimal
	separating hyperplane is obtained by solving a set of linear equations
	instead of solving a quadratic programming problem. {B}ut since {SVM}s
	and {LS}-{SVM}s are formulated for two-class problems, unclassifiable
	regions exist when they are extended to multiclass problems. {I}n
	this paper, we discuss fuzzy {LS}-{SVM}s that resolve unclassifiable
	regions for multiclass problems. {W}e define a membership function
	in the direction perpendicular to the optimal separating hyperplane
	that separates a pair of classes. {U}sing the minimum or average
	operation for these membership functions, we define a membership
	function for each class. {U}sing some benchmark data sets, we show
	that recognition performance of fuzzy {LS}-{SVM}s with the minimum
	operator is comparable to that of fuzzy {SVM}s, but fuzzy {LS}-{SVM}s
	with the average operator showed inferior performance.},
  pii = {S0893608003001102}
}

@book{Tsybakov2004Introduction,
  title = {Introduction {\`a} l'estimation non-param{\'e}trique},
  publisher = {Springer},
  year = {2004},
  author = {Tsybakov, A. B.},
  owner = {vert}
}

@article{Tsybakov1997On,
  author = {Tsybakov, A. B.},
  title = {On {N}onparametric {E}stimation of {D}ensity {L}evel {S}ets},
  journal = {Ann. {S}tat.},
  year = {1997},
  volume = {25},
  pages = {948-969},
  month = {June},
  pdf = {../local/Tsybakov1997On.pdf},
  file = {Tsybakov1997On.pdf:local/Tsybakov1997On.pdf:PDF},
  url = {http://links.jstor.org/sici?sici=0090-5364%28199706%2925%3A3%3C948%3AONEODL%3E2.0.CO%3B2-D}
}

@article{Tu2004Image,
  author = {Zhuowen Tu and Xiangrong Chen and Alan L. Yuille},
  title = {Image Parsing: Unifying Segmentation, Detection, and Recognition},
  journal = {Int. J. Comput. Vis.},
  year = {2004},
  citeseercitationcount = {0},
  citeseerurl = {http://citeseer.ist.psu.edu/640923.html},
  owner = {michael},
  timestamp = {2009.11.10}
}

@article{Tubio2011Cancer:,
  author = {Jose M C Tubio and Xavier Estivill},
  title = {Cancer: When catastrophe strikes a cell.},
  journal = {Nature},
  year = {2011},
  volume = {470},
  pages = {476--477},
  number = {7335},
  month = {Feb},
  doi = {10.1038/470476a},
  keywords = {Apoptosis; Bone Neoplasms, genetics/pathology; Cell Survival; Cell
	Transformation, Neoplastic, genetics; Chromosomes, Human, genetics/metabolism;
	DNA Breaks; DNA Copy Number Variations, genetics; DNA Repair; Disease
	Progression; Genes, Neoplasm, genetics; Humans; Leukemia, genetics;
	Mutagenesis, genetics; Mutation, genetics; Neoplasms, genetics/pathology;
	Recombination, Genetic, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {470476a},
  pmid = {21350479},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1038/470476a}
}

@article{Tucker2002Gene,
  author = {Tucker, D. L. and Tucker, N. and Conway, T.},
  title = {Gene expression profiling of the pH response in Escherichia coli.},
  journal = {J Bacteriol.},
  year = {2002},
  volume = {184},
  pages = {6551--6558},
  number = {23},
  month = {Dec},
  abstract = {Escherichia coli MG1655 acid-inducible genes were identified by whole-genome
	expression profiling. Cultures were grown to the mid-logarithmic
	phase on acidified glucose minimal medium, conditions that induce
	glutamate-dependent acid resistance (AR), while the other AR systems
	are either repressed or not induced. A total of 28 genes were induced
	in at least two of three experiments in which the gene expression
	profiles of cells grown in acid (pH 5.5 or 4.5) were compared to
	those of cells grown at pH 7.4. As expected, the genes encoding glutamate
	decarboxylase, gadA and gadB, were significantly induced. Interestingly,
	two acid-inducible genes code for small basic proteins with pIs of
	>10.5, and six code for small acidic proteins with pIs ranging from
	5.7 to 4.0; the roles of these small basic and acidic proteins in
	acid resistance are unknown. The acid-induced genes represented only
	five functional grouping categories, including eight genes involved
	in metabolism, nine associated with cell envelope structures or modifications,
	two encoding chaperones, six regulatory genes, and six unknown genes.
	It is unlikely that all of these genes are involved in the glutamate-dependent
	AR. However, nine acid-inducible genes are clustered in the gadA
	region, including hdeA, which encodes a putative periplasmic chaperone,
	and four putative regulatory genes. One of these putative regulators,
	yhiE, was shown to significantly increase acid resistance when overexpressed
	in cells that had not been preinduced by growth at pH 5.5, and mutation
	of yhiE decreased acid resistance; yhiE could therefore encode an
	activator of AR genes. Thus, the acid-inducible genes clustered in
	the gadA region appear to be involved in glutatmate-dependent acid
	resistance, although their specific roles remain to be elucidated.},
  institution = {Advanced Center for Genome Technology, The University of Oklahoma,
	Norman, Oklahoma 73069-0245, USA.},
  keywords = {Culture Media; Escherichia coli; Escherichia coli Proteins; Gene Expression
	Profiling; Gene Expression Regulation, Bacterial; Heat-Shock Response;
	Hydrogen-Ion Concentration; Morpholines; Oligonucleotide Array Sequence
	Analysis},
  owner = {fantine},
  pmid = {12426343},
  timestamp = {2008.02.11}
}

@article{Tugcu2003Identification,
  author = {Nihal Tugcu and Asif Ladiwala and Curt M Breneman and Steven M Cramer},
  title = {Identification of chemically selective displacers using parallel
	batch screening experiments and quantitative structure efficacy relationship
	models.},
  journal = {Anal {C}hem},
  year = {2003},
  volume = {75},
  pages = {5806-16},
  number = {21},
  month = {Nov},
  abstract = {Parallel batch screening experiments were carried out to examine how
	displacer chemistry and salt counterions affect the selectivity of
	batch protein displacements in anion exchange chromatographic systems.
	{T}he results indicate that both salt type and displacer chemistry
	can have a significant impact on the amount of protein displaced.
	{I}mportantly, the results indicate that, by changing the displacer,
	salt counterion, or both, one can induce significant selectivity
	changes in the relative displacement of two model proteins. {T}his
	indicates that highly selective separations can be developed in ion
	exchange systems by the appropriate selection of displacer chemistry
	and salt counterion. {T}he experimental batch screening data were
	also used in conjunction with various molecular descriptors to generate
	quantitative structure efficacy relationship ({QSER}) models based
	on a support vector machine feature selection and regression tool.
	{T}he models resulted in good correlations and successful predictions
	for an external test set of displacers. {A} star plot approach was
	shown to be a powerful tool to aid in the interpretation of the {QSER}
	models. {T}hese results indicate that this modeling approach can
	be employed for the a priori prediction of displacer efficacy as
	well as for providing insight into displacer design and the selection
	of proper mobile-phase conditions for highly selective separations.},
  doi = {10.1021/ac0341564},
  pdf = {../local/Tugcu2003Identification.pdf},
  file = {Tugcu2003Identification.pdf:local/Tugcu2003Identification.pdf:PDF},
  url = {http://dx.doi.org/10.1021/ac0341564}
}

@article{Tugcu2003Prediction,
  author = {Nihal Tugcu and Minghu Song and Curt M Breneman and N. Sukumar and
	Kristin P Bennett and Steven M Cramer},
  title = {Prediction of the effect of mobile-phase salt type on protein retention
	and selectivity in anion exchange systems.},
  journal = {Anal {C}hem},
  year = {2003},
  volume = {75},
  pages = {3563-72},
  number = {14},
  month = {Jul},
  abstract = {This study examines the effect of different salt types on protein
	retention and selectivity in anion exchange systems. {P}articularly,
	linear retention data for various proteins were obtained on two structurally
	different anion exchange stationary-phase materials in the presence
	of three salts with different counterions. {T}he data indicated that
	the effects are, for the most part, nonspecific, although various
	specific effects could also be observed. {Q}uantitative structure
	retention relationship ({QSRR}) models based on support vector machine
	feature selection and regression models were developed using the
	experimental chromatographic data in conjunction with various molecular
	descriptors computed from protein crystal structure geometries. {S}tar
	plots for each descriptor used in the final model were generated
	to aid in interpretation. {T}he resulting {QSRR} models were predictive,
	with cross-validated r2 values of 0.9445, 0.9676, and 0.8897 for
	{S}ource 15{Q} and 0.9561, 0.9876, and 0.9760 for {Q} {S}epharose
	resins in the presence of three different salts. {T}he predictive
	power of these models was validated using a set of test proteins
	that were not used in the generation of these models. {I}nterpretation
	of the models revealed that particular trends for proteins and salts
	could be captured using {QSRR} techniques.}
}

@article{Tung2007POPI:,
  author = {Chun-Wei Tung and Shinn-Ying Ho},
  title = {POPI: predicting immunogenicity of MHC class I binding peptides by
	mining informative physicochemical properties.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {942--949},
  number = {8},
  month = {Apr},
  abstract = {MOTIVATION: Both modeling of antigen-processing pathway including
	major histocompatibility complex (MHC) binding and immunogenicity
	prediction of those MHC-binding peptides are essential to develop
	a computer-aided system of peptide-based vaccine design that is one
	goal of immunoinformatics. Numerous studies have dealt with modeling
	the immunogenic pathway but not the intractable problem of immunogenicity
	prediction due to complex effects of many intrinsic and extrinsic
	factors. Moderate affinity of the MHC-peptide complex is essential
	to induce immune responses, but the relationship between the affinity
	and peptide immunogenicity is too weak to use for predicting immunogenicity.
	This study focuses on mining informative physicochemical properties
	from known experimental immunogenicity data to understand immune
	responses and predict immunogenicity of MHC-binding peptides accurately.
	RESULTS: This study proposes a computational method to mine a feature
	set of informative physicochemical properties from MHC class I binding
	peptides to design a support vector machine (SVM) based system (named
	POPI) for the prediction of peptide immunogenicity. High performance
	of POPI arises mainly from an inheritable bi-objective genetic algorithm,
	which aims to automatically determine the best number m out of 531
	physicochemical properties, identify these m properties and tune
	SVM parameters simultaneously. The dataset consisting of 428 human
	MHC class I binding peptides belonging to four classes of immunogenicity
	was established from MHCPEP, a database of MHC-binding peptides (Brusic
	et al., 1998). POPI, utilizing the m = 23 selected properties, performs
	well with the accuracy of 64.72\% using leave-one-out cross-validation,
	compared with two sequence alignment-based prediction methods ALIGN
	(54.91\%) and PSI-BLAST (53.23\%). POPI is the first computational
	system for prediction of peptide immunogenicity based on physicochemical
	properties. AVAILABILITY: A web server for prediction of peptide
	immunogenicity (POPI) and the used dataset of MHC class I binding
	peptides (PEPMHCI) are available at http://iclab.life.nctu.edu.tw/POPI},
  doi = {10.1093/bioinformatics/btm061},
  keywords = {Algorithms; Artificial Intelligence; Binding Sites; Epitope Mapping;
	Histocompatibility Antigens Class I; Oligopeptides; Pattern Recognition,
	Automated; Protein Binding; Software; Structure-Activity Relationship},
  owner = {laurent},
  pii = {btm061},
  pmid = {17384427},
  timestamp = {2007.07.12},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm061}
}

@article{Tung2005GenSo-FDSS,
  author = {W. L. Tung and C. Quek},
  title = {Gen{S}o-{FDSS}: a neural-fuzzy decision support system for pediatric
	{ALL} cancer subtype identification using gene expression data.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2005},
  volume = {33},
  pages = {61-88},
  number = {1},
  month = {Jan},
  abstract = {O{BJECTIVE}: {A}cute lymphoblastic leukemia ({ALL}) is the most common
	malignancy of childhood, representing nearly one third of all pediatric
	cancers. {C}urrently, the treatment of pediatric {ALL} is centered
	on tailoring the intensity of the therapy applied to a patient's
	risk of relapse, which is linked to the type of leukemia the patient
	has. {H}ence, accurate and correct diagnosis of the various leukemia
	subtypes becomes an important first step in the treatment process.
	{R}ecently, gene expression profiling using {DNA} microarrays has
	been shown to be a viable and accurate diagnostic tool to identify
	the known prognostically important {ALL} subtypes. {T}hus, there
	is currently a huge interest in developing autonomous classification
	systems for cancer diagnosis using gene expression data. {T}his is
	to achieve an unbiased analysis of the data and also partly to handle
	the large amount of genetic information extracted from the {DNA}
	microarrays. {METHODOLOGY}: {G}enerally, existing medical decision
	support systems ({DSS}) for cancer classification and diagnosis are
	based on traditional statistical methods such as {B}ayesian decision
	theory and machine learning models such as neural networks ({NN})
	and support vector machine ({SVM}). {T}hough high accuracies have
	been reported for these systems, they fall short on certain critical
	areas. {T}hese included (a) being able to present the extracted knowledge
	and explain the computed solutions to the users; (b) having a logical
	deduction process that is similar and intuitive to the human reasoning
	process; and (c) flexible enough to incorporate new knowledge without
	running the risk of eroding old but valid information. {O}n the other
	hand, a neural fuzzy system, which is synthesized to emulate the
	human ability to learn and reason in the presence of imprecise and
	incomplete information, has the ability to overcome the above-mentioned
	shortcomings. {H}owever, existing neural fuzzy systems have their
	own limitations when used in the design and implementation of {DSS}.
	{H}ence, this paper proposed the use of a novel neural fuzzy system:
	the generic self-organising fuzzy neural network ({G}en{S}o{FNN})
	with truth-value restriction ({TVR}) fuzzy inference, as a fuzzy
	{DSS} (denoted as {G}en{S}o-{FDSS}) for the classification of {ALL}
	subtypes using gene expression data. {RESULTS} {AND} {CONCLUSION}:
	{T}he performance of the {G}en{S}o-{FDSS} system is encouraging when
	benchmarked against those of {NN}, {SVM} and the {K}-nearest neighbor
	({K}-{NN}) classifier. {O}n average, a classification rate of above
	90\% has been achieved using the {G}en{S}o-{FDSS} system.},
  doi = {10.1016/j.artmed.2004.03.009},
  pdf = {../local/Tung2005GenSo-FDSS.pdf},
  file = {Tung2005GenSo-FDSS.pdf:local/Tung2005GenSo-FDSS.pdf:PDF},
  pii = {S0933-3657(04)00094-6},
  url = {http://dx.doi.org/10.1016/j.artmed.2004.03.009}
}

@article{Turlach2005Simultaneous,
  author = {Turlach, B. A. and Venables, W. N. and Wright, S. J.},
  title = {Simultaneous variable selection},
  journal = {Technometrics},
  year = {2005},
  volume = {47},
  pages = {349--363},
  number = {3},
  publisher = {ASA}
}

@article{Turlin2001Regulation,
  author = {E. Turlin and M. Perrotte-piquemal and A. Danchin and F. Biville},
  title = {{R}egulation of the early steps of 3-phenylpropionate catabolism
	in {E}scherichia coli.},
  journal = {J. Mol. Microbiol. Biotechnol.},
  year = {2001},
  volume = {3},
  pages = {127--133},
  number = {1},
  month = {Jan},
  abstract = {Microbial catabolism of phenylpropanoid compounds plays a key role
	in the degradation of aromatic molecules originating from the degradation
	of proteins and plant constituents. In this study, the regulation
	of the early steps in the utilisation of 3-phenylpropionate, a phenylpropanoid
	compound, was investigated. Expression of the hcaA gene product,
	which is involved in 3-phenylpropionate catabolism in Escherichia
	coli, was positively regulated by HcaR, a regulatory protein similar
	to members of the LysR regulators family. Remarkably, the expression
	of hcaA in the presence of 3-phenylpropionate was sharply and transiently
	induced at the end of the exponential growth phase. This occurred
	in a rpoS-independent manner. This transient induction was also mediated
	by HcaR. The expression of this positive regulator is negatively
	autoregulated, as for other members of the LysR family. The expression
	of hcaR is strongly repressed in the presence of glucose. Glucose-dependent
	repression of hcaR expression could only be partially overcome by
	adding exogenous cAMP.},
  pmid = {11200225},
  timestamp = {2008.02.12}
}

@article{Turner2002Cellular,
  author = {Bryan M. Turner},
  title = {Cellular Memory and the Histone Code},
  journal = {Cell},
  year = {2002},
  volume = {111},
  pages = {285-291},
  keywords = {csbcbook}
}

@article{Turner2003POCUS,
  author = {Turner, F. S. and Clutterbuck, D. R. and Semple, C. A. M.},
  title = {POCUS: mining genomic sequence annotation to predict disease genes},
  journal = {Genome Biol.},
  year = {2003},
  volume = {4},
  pages = {R75},
  number = {11},
  abstract = {Here we present POCUS (prioritization of candidate genes using statistics),
	a novel computational approach to prioritize candidate disease genes
	that is based on over-representation of functional annotation between
	loci for the same disease. We show that POCUS can provide high (up
	to 81-fold) enrichment of real disease genes in the candidate-gene
	shortlists it produces compared with the original large sets of positional
	candidates. In contrast to existing methods, POCUS can also suggest
	counterintuitive candidates.},
  doi = {10.1186/gb-2003-4-11-r75},
  pdf = {../local/Turner2003POCUS.pdf},
  file = {Turner2003POCUS.pdf:Turner2003POCUS.pdf:PDF},
  institution = {MRC Human Genetics Unit, Crewe Road, Western General Hospital, Edinburgh
	EH4 2XU, UK.},
  owner = {jp},
  pii = {gb-2003-4-11-r75},
  pmid = {14611661},
  timestamp = {2009.03.18},
  url = {http://dx.doi.org/10.1186/gb-2003-4-11-r75}
}

@article{Tuschl1999Targeted,
  author = {Tuschl, T. and Zamore, P.D. and Lehmann, R. and Bartel, D.P. and
	Sharp, P.A.},
  title = {Targeted m{RNA} degradation by double-stranded {RNA} in vitro.},
  journal = {Genes {D}ev.},
  year = {1999},
  volume = {13},
  pages = {3191-7},
  number = {24},
  month = {Dec},
  abstract = {Double-stranded {RNA} (ds{RNA}) directs gene-specific, post-transcriptional
	silencing in many organisms, including vertebrates, and has provided
	a new tool for studying gene function. {T}he biochemical mechanisms
	underlying this ds{RNA} interference ({RNA}i) are unknown. {H}ere
	we report the development of a cell-free system from syncytial blastoderm
	{D}rosophila embryos that recapitulates many of the features of {RNA}i.
	{T}he interference observed in this reaction is sequence specific,
	is promoted by ds{RNA} but not single-stranded {RNA}, functions by
	specific m{RNA} degradation, and requires a minimum length of ds{RNA}.
	{F}urthermore, preincubation of ds{RNA} potentiates its activity.
	{T}hese results demonstrate that {RNA}i can be mediated by sequence-specific
	processes in soluble reactions.}
}

@article{Tusher2001Significance,
  author = {Tusher, V. G. and Tibshirani, R. and Chu, G.},
  title = {Significance analysis of microarrays applied to the ionizing radiation
	response},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2001},
  volume = {98},
  pages = {5116--5121},
  number = {9},
  month = {Apr},
  abstract = {Microarrays can measure the expression of thousands of genes to identify
	changes in expression between different biological states. Methods
	are needed to determine the significance of these changes while accounting
	for the enormous number of genes. We describe a method, Significance
	Analysis of Microarrays (SAM), that assigns a score to each gene
	on the basis of change in gene expression relative to the standard
	deviation of repeated measurements. For genes with scores greater
	than an adjustable threshold, SAM uses permutations of the repeated
	measurements to estimate the percentage of genes identified by chance,
	the false discovery rate (FDR). When the transcriptional response
	of human cells to ionizing radiation was measured by microarrays,
	SAM identified 34 genes that changed at least 1.5-fold with an estimated
	FDR of 12\%, compared with FDRs of 60 and 84\% by using conventional
	methods of analysis. Of the 34 genes, 19 were involved in cell cycle
	regulation and 3 in apoptosis. Surprisingly, four nucleotide excision
	repair genes were induced, suggesting that this repair pathway for
	UV-damaged DNA might play a previously unrecognized role in repairing
	DNA damaged by ionizing radiation.},
  doi = {10.1073/pnas.091062498},
  pdf = {../local/Tusher2001Significance.pdf},
  file = {Tusher2001Significance.pdf:Tusher2001Significance.pdf:PDF},
  institution = {Departments of Medicine and Biochemistry, Stanford University, 269
	Campus Drive, Center for Clinical Sciences Research 1115, Stanford,
	CA 94305-5151, USA.},
  keywords = {csbcbook, csbcbook-ch4},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {091062498},
  pmid = {11309499},
  timestamp = {2011.04.07},
  url = {http://dx.doi.org/10.1073/pnas.091062498}
}

@article{Tut2001Cyclin,
  author = {Tut, V.M. and Braithwaite, K.L. and Angus, B. and Neal, D.E. and
	Lunec, J. and Mellon, J.K.},
  title = {Cyclin D1 expression in transitional cell carcinoma of the bladder:
	correlation with p53, waf1, pRb and Ki67.},
  journal = {Br J Cancer},
  year = {2001},
  volume = {84},
  pages = {270-275},
  owner = {lcalzone},
  timestamp = {2010.04.27}
}

@article{Tuteja2009Extracting,
  author = {Geetu Tuteja and Peter White and Jonathan Schug and Klaus H Kaestner},
  title = {Extracting transcription factor targets from ChIP-Seq data.},
  journal = {Nucleic Acids Res},
  year = {2009},
  volume = {37},
  pages = {e113},
  number = {17},
  month = {Sep},
  abstract = {ChIP-Seq technology, which combines chromatin immunoprecipitation
	(ChIP) with massively parallel sequencing, is rapidly replacing ChIP-on-chip
	for the genome-wide identification of transcription factor binding
	events. Identifying bound regions from the large number of sequence
	tags produced by ChIP-Seq is a challenging task. Here, we present
	GLITR (GLobal Identifier of Target Regions), which accurately identifies
	enriched regions in target data by calculating a fold-change based
	on random samples of control (input chromatin) data. GLITR uses a
	classification method to identify regions in ChIP data that have
	a peak height and fold-change which do not resemble regions in an
	input sample. We compare GLITR to several recent methods and show
	that GLITR has improved sensitivity for identifying bound regions
	closely matching the consensus sequence of a given transcription
	factor, and can detect bona fide transcription factor targets missed
	by other programs. We also use GLITR to address the issue of sequencing
	depth, and show that sequencing biological replicates identifies
	far more binding regions than re-sequencing the same sample.},
  doi = {10.1093/nar/gkp536},
  pdf = {../local/Tuteja2009Extracting.pdf},
  file = {Tuteja2009Extracting.pdf:Tuteja2009Extracting.pdf:PDF},
  institution = {Department of Genetics and Institute of Diabetes, Obesity and Metabolism,
	University of Pennsylvania School of Medicine, Philadelphia, PA 19104,
	USA.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gkp536},
  pmid = {19553195},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1093/nar/gkp536}
}

@article{Tzeng2004Predicting,
  author = {Huey-Ming Tzeng and Jer-Guang Hsieh and Yih-Lon Lin},
  title = {Predicting nurses' intention to quit with a support vector machine:
	a new approach to set up an early warning mechanism in human resource
	management.},
  journal = {Comput {I}nform {N}urs},
  year = {2004},
  volume = {22},
  pages = {232-42},
  number = {4},
  abstract = {This project developed a {S}upport {V}ector {M}achine for predicting
	nurses' intention to quit, using working motivation, job satisfaction,
	and stress levels as predictors. {T}his study was conducted in three
	hospitals located in southern {T}aiwan. {T}he target population was
	all nurses (389 valid cases). {F}or cross-validation, we randomly
	split cases into four groups of approximately equal sizes, and performed
	four training runs. {A}fter the training, the average percentage
	of misclassification on the training data was 0.86, while that on
	the testing data was 10.8, resulting in predictions with 89.2\% accuracy.
	{T}his {S}upport {V}ector {M}achine can predict nurses' intention
	to quit, without asking these nurses whether they have an intention
	to quit.},
  keywords = {Adolescent, Adult, Algorithms, Amino Acid Sequence, Amino Acids, Anatomic,
	Attitude of Health Personnel, Bacterial Proteins, Bias (Epidemiology),
	Brain, Brain Mapping, Burnout, Comparative Study, Computer Simulation,
	Computer-Assisted, Data Interpretation, Diffusion Magnetic Resonance
	Imaging, Facial Asymmetry, Facial Expression, Facial Paralysis, Female,
	Gene Expression Profiling, Gram-Negative Bacteria, Gram-Positive
	Bacteria, Hospital, Humans, Image Interpretation, Intention, Job
	Satisfaction, Logistic Models, Magnetoencephalography, Male, Middle
	Aged, Models, Motion, Neural Networks (Computer), Neural Pathways,
	Non-U.S. Gov't, Nonlinear Dynamics, Nursing Administration Research,
	Nursing Staff, Personnel Management, Personnel Turnover, Photography,
	Predictive Value of Tests, Professional, Protein, Proteins, Proteome,
	Psychological, Questionnaires, Regression Analysis, Reproducibility
	of Results, Research Support, Retina, Risk Factors, Sequence Alignment,
	Sequence Analysis, Severity of Illness Index, Software, Statistical,
	Subcellular Fractions, Taiwan, Theoretical, Workplace, 15494654},
  pii = {00024665-200407000-00012}
}

@inproceedings{Udupa04Algorithmic,
  author = {Udupa, R. and Faruquie, T. A. and Maji, H. K.},
  title = {An Algorithmic Framework for Solving the Decoding Problem in Statistical
	Machine Translation },
  booktitle = {Proceedings of Coling 2004 },
  year = {2004},
  pages = {631--637},
  address = {Geneva, Switzerland},
  month = {Aug 23--Aug 27},
  publisher = {COLING}
}

@article{Ueda2005Probabilistic,
  author = {Ueda, N. and Aoki-Kinoshita, K. F. and Yamaguchi, A. and Akutsu,
	T. and Mamitsuka, H.},
  title = {A {P}robabilistic {M}odel for {M}ining {L}abeled {O}rdered {T}rees:
	{C}apturing {P}atterns in {C}arbohydrate {S}ugar {C}hains},
  journal = {I{EEE} {T}ransactions on {K}nowledge and {D}ata {E}ngineering},
  year = {2005},
  volume = {17},
  pages = {1051-1064},
  number = {8},
  abstract = {Glycans, or carbohydrate sugar chains, which play a number of important
	roles in the development and functioning of multicellular organisms,
	can be regarded as labeled ordered trees. {A} recent increase in
	the documentation of glycan structures, especially in the form of
	database curation, has made mining glycans important for the understanding
	of living cells. {W}e propose a probabilistic model for mining labeled
	ordered trees, and we further present an efficient learning algorithm
	for this model, based on an {EM} algorithm. {T}he time and space
	complexities of this algorithm are rather favorable, falling within
	the practical limits set by a variety of existing probabilistic models,
	including stochastic context-free grammars. {E}xperimental results
	have shown that, in a supervised problem setting, the proposed method
	outperformed five other competing methods by a statistically significant
	factor in all cases. {W}e further applied the proposed method to
	aligning multiple glycan trees, and we detected biologically significant
	common subtrees in these alignments where the trees are automatically
	classified into subtypes already known in glycobiology. {E}xtended
	abstracts of parts of the work presented in this paper have appeared
	in [35], [4], and [3].},
  doi = {10.1109/TKDE.2005.117},
  pdf = {../local/Ueda2005Probabilistic.pdf},
  file = {Ueda2005Probabilistic.pdf:local/Ueda2005Probabilistic.pdf:PDF},
  url = {http://doi.dox.org/10.1109/TKDE.2005.117}
}

@article{Uetz2000comprehensive,
  author = {Uetz, P. and Giot, L. and Cagney, G. and Mansfield, T. A. and Judson,
	R. S. and Knight, J. R. and Lockshon, D. and Narayan, V. and Srinivasan,
	M. and Pochart, P. and Qureshi-Emili, A. and Li, Y. and Godwin, B.
	and Conover, D. and Kalbfleish, T. and Vijayadamodar, G. and Yang,
	M. and Johnston, M. and Fields, S. and Rothberg, J. M.},
  title = {A comprehensive analysis of protein-protein interactions in {S}accharomyces
	cerevisiae},
  journal = {Nature},
  year = {2000},
  volume = {403},
  pages = {623--627},
  pdf = {../local/uetz00.pdf},
  file = {uetz00.pdf:local/uetz00.pdf:PDF},
  subject = {bionet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v403/n6770/full/403623a0_fs.html&content_filetype=pdf}
}

@article{Ui-Tei2004Guidelines,
  author = {Ui-Tei, K. and Naito, Y. and Takahashi, F. and Haraguchi, T. and
	Ohki-Hamazaki, H. and Juni, A. and Ueda, R. and Saigo, K.},
  title = {Guidelines for the selection of highly effective si{RNA} sequences
	for mammalian and chick {RNA} interference.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2004},
  volume = {32},
  pages = {936-948},
  number = {3},
  month = {Feb},
  abstract = {In the present study, the relationship between short interfering {RNA}
	(si{RNA}) sequence and {RNA} interference ({RNA}i) effect was extensively
	analyzed using 62 targets of four exogenous and two endogenous genes
	and three mammalian and {D}rosophila cells. {W}e present the rules
	that may govern si{RNA} sequence preference and in accordance with
	which highly effective si{RNA}s essential for systematic mammalian
	functional genomics can be readily designed. {T}hese rules indicate
	that si{RNA}s which simultaneously satisfy all four of the following
	sequence conditions are capable of inducing highly effective gene
	silencing in mammalian cells: (i) {A}/{U} at the 5' end of the antisense
	strand; (ii) {G}/{C} at the 5' end of the sense strand; (iii) at
	least five {A}/{U} residues in the 5' terminal one-third of the antisense
	strand; and (iv) the absence of any {GC} stretch of more than 9 nt
	in length. si{RNA}s opposite in features with respect to the first
	three conditions give rise to little or no gene silencing in mammalian
	cells. {E}ssentially the same rules for si{RNA} sequence preference
	were found applicable to {DNA}-based {RNA}i in mammalian cells and
	in ovo {RNA}i using chick embryos. {I}n contrast to mammalian and
	chick cells, little si{RNA} sequence preference could be detected
	in {D}rosophila in vivo {RNA}i.},
  doi = {10.1093/nar/gkh247},
  keywords = {sirna},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/32/3/936}
}

@article{Ulitsky2010DEGAS,
  author = {Ulitsky, I. and Krishnamurthy, A. and Karp, R. M. and Shamir, R.},
  title = {{DEGAS}: de novo discovery of dysregulated pathways in human diseases.},
  journal = {PLoS One},
  year = {2010},
  volume = {5},
  pages = {e13367},
  number = {10},
  abstract = {Molecular studies of the human disease transcriptome typically involve
	a search for genes whose expression is significantly dysregulated
	in sick individuals compared to healthy controls. Recent studies
	have found that only a small number of the genes in human disease-related
	pathways show consistent dysregulation in sick individuals. However,
	those studies found that some pathway genes are affected in most
	sick individuals, but genes can differ among individuals. While a
	pathway is usually defined as a set of genes known to share a specific
	function, pathway boundaries are frequently difficult to assign,
	and methods that rely on such definition cannot discover novel pathways.
	Protein interaction networks can potentially be used to overcome
	these problems.We present DEGAS (DysrEgulated Gene set Analysis via
	Subnetworks), a method for identifying connected gene subnetworks
	significantly enriched for genes that are dysregulated in specimens
	of a disease. We applied DEGAS to seven human diseases and obtained
	statistically significant results that appear to home in on compact
	pathways enriched with hallmarks of the diseases. In Parkinson's
	disease, we provide novel evidence for involvement of mRNA splicing,
	cell proliferation, and the 14-3-3 complex in the disease progression.
	DEGAS is available as part of the MATISSE software package (http://acgt.cs.tau.ac.il/matisse).The
	subnetworks identified by DEGAS can provide a signature of the disease
	potentially useful for diagnosis, pinpoint possible pathways affected
	by the disease, and suggest targets for drug intervention.},
  doi = {10.1371/journal.pone.0013367},
  pdf = {../local/Ulitsky2010DEGAS.pdf},
  file = {Ulitsky2010DEGAS.pdf:Ulitsky2010DEGAS.pdf:PDF},
  institution = {Blavatnik School of Computer Science, Tel Aviv University, Tel Aviv,
	Israel. ulitsky@wi.mit.edu},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pmid = {20976054},
  timestamp = {2011.10.09},
  url = {http://dx.doi.org/10.1371/journal.pone.0013367}
}

@article{Ullman1976Algorithm,
  author = {Ullmann, J. R.},
  title = {An Algorithm for Subgraph Isomorphism},
  journal = {J. ACM},
  year = {1976},
  volume = {23},
  pages = {31--42},
  number = {1},
  address = {New York, NY, USA},
  doi = {http://doi.acm.org/10.1145/321921.321925},
  issn = {0004-5411},
  publisher = {ACM}
}

@article{Umeyama1988eigendecomposition,
  author = {Umeyama, S.},
  title = {An eigendecomposition approach to weighted graph matching problems},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {1988},
  volume = {10},
  pages = {695--703},
  number = {5},
  month = {Sept. },
  abstract = {An approximate solution to the weighted-graph-matching problem is
	discussed for both undirected and directed graphs. The weighted-graph-matching
	problem is that of finding the optimum matching between two weighted
	graphs, which are graphs with weights at each arc. The proposed method
	uses an analytic instead of a combinatorial or iterative approach
	to the optimum matching problem. Using the eigendecompositions of
	the adjacency matrices (in the case of the undirected-graph-matching
	problem) or Hermitian matrices derived from the adjacency matrices
	(in the case of the directed-graph-matching problem), a matching
	close to the optimum can be found efficiently when the graphs are
	sufficiently close to each other. Simulation results are given to
	evaluate the performance of the proposed method.},
  doi = {10.1109/34.6778},
  owner = {jp},
  timestamp = {2008.10.05},
  url = {http://dx.doi.org/10.1109/34.6778}
}

@article{Vaidya2007Breast,
  author = {Jayant S Vaidya},
  title = {Breast cancer: an artistic view},
  journal = {The Lancet Oncology},
  year = {2007},
  volume = {8},
  pages = {583-585},
  keywords = {csbcbook}
}

@article{Valentini2007Mosclust:,
  author = {Giorgio Valentini},
  title = {Mosclust: a software library for discovering significant structures
	in bio-molecular data.},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {387--389},
  number = {3},
  month = {Feb},
  abstract = {The R package mosclust (model order selection for clustering problems)
	implements algorithms based on the concept of stability for discovering
	significant structures in bio-molecular data. The software library
	provides stability indices obtained through different data perturbations
	methods (resampling, random projections, noise injection), as well
	as statistical tests to assess the significance of multi-level structures
	singled out from the data. Availability: http://homes.dsi.unimi.it/~valenti/SW/mosclust/download/mosclust_1.0.tar.gz.
	Supplementary information: http://homes.dsi.unimi.it/~valenti/SW/mosclust.},
  doi = {10.1093/bioinformatics/btl600},
  institution = {DSI, Dipartimento di Scienze dell'Informazione, Università degli
	Studi di Milano, Via Comelico 39, Italy. valentini@dsi.unimi.it},
  keywords = {Algorithms; Artificial Intelligence; Cluster Analysis; Gene Expression
	Profiling, methods; Oligonucleotide Array Sequence Analysis, methods;
	Pattern Recognition, Automated, methods; Programming Languages; Proteome,
	metabolism; Signal Transduction, physiology; Software},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {btl600},
  pmid = {17127677},
  timestamp = {2011.05.14},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl600}
}

@article{Valentini2002Gene,
  author = {Valentini, G.},
  title = {Gene expression data analysis of human lymphoma using support vector
	machines and output coding ensembles.},
  journal = {Artif. {I}ntell. {M}ed.},
  year = {2002},
  volume = {26},
  pages = {281-304},
  number = {3},
  month = {Nov},
  abstract = {The large amount of data generated by {DNA} microarrays was originally
	analysed using unsupervised methods, such as clustering or self-organizing
	maps. {R}ecently supervised methods such as decision trees, dot-product
	support vector machines ({SVM}) and multi-layer perceptrons ({MLP})
	have been applied in order to classify normal and tumoural tissues.
	{W}e propose methods based on non-linear {SVM} with polynomial and
	{G}aussian kernels, and output coding ({OC}) ensembles of learning
	machines to separate normal from malignant tissues, to classify different
	types of lymphoma and to analyse the role of sets of coordinately
	expressed genes in carcinogenic processes of lymphoid tissues. {U}sing
	gene expression data from "{L}ymphochip", a specialised {DNA} microarray
	developed at {S}tanford {U}niversity {S}chool of {M}edicine, we show
	that {SVM} can correctly separate normal from tumoural tissues, and
	{OC} ensembles can be successfully used to classify different types
	of lymphoma. {M}oreover, we identify a group of coordinately expressed
	genes related to the separation of two distinct subgroups inside
	diffuse large {B}-cell lymphoma ({DLBCL}), validating a previous
	{A}lizadeh's hypothesis about the existence of two distinct diseases
	inside {DLBCL}.},
  doi = {10.1016/S0933-3657(02)00077},
  pdf = {../local/Valentini2002Gene.pdf},
  file = {Valentini2002Gene.pdf:local/Valentini2002Gene.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Vallabhaneni2004Motor,
  author = {Anirudh Vallabhaneni and Bin He},
  title = {Motor imagery task classification for brain computer interface applications
	using spatiotemporal principle component analysis.},
  journal = {Neurol {R}es},
  year = {2004},
  volume = {26},
  pages = {282-7},
  number = {3},
  month = {Apr},
  abstract = {Classification of single-trial imagined left- and right-hand movements
	recorded through scalp {EEG} are explored in this study. {C}lassical
	event-related desynchronization/synchronization ({ERD}/{ERS}) calculation
	approach was utilized to extract {ERD} features from the raw scalp
	{EEG} signal. {P}rinciple {C}omponent {A}nalysis ({PCA}) was used
	for feature extraction and applied on spatial, as well as temporal
	dimensions in two consecutive steps. {A} {S}upport {V}ector {M}achine
	({SVM}) classifier using a linear decision function was used to classify
	each trial as either left or right. {T}he present approach has yielded
	good classification results and promises to have potential for further
	refinement for increased accuracy as well as application in online
	brain computer interface ({BCI}).},
  doi = {10.1179/016164104225013950},
  keywords = {Amino Acids, Antibodies, Artificial Intelligence, Biological, Brain,
	Brain Mapping, Calibration, Comparative Study, Computational Biology,
	Cysteine, Cystine, Electrodes, Electroencephalography, Evoked Potentials,
	Female, Horseradish Peroxidase, Humans, Imagery (Psychotherapy),
	Imagination, Laterality, Male, Monoclonal, Movement, Neoplasms, Non-P.H.S.,
	Non-U.S. Gov't, P.H.S., Perception, Principal Component Analysis,
	Protein, Protein Array Analysis, Proteins, Research Support, Sensitivity
	and Specificity, Sequence Analysis, Tumor Markers, U.S. Gov't, User-Computer
	Interface, 15142321},
  url = {http://dx.doi.org/10.1179/016164104225013950}
}

@article{Vanunu2010Associating,
  author = {Vanunu, O. and Magger, O. and Ruppin, E. and Shlomi, T. and Sharan,
	R.},
  title = {Associating genes and protein complexes with disease via network
	propagation.},
  journal = {PLoS Comput. Biol.},
  year = {2010},
  volume = {6},
  pages = {e1000641},
  number = {1},
  month = {Jan},
  abstract = {A fundamental challenge in human health is the identification of disease-causing
	genes. Recently, several studies have tackled this challenge via
	a network-based approach, motivated by the observation that genes
	causing the same or similar diseases tend to lie close to one another
	in a network of protein-protein or functional interactions. However,
	most of these approaches use only local network information in the
	inference process and are restricted to inferring single gene associations.
	Here, we provide a global, network-based method for prioritizing
	disease genes and inferring protein complex associations, which we
	call PRINCE. The method is based on formulating constraints on the
	prioritization function that relate to its smoothness over the network
	and usage of prior information. We exploit this function to predict
	not only genes but also protein complex associations with a disease
	of interest. We test our method on gene-disease association data,
	evaluating both the prioritization achieved and the protein complexes
	inferred. We show that our method outperforms extant approaches in
	both tasks. Using data on 1,369 diseases from the OMIM knowledgebase,
	our method is able (in a cross validation setting) to rank the true
	causal gene first for 34\% of the diseases, and infer 139 disease-related
	complexes that are highly coherent in terms of the function, expression
	and conservation of their member proteins. Importantly, we apply
	our method to study three multi-factorial diseases for which some
	causal genes have been found already: prostate cancer, alzheimer
	and type 2 diabetes mellitus. PRINCE's predictions for these diseases
	highly match the known literature, suggesting several novel causal
	genes and protein complexes for further investigation.},
  doi = {10.1371/journal.pcbi.1000641},
  institution = {School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel.},
  keywords = {Algorithms; Alzheimer Disease; Databases, Genetic; Diabetes Mellitus;
	Disease; Genes; Humans; Male; Multiprotein Complexes; Prostatic Neoplasms;
	Protein Interaction Mapping; Proteins; Reproducibility of Results},
  owner = {mordelet},
  pmid = {20090828},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000641}
}

@article{Vapnik2000Bounds,
  author = {V. Vapnik and O. Chapelle},
  title = {Bounds on error expectation for support vector machines.},
  journal = {Neural {C}omput},
  year = {2000},
  volume = {12},
  pages = {2013-36},
  number = {9},
  month = {Sep},
  abstract = {We introduce the concept of span of support vectors ({SV}) and show
	that the generalization ability of support vector machines ({SVM})
	depends on this new geometrical concept. {W}e prove that the value
	of the span is always smaller (and can be much smaller) than the
	diameter of the smallest sphere containing the support vectors, used
	in previous bounds ({V}apnik, 1998). {W}e also demonstrate experimentally
	that the prediction of the test error given by the span is very accurate
	and has direct application in model selection (choice of the optimal
	parameters of the {SVM}).},
  keywords = {Automated, Learning, Models, Neural Networks (Computer), Neurological,
	Pattern Recognition, 10976137}
}

@misc{Vapnik1974Theory,
  author = {V.N. Vapnik and A. Ya. Chervonenkis},
  title = {Teoriya Raspoznavaniya Obrazov: Statisticheskie Problemy Obucheniya.
	({R}ussian) [{T}heory of {P}attern {R}ecognition: {S}tatistical {P}roblems
	of {L}earning]},
  howpublished = {Moscow: Nauka},
  year = {1974}
}

@book{Vapnik1998Statistical,
  title = {Statistical {L}earning {T}heory},
  publisher = {Wiley},
  year = {1998},
  author = {Vapnik, V. N.},
  address = {New-York},
  subject = {kernel}
}

@book{Vapnik1995nature,
  title = {The nature of statistical learning theory},
  publisher = {Springer-Verlag New York, Inc.},
  year = {1995},
  author = {Vapnik, Vladimir N.},
  address = {New York, NY, USA},
  citeulike-article-id = {254315},
  citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=211359},
  isbn = {0387945598},
  keywords = {svms},
  posted-at = {2005-07-13 13:49:01},
  priority = {2},
  url = {http://portal.acm.org/citation.cfm?id=211359}
}

@book{Varki1999Essentials,
  title = {Essentials of glycobiology},
  publisher = {Cold Spring Harbor Laboratory Press},
  year = {1999},
  author = {Varki, A. and Cummings, R. and Esko, J. and Freeze, H. and Hart,
	G. and Marth, J.}
}

@article{Vassetzky2009Chromosome,
  author = {Yegor Vassetzky and Alexey Gavrilov and Elvira Eivazova and Iryna
	Priozhkova and Marc Lipinski and Sergey Razin},
  title = {Chromosome conformation capture (from 3C to 5C) and its ChIP-based
	modification.},
  journal = {Methods Mol Biol},
  year = {2009},
  volume = {567},
  pages = {171--188},
  abstract = {Chromosome conformation capture (3C) methodology was developed to
	study spatial organization of long genomic regions in living cells.
	Briefly, chromatin is fixed with formaldehyde in vivo to cross-link
	interacting sites, digested with a restriction enzyme and ligated
	at a low DNA concentration so that ligation between cross-linked
	fragments is favored over ligation between random fragments. Ligation
	products are then analyzed and quantified by PCR. So far, semi-quantitative
	PCR methods were widely used to estimate the ligation frequencies.
	However, it is often important to estimate the ligation frequencies
	more precisely which is only possible by using the real-time PCR.
	At the same time, it is equally necessary to monitor the specificity
	of PCR amplification. That is why the real-time PCR with TaqMan probes
	is becoming more and more popular in 3C studies. In this chapter,
	we describe the general protocol for 3C analysis with the subsequent
	estimation of ligation frequencies by using the real-time PCR technology
	with TaqMan probes. We discuss in details all steps of the experimental
	procedure paying special attention to weak points and possible ways
	to solve the problems. A special attention is also paid to the problems
	in interpretation of the results and necessary control experiments.
	Besides, in theory, we consider other approaches to analysis of the
	ligation products used in frames of the so-called 4C and 5C methods.
	The recently developed chromatin immunoprecipitation (ChIP)-loop
	assay representing a combination of 3C and ChIP is also discussed.},
  doi = {10.1007/978-1-60327-414-2\_12},
  institution = {CNRS UMR-8126, Université Paris-Sud 11, Institut de Cancérologie
	Gustave Roussy, 39, rue Camille-Desmoulins, 94805, Villejuif.},
  keywords = {Chromatin Immunoprecipitation; Chromosome Mapping; Chromosomes; Cross-Linking
	Reagents; Humans; Models, Biological; Nucleic Acid Conformation;
	Polymerase Chain Reaction; Quality Control},
  owner = {phupe},
  pmid = {19588093},
  timestamp = {2010.08.11},
  url = {http://dx.doi.org/10.1007/978-1-60327-414-2\_12}
}

@article{Vasudevan2007Switching,
  author = {Shobha Vasudevan and Yingchun Tong and Joan A Steitz},
  title = {Switching from repression to activation: microRNAs can up-regulate
	translation.},
  journal = {Science},
  year = {2007},
  volume = {318},
  pages = {1931--1934},
  number = {5858},
  month = {Dec},
  abstract = {AU-rich elements (AREs) and microRNA target sites are conserved sequences
	in messenger RNA (mRNA) 3' untranslated regions (3'UTRs) that control
	gene expression posttranscriptionally. Upon cell cycle arrest, the
	ARE in tumor necrosis factor-alpha (TNFalpha) mRNA is transformed
	into a translation activation signal, recruiting Argonaute (AGO)
	and fragile X mental retardation-related protein 1 (FXR1), factors
	associated with micro-ribonucleoproteins (microRNPs). We show that
	human microRNA miR369-3 directs association of these proteins with
	the AREs to activate translation. Furthermore, we document that two
	well-studied microRNAs-Let-7 and the synthetic microRNA miRcxcr4-likewise
	induce translation up-regulation of target mRNAs on cell cycle arrest,
	yet they repress translation in proliferating cells. Thus, activation
	is a common function of microRNPs on cell cycle arrest. We propose
	that translation regulation by microRNPs oscillates between repression
	and activation during the cell cycle.},
  doi = {10.1126/science.1149460},
  institution = {Department of Molecular Biophysics and Biochemistry, Howard Hughes
	Medical Institute, Yale University School of Medicine, Boyer Center
	for Molecular Medicine, 295 Congress Avenue, New Haven, CT 06536,
	USA.},
  keywords = {sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {1149460},
  pmid = {18048652},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1126/science.1149460}
}

@unpublished{Vazquez2001Modeling,
  author = {Vazquez, A. and Flammini, A. and Maritan, A. and Vespignani, A.},
  title = {Modeling of protein interaction networks},
  note = {E-print cond-mat/0108043},
  month = {Aug},
  year = {2001},
  pdf = {../local/vazq01.pdf},
  file = {vazq01.pdf:local/vazq01.pdf:PDF},
  subject = {bionetprot},
  url = {http://xxx.lanl.gov/abs/cond-mat/0108043}
}

@article{Veber2002Molecular,
  author = {Veber, D. F. and Johnson, S. R. and Cheng, H.-Y. and Smith, B. R.
	and Ward, K. W. and Kopple, K. D.},
  title = {Molecular properties that influence the oral bioavailability of drug
	candidates},
  journal = {J. Med. Chem.},
  year = {2002},
  volume = {45},
  pages = {2615--2623},
  number = {12},
  month = {Jun},
  abstract = {Oral bioavailability measurements in rats for over 1100 drug candidates
	studied at SmithKline Beecham Pharmaceuticals (now GlaxoSmithKline)
	have allowed us to analyze the relative importance of molecular properties
	considered to influence that drug property. Reduced molecular flexibility,
	as measured by the number of rotatable bonds, and low polar surface
	area or total hydrogen bond count (sum of donors and acceptors) are
	found to be important predictors of good oral bioavailability, independent
	of molecular weight. That on average both the number of rotatable
	bonds and polar surface area or hydrogen bond count tend to increase
	with molecular weight may in part explain the success of the molecular
	weight parameter in predicting oral bioavailability. The commonly
	applied molecular weight cutoff at 500 does not itself significantly
	separate compounds with poor oral bioavailability from those with
	acceptable values in this extensive data set. Our observations suggest
	that compounds which meet only the two criteria of (1) 10 or fewer
	rotatable bonds and (2) polar surface area equal to or less than
	140 A(2) (or 12 or fewer H-bond donors and acceptors) will have a
	high probability of good oral bioavailability in the rat. Data sets
	for the artificial membrane permeation rate and for clearance in
	the rat were also examined. Reduced polar surface area correlates
	better with increased permeation rate than does lipophilicity (C
	log P), and increased rotatable bond count has a negative effect
	on the permeation rate. A threshold permeation rate is a prerequisite
	of oral bioavailability. The rotatable bond count does not correlate
	with the data examined here for the in vivo clearance rate in the
	rat.},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {jm020017n},
  pmid = {12036371},
  timestamp = {2008.07.16}
}

@article{Veer2002Gene,
  author = {van 't Veer, L. J. and Dai, H. and van de Vijver, M. J. and He, Y.
	D. and Hart, A. A. M. and Mao, M. and Peterse, H. L. and van der
	Kooy, K. and Marton, M. J. and Witteveen, A. T. and Schreiber, G.
	J. and Kerkhoven, R. M. and Roberts, C. and Linsley, P. S. and Bernards,
	R. and Friend, S. H.},
  title = {Gene expression profiling predicts clinical outcome of breast cancers},
  journal = {Nature},
  year = {2002},
  volume = {415},
  pages = {530--536},
  number = {6871},
  month = {Jan},
  abstract = {Breast cancer patients with the same stage of disease can have markedly
	different treatment responses and overall outcome. The strongest
	predictors for metastases (for example, lymph node status and histological
	grade) fail to classify accurately breast tumours according to their
	clinical behaviour. Chemotherapy or hormonal therapy reduces the
	risk of distant metastases by approximately one-third; however, 70-80\%
	of patients receiving this treatment would have survived without
	it. None of the signatures of breast cancer gene expression reported
	to date allow for patient-tailored therapy strategies. Here we used
	DNA microarray analysis on primary breast tumours of 117 young patients,
	and applied supervised classification to identify a gene expression
	signature strongly predictive of a short interval to distant metastases
	('poor prognosis' signature) in patients without tumour cells in
	local lymph nodes at diagnosis (lymph node negative). In addition,
	we established a signature that identifies tumours of BRCA1 carriers.
	The poor prognosis signature consists of genes regulating cell cycle,
	invasion, metastasis and angiogenesis. This gene expression profile
	will outperform all currently used clinical parameters in predicting
	disease outcome. Our findings provide a strategy to select patients
	who would benefit from adjuvant therapy.},
  doi = {10.1038/415530a},
  pdf = {../local/Veer2002Gene.pdf},
  file = {Veer2002Gene.pdf:Veer2002Gene.pdf:PDF},
  institution = {Division of Diagnostic Oncology, The Netherlands Cancer Institute,
	121 Plesmanlaan, 1066 CX Amsterdam, The Netherlands.},
  keywords = {breastcancer, csbcbook, csbcbook-ch3},
  owner = {jp},
  pii = {415530a},
  pmid = {11823860},
  timestamp = {2008.11.16},
  url = {http://dx.doi.org/10.1038/415530a}
}

@article{Venables2004Aberrant,
  author = {Julian P. Venables},
  title = {Aberrant and Alternative Splicing in Cancer},
  journal = {Cancer Res.},
  year = {2004},
  volume = {64},
  pages = {7647-7654},
  keywords = {csbcbook}
}

@book{Venables2002Modern,
  title = {{Modern Applied Statistics with S}},
  publisher = {Springer},
  year = {2002},
  author = {Venables, W. N. and Ripley, B. D.},
  address = {New York},
  edition = {Fourth},
  note = {ISBN 0-387-95457-0},
  owner = {jp},
  timestamp = {2012.07.31},
  url = {http://www.stats.ox.ac.uk/pub/MASS4}
}

@article{Venet2011Most,
  author = {Venet, D. and Dumont, J.E. and Detours, V.},
  title = {Most random gene expression signatures are significantly associated
	with breast cancer outcome},
  journal = {PLoS computational biology},
  year = {2011},
  volume = {7},
  pages = {e1002240},
  number = {10},
  publisher = {Public Library of Science}
}

@article{Venter2001Sequence,
  author = {Venter, J. C. et al.},
  title = {The {S}equence of the {H}uman {G}enome},
  journal = {Science},
  year = {2001},
  volume = {291},
  pages = {1304-1351},
  number = {5507},
  abstract = {A 2.91-billion base pair (bp) consensus sequence of the euchromatic
	portion of the human genome was generated by the whole-genome shotgun
	sequencing method. {T}he 14.8-billion bp {DNA} sequence was generated
	over 9 months from 27,271,853 high-quality sequence reads (5.11-fold
	coverage of the genome) from both ends of plasmid clones made from
	the {DNA} of five individuals. {T}wo assembly strategies--a whole-genome
	assembly and a regional chromosome assembly--were used, each combining
	sequence data from {C}elera and the publicly funded genome effort.
	{T}he public data were shredded into 550-bp segments to create a
	2.9-fold coverage of those genome regions that had been sequenced,
	without including biases inherent in the cloning and assembly procedure
	used by the publicly funded group. {T}his brought the effective coverage
	in the assemblies to eightfold, reducing the number and size of gaps
	in the final assembly over what would be obtained with 5.11-fold
	coverage. {T}he two assembly strategies yielded very similar results
	that largely agree with independent mapping data. {T}he assemblies
	effectively cover the euchromatic regions of the human chromosomes.
	{M}ore than 90% of the genome is in scaffold assemblies of 100,000
	bp or more, and 25% of the genome is in scaffolds of 10 million bp
	or larger. {A}nalysis of the genome sequence revealed 26,588 protein-encoding
	transcripts for which there was strong corroborating evidence and
	an additional ~12,000 computationally derived genes with mouse matches
	or other weak supporting evidence. {A}lthough gene-dense clusters
	are obvious, almost half the genes are dispersed in low {G}+{C} sequence
	separated by large tracts of apparently noncoding sequence. {O}nly
	1.1% of the genome is spanned by exons, whereas 24% is in introns,
	with 75% of the genome being intergenic {DNA}. {D}uplications of
	segmental blocks, ranging in size up to chromosomal lengths, are
	abundant throughout the genome and reveal a complex evolutionary
	history. {C}omparative genomic analysis indicates vertebrate expansions
	of genes associated with neuronal function, with tissue-specific
	developmental regulation, and with the hemostasis and immune systems.
	{DNA} sequence comparisons between the consensus sequence and publicly
	funded genome data provided locations of 2.1 million single-nucleotide
	polymorphisms ({SNP}s). {A} random pair of human haploid genomes
	differed at a rate of 1 bp per 1250 on average, but there was marked
	heterogeneity in the level of polymorphism across the genome. {L}ess
	than 1% of all {SNP}s resulted in variation in proteins, but the
	task of determining which {SNP}s have functional consequences remains
	an open challenge.},
  pdf = {../local/Venter2001Sequence.pdf},
  file = {Venter2001Sequence.pdf:local/Venter2001Sequence.pdf:PDF},
  keywords = {genomics bio},
  owner = {vert},
  url = {http://www.sciencemag.org/cgi/content/abstract/291/5507/1304}
}

@article{Vercoutere2001Rapid,
  author = {W. Vercoutere and S. Winters-Hilt and H. Olsen and D. Deamer and
	D. Haussler and M. Akeson},
  title = {Rapid discrimination among individual {DNA} hairpin molecules at
	single-nucleotide resolution using an ion channel.},
  journal = {Nat {B}iotechnol},
  year = {2001},
  volume = {19},
  pages = {248-52},
  number = {3},
  month = {Mar},
  abstract = {R{NA} and {DNA} strands produce ionic current signatures when driven
	through an alpha-hemolysin channel by an applied voltage. {H}ere
	we combine this nanopore detector with a support vector machine ({SVM})
	to analyze {DNA} hairpin molecules on the millisecond time scale.
	{M}easurable properties include duplex stem length, base pair mismatches,
	and loop length. {T}his nanopore instrument can discriminate between
	individual {DNA} hairpins that differ by one base pair or by one
	nucleotide.},
  doi = {10.1038/85696},
  pdf = {../local/Vercoutere2001Rapid.pdf},
  file = {Vercoutere2001Rapid.pdf:local/Vercoutere2001Rapid.pdf:PDF},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins,
	Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites,
	Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry,
	Child, Chromosome Aberrations, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, DNA, Data Interpretation,
	Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric
	Conductivity, Electrophysiology, Escherichia coli Proteins, Factual,
	Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene
	Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins,
	Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic
	Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular,
	Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Organ Specificity,
	Organelles, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive
	Value of Tests, Promoter Regions (Genetics), Protein Folding, Protein
	Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research
	Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	U.S. Gov't, 11231558},
  pii = {85696},
  url = {http://dx.doi.org/10.1038/85696}
}

@article{de1993Multiplicites,
  author = {de Verdi{\`e}re, Y. C.},
  title = {Multiplicit{\'e}s des valeurs propres {L}aplaciens discrets et {L}aplaciens
	continus},
  journal = {Rendiconti di {M}atematica},
  year = {1993},
  volume = {13},
  pages = {433--460},
  subject = {net}
}

@article{Vermeulen2008Cancer,
  author = {Vermeulen, L. and Sprick, M.R. and Kemper, K. and Stassi, G. and
	Medema, J.P.},
  title = {{{C}ancer stem cells - old concepts, new insights}},
  journal = {Cell Death and Differentiation},
  year = {2008},
  volume = {15},
  pages = {947-58},
  keywords = {csbcbook}
}

@inproceedings{Vert2009High-level,
  author = {Vert, J. P. and Matsui, T. and Satoh, S. and Uchiyama, Y.},
  title = {High-level feature extraction using {SVM} with walk-based graph kernel},
  booktitle = {Proceedings of the IEEE International Conference on Acoustic, Speech
	and Signal Processing (ICASSP 2009)},
  year = {2009},
  pdf = {../local/Vert2009High-level.pdf},
  file = {Vert2009High-level.pdf:Vert2009High-level.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.05}
}

@inproceedings{Vert2002Support,
  author = {Vert, J.-P.},
  title = {Support vector machine prediction of signal peptide cleavage site
	using a new class of kernels for strings},
  booktitle = {Proceedings of the {P}acific {S}ymposium on {B}iocomputing 2002},
  year = {2002},
  editor = {R. B. Altman and A. K. Dunker and L. Hunter and K. Lauerdale and
	T. E. Klein},
  pages = {649--660},
  publisher = {World Scientific},
  pdf = {../local/vert02.pdf},
  file = {vert02.pdf:local/vert02.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://www.smi.stanford.edu/projects/helix/psb02/vert.pdf}
}

@inproceedings{Vert2007Kernel,
  author = {J.-P. Vert},
  title = {Kernel methods in genomics and computational biology},
  booktitle = {Kernel Methods in Bioengineering, Signal and Image Processing},
  year = {2007},
  editor = {G. Camps-Valls and J.-L. Rojo-Alvarez and M. Martinez-Ramon},
  publisher = {IDEA Group},
  owner = {vert},
  timestamp = {2006.11.08}
}

@inproceedings{Vert2001Text,
  author = {J.-P. Vert},
  title = {Text categorization using adaptive context trees},
  booktitle = {Proceedings of the CICLing-2001 conference},
  year = {2001},
  editor = {A. Gelbukh},
  volume = {2004},
  series = {LNCS},
  pages = {423--436},
  publisher = {Springer Verlag},
  owner = {vert},
  timestamp = {2006.11.08}
}

@techreport{Vert2008optimal,
  author = {Vert, J.-P.},
  title = {The optimal assignment kernel is not positive definite},
  institution = {Arxiv},
  year = {2008},
  number = {0801.4061},
  timestamp = {2008.01.25},
  url = {http://hal.archives-ouvertes.fr/hal-00218278}
}

@unpublished{Vert2006Kernel,
  author = {Vert, J.-P.},
  title = {Kernel methods in bioinformatics : a survey},
  note = {To appear},
  year = {2006}
}

@techreport{Vert2005Kernel,
  author = {Vert, J.-P.},
  title = {Kernel methods in computational biology},
  institution = {CNRS-HAL},
  year = {2005},
  number = {ccsd-00012124},
  month = {Oct},
  abstract = {Support vector machines and kernel methods are increasingly popular
	in genomics and computational biology, due to their good performance
	in real-world applications and strong modularity that makes them
	suitable to a wide range of problems, from the classification of
	tumors to the automatic annotation of proteins. {T}heir ability to
	work in high dimension, to process non-vectorial data, and the natural
	framework they provide to integrate heterogeneous data are particularly
	relevant to various problems arising in computational biology. {I}n
	this chapter we survey some of the most prominent applications published
	so far, highlighting the particular developments in kernel methods
	triggered by problems in biology, and mention a few promising research
	directions likely to expand in the future.},
  pdf = {../local/Vert2005Kernel.pdf},
  file = {Vert2005Kernel.pdf:local/Vert2005Kernel.pdf:PDF},
  keywords = {biosvm},
  url = {http://hal.ccsd.cnrs.fr/ccsd-00012124}
}

@article{Vert2002tree,
  author = {Vert, J.-P.},
  title = {A tree kernel to analyze phylogenetic profiles},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {S276--S284},
  pdf = {../local/vert02b.pdf},
  file = {vert02b.pdf:local/vert02b.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://cbio.ensmp.fr/~jvert/publi/ismb02/index.html}
}

@article{Vert2001Adaptive,
  author = {Vert, J.-P. },
  title = {Adaptive context trees and text clustering},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2001},
  volume = {47},
  pages = {1884-1901},
  number = {5},
  month = {Jul},
  abstract = {In the finite-alphabet context we propose four alternatives to fixed-order
	{M}arkov models to estimate a conditional distribution. {T}hey consist
	in working with a large class of variable-length {M}arkov models
	represented by context trees, and building an estimator of the conditional
	distribution with a risk of the same order as the risk of the best
	estimator for every model simultaneously, in a conditional {K}ullback-{L}eibler
	sense. {S}uch estimators can be used to model complex objects like
	texts written in natural language and define a notion of similarity
	between them. {T}his idea is illustrated by experimental results
	of unsupervised text clustering},
  doi = {10.1109/18.930925},
  pdf = {../local/Vert2001Adaptive.pdf},
  file = {Vert2001Adaptive.pdf:local/Vert2001Adaptive.pdf:PDF},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/18.930925}
}

@phdthesis{Vert2001Statistical,
  author = {Vert, J.-P.},
  title = {Statistical {M}ethods for {N}atural {L}anguage {M}odelling},
  school = {Paris 6 University},
  year = {2001},
  pdf = {../local/these.pdf:http\://cg.ensmp.fr/~vert/publi/phd/these.pdf:PDF;these.pdf:http\},
  file = {these.pdf:http\://cg.ensmp.fr/~vert/publi/phd/these.pdf:PDF;these.pdf:http\://cg.ensmp.fr/~vert/publi/phd/these.pdf:PDF},
  owner = {vert}
}

@techreport{Vert2000Double,
  author = {J.-P. Vert},
  title = {Double mixture and universal inference},
  institution = {Ecole Normale Sup{\'e}rieure},
  year = {2000},
  number = {DMA-00-15},
  owner = {vert},
  timestamp = {2006.11.08}
}

@misc{Vert2006Low-rank,
  author = {Jean-Philippe Vert and Francis Bach and Theodoros Evgeniou},
  title = {Low-rank matrix factorization with attributes},
  year = {2006},
  owner = {jacob}
}

@inproceedings{Vert2010Fast,
  author = {Vert, J-P. and Bleakley, K.},
  title = {Fast detection of multiple change-points shared by many signals using
	group {LARS}},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2010},
  editor = {J. Lafferty and C. K. I. Williams and J. Shawe-Taylor and R.S. Zemel
	and A. Culotta},
  volume = {22},
  pages = {2343--2352},
  owner = {jp},
  timestamp = {2010.10.12}
}

@article{Vert2006accurate,
  author = {Vert, J.-P. and Foveau, N. and Lajaunie, C. and Vandenbrouck, Y.},
  title = {An accurate and interpretable model for siRNA efficacy prediction},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {520},
  abstract = {Background
	
	The use of exogenous small interfering RNAs (siRNAs) for gene silencing
	has quickly become a widespread molecular tool providing a powerful
	means for gene functional study and new drug target identification.
	Although considerable progress has been made recently in understanding
	how the RNAi pathway mediates gene silencing, the design of potent
	siRNAs remains challenging.
	
	
	Results
	
	We propose a simple linear model combining basic features of siRNA
	sequences for siRNA efficacy prediction. Trained and tested on a
	large dataset of siRNA sequences made recently available, it performs
	as well as more complex state-of-the-art models in terms of potency
	prediction accuracy, with the advantage of being directly interpretable.
	The analysis of this linear model allows us to detect and quantify
	the effect of nucleotide preferences at particular positions, including
	previously known and new observations. We also detect and quantify
	a strong propensity of potent siRNAs to contain short asymmetric
	motifs in their sequence, and show that, surprisingly, these motifs
	alone contain at least as much relevant information for potency prediction
	as the nucleotide preferences for particular positions.
	
	
	Conclusion
	
	The model proposed for prediction of siRNA potency is as accurate
	as a state-of-the-art nonlinear model and is easily interpretable
	in terms of biological features. It is freely available on the web
	at http://cbio.ensmp.fr/dsir},
  doi = {10.1186/1471-2105-7-520},
  owner = {jp},
  timestamp = {2008.11.30},
  url = {http://dx.doi.org/10.1186/1471-2105-7-520}
}

@article{Vert2008Machine,
  author = {Vert, J.-P. and Jacob, L.},
  title = {Machine Learning for In Silico Virtual Screening and Chemical Genomics:
	New Strategies},
  journal = {Combinatorial Chemistry \& High Throughput Screening},
  year = {2008},
  volume = {11},
  pages = {677--685},
  number = {8},
  month = {September},
  doi = {10.2174/138620708785739899},
  issn = {1386-2073},
  keywords = {cheminformatics, virtual\_screening},
  publisher = {Bentham Science Publishers},
  url = {http://dx.doi.org/10.2174/138620708785739899}
}

@inproceedings{Vert2003Graph-driven,
  author = {Vert, J.-P. and Kanehisa, M.},
  title = {Graph-driven features extraction from microarray data using diffusion
	kernels and kernel {CCA}},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2003},
  editor = {S. Becker and S. Thrun and K. Obermayer},
  pages = {1449--1456},
  publisher = {MIT Press},
  pdf = {../local/Vert2003Graph-driven.pdf},
  file = {Vert2003Graph-driven.pdf:local/Vert2003Graph-driven.pdf:PDF},
  keywords = {biosvm}
}

@article{Vert2003Extracting,
  author = {Vert, J.-P. and Kanehisa, M. },
  title = {Extracting active pathways from gene expression data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {238ii-234ii},
  abstract = {Motivation: {A} promising way to make sense out of gene expression
	profiles is to relate them to the activity of metabolic and signalling
	pathways. {E}ach pathway usually involves many genes, such as enzymes,
	which can themselves participate in many pathways. {T}he set of all
	known pathways can therefore be represented by a complex network
	of genes. {S}earching for regularities in the set of gene expression
	profiles with respect to the topology of this gene network is a way
	to automatically extract active pathways and their associated patterns
	of activity. {M}ethod: {W}e present a method to perform this task,
	which consists in encoding both the gene network and the set of profiles
	into two kernel functions, and performing a regularized form of canonical
	correlation analysis between the two kernels. {R}esults: {W}hen applied
	to publicly available expression data the method is able to extract
	biologically relevant expression patterns, as well as pathways with
	related activity.},
  pdf = {../local/Vert2003Extracting.pdf},
  file = {Vert2003Extracting.pdf:local/Vert2003Extracting.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/suppl_2/ii238}
}

@techreport{Vert2002Graph-driven,
  author = {Vert, J.-P. and Kanehisa, M.},
  title = {Graph-driven features extraction from microarray data},
  institution = {Arxiv physics},
  year = {2002},
  number = {0206055},
  keywords = {biosvm}
}

@article{Vert2007new,
  author = {Vert, J.-P. and Qiu, J. and Noble, W. S.},
  title = {{A} new pairwise kernel for biological network inference with support
	vector machines},
  journal = {BMC Bioinformatics},
  year = {2007},
  volume = {8 Suppl 10},
  pages = {S8},
  abstract = {BACKGROUND: Much recent work in bioinformatics has focused on the
	inference of various types of biological networks, representing gene
	regulation, metabolic processes, protein-protein interactions, etc.
	A common setting involves inferring network edges in a supervised
	fashion from a set of high-confidence edges, possibly characterized
	by multiple, heterogeneous data sets (protein sequence, gene expression,
	etc.). RESULTS: Here, we distinguish between two modes of inference
	in this setting: direct inference based upon similarities between
	nodes joined by an edge, and indirect inference based upon similarities
	between one pair of nodes and another pair of nodes. We propose a
	supervised approach for the direct case by translating it into a
	distance metric learning problem. A relaxation of the resulting convex
	optimization problem leads to the support vector machine (SVM) algorithm
	with a particular kernel for pairs, which we call the metric learning
	pairwise kernel. This new kernel for pairs can easily be used by
	most SVM implementations to solve problems of supervised classification
	and inference of pairwise relationships from heterogeneous data.
	We demonstrate, using several real biological networks and genomic
	datasets, that this approach often improves upon the state-of-the-art
	SVM for indirect inference with another pairwise kernel, and that
	the combination of both kernels always improves upon each individual
	kernel. CONCLUSION: The metric learning pairwise kernel is a new
	formulation to infer pairwise relationships with SVM, which provides
	state-of-the-art results for the inference of several biological
	networks from heterogeneous genomic data.},
  doi = {10.1186/1471-2105-8-S10-S8},
  pii = {1471-2105-8-S10-S8},
  pmid = {18269702},
  timestamp = {2008.05.26},
  url = {http://dx.doi.org/10.1186/1471-2105-8-S10-S8}
}

@incollection{Vert2004Local,
  author = {Vert, J.-P. and Saigo, H. and Akutsu, T.},
  title = {Local alignment kernels for biological sequences},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {131-154},
  address = {The MIT Press, Cambridge, Massachussetts},
  pdf = {../local/saigo.pdf:http\},
  file = {saigo.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/saigo.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Vert2006Kernels,
  author = {Vert, J.-P. and Thurman, R. and Noble, W. S.},
  title = {Kernels for gene regulatory regions},
  booktitle = {Adv. {N}eural. {I}nform. {P}rocess {S}yst.},
  year = {2006},
  editor = {Y. Weiss and B. Sch\"{o}lkopf and J. Platt},
  volume = {18},
  pages = {1401-1408},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  keywords = {biosvm}
}

@incollection{Vert2004primer,
  author = {Vert, J.-P. and Tsuda, K. and Sch{\"o}lkopf, B.},
  title = {A primer on kernel methods},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {35-70},
  keywords = {biosvm},
  owner = {vert}
}

@inproceedings{Vert2005Supervised,
  author = {Vert, J.-P. and Yamanishi, Y.},
  title = {Supervised graph inference},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2005},
  editor = {Saul, L. K. and Weiss, Y. and Bottou, L.},
  volume = {17},
  pages = {1433-1440},
  publisher = {MIT Press, Cambridge, MA},
  pdf = {../local/nips2004.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_yamanishi/nips2004.pdf:PDF;nips2004.pdf:http\},
  file = {nips2004.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_yamanishi/nips2004.pdf:PDF;nips2004.pdf:http\://cg.ensmp.fr/~vert/publi/04nips_yamanishi/nips2004.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Vert2006Consistency,
  author = {Vert, R. and Vert, J.-P.},
  title = {Consistency and convergence rates of one-class {SVM}s and related
	algorithms},
  journal = {J. Mach. Learn. Res.},
  year = {2006},
  volume = {7},
  pages = {817--854},
  abstract = {We determine the asymptotic behaviour of the function computed by
	support vector machines (SVM) and related algorithms that minimize
	a regularized empirical convex loss function in the reproducing kernel
	Hilbert space of the Gaussian RBF kernel, in the situation where
	the number of examples tends to infinity, the bandwidth of the Gaussian
	kernel tends to 0, and the regularization parameter is held fixed.
	Non-asymptotic convergence bounds to this limit in the L2 sense are
	provided, together with upper bounds on the classification error
	that is shown to converge to the Bayes risk, therefore proving the
	Bayes-consistency of a variety of methods although the regularization
	term does not vanish. These results are particularly relevant to
	the one-class SVM, for which the regularization can not vanish by
	construction, and which is shown for the first time to be a consistent
	density level set estimator.},
  owner = {jp},
  timestamp = {2008.11.30},
  url = {http://jmlr.csail.mit.edu/papers/v7/vert06a.html}
}

@techreport{Vert2005Consistency,
  author = {Vert, R. and Vert, J.-P.},
  title = {Consistency and convergence rates of one-class {SVM} and related
	algorithms},
  institution = {LRI, Universit{\'e} Paris-Sud},
  year = {2005},
  number = {1414},
  abstract = {We determine the asymptotic limit of the function computed by support
	vector machines ({SVM}) and related algorithms that minimize a regularized
	empirical convex loss function in the reproducing kernel {H}ilbert
	space of the {G}aussian {RBF} kernel, in the situation where the
	number of examples tends to infinity, the bandwidth of the {G}aussian
	kernel tends to 0, and the regularization parameter is held fixed.
	{N}on-asymptotic convergence bounds to this limit in the {L}2 sense
	are provided, together with upper bounds on the classification error
	that is shown to converge to the {B}ayes risk, therefore proving
	the {B}ayes-consistency of a variety of methods although the regularization
	term does not vanish. {T}hese results are particularly relevant to
	the one-class {SVM}, for which the regularization can not vanish
	by construction, and which is shown for the first time to be a consistent
	density level set estimator.},
  pdf = {../local/Vert2005Consistency.pdf},
  file = {Vert2005Consistency.pdf:local/Vert2005Consistency.pdf:PDF}
}

@article{Vickers2003Efficient,
  author = {Vickers, T. A. and Koo, S. and Bennett, C. F. and Crooke, S. T. and
	Dean, N. M. and Baker, B. F.},
  title = {Efficient reduction of target {RNA}s by small interfering {RNA} and
	{RN}ase {H}-dependent antisense agents. {A} comparative analysis.},
  journal = {J. {B}iol. {C}hem.},
  year = {2003},
  volume = {278},
  pages = {7108-18},
  number = {9},
  month = {Feb},
  abstract = {R{NA} interference can be considered as an antisense mechanism of
	action that utilizes a double-stranded {RN}ase to promote hydrolysis
	of the target {RNA}. {W}e have performed a comparative study of optimized
	antisense oligonucleotides designed to work by an {RNA} interference
	mechanism to oligonucleotides designed to work by an {RN}ase {H}-dependent
	mechanism in human cells. {T}he potency, maximal effectiveness, duration
	of action, and sequence specificity of optimized {RN}ase {H}-dependent
	oligonucleotides and small interfering {RNA} (si{RNA}) oligonucleotide
	duplexes were evaluated and found to be comparable. {E}ffects of
	base mismatches on activity were determined to be position-dependent
	for both si{RNA} oligonucleotides and {RN}ase {H}-dependent oligonucleotides.
	{I}n addition, we determined that the activity of both si{RNA} oligonucleotides
	and {RN}ase {H}-dependent oligonucleotides is affected by the secondary
	structure of the target m{RNA}. {T}o determine whether positions
	on target {RNA} identified as being susceptible for {RN}ase {H}-mediated
	degradation would be coincident with si{RNA} target sites, we evaluated
	the effectiveness of si{RNA}s designed to bind the same position
	on the target m{RNA} as {RN}ase {H}-dependent oligonucleotides. {E}xamination
	of 80 si{RNA} oligonucleotide duplexes designed to bind to {RNA}
	from four distinct human genes revealed that, in general, activity
	correlated with the activity to {RN}ase {H}-dependent oligonucleotides
	designed to the same site, although some exceptions were noted. {T}he
	one major difference between the two strategies is that {RN}ase {H}-dependent
	oligonucleotides were determined to be active when directed against
	targets in the pre-m{RNA}, whereas si{RNA}s were not. {T}hese results
	demonstrate that si{RNA} oligonucleotide- and {RN}ase {H}-dependent
	antisense strategies are both valid strategies for evaluating function
	of genes in cell-based assays.},
  doi = {10.1074/jbc.M210326200},
  keywords = {Animals, Antisense, Base Sequence, COS Cells, Calf Thymus, Cultured,
	Dose-Response Relationship, Drug, Flow Cytometry, Humans, Intercellular
	Adhesion Molecule-1, Introns, Luciferases, Messenger, Molecular Sequence
	Data, Nucleic Acid Conformation, Oligonucleotides, PTEN Phosphohydrolase,
	Phosphoric Monoester Hydrolases, Protein Structure, RNA, Ribonuclease
	H, Small Interfering, Tertiary, Time Factors, Tumor Cells, Tumor
	Suppressor Proteins, 12500975},
  pii = {M210326200},
  url = {http://dx.doi.org/10.1074/jbc.M210326200}
}

@article{Vijver2002gene-expression,
  author = {van de Vijver, M. J. and He, Y. D. and van't Veer, L. J. and Dai,
	H. and Hart, A. A. M. and Voskuil, D. W. and Schreiber, G. J. and
	Peterse, J. L. and Roberts, C. and Marton, M. J. and Parrish, M.
	and Atsma, D. and Witteveen, A. and Glas, A. and Delahaye, L. and
	van der Velde, T. and Bartelink, H. and Rodenhuis, S. and Rutgers,
	E. T. and Friend, S. H. and Bernards, R.},
  title = {A gene-expression signature as a predictor of survival in breast
	cancer},
  journal = {N. Engl. J. Med.},
  year = {2002},
  volume = {347},
  pages = {1999--2009},
  number = {25},
  month = {Dec},
  abstract = {BACKGROUND: A more accurate means of prognostication in breast cancer
	will improve the selection of patients for adjuvant systemic therapy.
	METHODS: Using microarray analysis to evaluate our previously established
	70-gene prognosis profile, we classified a series of 295 consecutive
	patients with primary breast carcinomas as having a gene-expression
	signature associated with either a poor prognosis or a good prognosis.
	All patients had stage I or II breast cancer and were younger than
	53 years old; 151 had lymph-node-negative disease, and 144 had lymph-node-positive
	disease. We evaluated the predictive power of the prognosis profile
	using univariable and multivariable statistical analyses. RESULTS:
	Among the 295 patients, 180 had a poor-prognosis signature and 115
	had a good-prognosis signature, and the mean (+/-SE) overall 10-year
	survival rates were 54.6+/-4.4 percent and 94.5+/-2.6 percent, respectively.
	At 10 years, the probability of remaining free of distant metastases
	was 50.6+/-4.5 percent in the group with a poor-prognosis signature
	and 85.2+/-4.3 percent in the group with a good-prognosis signature.
	The estimated hazard ratio for distant metastases in the group with
	a poor-prognosis signature, as compared with the group with the good-prognosis
	signature, was 5.1 (95 percent confidence interval, 2.9 to 9.0; P<0.001).
	This ratio remained significant when the groups were analyzed according
	to lymph-node status. Multivariable Cox regression analysis showed
	that the prognosis profile was a strong independent factor in predicting
	disease outcome. CONCLUSIONS: The gene-expression profile we studied
	is a more powerful predictor of the outcome of disease in young patients
	with breast cancer than standard systems based on clinical and histologic
	criteria.},
  doi = {10.1056/NEJMoa021967},
  pdf = {../local/Vijver2002gene-expression.pdf},
  file = {Vijver2002gene-expression.pdf:local/Vijver2002gene-expression.pdf:PDF},
  institution = {Division of Diagnostic Oncology, Netherlands Cancer Institute, Amsterdam,
	The Netherlands.},
  keywords = {breastcancer, csbcbook, csbcbook-ch3},
  owner = {jp},
  pii = {347/25/1999},
  pmid = {12490681},
  timestamp = {2008.11.16},
  url = {http://dx.doi.org/10.1056/NEJMoa021967}
}

@article{Vinayagam2004Applying,
  author = {Vinayagam, A. and KÃ¶nig, R. and Moormann, J. and Schubert, F. and
	Eils, R. and Glatting, K.-H. and Suhai, S.},
  title = {Applying {S}upport {V}ector {M}achines for {G}ene {O}ntology based
	gene function prediction.},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  pages = {116},
  number = {1},
  month = {Aug},
  abstract = {B{ACKGROUND}: {T}he current progress in sequencing projects calls
	for rapid, reliable and accurate function assignments of gene products.
	{A} variety of methods has been designed to annotate sequences on
	a large scale. {H}owever, these methods can either only be applied
	for specific subsets, or their results are not formalised, or they
	do not provide precise confidence estimates for their predictions.
	{RESULTS}: {W}e have developed a large-scale annotation system that
	tackles all of these shortcomings. {I}n our approach, annotation
	was provided through {G}ene {O}ntology terms by applying multiple
	{S}upport {V}ector {M}achines ({SVM}) for the classification of correct
	and false predictions. {T}he general performance of the system was
	benchmarked with a large dataset. {A}n organism-wise cross-validation
	was performed to define confidence estimates, resulting in an average
	precision of 80\% for 74\% of all test sequences. {T}he validation
	results show that the prediction performance was organism-independent
	and could reproduce the annotation of other automated systems as
	well as high-quality manual annotations. {W}e applied our trained
	classification system to {X}enopus laevis sequences, yielding functional
	annotation for more than half of the known expressed genome. {C}ompared
	to the currently available annotation, we provided more than twice
	the number of contigs with good quality annotation, and additionally
	we assigned a confidence value to each predicted {GO} term. {CONCLUSIONS}:
	{W}e present a complete automated annotation system that overcomes
	many of the usual problems by applying a controlled vocabulary of
	{G}ene {O}ntology and an established classification method on large
	and well-described sequence data sets. {I}n a case study, the function
	for {X}enopus laevis contig sequences was predicted and the results
	are publicly available at ftp://genome.dkfz-heidelberg.de/pub/agd/gene_association.agd_{X}enopus.},
  doi = {10.1186/1471-2105-5-116},
  pdf = {../local/Vinayagam2004Applying.pdf},
  file = {Vinayagam2004Applying.pdf:local/Vinayagam2004Applying.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-5-116},
  url = {http://dx.doi.org/10.1186/1471-2105-5-116}
}

@article{Vincent-Salomon2008Integrated,
  author = {Vincent-Salomon, A. and Lucchesi, C. and Gruel, N. and Raynal, V.
	and Pierron, G. and Goudefroye, R. and Reyal, F. and Radvanyi, F.
	and Salmon, R. and Thiery, J.-P. and Sastre-Garau, X. and Sigal-Zafrani,
	B. and Fourquet, A. and Delattre, A.},
  title = {Integrated genomic and transcriptomic analysis of ductal carcinoma
	in situ of the breast},
  journal = {Clin. Cancer Res.},
  year = {2008},
  volume = {14},
  pages = {1956--1965},
  number = {7},
  month = {Apr},
  abstract = {PURPOSE: To gain insight into genomic and transcriptomic subtypes
	of ductal carcinomas in situ of the breast (DCIS). EXPERIMENTAL DESIGN:
	We did a combined phenotypic and genomic analysis of a series of
	57 DCIS integrated with gene expression profile analysis for 26 of
	the 57 cases. RESULTS: Thirty-two DCIS exhibited a luminal phenotype;
	21 were ERBB2 positive, and 4 were ERBB2/estrogen receptor (ER) negative
	with 1 harboring a bona fide basal-like phenotype. Based on a CGH
	analysis, genomic types were identified in this series of DCIS with
	the 1q gain/16q loss combination observed in 3 luminal DCIS, the
	mixed amplifier pattern including all ERBB2, 12 luminal and 2 ERBB2(-)/ER(-)
	DCIS, and the complex copy number alteration profile encompassing
	14 luminal and 1 ERBB2(-)/ER(-) DCIS. Eight cases (8 of 57; 14\%)
	presented a TP53 mutation, all being amplifiers. Unsupervised analysis
	of gene expression profiles of 26 of the 57 DCIS showed that luminal
	and ERBB2-amplified, ER-negative cases clustered separately. We further
	investigated the effect of high and low copy number changes on gene
	expression. Strikingly, amplicons but also low copy number changes
	especially on 1q, 8q, and 16q in DCIS regulated the expression of
	a subset of genes in a very similar way to that recently described
	in invasive ductal carcinomas. CONCLUSIONS: These combined approaches
	show that the molecular heterogeneity of breast ductal carcinomas
	exists already in in situ lesions and further indicate that DCIS
	and invasive ductal carcinomas share genomic alterations with a similar
	effect on gene expression profile.},
  doi = {10.1158/1078-0432.CCR-07-1465},
  pdf = {../local/Vincent-Salomon2008Integrated.pdf},
  file = {Vincent-Salomon2008Integrated.pdf:Vincent-Salomon2008Integrated.pdf:PDF},
  institution = {Institut Curie, Department of Tumor Biology, Paris, France.},
  keywords = {breastcancer, cgh},
  owner = {jp},
  pii = {14/7/1956},
  pmid = {18381933},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1158/1078-0432.CCR-07-1465}
}

@inproceedings{Vinokourov2003Inferring,
  author = {Vinokourov, A. and Shawe-Taylor, J. and Cristianini, N.},
  title = {Inferring a semantic representation of text via cross-language correlation
	analysis},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2003},
  editor = {Suzanna Becker and Sebastian Thrun and Klaus Obermayer},
  publisher = {MIT Press},
  pdf = {../local/Vinokourov2003Inferring.pdf},
  file = {Vinokourov2003Inferring.pdf:local/Vinokourov2003Inferring.pdf:PDF}
}

@techreport{Vinokourov2002Finding,
  author = {Vinokourov, A. and Shawe-Taylor, J. and Cristianini, N.},
  title = {Finding {L}anguage-{I}ndependent {S}emantic {R}epresentation of {T}ext
	using {K}ernel {C}anonical {C}orrelation {A}nalysis},
  institution = {Neurocolt},
  year = {2002},
  note = {NeuroCOLT Technical Report NC-TR-02-119},
  pdf = {../local/vino02.ps.gz},
  file = {vino02.ps.gz:local/vino02.ps.gz:PostScript},
  subject = {kernel},
  url = {http://www.neurocolt.com/abs/2002/abs02119.html}
}

@inproceedings{Vishwanathan2007Fast,
  author = {Vishwanathan, S.V.N. and Borgwardt, K. and Schraudolph, N.},
  title = {Fast {C}omputation of {G}raph {K}ernels},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2007},
  editor = {B. Sch\"{o}lkopf and J. Platt and T. Hoffman},
  volume = {19},
  pages = {1-2},
  address = {Cambridge, MA},
  publisher = {MIT Press, Cambridge, MA}
}

@article{Vishwanathan2008Graph,
  author = {S. V. N. Vishwanathan and K. M. Borgwardt and R. I. Kondor and N.
	N. Schraudolph},
  title = {Graph Kernels},
  journal = {CoRR},
  year = {2008},
  volume = {abs/0807.0093},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://arxiv.org/abs/0807.0093}
}

@article{Vishwanathan2009Graph,
  author = {Vishwanathan, S. V. N. and Schraudolph, N. N. and Kondor, R. and
	Borgwardt, K. M.},
  title = {Graph Kernels},
  journal = {J. Mach. Learn. Res.},
  year = {2009},
  volume = {10},
  pages = {1--41},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://arxiv.org/abs/0807.0093},
  pdf = {../local/Vishwanathan2009Graph.pdf},
  file = {Vishwanathan2009Graph.pdf:Vishwanathan2009Graph.pdf:PDF}
}

@incollection{Vishwanathan2004Fast,
  author = {Vishwanathan, S. V. N. and Smola, A. J.},
  title = {Fast kernels for string and tree matching},
  booktitle = {Kernel methods in computational biology},
  publisher = {MIT Press},
  year = {2004},
  editor = {Sch{\"o}lkopf, B. and Tsuda, K. and Vert, J.-P.},
  pages = {113--130},
  owner = {vert},
  timestamp = {2007.08.01}
}

@article{Viterbi1973Error,
  author = {Viterbi, A.},
  title = {Error bounds for convolutional codes and an asymptotically optimum
	decoding algorithm},
  journal = {IEEE Trans. Inform. Theory},
  year = {1973},
  volume = {13},
  pages = {260--269},
  number = {2},
  abstract = {The probability of error in decoding an optimal convolutional code
	transmitted over a memoryless channel is bounded from above and below
	as a function of the constraint length of the code. For all but pathological
	channels the bounds are asymptotically (exponentially) tight for
	rates aboveR_{0}, the computational cutoff rate of sequential decoding.
	As a function of constraint length the performance of optimal convolutional
	codes is shown to be superior to that of block codes of the same
	length, the relative improvement increasing with rate. The upper
	bound is obtained for a specific probabilistic nonsequential decoding
	algorithm which is shown to be asymptotically optimum for rates aboveR_{0}and
	whose performance bears certain similarities to that of sequential
	decoding algorithms.},
  owner = {jp},
  timestamp = {2008.10.04},
  url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=1054010}
}

@article{Vlahovicek2005SBASE,
  author = {Kristian Vlahovicek and LÃ¡szlÃ³ KajÃ¡n and Vilmos Agoston and SÃ¡ndor
	Pongor},
  title = {The {SBASE} domain sequence resource, release 12: prediction of protein
	domain-architecture using support vector machines.},
  journal = {Nucleic {A}cids {R}es},
  year = {2005},
  volume = {33},
  pages = {D223-5},
  number = {Database issue},
  month = {Jan},
  abstract = {S{BASE} (http://www.icgeb.trieste.it/sbase) is an online resource
	designed to facilitate the detection of domain homologies based on
	sequence database search. {T}he present release of the {SBASE} {A}
	library of protein domain sequences contains 972,397 protein sequence
	segments annotated by structure, function, ligand-binding or cellular
	topology, clustered into 8547 domain groups. {SBASE} {B} contains
	169,916 domain sequences clustered into 2526 less well-characterized
	groups. {D}omain prediction is based on an evaluation of database
	search results in comparison with a 'similarity network' of inter-sequence
	similarity scores, using support vector machines trained on similarity
	search results of known domains.},
  doi = {10.1093/nar/gki112},
  pdf = {../local/Vlahovicek2005SBASE.pdf},
  file = {Vlahovicek2005SBASE.pdf:local/Vlahovicek2005SBASE.pdf:PDF},
  keywords = {biosvm},
  pii = {33/suppl_1/D223},
  url = {http://dx.doi.org/10.1093/nar/gki112}
}

@article{Vliet2012Integration,
  author = {van Vliet, M.H. and Horlings, H.M. and van de Vijver, M.J. and Reinders,
	M.J.T. and Wessels, L.F.A.},
  title = {Integration of Clinical and Gene Expression Data Has a Synergetic
	Effect on Predicting Breast Cancer Outcome},
  journal = {PloS one},
  year = {2012},
  volume = {7},
  pages = {e40358},
  number = {7},
  publisher = {Public Library of Science}
}

@article{Voduc2010Breast,
  author = {K. David Voduc and Maggie C U Cheang and Scott Tyldesley and Karen
	Gelmon and Torsten O Nielsen and Hagen Kennecke},
  title = {Breast cancer subtypes and the risk of local and regional relapse.},
  journal = {J Clin Oncol},
  year = {2010},
  volume = {28},
  pages = {1684--1691},
  number = {10},
  month = {Apr},
  abstract = {The risk of local and regional relapse associated with each breast
	cancer molecular subtype was determined in a large cohort of patients
	with breast cancer. Subtype assignment was accomplished using a validated
	six-marker immunohistochemical panel applied to tissue microarrays.Semiquantitative
	analysis of estrogen receptor (ER), progesterone receptor (PR), Ki-67,
	human epidermal growth factor receptor 2 (HER2), epidermal growth
	factor receptor (EGFR), and cytokeratin (CK) 5/6 was performed on
	tissue microarrays constructed from 2,985 patients with early invasive
	breast cancer. Patients were classified into the following categories:
	luminal A, luminal B, luminal-HER2, HER2 enriched, basal-like, or
	triple-negative phenotype-nonbasal. Multivariable Cox analysis was
	used to determine the risk of local or regional relapse associated
	the intrinsic subtypes, adjusting for standard clinicopathologic
	factors.The intrinsic molecular subtype was successfully determined
	in 2,985 tumors. The median follow-up time was 12 years, and there
	have been a total of 325 local recurrences and 227 regional lymph
	node recurrences. Luminal A tumors (ER or PR positive, HER2 negative,
	Ki-67 < 1\%) had the best prognosis and the lowest rate of local
	or regional relapse. For patients undergoing breast conservation,
	HER2-enriched and basal subtypes demonstrated an increased risk of
	regional recurrence, and this was statistically significant on multivariable
	analysis. After mastectomy, luminal B, luminal-HER2, HER2-enriched,
	and basal subtypes were all associated with an increased risk of
	local and regional relapse on multivariable analysis.Luminal A tumors
	are associated with a low risk of local or regional recurrence. Molecular
	subtyping of breast tumors using a six-marker immunohistochemical
	panel can identify patients at increased risk of local and regional
	recurrence.},
  doi = {10.1200/JCO.2009.24.9284},
  pdf = {../local/Voduc2010Breast.pdf},
  file = {Voduc2010Breast.pdf:Voduc2010Breast.pdf:PDF},
  institution = {Department of Radiation Oncology, British Columbia Cancer Agency,
	Vancouver, British Columbia, Canada V5Z 4E6. dvoduc@bccancer.bc.ca},
  keywords = {Adult; Breast Neoplasms, mortality/pathology; Female; Humans; Ki-67
	Antigen, metabolism; Lymphatic Metastasis; Middle Aged; Neoplasm
	Metastasis; Neoplasm Recurrence, Local; Neoplasms, Hormone-Dependent;
	Receptor, Epidermal Growth Factor, metabolism; Receptors, Estrogen,
	analysis; Receptors, Progesterone, analysis; Tissue Array Analysis;
	Tumor Markers, Biological, analysis},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {JCO.2009.24.9284},
  pmid = {20194857},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1200/JCO.2009.24.9284}
}

@article{Vogelstein2004Cancer,
  author = {Vogelstein, B. and Kinzler, K. W.},
  title = {Cancer genes and the pathways they control.},
  journal = {Nat. Med.},
  year = {2004},
  volume = {10},
  pages = {789--799},
  number = {8},
  month = {Aug},
  abstract = {The revolution in cancer research can be summed up in a single sentence:
	cancer is, in essence, a genetic disease. In the last decade, many
	important genes responsible for the genesis of various cancers have
	been discovered, their mutations precisely identified, and the pathways
	through which they act characterized. The purposes of this review
	are to highlight examples of progress in these areas, indicate where
	knowledge is scarce and point out fertile grounds for future investigation.},
  doi = {10.1038/nm1087},
  pdf = {../local/Vogelstein2004Cancer.pdf},
  file = {Vogelstein2004Cancer.pdf:Vogelstein2004Cancer.pdf:PDF},
  institution = {Howard Hughes Medical Institute and The Sidney Kimmel Comprehensive
	Cancer Center, The Johns Hopkins University Medical Institutions,
	Baltimore, Maryland 21231, USA. vogelbe@welch.jhu.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nm1087},
  pmid = {15286780},
  timestamp = {2011.10.05},
  url = {http://dx.doi.org/10.1038/nm1087}
}

@article{Vostrikova1981Detection,
  author = {Vostrikova, L. J.},
  title = {Detection of disorder in multidimensional stochastic processes},
  journal = {Soviet Mathematics Doklady},
  year = {1981},
  volume = {24},
  pages = {55--59},
  owner = {jp},
  timestamp = {2010.06.02}
}

@article{Waaijenborg2008Quantifying,
  author = {Waaijenborg, S. and {Verselewel de Witt Hamer}, P. C. and Zwinderman,
	A. H.},
  title = {Quantifying the association between gene expressions and DNA-markers
	by penalized canonical correlation analysis.},
  journal = {Stat Appl Genet Mol Biol},
  year = {2008},
  volume = {7},
  pages = {Article3},
  number = {1},
  abstract = {Multiple changes at the DNA level are at the basis of complex diseases.
	Identifying the genetic networks that are influenced by these changes
	might help in understanding the development of these diseases. Canonical
	correlation analysis is used to associate gene expressions with DNA-markers
	and thus reveals sets of co-expressed and co-regulated genes and
	their associating DNA-markers. However, when the number of variables
	gets high, e.g. in the case of microarray studies, interpretation
	of these results can be difficult. By adapting the elastic net to
	canonical correlation analysis the number of variables reduces, and
	interpretation becomes easier, moreover, due to the grouping effect
	of the elastic net co-regulated and co-expressed genes cluster. Additionally,
	our adaptation works well in situations where the number of variables
	exceeds by far the number of subjects.},
  doi = {10.2202/1544-6115.1329},
  institution = {Academic Medical Center / University of Amsterdam. s.waaijenborg@amc.uva.nl},
  keywords = {Cluster Analysis; DNA, genetics; Gene Expression; Genetic Markers},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {18241193},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.2202/1544-6115.1329}
}

@article{Wadman2008James,
  author = {Meredith Wadman},
  title = {James Watson's genome sequenced at high speed.},
  journal = {Nature},
  year = {2008},
  volume = {452},
  pages = {788},
  number = {7189},
  month = {Apr},
  doi = {10.1038/452788b},
  keywords = {Genetic Counseling, trends; Genome, Human; Genomics, economics/trends;
	History, 21st Century; Humans; Individuality; Male; Reference Standards;
	Sequence Analysis, DNA, economics/trends; Time Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {18431822},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1038/452788b}
}

@article{Wagener1995Autocorrelation,
  author = {M. Wagener and J. Sadowski and J. Gasteiger},
  title = {Autocorrelation of Molecular Surface Properties for Modeling. Corticosteroid
	Binding Globulin and Cytosolic. Ah. Receptor. Activity by Neural
	Networks},
  journal = {J. Am. Chem. Soc.},
  year = {1995},
  volume = {117},
  pages = {7769-7775},
  owner = {mahe},
  timestamp = {2006.09.13}
}

@article{Wagner2001Yeast,
  author = {Wagner, A.},
  title = {The {Y}east {P}rotein {I}nteraction {N}etwork {E}volves {R}apidly
	and {C}ontains {F}ew {R}edundant {D}uplicate {G}enes},
  journal = {Mol. {B}iol. {E}vol.},
  year = {2001},
  volume = {18},
  pages = {1283--1292},
  pdf = {../local/wagn01.pdf},
  file = {wagn01.pdf:local/wagn01.pdf:PDF},
  subject = {bionet},
  url = {http://www.santafe.edu/sfi/publications/Abstracts/01-04-022abs.html}
}

@article{Wagner2003Protocols,
  author = {Wagner, M. and Naik, D. and Pothen, A.},
  title = {Protocols for disease classification from mass spectrometry data.},
  journal = {Proteomics},
  year = {2003},
  volume = {3},
  pages = {1692-1698},
  number = {9},
  abstract = {We report our results in classifying protein matrix-assisted laser
	desorption/ionization-time of flight mass spectra obtained from serum
	samples into diseased and healthy groups. {W}e discuss in detail
	five of the steps in preprocessing the mass spectral data for biomarker
	discovery, as well as our criterion for choosing a small set of peaks
	for classifying the samples. {C}ross-validation studies with four
	selected proteins yielded misclassification rates in the 10-15% range
	for all the classification methods. {T}hree of these proteins or
	protein fragments are down-regulated and one up-regulated in lung
	cancer, the disease under consideration in this data set. {W}hen
	cross-validation studies are performed, care must be taken to ensure
	that the test set does not influence the choice of the peaks used
	in the classification. {M}isclassification rates are lower when both
	the training and test sets are used to select the peaks used in classification
	versus when only the training set is used. {T}his expectation was
	validated for various statistical discrimination methods when thirteen
	peaks were used in cross-validation studies. {O}ne particular classification
	method, a linear support vector machine, exhibited especially robust
	performance when the number of peaks was varied from four to thirteen,
	and when the peaks were selected from the training set alone. {E}xperiments
	with the samples randomly assigned to the two classes confirmed that
	misclassification rates were significantly higher in such cases than
	those observed with the true data. {T}his indicates that our findings
	are indeed significant. {W}e found closely matching masses in a database
	for protein expression in lung cancer for three of the four proteins
	we used to classify lung cancer. {D}ata from additional samples,
	increased experience with the performance of various preprocessing
	techniques, and affirmation of the biological roles of the proteins
	that help in classification, will strengthen our conclusions in the
	future.},
  doi = {10.1002/pmic.200300519},
  pdf = {../local/Wagner2003Protocols.pdf},
  file = {Wagner2003Protocols.pdf:local/Wagner2003Protocols.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/pmic.200300519}
}

@article{Wagner2004Computational,
  author = {Wagner, M. and Naik, D.N. and Pothen, A. and Kasukurti, S. and Devineni,
	R.R. and Adam, B.L. and Semmes, O.J. and Wright Jr, G.L.},
  title = {Computational protein biomarker prediction: a case study for prostate
	cancer},
  journal = {B{MC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  number = {26},
  abstract = {Background {R}ecent technological advances in mass spectrometry pose
	challenges in computational mathematics and statistics to process
	the mass spectral data into predictive models with clinical and biological
	significance. {W}e discuss several classification-based approaches
	to finding protein biomarker candidates using protein profiles obtained
	via mass spectrometry, and we assess their statistical significance.
	{O}ur overall goal is to implicate peaks that have a high likelihood
	of being biologically linked to a given disease state, and thus to
	narrow the search for biomarker candidates. {R}esults {T}horough
	cross-validation studies and randomization tests are performed on
	a prostate cancer dataset with over 300 patients, obtained at the
	{E}astern {V}irginia {M}edical {S}chool using {SELDI}-{TOF} mass
	spectrometry. {W}e obtain average classification accuracies of 87%
	on a four-group classification problem using a two-stage linear {SVM}-based
	procedure and just 13 peaks, with other methods performing comparably.
	{C}onclusions {M}odern feature selection and classification methods
	are powerful techniques for both the identification of biomarker
	candidates and the related problem of building predictive models
	from protein mass spectrometric profiles. {C}ross-validation and
	randomization are essential tools that must be performed carefully
	in order not to bias the results unfairly. {H}owever, only a biological
	validation and identification of the underlying proteins will ultimately
	confirm the actual value and power of any computational predictions.},
  doi = {10.1186/1471-2105-5-26},
  pdf = {../local/Wagner2004Computational.pdf},
  file = {Wagner2004Computational.pdf:local/Wagner2004Computational.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://www.biomedcentral.com/1471-2105/5/26}
}

@article{Wahba2002Soft,
  author = {Grace Wahba},
  title = {Soft and hard classification by reproducing kernel {H}ilbert space
	methods.},
  journal = {Proc {N}atl {A}cad {S}ci {U} {S} {A}},
  year = {2002},
  volume = {99},
  pages = {16524-30},
  number = {26},
  month = {Dec},
  abstract = {Reproducing kernel {H}ilbert space ({RKHS}) methods provide a unified
	context for solving a wide variety of statistical modelling and function
	estimation problems. {W}e consider two such problems: {W}e are given
	a training set [yi, ti, i = 1, em leader, n], where yi is the response
	for the ith subject, and ti is a vector of attributes for this subject.
	{T}he value of y(i) is a label that indicates which category it came
	from. {F}or the first problem, we wish to build a model from the
	training set that assigns to each t in an attribute domain of interest
	an estimate of the probability pj(t) that a (future) subject with
	attribute vector t is in category j. {T}he second problem is in some
	sense less ambitious; it is to build a model that assigns to each
	t a label, which classifies a future subject with that t into one
	of the categories or possibly "none of the above." {T}he approach
	to the first of these two problems discussed here is a special case
	of what is known as penalized likelihood estimation. {T}he approach
	to the second problem is known as the support vector machine. {W}e
	also note some alternate but closely related approaches to the second
	problem. {T}hese approaches are all obtained as solutions to optimization
	problems in {RKHS}. {M}any other problems, in particular the solution
	of ill-posed inverse problems, can be obtained as solutions to optimization
	problems in {RKHS} and are mentioned in passing. {W}e caution the
	reader that although a large literature exists in all of these topics,
	in this inaugural article we are selectively highlighting work of
	the author, former students, and other collaborators.},
  doi = {10.1073/pnas.242574899},
  pdf = {../local/Wahba2002Soft.pdf},
  file = {Wahba2002Soft.pdf:local/Wahba2002Soft.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Automated, Base Pair Mismatch, Base Pairing,
	Base Sequence, Biological, Biosensing Techniques, Classification,
	Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted,
	Cystadenoma, DNA, Drug, Drug Design, Eukaryotic Cells, Female, Gene
	Expression, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Hemolysins, Humans, Leukemia, Ligands,
	Likelihood Functions, Lymphocytic, Markov Chains, Mathematics, Messenger,
	Models, Molecular, Molecular Probe Techniques, Molecular Sequence
	Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Protein Binding, Proteins, Quality
	Control, RNA, RNA Splicing, Receptors, Reference Values, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Signal Processing, Statistical, Stomach Neoplasms, Thermodynamics,
	Transcription, Tumor Markers, U.S. Gov't, 12477931},
  pii = {242574899},
  url = {http://dx.doi.org/10.1073/pnas.242574899}
}

@book{Wahba1990Spline,
  title = {Spline {M}odels for {O}bservational {D}ata},
  publisher = {{SIAM}},
  year = {1990},
  author = {Wahba, G.},
  volume = {59},
  series = {CBMS-NSF Regional Conference Series in Applied Mathematics},
  address = {Philadelphia},
  subject = {ml}
}

@article{Wainwright2009Information-Theoretic,
  author = {Wainwright, M. J. },
  title = {Information-Theoretic Limits on Sparsity Recovery in the High-Dimensional
	and Noisy Setting},
  journal = {IEEE T Inform Theory},
  year = {2009},
  volume = {55},
  pages = {5728--5741},
  number = {12},
  doi = {10.1109/TIT.2009.2032816},
  owner = {jp},
  timestamp = {2011.09.19}
}

@article{Wainwright2009Sharp,
  author = {Wainwright, M. J.},
  title = {Sharp Thresholds for High-Dimensional and Noisy Sparsity Recovery
	Using $\ell_1$-Constrained Quadratic Programming (Lasso)},
  journal = {IEEE T. Inform. Theory.},
  year = {2009},
  volume = {55},
  pages = {2183--2202},
  number = {5},
  doi = {10.1109/TIT.2009.2016018},
  pdf = {../local/Wainwright2009Sharp.pdf},
  file = {Wainwright2009Sharp.pdf:Wainwright2009Sharp.pdf:PDF},
  owner = {jp},
  timestamp = {2011.09.19},
  url = {http://dx.doi.org/10.1109/TIT.2009.2016018}
}

@techreport{Wainwright2006Sharp,
  author = {Wainwright, M. J.},
  title = {Sharp thresholds for high-dimensional and noisy recovery of sparsity},
  institution = {UC Berkeley, Department of Statistics},
  year = {2006},
  number = {709},
  pdf = {../local/Wainwright2006Sharp.pdf},
  file = {Wainwright2006Sharp.pdf:Wainwright2006Sharp.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.25},
  url = {http://www.stat.berkeley.edu/tech-reports/709.pdf}
}

@article{Wallace2010Identification,
  author = {Emma V B Wallace and David Stoddart and Andrew J Heron and Ellina
	Mikhailova and Giovanni Maglia and Timothy J Donohoe and Hagan Bayley},
  title = {Identification of epigenetic DNA modifications with a protein nanopore.},
  journal = {Chem Commun (Camb)},
  year = {2010},
  volume = {46},
  pages = {8195--8197},
  number = {43},
  month = {Nov},
  abstract = {Two DNA bases, 5-methylcytosine (5mC) and 5-hydroxymethylcytosine
	(hmC), marks of epigenetic modification, are recognized in immobilized
	DNA strands and distinguished from G, A, T and C by nanopore current
	recording. Therefore, if further aspects of nanopore sequencing can
	be addressed, the approach will provide a means to locate epigenetic
	modifications in unamplified genomic DNA.},
  doi = {10.1039/c0cc02864a},
  institution = {Department of Chemistry, University of Oxford, Chemistry Research
	Laboratory, Mansfield Road, Oxford, UK OX1 3TA.},
  keywords = {5-Methylcytosine, chemistry; Cyclodextrins, chemistry; Cytosine, analogs
	/&/ derivatives/chemistry; DNA, chemistry; Epigenesis, Genetic; Hemolysin
	Proteins, chemistry; Nanopores},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pmid = {20927439},
  timestamp = {2011.06.01},
  url = {http://dx.doi.org/10.1039/c0cc02864a}
}

@article{Walter2003Detection,
  author = {T. Walter and J.-C. Klein and P. Massin and A. Erignay},
  title = {Detection of the median axis of vessels in retinal images},
  journal = {European Journal of Ophthalmology},
  year = {2003},
  volume = {13},
  number = {2}
}

@article{Walters2002Prediction,
  author = {W. Patrick Walters and Mark A. Murcko},
  title = {Prediction of 'drug-likeliness'},
  journal = {Adv. {D}rug {D}eliv. {R}ev.},
  year = {2002},
  volume = {54},
  pages = {255--271},
  subject = {qsar}
}

@article{Wang2001Methylation,
  author = {H. Wang and Z. Q. Huang and L. Xia and Q. Feng and H. Erdjument-Bromage
	and B. D. Strahl and S. D. Briggs and C. D. Allis and J. Wong and
	P. Tempst and Y. Zhang},
  title = {Methylation of histone H4 at arginine 3 facilitating transcriptional
	activation by nuclear hormone receptor.},
  journal = {Science},
  year = {2001},
  volume = {293},
  pages = {853--857},
  number = {5531},
  month = {Aug},
  abstract = {Acetylation of core histone tails plays a fundamental role in transcription
	regulation. In addition to acetylation, other posttranslational modifications,
	such as phosphorylation and methylation, occur in core histone tails.
	Here, we report the purification, molecular identification, and functional
	characterization of a histone H4-specific methyltransferase PRMT1,
	a protein arginine methyltransferase. PRMT1 specifically methylates
	arginine 3 (Arg 3) of H4 in vitro and in vivo. Methylation of Arg
	3 by PRMT1 facilitates subsequent acetylation of H4 tails by p300.
	However, acetylation of H4 inhibits its methylation by PRMT1. Most
	important, a mutation in the S-adenosyl-l-methionine-binding site
	of PRMT1 substantially crippled its nuclear receptor coactivator
	activity. Our finding reveals Arg 3 of H4 as a novel methylation
	site by PRMT1 and indicates that Arg 3 methylation plays an important
	role in transcriptional regulation.},
  doi = {10.1126/science.1060781},
  institution = {Department of Biochemistry and Biophysics, Lineberger Comprehensive
	Cancer Center, University of North Carolina at Chapel Hill, Chapel
	Hill, NC 27599-7295, USA.},
  keywords = {Acetylation; Amino Acid Sequence; Animals; Arginine, metabolism; Binding
	Sites; Cell Nucleus, metabolism; Hela Cells; Histones, chemistry/metabolism;
	Humans; Hydroxamic Acids, pharmacology; Lysine, metabolism; Methylation;
	Methyltransferases, chemistry/genetics/isolation /&/ purification/metabolism;
	Molecular Sequence Data; Mutation; Oocytes; Receptors, Androgen,
	metabolism; Recombinant Proteins, metabolism; S-Adenosylmethionine,
	metabolism; Transcriptional Activation; Xenopus},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {1060781},
  pmid = {11387442},
  timestamp = {2010.11.23},
  url = {http://dx.doi.org/10.1126/science.1060781}
}

@article{Wang2003Application,
  author = {Haojun Wang and Chongxun Zheng and Ying Li and Huafeng Zhu and Xiangguo
	Yan},
  title = {Application of support vector machines to classification of blood
	cells},
  journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi},
  year = {2003},
  volume = {20},
  pages = {484-7},
  number = {3},
  month = {Sep},
  abstract = {The support vector machine ({SVM}) is a new learning technique based
	on the statistical learning theory. {I}t was originally developed
	for two-class classification. {I}n this paper, the {SVM} approach
	is extended to multi-class classification problems, a hierarchical
	{SVM} is applied to classify blood cells in different maturation
	stages from bone marrow. {B}ased on stepwise decomposition, a hierarchical
	clustering method is presented to construct the architecture of the
	hierarchical (tree-like) {SVM}, then the optimal control parameters
	of {SVM} are determined by some criterion for each discriminant step.
	{T}o verify the performances of classifiers, the {SVM} method is
	compared with three classical classifiers using 3-fold cross validation.
	{T}he preliminary results indicate that the proposed method avoids
	the curse of dimensionality and has greater generalization. {T}hus,
	the method can improve the classification correctness for blood cells
	from bone marrow.}
}

@article{Wang2006Correspondence,
  author = {Wang, H. F. and Hancock, E. R.},
  title = {Correspondence matching using kernel principal components analysis
	and label consistency constraints},
  journal = {Pattern Recogn.},
  year = {2006},
  volume = {39},
  pages = {1012--1025},
  number = {6},
  address = {New York, NY, USA},
  doi = {http://dx.doi.org/10.1016/j.patcog.2005.05.013},
  issn = {0031-3203},
  publisher = {Elsevier Science Inc.}
}

@article{Wang2007new,
  author = {Wang, J.Z. and Du, Z. and Payattakool, R. and Yu, P.S. and Chen,
	C.F.},
  title = {A new method to measure the semantic similarity of {GO} terms},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {1274},
  number = {10},
  doi = {10.1093/bioinformatics/btm087},
  issn = {1367-4803},
  owner = {jp},
  publisher = {Oxford Univ Press},
  timestamp = {2011.01.12},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm087}
}

@article{Wang2005Protein,
  author = {Wang, J. and Sung, W.-K. and Krishnan, A. and Li, K.-B.},
  title = {Protein subcellular localization prediction for {G}ram-negative bacteria
	using amino acid subalphabets and a combination of multiple support
	vector machines.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6},
  pages = {174},
  number = {1},
  month = {Jul},
  abstract = {B{ACKGROUND}: {P}redicting the subcellular localization of proteins
	is important for determining the function of proteins. {P}revious
	works focused on predicting protein localization in {G}ram-negative
	bacteria obtained good results. {H}owever, these methods had relatively
	low accuracies for the localization of extracellular proteins. {T}his
	paper studies ways to improve the accuracy for predicting extracellular
	localization in {G}ram-negative bacteria. {RESULTS}: {W}e have developed
	a system for predicting the subcellular localization of proteins
	for {G}ram-negative bacteria based on amino acid subalphabets and
	a combination of multiple support vector machines. {T}he recall of
	the extracellular site and overall recall of our predictor reach
	86.0\% and 89.8\%, respectively, in 5-fold cross-validation. {T}o
	the best of our knowledge, these are the most accurate results for
	predicting subcellular localization in {G}ram-negative bacteria.
	{CONCLUSIONS}: {C}lustering 20 amino acids into a few groups by the
	proposed greedy algorithm provides a new way to extract features
	from protein sequences to cover more adjacent amino acids and hence
	reduce the dimensionality of the input vector of protein features.
	{I}t was observed that a good amino acid grouping leads to an increase
	in prediction performance. {F}urthermore, a proper choice of a subset
	of complementary support vector machines constructed by different
	features of proteins maximizes the prediction accuracy.},
  doi = {10.1186/1471-2105-6-174},
  pdf = {../local/Wang2005Protein.pdf},
  file = {Wang2005Protein.pdf:local/Wang2005Protein.pdf:PDF},
  keywords = {biosvm},
  pii = {1471-2105-6-174},
  url = {http://dx.doi.org/10.1186/1471-2105-6-174}
}

@article{Wang2008diploid,
  author = {Wang, Jun and Wang, Wei and Li, Ruiqiang and Li, Yingrui and Tian,
	Geng and Goodman, Laurie and Fan, Wei and Zhang, Junqing and Li,
	Jun and Zhang, Juanbin and Guo, Yiran and Feng, Binxiao and Li, Heng
	and Lu, Yao and Fang, Xiaodong and Liang, Huiqing and Du, Zhenglin
	and Li, Dong and Zhao, Yiqing and Hu, Yujie and Yang, Zhenzhen and
	Zheng, Hancheng and Hellmann, Ines and Inouye, Michael and Pool,
	John and Yi, Xin and Zhao, Jing and Duan, Jinjie and Zhou, Yan and
	Qin, Junjie and Ma, Lijia and Li, Guoqing and Yang, Zhentao and Zhang,
	Guojie and Yang, Bin and Yu, Chang and Liang, Fang and Li, Wenjie
	and Li, Shaochuan and Li, Dawei and Ni, Peixiang and Ruan, Jue and
	Li, Qibin and Zhu, Hongmei and Liu, Dongyuan and Lu, Zhike and Li,
	Ning and Guo, Guangwu and Zhang, J. and Ye, J. and Fang, L. and Hao,
	Q. and Chen, Q. and Liang, Y. and Su, Y. and San, A. and Ping, C.
	and Yang, S. and Chen, F. and Li, L. and Zhou, K. and Zheng, H. and
	Ren, Y. and Yang, L. and Gao, Y. and Yang, G. and Li, Z. and Feng,
	X. and Kristiansen, K. and Wong, G. K.-S. and Nielsen, R. and Durbin,
	R. and Bolund, L. and Zhang, X. and Li, S. and Yang, H. and Wang,
	J.},
  title = {The diploid genome sequence of an {A}sian individual.},
  journal = {Nature},
  year = {2008},
  volume = {456},
  pages = {60--65},
  number = {7218},
  month = {Nov},
  abstract = {Here we present the first diploid genome sequence of an Asian individual.
	The genome was sequenced to 36-fold average coverage using massively
	parallel sequencing technology. We aligned the short reads onto the
	NCBI human reference genome to 99.97\% coverage, and guided by the
	reference genome, we used uniquely mapped reads to assemble a high-quality
	consensus sequence for 92\% of the Asian individual's genome. We
	identified approximately 3 million single-nucleotide polymorphisms
	(SNPs) inside this region, of which 13.6\% were not in the dbSNP
	database. Genotyping analysis showed that SNP identification had
	high accuracy and consistency, indicating the high sequence quality
	of this assembly. We also carried out heterozygote phasing and haplotype
	prediction against HapMap CHB and JPT haplotypes (Chinese and Japanese,
	respectively), sequence comparison with the two available individual
	genomes (J. D. Watson and J. C. Venter), and structural variation
	identification. These variations were considered for their potential
	biological impact. Our sequence data and analyses demonstrate the
	potential usefulness of next-generation sequencing technologies for
	personal genomics.},
  doi = {10.1038/nature07484},
  institution = {Beijing Genomics Institute at Shenzhen, Shenzhen 518000, China. wangj@genomics.org.cn},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature07484},
  pmid = {18987735},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1038/nature07484}
}

@article{Wang2005computer,
  author = {J. F. Wang and C. Z. Cai and C. Y. Kong and Z. W. Cao and Y. Z. Chen},
  title = {A computer method for validating traditional {C}hinese medicine herbal
	prescriptions.},
  journal = {Am {J} {C}hin {M}ed},
  year = {2005},
  volume = {33},
  pages = {281-97},
  number = {2},
  abstract = {Traditional {C}hinese medicine ({TCM}) has been widely practiced and
	is considered as an alternative to conventional medicine. {TCM} herbal
	prescriptions contain a mixture of herbs that collectively exert
	therapeutic actions and modulating effects. {T}raditionally defined
	herbal properties, related to the pharmacodynamic, pharmacokinetic
	and toxicological, as well as physicochemical properties of their
	principal ingredients, have been used as the basis for formulating
	{TCM} multi-herb prescriptions. {T}hese properties are used in this
	work to develop a computer program for predicting whether a multi-herb
	recipe is a valid {TCM} prescription. {T}his program is based on
	a statistical learning method, support vector machine ({SVM}), and
	it is trained by using 575 well-known {TCM} prescriptions and 1961
	non-{TCM} recipes generated by random combination of {TCM} herbs.
	{T}esting results by using 72 well-known {TCM} prescriptions and
	5039 non-{TCM} recipes showed that 73.6\% of the {TCM} prescriptions
	and 99.9\% of non-{TCM} recipes are correctly classified by this
	system. {A} further test by using 48 {TCM} prescriptions published
	in recent years found that 68.7\% of these are correctly classified.
	{T}hese accuracies are comparable to those of {SVM} classification
	of other biological systems. {O}ur study indicates the potential
	of {SVM} for facilitating the analysis of {TCM} prescriptions.},
  keywords = {Artificial Intelligence, Conservation of Natural Resources, Decision
	Support Techniques, Ecosystem, Environment, Forestry, Regression
	Analysis, Spain, 15974487}
}

@article{Wang2004Simple,
  author = {Kai Wang and Ekachai Jenwitheesuk and Ram Samudrala and John E Mittler},
  title = {Simple linear model provides highly accurate genotypic predictions
	of {HIV}-1 drug resistance.},
  journal = {Antivir {T}her},
  year = {2004},
  volume = {9},
  pages = {343-52},
  number = {3},
  month = {Jun},
  abstract = {Drug resistance is a major obstacle to the successful treatment of
	{HIV}-1 infection. {G}enotypic assays are used widely to provide
	indirect evidence of drug resistance, but the performance of these
	assays has been mixed. {W}e used standard stepwise linear regression
	to construct drug resistance models for seven protease inhibitors
	and 10 reverse transcriptase inhibitors using data obtained from
	the {S}tanford {HIV} drug resistance database. {W}e evaluated these
	models by hold-one-out experiments and by tests on an independent
	dataset. {O}ur linear model outperformed other publicly available
	genotypic interpretation algorithms, including decision tree, support
	vector machine and four rules-based algorithms ({HIV}db, {VGI}, {ANRS}
	and {R}ega) under both tests. {I}nterestingly, our model did well
	despite the absence of any terms for interactions between different
	residues in protease or reverse transcriptase. {T}he resulting linear
	models are easy to understand and can potentially assist in choosing
	combination therapy regimens.},
  keywords = {Algorithms, Computational Biology, Databases, Drug Resistance, Forecasting,
	Genetic, Genotype, HIV Protease Inhibitors, HIV-1, Humans, Information
	Management, Information Storage and Retrieval, Kinetics, Linear Models,
	Microbial Sensitivity Tests, Models, Non-U.S. Gov't, P.H.S., Periodicals,
	Point Mutation, Pyrimidinones, Research Support, Reverse Transcriptase
	Inhibitors, Theoretical, U.S. Gov't, Viral, 15259897}
}

@article{Wang2004Predicting,
  author = {Long-Hui Wang and Juan Liu and Yan-Fu Li and Huai-Bei Zhou},
  title = {Predicting protein secondary structure by a support vector machine
	based on a new coding scheme.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2004},
  volume = {15},
  pages = {181-90},
  number = {2},
  abstract = {Protein structure prediction is one of the most important problems
	in modern computational biology. {P}rotein secondary structure prediction
	is a key step in prediction of protein tertiary structure. {T}here
	have emerged many methods based on machine learning techniques, such
	as neural networks ({NN}) and support vector machine ({SVM}) etc.,
	to focus on the prediction of the secondary structures. {I}n this
	paper, a new method was proposed based on {SVM}. {D}ifferent from
	the existing methods, this method takes into account of the physical-chemical
	properties and structure properties of amino acids. {W}hen tested
	on the most popular dataset {CB}513, it achieved a {Q}(3) accuracy
	of 0.7844, which illustrates that it is one of the top range methods
	for protein of secondary structure prediction.},
  keywords = {biosvm},
  url = {http://www.jsbi.org/journal/GIW04/GIW04F019.html}
}

@article{Wang2005Using,
  author = {M. Wang and J. Yang and K-C. Chou},
  title = {Using string kernel to predict signal peptide cleavage site based
	on subsite coupling model.},
  journal = {Amino {A}cids},
  year = {2005},
  volume = {28},
  pages = {395-402},
  number = {4},
  month = {Jun},
  abstract = {Owing to the importance of signal peptides for studying the molecular
	mechanisms of genetic diseases, reprogramming cells for gene therapy,
	and finding new drugs for healing a specific defect, it is in great
	demand to develop a fast and accurate method to identify the signal
	peptides. {I}ntroduction of the so-called {-3,-1, +1} coupling model
	({C}hou, {K}. {C}.: {P}rotein {E}ngineering, 2001, 14-2, 75-79) has
	made it possible to take into account the coupling effect among some
	key subsites and hence can significantly enhance the prediction quality
	of peptide cleavage site. {B}ased on the subsite coupling model,
	a kind of string kernels for protein sequence is introduced. {I}ntegrating
	the biologically relevant prior knowledge, the constructed string
	kernels can thus be used by any kernel-based method. {A} {S}upport
	vector machines ({SVM}) is thus built to predict the cleavage site
	of signal peptides from the protein sequences. {T}he current approach
	is compared with the classical weight matrix method. {A}t small false
	positive ratios, our method outperforms the classical weight matrix
	method, indicating the current approach may at least serve as a powerful
	complemental tool to other existing methods for predicting the signal
	peptide cleavage site.{T}he software that generated the results reported
	in this paper is available upon requirement, and will appear at http://www.pami.sjtu.edu.cn/wm.},
  doi = {10.1007/s00726-005-0189-6},
  pdf = {../local/Wang2005Using.pdf},
  file = {Wang2005Using.pdf:local/Wang2005Using.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1007/s00726-005-0189-6}
}

@article{Wang2004Weighted-support,
  author = {Wang, M. and Yang, J. and Liu, G.-P. and Xu, Z.-J. and Chou, K.-C.},
  title = {Weighted-support vector machines for predicting membrane protein
	types based on pseudo-amino acid composition},
  journal = {Protein {E}ng. {D}es. {S}el.},
  year = {2004},
  volume = {17},
  pages = {509-516},
  number = {6},
  abstract = {Membrane proteins are generally classified into the following five
	types: (1) type {I} membrane proteins, (2) type {II} membrane proteins,
	(3) multipass transmembrane proteins, (4) lipid chain-anchored membrane
	proteins and (5) {GPI}-anchored membrane proteins. {P}rediction of
	membrane protein types has become one of the growing hot topics in
	bioinformatics. {C}urrently, we are facing two critical challenges
	in this area: first, how to take into account the extremely complicated
	sequence-order effects, and second, how to deal with the highly uneven
	sizes of the subsets in a training dataset. {I}n this paper, stimulated
	by the concept of using the pseudo-amino acid composition to incorporate
	the sequence-order effects, the spectral analysis technique is introduced
	to represent the statistical sample of a protein. {B}ased on such
	a framework, the weighted support vector machine ({SVM}) algorithm
	is applied. {T}he new approach has remarkable power in dealing with
	the bias caused by the situation when one subset in the training
	dataset contains many more samples than the other. {T}he new method
	is particularly useful when our focus is aimed at proteins belonging
	to small subsets. {T}he results obtained by the self-consistency
	test, jackknife test and independent dataset test are encouraging,
	indicating that the current approach may serve as a powerful complementary
	tool to other existing methods for predicting the types of membrane
	proteins.},
  doi = {10.1093/protein/gzh061},
  eprint = {http://peds.oupjournals.org/cgi/reprint/17/6/509.pdf},
  pdf = {../local/Wang2004Weighted-support.pdf},
  file = {Wang2004Weighted-support.pdf:local/Wang2004Weighted-support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1093/protein/gzh061}
}

@article{Wang2004Support,
  author = {M-L. Wang and W-J. Li and M-L. Wang and W-B. Xu},
  title = {Support vector machines for prediction of peptidyl prolyl cis/trans
	isomerization.},
  journal = {J {P}ept {R}es},
  year = {2004},
  volume = {63},
  pages = {23-8},
  number = {1},
  month = {Jan},
  abstract = {A new method for peptidyl prolyl cis/trans isomerization prediction
	based on the theory of support vector machines ({SVM}) was introduced.
	{T}he {SVM} represents a new approach to supervised pattern classification
	and has been successfully applied to a wide range of pattern recognition
	problems. {I}n this study, six training datasets consisting of different
	length local sequence respectively were used. {T}he polynomial kernel
	functions with different parameter d were chosen. {T}he test for
	the independent testing dataset and the jackknife test were both
	carried out. {W}hen the local sequence length was 20-residue and
	the parameter d = 8, the {SVM} method archived the best performance
	with the correct rate for the cis and trans forms reaching 70.4 and
	69.7\% for the independent testing dataset, 76.7 and 76.6\% for the
	jackknife test, respectively. {M}atthew's correlation coefficients
	for the jackknife test could reach about 0.5. {T}he results obtained
	through this study indicated that the {SVM} method would become a
	powerful tool for predicting peptidyl prolyl cis/trans isomerization.},
  keywords = {biosvm},
  pii = {100}
}

@article{Wang2005Prediction,
  author = {Ming-Lei Wang and Hui Yao and Wen-Bo Xu},
  title = {Prediction by support vector machines and analysis by {Z}-score of
	poly-{L}-proline type {II} conformation based on local sequence.},
  journal = {Comput. {B}iol. {C}hem.},
  year = {2005},
  volume = {29},
  pages = {95-100},
  number = {2},
  month = {Apr},
  abstract = {In recent years, the poly-{L}-proline type {II} ({PPII}) conformation
	has gained more and more importance. {T}his structure plays vital
	roles in many biological processes. {B}ut few studies have been made
	to predict {PPII} secondary structures computationally. {T}he support
	vector machine ({SVM}) represents a new approach to supervised pattern
	classification and has been successfully applied to a wide range
	of pattern recognition problems. {I}n this paper, we present a {SVM}
	prediction method of {PPII} conformation based on local sequence.
	{T}he overall accuracy for both the independent testing set and estimate
	of jackknife testing reached approximately 70\%. {M}atthew's correlation
	coefficient ({MCC}) could reach 0.4. {B}y comparing the results of
	training and testing datasets with different sequence identities,
	we suggest that the performance of this method correlates with the
	sequence identity of dataset. {T}he parameter of {SVM} kernel function
	was an important factor to the performance of this method. {T}he
	propensities of residues located at different positions were also
	analyzed. {B}y computing {Z}-scores, we found that {P} and {G} were
	the two most important residues to {PPII} structure conformation.},
  doi = {10.1016/j.compbiolchem.2005.02.002},
  pdf = {../local/Wang2005Prediction.pdf},
  file = {Wang2005Prediction.pdf:local/Wang2005Prediction.pdf:PDF},
  keywords = {biosvm},
  pii = {S1476-9271(05)00017-4},
  url = {http://dx.doi.org/10.1016/j.compbiolchem.2005.02.002}
}

@article{Wang1999Human,
  author = {Wang, R. F.},
  title = {{H}uman tumor antigens: implications for cancer vaccine development.},
  journal = {J. Mol. Med.},
  year = {1999},
  volume = {77},
  pages = {640--655},
  number = {9},
  month = {Sep},
  abstract = {The adoptive transfer of tumor-infiltrating lymphocytes along with
	interleukin 2 into autologous patients resulted in the objective
	regression of tumor in about 30\% of patients with melanoma, indicating
	that these T cells play a role in tumor rejection. To understand
	the molecular basis of the T cell-cancer cell interaction we and
	others started to search for tumor antigens expressed on cancer cells
	recognized by T cells. This led to the identification of several
	major histocompatibility complex (MHC) class I restricted tumor antigens.
	These tumor antigens have been classified into several categories:
	tissue-specific differentiation antigens, tumor-specific shared antigens,
	and tumor-specific unique antigens. Because CD4+ T cells play a central
	role in orchestrating the host immune response against cancer, infectious
	diseases, and autoimmune diseases, a novel genetic approach has recently
	been developed to identify these MHC class II restricted tumor antigens.
	The identification of both MHC class I and II restricted tumor antigens
	provides new opportunities for the development of therapeutic strategies
	against cancer. This review summarizes the current status of tumor
	antigens and their potential applications to cancer treatment.},
  keywords = {immunoinformatics},
  pmid = {10569202},
  timestamp = {2007.01.25}
}

@article{Wang2004Objective,
  author = {S. Wang and H. Li and F. Qi and Y. Zhao},
  title = {Objective facial paralysis grading based on {P}face and eigenflow.},
  journal = {Med {B}iol {E}ng {C}omput},
  year = {2004},
  volume = {42},
  pages = {598-603},
  number = {5},
  month = {Sep},
  abstract = {To provide physicians with an objective and quantitative measurement
	of single-sided facial paralysis, the paper presents a computer-based
	approach that is different from the nine existing, subjective and
	hand-performed international scales, such as {H}ouse-{B}rackman.
	{F}or voluntary expressions of a patient, this approach used {P}face,
	which stems from {D}face, to measure the asymmetry between two sides
	of the face and used eigenflow to measure the expression variations
	between the patient and normal subjects. {T}he results from {P}face
	and eigenflow were then combined by the support vector machine produce
	to {P}degree. {A} study of 25 subjects revealed that {P}degree could
	differentiate paralysis states ({P}degree > or = 0) and normal states
	({P}degree < 0), with the ability to grade facial paralysis automatically.
	{M}oreover, the {P}face of specific facial areas can be used in the
	supervision of the rehabilitation process.},
  keywords = {Adolescent, Adult, Computer-Assisted, Facial Asymmetry, Facial Expression,
	Facial Paralysis, Female, Humans, Image Interpretation, Male, Middle
	Aged, Motion, Photography, Severity of Illness Index, 15503959}
}

@article{Wang2005Gene-expression,
  author = {Wang, Y. and Klijn, J.G.M. and Zhang, Y. and Sieuwerts, A.M. and
	Look, M.P. and Yang, F. and Talantov, D. and Timmermans, M. and Meijer-van
	Gelder, M.E. and Yu, J. and Jatkoe, T. and Berns, E.M.J.J. and Atkins,
	D. and Foekens, J.A.},
  title = {Gene-expression profiles to predict distant metastasis of lymph-node-negative
	primary breast cancers},
  journal = {Lancet},
  year = {2005},
  volume = {365},
  pages = {671--679},
  number = {9460},
  abstract = {BACKGROUND: Genome-wide measures of gene expression can identify patterns
	of gene activity that subclassify tumours and might provide a better
	means than is currently available for individual risk assessment
	in patients with lymph-node-negative breast cancer. METHODS: We analysed,
	with Affymetrix Human U133a GeneChips, the expression of 22000 transcripts
	from total RNA of frozen tumour samples from 286 lymph-node-negative
	patients who had not received adjuvant systemic treatment. FINDINGS:
	In a training set of 115 tumours, we identified a 76-gene signature
	consisting of 60 genes for patients positive for oestrogen receptors
	(ER) and 16 genes for ER-negative patients. This signature showed
	93\% sensitivity and 48\% specificity in a subsequent independent
	testing set of 171 lymph-node-negative patients. The gene profile
	was highly informative in identifying patients who developed distant
	metastases within 5 years (hazard ratio 5.67 [95\% CI 2.59-12.4]),
	even when corrected for traditional prognostic factors in multivariate
	analysis (5.55 [2.46-12.5]). The 76-gene profile also represented
	a strong prognostic factor for the development of metastasis in the
	subgroups of 84 premenopausal patients (9.60 [2.28-40.5]), 87 postmenopausal
	patients (4.04 [1.57-10.4]), and 79 patients with tumours of 10-20
	mm (14.1 [3.34-59.2]), a group of patients for whom prediction of
	prognosis is especially difficult. INTERPRETATION: The identified
	signature provides a powerful tool for identification of patients
	at high risk of distant recurrence. The ability to identify patients
	who have a favourable prognosis could, after independent confirmation,
	allow clinicians to avoid adjuvant systemic therapy or to choose
	less aggressive therapeutic options.},
  doi = {10.1016/S0140-6736(05)17947-1},
  pdf = {../local/Wang2005Gene-expression.pdf},
  file = {Wang2005Gene-expression.pdf:local/Wang2005Gene-expression.pdf:PDF},
  keywords = {microarray, breastcancer},
  owner = {jp},
  pii = {S0140673605179471},
  pmid = {15894094},
  timestamp = {2006.07.06},
  url = {http://dx.doi.org/10.1016/S0140-6736(05)17947-1}
}

@inproceedings{Wang2004Bipartie,
  author = {Y. Wang and F. Makedon and J. Ford},
  title = {A Bipartite Graph Matching Framework for Finding Correspondences
	between Structural Elements in Two Proteins},
  booktitle = {Proceedings of the 26th Annual International Conference of the IEEE
	Engineering in Medicine and Biology Society},
  year = {2004},
  abstract = {A protein molecule consists one or more chains of amino acid sequences
	that fold into a complex three-dimensional structure. A protein's
	functions are often determined by its 3D structure, and so comparing
	the similarity of 3D structures between proteins is an important
	problem. To accomplish such comparison, one must align two proteins
	properly with rotation and translation in 3D space. Finding the correspondences
	between structural elements in the two proteins is the key step in
	many protein structure alignment algorithms. In this paper, we introduce
	a new graph theoretic framework based on bipartite graph matching
	for finding sufficiently good correspondences. It is capable of providing
	both sequence-dependent and sequence-independent correspondences.
	It is a general framework for pair-wise matching of atoms, amino
	acids residues or secondary structure elements. },
  date = {September 1--5},
  url = {http://www.cs.dartmouth.edu/~wyh/papers/embs04_alignment.pdf}
}

@article{Wang2005Gene,
  author = {Yu Wang and Igor V Tetko and Mark A Hall and Eibe Frank and Axel
	Facius and Klaus F X Mayer and Hans W Mewes},
  title = {Gene selection from microarray data for cancer classification--a
	machine learning approach.},
  journal = {Comput. {B}iol. {C}hem.},
  year = {2005},
  volume = {29},
  pages = {37-46},
  number = {1},
  month = {Feb},
  abstract = {A {DNA} microarray can track the expression levels of thousands of
	genes simultaneously. {P}revious research has demonstrated that this
	technology can be useful in the classification of cancers. {C}ancer
	microarray data normally contains a small number of samples which
	have a large number of gene expression levels as features. {T}o select
	relevant genes involved in different types of cancer remains a challenge.
	{I}n order to extract useful gene information from cancer microarray
	data and reduce dimensionality, feature selection algorithms were
	systematically investigated in this study. {U}sing a correlation-based
	feature selector combined with machine learning algorithms such as
	decision trees, naÃ¯ve {B}ayes and support vector machines, we show
	that classification performance at least as good as published results
	can be obtained on acute leukemia and diffuse large {B}-cell lymphoma
	microarray data sets. {W}e also demonstrate that a combined use of
	different classification and feature selection approaches makes it
	possible to select relevant genes with high confidence. {T}his is
	also the first paper which discusses both computational and biological
	evidence for the involvement of zyxin in leukaemogenesis.},
  doi = {10.1016/j.compbiolchem.2004.11.001},
  pdf = {../local/Wang2005Gene.pdf},
  file = {Wang2005Gene.pdf:local/Wang2005Gene.pdf:PDF},
  keywords = {biosvm microarray},
  pii = {S1476-9271(04)00108-2},
  url = {http://dx.doi.org/10.1016/j.compbiolchem.2004.11.001}
}

@article{Wang2003Nonlinear,
  author = {Yongmei Michelle Wang and Robert T Schultz and R. Todd Constable
	and Lawrence H Staib},
  title = {Nonlinear estimation and modeling of f{MRI} data using spatio-temporal
	support vector regression.},
  journal = {Inf {P}rocess {M}ed {I}maging},
  year = {2003},
  volume = {18},
  pages = {647-59},
  month = {Jul},
  abstract = {This paper presents a new and general nonlinear framework for f{MRI}
	data analysis based on statistical learning methodology: support
	vector machines. {U}nlike most current methods which assume a linear
	model for simplicity, the estimation and analysis of f{MRI} signal
	within the proposed framework is nonlinear, which matches recent
	findings on the dynamics underlying neural activity and hemodynamic
	physiology. {T}he approach utilizes spatio-temporal support vector
	regression ({SVR}), within which the intrinsic spatio-temporal autocorrelations
	in f{MRI} data are reflected. {T}he novel formulation of the problem
	allows merging model-driven with data-driven methods, and therefore
	unifies these two currently separate modes of f{MRI} analysis. {I}n
	addition, multiresolution signal analysis is achieved and developed.
	{O}ther advantages of the approach are: avoidance of interpolation
	after motion estimation, embedded removal of low-frequency noise
	components, and easy incorporation of multi-run, multi-subject, and
	multi-task studies into the framework.}
}

@article{Wang2009RNA,
  author = {Wang, Z. and Gerstein, M. and Snyder, M.},
  title = {{RNA-Seq}: a revolutionary tool for transcriptomics.},
  journal = {Nat. Rev. Genet.},
  year = {2009},
  volume = {10},
  pages = {57--63},
  number = {1},
  month = {Jan},
  abstract = {RNA-Seq is a recently developed approach to transcriptome profiling
	that uses deep-sequencing technologies. Studies using this method
	have already altered our view of the extent and complexity of eukaryotic
	transcriptomes. RNA-Seq also provides a far more precise measurement
	of levels of transcripts and their isoforms than other methods. This
	article describes the RNA-Seq approach, the challenges associated
	with its application, and the advances made so far in characterizing
	several eukaryote transcriptomes.},
  doi = {10.1038/nrg2484},
  pdf = {../local/Wang2009RNA.pdf},
  file = {Wang2009RNA.pdf:Wang2009RNA.pdf:PDF},
  institution = {Department of Molecular, Cellular and Developmental Biology, Yale
	University, 219 Prospect Street, New Haven, Connecticut 06520, USA.},
  keywords = {ngs, rnaseq},
  owner = {ljacob},
  pii = {nrg2484},
  pmid = {19015660},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1038/nrg2484}
}

@article{Ward2003Secondary,
  author = {Ward, J. J. and McGuffin, L. J. and Buxton, B. F. and Jones, D. T.},
  title = {Secondary structure prediction with support vector machines},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1650-1655},
  number = {13},
  abstract = {Motivation: {A} new method that uses support vector machines ({SVM}s)
	to predict protein secondary structure is described and evaluated.
	{T}he study is designed to develop a reliable prediction method using
	an alternative technique and to investigate the applicability of
	{SVM}s to this type of bioinformatics problem. {M}ethods: {B}inary
	{SVM}s are trained to discriminate between two structural classes.
	{T}he binary classifiers are combined in several ways to predict
	multi-class secondary structure. {R}esults: {T}he average three-state
	prediction accuracy per protein ({Q}3) is estimated by cross-validation
	to be 77.07 {+/-} 0.26% with a segment overlap ({S}ov) score of 73.32
	{+/-} 0.39%. {T}he {SVM} performs similarly to the 'state-of-the-art'
	{PSIPRED} prediction method on a non-homologous test set of 121 proteins
	despite being trained on substantially fewer examples. {A} simple
	consensus of the {SVM}, {PSIPRED} and {PROF}sec achieves significantly
	higher prediction accuracy than the individual methods. {A}vailability:
	{T}he {SVM} classifier is available from the authors. {W}ork is in
	progress to make the method available on-line and to integrate the
	{SVM} predictions into the {PSIPRED} server.},
  pdf = {../local/Ward2003Secondary.pdf},
  file = {Ward2003Secondary.pdf:local/Ward2003Secondary.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/13/1650}
}

@article{Waring2005Face,
  author = {Christopher A Waring and Xiuwen Liu},
  title = {Face detection using spectral histograms and {SVM}s.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2005},
  volume = {35},
  pages = {467-76},
  number = {3},
  month = {Jun},
  abstract = {We present a face detection method using spectral histograms and support
	vector machines ({SVM}s). {E}ach image window is represented by its
	spectral histogram, which is a feature vector consisting of histograms
	of filtered images. {U}sing statistical sampling, we show systematically
	the representation groups face images together; in comparison, commonly
	used representations often do not exhibit this necessary and desirable
	property. {B}y using an {SVM} trained on a set of 4500 face and 8000
	nonface images, we obtain a robust classifying function for face
	and non-face patterns. {W}ith an effective illumination-correction
	algorithm, our system reliably discriminates face and nonface patterns
	in images under different kinds of conditions. {O}ur method on two
	commonly used data sets give the best performance among recent face-detection
	ones. {W}e attribute the high performance to the desirable properties
	of the spectral histogram representation and good generalization
	of {SVM}s. {S}everal further improvements in computation time and
	in performance are discussed.}
}

@article{Waring2004Interlaboratory,
  author = {Jeffrey F Waring and Roger G Ulrich and Nick Flint and David Morfitt
	and Arno Kalkuhl and Frank Staedtler and Michael Lawton and Johanna
	M Beekman and Laura Suter},
  title = {Interlaboratory evaluation of rat hepatic gene expression changes
	induced by methapyrilene.},
  journal = {Environ {H}ealth {P}erspect},
  year = {2004},
  volume = {112},
  pages = {439-48},
  number = {4},
  month = {Mar},
  abstract = {Several studies using microarrays have shown that changes in gene
	expression provide information about the mechanism of toxicity induced
	by xenobiotic agents. {N}evertheless, the issue of whether gene expression
	profiles are reproducible across different laboratories remains to
	be determined. {T}o address this question, several members of the
	{H}epatotoxicity {W}orking {G}roup of the {I}nternational {L}ife
	{S}ciences {I}nstitute {H}ealth and {E}nvironmental {S}ciences {I}nstitute
	evaluated the liver gene expression profiles of rats treated with
	methapyrilene ({MP}). {A}nimals were treated at one facility, and
	{RNA} was distributed to five different sites for gene expression
	analysis. {A} preliminary evaluation of the number of modulated genes
	uncovered striking differences between the five different sites.
	{H}owever, additional data analysis demonstrated that these differences
	had an effect on the absolute gene expression results but not on
	the outcome of the study. {F}or all users, unsupervised algorithms
	showed that gene expression allows the distinction of the high dose
	of {MP} from controls and low dose. {I}n addition, the use of a supervised
	analysis method (support vector machines) made it possible to correctly
	classify samples. {I}n conclusion, the results show that, despite
	some variability, robust gene expression changes were consistent
	between sites. {I}n addition, key expression changes related to the
	mechanism of {MP}-induced hepatotoxicity were identified. {T}hese
	results provide critical information regarding the consistency of
	microarray results across different laboratories and shed light on
	the strengths and limitations of expression profiling in drug safety
	analysis.},
  keywords = {biosvm}
}

@article{Warmke1994family,
  author = {J. W. Warmke and B. Ganetzky},
  title = {{A} family of potassium channel genes related to eag in {D}rosophila
	and mammals.},
  journal = {Proc. Natl. Acad. Sci. U. S. A.},
  year = {1994},
  volume = {91},
  pages = {3438--3442},
  number = {8},
  month = {Apr},
  abstract = {We have identified a conserved family of genes related to Drosophila
	eag, which encodes a distinct type of voltage-activated K+ channel.
	Three related genes were recovered in screens of cDNA libraries from
	Drosophila, mouse, and human tissues. One gene is the mouse counterpart
	of eag; the other two represent additional subfamilies. The human
	gene maps to chromosome 7. Family members share at least 47\% amino
	acid identity in their hydrophobic cores and all contain a segment
	homologous to a cyclic nucleotide-binding domain. Sequence comparisons
	indicate that members of this family are most closely related to
	vertebrate cyclic nucleotide-gated cation channels and plant inward-rectifying
	K+ channels. The existence of another family of K+ channel structural
	genes further extends the known diversity of K+ channels and has
	important implications for the structure, function, and evolution
	of the superfamily of voltage-sensitive ion channels.},
  pdf = {../local/Warmke1994family.pdf},
  file = {Warmke1994family.pdf:local/Warmke1994family.pdf:PDF},
  pmid = {8159766},
  timestamp = {2006.10.05},
  url = {http://www.pnas.org/cgi/reprint/91/8/3438}
}

@article{Warmuth2003Active,
  author = {Warmuth, M. K. and Liao, J. and R{\"a}tsch, G. and Mathieson, M.
	and Putta, S. and Lemmen, C.},
  title = {Active learning with support vector machines in the drug discovery
	process.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {667-673},
  number = {2},
  abstract = {We investigate the following data mining problem from computer-aided
	drug design: {F}rom a large collection of compounds, find those that
	bind to a target molecule in as few iterations of biochemical testing
	as possible. {I}n each iteration a comparatively small batch of compounds
	is screened for binding activity toward this target. {W}e employed
	the so-called "active learning paradigm" from {M}achine {L}earning
	for selecting the successive batches. {O}ur main selection strategy
	is based on the maximum margin hyperplane-generated by "{S}upport
	{V}ector {M}achines". {T}his hyperplane separates the current set
	of active from the inactive compounds and has the largest possible
	distance from any labeled compound. {W}e perform a thorough comparative
	study of various other selection strategies on data sets provided
	by {D}u{P}ont {P}harmaceuticals and show that the strategies based
	on the maximum margin hyperplane clearly outperform the simpler ones.},
  doi = {10.1021/ci025620t},
  pdf = {../local/Warmuth2003Active.pdf},
  file = {Warmuth2003Active.pdf:local/Warmuth2003Active.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1021/ci025620t}
}

@inproceedings{Warmuth2002Active,
  author = {Warmuth, M. K. and R{\"a}tsch, G. and Mathieson, M. and Liao, L.
	and Lemmen, C.},
  title = {Active learning in the drug discovery process},
  booktitle = {Adv. {N}eural {I}nform. {P}rocess. {S}yst.},
  year = {2002},
  editor = {T.G. Dietterich and S. Becker and Z. Ghahramani},
  volume = {14},
  pages = {1449--1456},
  publisher = {MIT Press},
  keywords = {biosvm},
  subject = {qsar}
}

@article{Warren2006Critical,
  author = {Warren, G.L. and Andrews, C.W. and Capelli, A.M. and Clarke, B. and
	LaLonde, J. and Lambert, M.H. and Lindvall, M. and Nevins, N. and
	Semus, S.F. and Senger, S. and Tedesco, G. and Wall, I.D. and Woolven,
	J.M. and Peishoff, C.E. and Head, M.S.},
  title = {A critical assessment of docking programs and scoring functions.},
  journal = {J. Med. Chem.},
  year = {2006},
  volume = {49},
  pages = {5912--5931},
  number = {20},
  month = {Oct},
  abstract = {Docking is a computational technique that samples conformations of
	small molecules in protein binding sites; scoring functions are used
	to assess which of these conformations best complements the protein
	binding site. An evaluation of 10 docking programs and 37 scoring
	functions was conducted against eight proteins of seven protein types
	for three tasks: binding mode prediction, virtual screening for lead
	identification, and rank-ordering by affinity for lead optimization.
	All of the docking programs were able to generate ligand conformations
	similar to crystallographically determined protein/ligand complex
	structures for at least one of the targets. However, scoring functions
	were less successful at distinguishing the crystallographic conformation
	from the set of docked poses. Docking programs identified active
	compounds from a pharmaceutically relevant pool of decoy compounds;
	however, no single program performed well for all of the targets.
	For prediction of compound affinity, none of the docking programs
	or scoring functions made a useful prediction of ligand binding affinity.},
  owner = {vero},
  pmid = {17004707},
  timestamp = {2009.02.04}
}

@article{Wassermann2009Ligand,
  author = {Anne Mai Wassermann and Hanna Geppert and Jürgen Bajorath},
  title = {Ligand prediction for orphan targets using support vector machines
	and various target-ligand kernels is dominated by nearest neighbor
	effects.},
  journal = {J Chem Inf Model},
  year = {2009},
  volume = {49},
  pages = {2155--2167},
  number = {10},
  month = {Oct},
  abstract = {Support vector machine (SVM) calculations combining protein and small
	molecule information have been applied to identify ligands for simulated
	orphan targets (i.e., targets for which no ligands were available).
	The combination of protein and ligand information was facilitated
	through the design of target-ligand kernel functions that account
	for pairwise ligand and target similarity. The design and biological
	information content of such kernel functions was expected to play
	a major role for target-directed ligand prediction. Therefore, a
	variety of target-ligand kernels were implemented to capture different
	types of target information including sequence, secondary structure,
	tertiary structure, biophysical properties, ontologies, or structural
	taxonomy. These kernels were tested in ligand predictions for simulated
	orphan targets in two target protein systems characterized by the
	presence of different intertarget relationships. Surprisingly, although
	there were target- and set-specific differences in prediction rates
	for alternative target-ligand kernels, the performance of these kernels
	was overall similar and also similar to SVM linear combinations.
	Test calculations designed to better understand possible reasons
	for these observations revealed that ligand information provided
	by nearest neighbors of orphan targets significantly influenced SVM
	performance, much more so than the inclusion of protein information.
	As long as ligands of closely related neighbors of orphan targets
	were available for SVM learning, orphan target ligands could be well
	predicted, regardless of the type and sophistication of the kernel
	function that was used. These findings suggest simplified strategies
	for SVM-based ligand prediction for orphan targets.},
  doi = {10.1021/ci9002624},
  pdf = {../local/Wassermann2009Ligand.pdf},
  file = {Wassermann2009Ligand.pdf:Wassermann2009Ligand.pdf:PDF},
  institution = {Department of Life Science Informatics, B-IT, LIMES Program Unit
	Chemical Biology and Medicinal Chemistry, Rheinische Friedrich-Wilhelms-Universität
	Bonn, Dahlmannstrasse 2, D-53113 Bonn, Germany.},
  keywords = {chemogenomics, chemoinformatics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19780576},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1021/ci9002624}
}

@article{Watanabe2008Endogenous,
  author = {Watanabe, T. and Totoki, Y. and Toyoda, A. and Kaneda, M. and Kuramochi-Miyagawa,
	S. and Obata, Y. and Chiba, H. and Kohara, Y. and Kono, T. and Nakano,
	T. and Surani, M.A. and Sakaki, Y. and Sasaki, H.},
  title = {{E}ndogenous si{R}{N}{A}s from naturally formed ds{R}{N}{A}s regulate
	transcripts in mouse oocytes},
  journal = {Nature},
  year = {2008},
  volume = {453},
  pages = {539--543},
  keywords = {csbcbook}
}

@article{Waterman2003Transcriptional,
  author = {Waterman, S.R. and Small, P.L.C.},
  title = {Transcriptional expression of Escherichia coli glutamate-dependent
	acid resistance genes gadA and gadBC in an hns rpoS mutant.},
  journal = {J. Bacteriol.},
  year = {2003},
  volume = {185},
  pages = {4644--4647},
  number = {15},
  month = {Aug},
  abstract = {Resistance to being killed by acidic environments with pH values lower
	than 3 is an important feature of both pathogenic and nonpathogenic
	Escherichia coli. The most potent E. coli acid resistance system
	utilizes two isoforms of glutamate decarboxylase encoded by gadA
	and gadB and a putative glutamate:gamma-aminobutyric acid antiporter
	encoded by gadC. The gad system is controlled by two repressors (H-NS
	and CRP), one activator (GadX), one repressor-activator (GadW), and
	two sigma factors (sigma(S) and sigma(70)). In contrast to results
	of previous reports, we demonstrate that gad transcription can be
	detected in an hns rpoS mutant strain of E. coli K-12, indicating
	that gad promoters can be initiated by sigma(70) in the absence of
	H-NS.},
  institution = {Division of Human Immunology, Hanson Institute, Institute of Medical
	and Veterinary Science, Adelaide, South Australia, 5000, Australia.
	scott.waterman@imvs.sa.gov.au},
  keywords = {Bacterial Proteins; DNA-Binding Proteins; Drug Resistance, Bacterial;
	Escherichia coli; Escherichia coli Proteins; Gene Expression Regulation,
	Bacterial; Glutamate Decarboxylase; Glutamates; Hydrogen-Ion Concentration;
	Membrane Proteins; Mutation; Sigma Factor; Transcription, Genetic},
  owner = {fantine},
  pmid = {12867478},
  timestamp = {2008.02.11}
}

@incollection{Watkins2000Dynamic,
  author = {C. Watkins},
  title = {Dynamic alignment kernels},
  booktitle = {Advances in {L}arge {M}argin {C}lassifiers},
  publisher = {MIT Press},
  year = {2000},
  editor = {A.J. Smola and P.L. Bartlett and B. Sch{\"o}lkopf and D. Schuurmans},
  pages = {39--50},
  address = {Cambridge, MA},
  pdf = {../local/Watkins2000Dynamic.pdf},
  file = {Watkins2000Dynamic.pdf:local/Watkins2000Dynamic.pdf:PDF},
  keywords = {biosvm},
  subject = {kernel},
  url = {http://www.cs.rhbnc.ac.uk/home/chrisw/dynk.ps.gz}
}

@article{Watson1953Structure,
  author = {Watson, J. D. and Crick, F. H. C.},
  title = {A {S}tructure for {D}eoxyribose {N}ucleic {A}cid},
  journal = {Nature},
  year = {1953},
  volume = {171},
  pages = {737},
  pdf = {../local/Watson1953Structure.pdf},
  file = {Watson1953Structure.pdf:local/Watson1953Structure.pdf:PDF},
  keywords = { bio},
  owner = {vert},
  url = {http://www.nature.com/genomics/human/watson-crick/index.html}
}

@article{Watts1998Collective,
  author = {Watts, D. J. and Strogatz, S. H.},
  title = {Collective dynamics of 'small-world' networks},
  journal = {Nature},
  year = {1998},
  volume = {393},
  pages = {440-442},
  pdf = {../local/watt98.pdf},
  file = {watt98.pdf:local/watt98.pdf:PDF},
  subject = {compnet},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v393/n6684/full/393440a0_fs.html&content_filetype=PDF}
}

@article{Weathers2004Reduced,
  author = {Weathers, E. A. and Paulaitis, M. E. and Woolf, T. B. and Hoh, J.
	H.},
  title = {Reduced amino acid alphabet is sufficient to accurately recognize
	intrinsically disordered protein.},
  journal = {F{EBS} {L}ett.},
  year = {2004},
  volume = {576},
  pages = {348-352},
  number = {3},
  abstract = {Intrinsically disordered proteins are an important class of proteins
	with unique functions and properties. {H}ere, we have applied a support
	vector machine ({SVM}) trained on naturally occurring disordered
	and ordered proteins to examine the contribution of various parameters
	(vectors) to recognizing proteins that contain disordered regions.
	{W}e find that a {SVM} that incorporates only amino acid composition
	has a recognition accuracy of 87+/-2%. {T}his result suggests that
	composition alone is sufficient to accurately recognize disorder.
	{I}nterestingly, {SVM}s using reduced sets of amino acids based on
	chemical similarity preserve high recognition accuracy. {A} set as
	small as four retains an accuracy of 84+/-2%; this suggests that
	general physicochemical properties rather than specific amino acids
	are important factors contributing to protein disorder.},
  doi = {10.1016/j.febslet.2004.09.036},
  pdf = {../local/Weathers2004Reduced.pdf},
  file = {Weathers2004Reduced.pdf:local/Weathers2004Reduced.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1016/j.febslet.2004.09.036}
}

@article{Weber2002Cancer,
  author = {Weber, B. L.},
  title = {Cancer genomics},
  journal = {Cancer {C}ell},
  year = {2002},
  volume = {1},
  pages = {37-47},
  number = {1},
  abstract = {The draft human genome sequence and the dissemination of high throughput
	technology provides opportunities for systematic analysis of cancer
	cells. {G}enome-wide mutation screens, high resolution analysis of
	chromosomal abberations and expression profiling all give comprehensive
	views of genetic alterations in cancer cells. {F}rom these analyses
	will come a complete list of the genetic changes that drive malignant
	transformation and of the therapeutic targets that may be exploited
	for clinical benefit.},
  doi = {doi:10.1016/S1535-6108(02)00026-0},
  pdf = {../local/Weber2002Cancer.pdf},
  file = {Weber2002Cancer.pdf:local/Weber2002Cancer.pdf:PDF},
  url = {http://dx.doi.org/10.1016/S1535-6108(02)00026-0}
}

@article{Weber2002Building,
  author = {Griffin Weber and Staal Vinterbo and Lucila Ohno-Machado},
  title = {Building an asynchronous web-based tool for machine learning classification.},
  journal = {Proc {AMIA} {S}ymp},
  year = {2002},
  pages = {869-73},
  abstract = {Various unsupervised and supervised learning methods including support
	vector machines, classification trees, linear discriminant analysis
	and nearest neighbor classifiers have been used to classify high-throughput
	gene expression data. {S}impler and more widely accepted statistical
	tools have not yet been used for this purpose, hence proper comparisons
	between classification methods have not been conducted. {W}e developed
	free software that implements logistic regression with stepwise variable
	selection as a quick and simple method for initial exploration of
	important genetic markers in disease classification. {T}o implement
	the algorithm and allow our collaborators in remote locations to
	evaluate and compare its results against those of other methods,
	we developed a user-friendly asynchronous web-based application with
	a minimal amount of programming using free, downloadable software
	tools. {W}ith this program, we show that classification using logistic
	regression can perform as well as other more sophisticated algorithms,
	and it has the advantages of being easy to interpret and reproduce.
	{B}y making the tool freely and easily available, we hope to promote
	the comparison of classification methods. {I}n addition, we believe
	our web application can be used as a model for other bioinformatics
	laboratories that need to develop web-based analysis tools in a short
	amount of time and on a limited budget.},
  keywords = {Acute, Algorithms, Animals, Artificial Intelligence, Automated, Base
	Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing
	Techniques, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Drug,
	Drug Design, Eukaryotic Cells, Female, Gene Expression, Gene Expression
	Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers,
	Hemolysins, Humans, Internet, Leukemia, Ligands, Likelihood Functions,
	Logistic Models, Lymphocytic, Markov Chains, Mathematics, Messenger,
	Models, Molecular, Molecular Probe Techniques, Molecular Sequence
	Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks
	(Computer), Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation,
	Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian
	Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding,
	Proteins, Quality Control, RNA, RNA Splicing, Receptors, Reference
	Values, Reproducibility of Results, Research Support, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Thermodynamics, Transcription, Tumor
	Markers, U.S. Gov't, 12463949},
  pii = {D020001919}
}

@article{Wei2005study,
  author = {Liyang Wei and Yongyi Yang and Robert M Nishikawa and Yulei Jiang},
  title = {A study on several machine-learning methods for classification of
	malignant and benign clustered microcalcifications.},
  journal = {I{EEE} {T}rans {M}ed {I}maging},
  year = {2005},
  volume = {24},
  pages = {371-80},
  number = {3},
  month = {Mar},
  abstract = {In this paper, we investigate several state-of-the-art machine-learning
	methods for automated classification of clustered microcalcifications
	({MC}s). {T}he classifier is part of a computer-aided diagnosis ({CAD}x)
	scheme that is aimed to assisting radiologists in making more accurate
	diagnoses of breast cancer on mammograms. {T}he methods we considered
	were: support vector machine ({SVM}), kernel {F}isher discriminant
	({KFD}), relevance vector machine ({RVM}), and committee machines
	(ensemble averaging and {A}da{B}oost), of which most have been developed
	recently in statistical learning theory. {W}e formulated differentiation
	of malignant from benign {MC}s as a supervised learning problem,
	and applied these learning methods to develop the classification
	algorithm. {A}s input, these methods used image features automatically
	extracted from clustered {MC}s. {W}e tested these methods using a
	database of 697 clinical mammograms from 386 cases, which included
	a wide spectrum of difficult-to-classify cases. {W}e analyzed the
	distribution of the cases in this database using the multidimensional
	scaling technique, which reveals that in the feature space the malignant
	cases are not trivially separable from the benign ones. {W}e used
	receiver operating characteristic ({ROC}) analysis to evaluate and
	to compare classification performance by the different methods. {I}n
	addition, we also investigated how to combine information from multiple-view
	mammograms of the same case so that the best decision can be made
	by a classifier. {I}n our experiments, the kernel-based methods (i.e.,
	{SVM}, {KFD}, and {RVM}) yielded the best performance ({A}z = 0.85,
	{SVM}), significantly outperforming a well-established, clinically-proven
	{CAD}x approach that is based on neural network ({A}z = 0.80).}
}

@article{Weigelt2010Breast,
  author = {Britta Weigelt and Alan Mackay and Roger A'hern and Rachael Natrajan
	and David S P Tan and Mitch Dowsett and Alan Ashworth and Jorge S
	Reis-Filho},
  title = {Breast cancer molecular profiling with single sample predictors:
	a retrospective analysis.},
  journal = {Lancet Oncol},
  year = {2010},
  volume = {11},
  pages = {339--349},
  number = {4},
  month = {Apr},
  abstract = {Microarray expression profiling classifies breast cancer into five
	molecular subtypes: luminal A, luminal B, basal-like, HER2, and normal
	breast-like. Three microarray-based single sample predictors (SSPs)
	have been used to define molecular classification of individual samples.
	We aimed to establish agreement between these SSPs for identification
	of breast cancer molecular subtypes.Previously described microarray-based
	SSPs were applied to one in-house (n=53) and three publicly available
	(n=779) breast cancer datasets. Agreement was analysed between SSPs
	for the whole classification system and for the five molecular subtypes
	individually in each cohort.Fair-to-substantial agreement between
	every pair of SSPs in each cohort was recorded (kappa=0.238-0.740).
	Of the five molecular subtypes, only basal-like cancers consistently
	showed almost-perfect agreement (kappa>0.812). The proportion of
	cases classified as basal-like in each cohort was consistent irrespective
	of the SSP used; however, the proportion of each remaining molecular
	subtype varied substantially. Assignment of individual cases to luminal
	A, luminal B, HER2, and normal breast-like subtypes was dependent
	on the SSP used. The significance of associations with outcome of
	each molecular subtype, other than basal-like and luminal A, varied
	depending on SSP used. However, different SSPs produced broadly similar
	survival curves.Although every SSP identifies molecular subtypes
	with similar survival, they do not reliably assign the same patients
	to the same molecular subtypes. For molecular subtype classification
	to be incorporated into routine clinical practice and treatment decision
	making, stringent standardisation of methodologies and definitions
	for identification of breast cancer molecular subtypes is needed.Breakthrough
	Breast Cancer, Cancer Research UK.},
  doi = {10.1016/S1470-2045(10)70008-5},
  institution = {Cancer Research UK, London Research Institute, London, UK.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S1470-2045(10)70008-5},
  pmid = {20181526},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1016/S1470-2045(10)70008-5}
}

@article{Weigelt2010Molecular,
  author = {Weigelt, B. and Reis-Filho, J. S.},
  title = {Molecular profiling currently offers no more than tumour morphology
	and basic immunohistochemistry},
  journal = {Breast Cancer Res.},
  year = {2010},
  volume = {12},
  pages = {S5},
  number = {S4},
  doi = {10.1186/bcr2734},
  pdf = {../local/Weigelt2010Molecular.pdf},
  file = {Weigelt2010Molecular.pdf:Weigelt2010Molecular.pdf:PDF},
  owner = {jp},
  timestamp = {2011.01.13},
  url = {http://dx.doi.org/10.1186/bcr2734}
}

@article{Weill2009Development,
  author = {Nathanael Weill and Didier Rognan},
  title = {Development and {V}alidation of a {N}ovel {P}rotein-- {L}igand {F}ingerprint
	{T}o {M}ine {C}hemogenomic {S}pace: {A}pplication to {G} {P}rotein-{C}oupled
	{R}eceptors and {T}heir {L}igands},
  journal = {Journal of {C}hemical {I}nformation and {M}odeling},
  year = {2009},
  volume = {49},
  pages = {1049--1062},
  number = {4}
}

@article{Weinberg2010Point,
  author = {Weinberg, R.},
  title = {Point: Hypotheses first.},
  journal = {Nature},
  year = {2010},
  volume = {464},
  pages = {678},
  number = {7289},
  month = {Apr},
  doi = {10.1038/464678a},
  institution = {Whitehead Institute for Biomedical Research, Department of Biology,
	Massachusetts Institute of Technology, Cambridge, Massachusetts 02142,
	USA. weinberg@wi.mit.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {464678a},
  pmid = {20360718},
  timestamp = {2010.10.12},
  url = {http://dx.doi.org/10.1038/464678a}
}

@book{Weinberg2007Biology,
  title = {The biology of cancer},
  publisher = {Garland Science, Taylor \& Francis Group, LLC},
  year = {2007},
  author = {R A Weinberg},
  pages = {864 pages},
  keywords = {csbcbook}
}

@inproceedings{Weinberger2005Nonlinear,
  author = {Weinberger, K. and Packer, B. and Saul, L.},
  title = {Nonlinear {D}imensionality {R}eduction by {S}emidefinite {P}rogramming
	and {K}ernel {M}atrix {F}actorization},
  booktitle = {Proceedings of the {T}enth {I}nternational {W}orkshop on {A}rtificial
	{I}ntelligence and {S}tatistics, {J}an 6-8, 2005, {S}avannah {H}otel,
	{B}arbados},
  year = {2005},
  editor = {Cowell, R. G. and Ghahramani, Z.},
  pages = {381-388},
  publisher = {Society for Artificial Intelligence and Statistics},
  abstract = {We describe an algorithm for nonlinear dimensionality reduction based
	on semidefinite programming and kernel matrix factorization. {T}he
	algorithm learns a kernel matrix for high dimensional data that lies
	on or near a low dimensional manifold. {I}n earlier work, the kernel
	matrix was learned by maximizing the variance in feature space while
	preserving the distances and angles between nearest neighbors. {I}n
	this paper, adapting recent ideas from semi-supervised learning on
	graphs, we show that the full kernel matrix can be very well approximated
	by a product of smaller matrices. {R}epresenting the kernel matrix
	in this way, we can reformulate the semidefinite program in terms
	of a much smaller submatrix of inner products between randomly chosen
	landmarks. {T}he new framework leads to order-of-magnitude reductions
	in computation time and makes it possible to study much larger problems
	in manifold learning.},
  pdf = {../local/We describe an algorithm for nonlinear dimensionality reduction based on semidefinite programming and kernel matrix factorization. The algorithm learns a kernel matrix for high dimensional data that lies on or near a low dimensional manifold. In earlier work, the kernel matrix was learned by maximizing the variance in feature space while preserving the distances and angles between nearest neighbors. In this paper, adapting recent ideas from semi-supervised learning on graphs, we show that the full kernel matrix can be very well approximated by a product of smaller matrices. Representing the kernel matrix in this way, we can reformulate the semidefinite program in terms of a much smaller submatrix of inner products between randomly chosen landmarks. The new framework leads to order-of-magnitude reductions in computation time and makes it possible to study much larger problems in manifold learning.},
  file = {We describe an algorithm for nonlinear dimensionality reduction based on semidefinite programming and kernel matrix factorization. The algorithm learns a kernel matrix for high dimensional data that lies on or near a low dimensional manifold. In earlier work, the kernel matrix was learned by maximizing the variance in feature space while preserving the distances and angles between nearest neighbors. In this paper, adapting recent ideas from semi-supervised learning on graphs, we show that the full kernel matrix can be very well approximated by a product of smaller matrices. Representing the kernel matrix in this way, we can reformulate the semidefinite program in terms of a much smaller submatrix of inner products between randomly chosen landmarks. The new framework leads to order-of-magnitude reductions in computation time and makes it possible to study much larger problems in manifold learning.:We describe an algorithm for nonlinear dimensionality reduction based on semidefinite programming and kernel matrix factorization. The algorithm learns a kernel matrix for high dimensional data that lies on or near a low dimensional manifold. In earlier work, the kernel matrix was learned by maximizing the variance in feature space while preserving the distances and angles between nearest neighbors. In this paper, adapting recent ideas from semi-supervised learning on graphs, we show that the full kernel matrix can be very well approximated by a product of smaller matrices. Representing the kernel matrix in this way, we can reformulate the semidefinite program in terms of a much smaller submatrix of inner products between randomly chosen landmarks. The new framework leads to order-of-magnitude reductions in computation time and makes it possible to study much larger problems in manifold learning.:PDF},
  keywords = {dimred}
}

@inproceedings{Weinberger2006Distance,
  author = {Weinberger, K. Q. and Blitzer, J. and Saul, L. K.},
  title = {Distance metric learning for large margin nearest neighbor classification},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2006},
  editor = {Weiss, Y. and Schoelkopf, B. and Platt, J.},
  volume = {18},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  timestamp = {2007.06.06}
}

@inproceedings{Weinberger2004Unsupervised,
  author = {Weinberger, K. Q. and Saul, L. K.},
  title = {Unsupervised {L}earning of {I}mage {M}anifolds by {S}emidefinite
	{P}rogramming},
  booktitle = {2004 {IEEE} {C}omputer {S}ociety {C}onference on {C}omputer {V}ision
	and {P}attern {R}ecognition ({CVPR}'04)},
  year = {2004},
  volume = {2},
  number = {2},
  pages = {988-995},
  abstract = {Can we detect low dimensional structure in high dimensional data sets
	of images and video? {T}he problem of dimensionality reduction arises
	often in computer vision and pattern recognition. {I}n this paper,
	we propose a new solution to this problem based on semidefinite programming.
	{O}ur algorithm can be used to analyze high dimensional data that
	lies on or near a low dimensional manifold. {I}t overcomes certain
	limitations of previous work in manifold learning, such as {I}somap
	and locally linear embedding. {W}e illustrate the algorithm on easily
	visualized examples of curves and surfaces, as well as on actual
	images of faces, handwritten digits, and solid objects.},
  doi = {doi.ieeecomputersociety.org/10.1109/CVPR.2004.256},
  pdf = {../local/sdeFinal_cvpr04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/sdeFinal_cvpr04.pdf:PDF;sdeFinal_cvpr04.pdf:http\},
  file = {sdeFinal_cvpr04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/sdeFinal_cvpr04.pdf:PDF;sdeFinal_cvpr04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/sdeFinal_cvpr04.pdf:PDF},
  keywords = {dimred}
}

@inproceedings{Weinberger2004Learning,
  author = {Weinberger, K. Q. and Sha, F. and Saul, L. K.},
  title = {Learning a kernel matrix for nonlinear dimensionality reduction},
  booktitle = {I{CML} '04: {T}wenty-first international conference on {M}achine
	learning},
  year = {2004},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  abstract = {We investigate how to learn a kernel matrix for high dimensional data
	that lies on or near a low dimensional manifold. {N}oting that the
	kernel matrix implicitly maps the data into a nonlinear feature space,
	we show how to discover a mapping that "unfolds" the underlying manifold
	from which the data was sampled. {T}he kernel matrix is constructed
	by maximizing the variance in feature space subject to local constraints
	that preserve the angles and distances between nearest neighbors.
	{T}he main optimization involves an instance of semidefinite programming---a
	fundamentally different computation than previous algorithms for
	manifold learning, such as {I}somap and locally linear embedding.
	{T}he optimized kernels perform better than polynomial and {G}aussian
	kernels for problems in manifold learning, but worse for problems
	in large margin classification. {W}e explain these results in terms
	of the geometric properties of different kernels and comment on various
	interpretations of other manifold learning algorithms as kernel methods.},
  doi = {http://doi.acm.org/10.1145/1015330.1015345},
  pdf = {../local/kernel_icml04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/kernel_icml04.pdf:PDF;kernel_icml04.pdf:http\},
  file = {kernel_icml04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/kernel_icml04.pdf:PDF;kernel_icml04.pdf:http\://www.seas.upenn.edu/~kilianw/publications/PDFs/kernel_icml04.pdf:PDF},
  keywords = {dimred}
}

@article{Weinberger1994Optimal,
  author = {Weinberger, M. J. and Merhav, N. and Feder, M.},
  title = {Optimal sequential probability assignment for individual sequences},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1994},
  volume = {40},
  pages = {384-396},
  number = {2},
  month = {Mar},
  abstract = {The problem of sequential probability assignment for individual sequences
	is investigated. {T}he authors compare the probabilities assigned
	by any sequential scheme to the performance of the best ?batch? scheme
	(model) in some class. {F}or the class of finite-state schemes and
	other related families, they derive a deterministic performance bound,
	analogous to the classical (probabilistic) minimum description length
	({MDL}) bound. {I}t holds for ?most? sequences, similarly to the
	probabilistic setting, where the bound holds for ?most? sources in
	a class. {I}t is shown that the bound can be attained both pointwise
	and sequentially for any model family in the reference class and
	without any prior knowledge of its order. {T}his is achieved by a
	universal scheme based on a mixing approach. {T}he bound and its
	sequential achievability establish a completely deterministic significance
	to the concept of predictive {MDL} },
  pdf = {../local/Weinberger1994Optimal.pdf},
  file = {Weinberger1994Optimal.pdf:local/Weinberger1994Optimal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Weinberger1995universal,
  author = {Weinberger, M. J. and Rissanen, J. J. and Feder, M.},
  title = {A universal finite memory source},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1995},
  volume = {41},
  pages = {643-652},
  number = {3},
  month = {May},
  abstract = {An irreducible parameterization for a finite memory source is constructed
	in the form of a tree machine. {A} universal information source for
	the set of finite memory sources is constructed by a predictive modification
	of an earlier studied algorithm-{C}ontext. {I}t is shown that this
	universal source incorporates any minimal data-generating tree machine
	in an asymptotically optimal manner in the following sense: the negative
	logarithm of the probability it assigns to any long typical sequence,
	generated by any tree machine, approaches that assigned by the tree
	machine at the best possible rate },
  pdf = {../local/Weinberger1995universal.pdf},
  file = {Weinberger1995universal.pdf:local/Weinberger1995universal.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Weis2008Structural,
  author = {William I Weis and Brian K Kobilka},
  title = {Structural insights into {G}-protein-coupled receptor activation.},
  journal = {Curr Opin Struct Biol},
  year = {2008},
  volume = {18},
  pages = {734--740},
  number = {6},
  month = {Dec},
  abstract = {G-protein-coupled receptors (GPCRs) are the largest family of eukaryotic
	plasma membrane receptors, and are responsible for the majority of
	cellular responses to external signals. GPCRs share a common architecture
	comprising seven transmembrane (TM) helices. Binding of an activating
	ligand enables the receptor to catalyze the exchange of GTP for GDP
	in a heterotrimeric G protein. GPCRs are in a conformational equilibrium
	between inactive and activating states. Crystallographic and spectroscopic
	studies of the visual pigment rhodopsin and two beta-adrenergic receptors
	have defined some of the conformational changes associated with activation.},
  doi = {10.1016/j.sbi.2008.09.010},
  institution = { Cellular Physiology, Stanford University School of Medicine, USA.
	bill.weis@stanford.edu},
  keywords = {Animals; Crystallography; Humans; Membrane Proteins; Models, Molecular;
	Receptors, Adrenergic, beta; Receptors, G-Protein-Coupled; Rhodopsin},
  owner = {ljacob},
  pii = {S0959-440X(08)00147-4},
  pmid = {18957321},
  timestamp = {2009.11.09},
  url = {http://dx.doi.org/10.1016/j.sbi.2008.09.010}
}

@book{Weisberg1981Applied,
  title = {Applied linear regression},
  publisher = {Wiley},
  year = {1981},
  author = {Weisberg, S.},
  address = {New-York},
  owner = {jp},
  timestamp = {2012.02.12}
}

@article{Wellings1973origin,
  author = {Wellings, S. R. and Jensen, H. M.},
  title = {On the origin and progression of ductal carcinoma in the human breast},
  journal = {J. Natl. Cancer Inst.},
  year = {1973},
  volume = {50},
  pages = {1111--1118},
  number = {5},
  month = {May},
  keywords = {breastcancer},
  owner = {jp},
  pmid = {4123242},
  timestamp = {2009.02.04}
}

@article{Weskamp2007Multiple,
  author = {N. Weskamp and E. Hullermeier and D. Kuhn and G. Klebe},
  title = {Multiple Graph Alignment for the Structural Analysis of Protein Active
	Sites},
  journal = {IEEE/ACM Trans. Comput. Biol. Bioinformatics},
  year = {2007},
  volume = {4},
  pages = {310--320},
  number = {2},
  address = {Los Alamitos, CA, USA},
  doi = {http://dx.doi.org/10.1109/TCBB.2007.358301},
  issn = {1545-5963},
  publisher = {IEEE Computer Society Press}
}

@book{Westfall1993Resampling,
  title = {Resampling-based multiple testing: Examples and methods for p-value
	adjustment.},
  publisher = {John Wiley and Sons},
  year = {1993},
  author = {Westfall, P. H. and Young, S. S.},
  owner = {jp},
  timestamp = {2012.03.07}
}

@article{Weston2003Feature,
  author = {Weston, J. and P{\'e}rez-Cruz, F. and Bousquet, O. and Chapelle,
	O. and Elisseeff, A. and Sch{\"o}lkopf, B.},
  title = {Feature selection and transduction for prediction of molecular bioactivity
	for drug design},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {764-771},
  number = {6},
  abstract = {Motivation: {I}n drug discovery a key task is to identify characteristics
	that separate active (binding) compounds from inactive (non-binding)
	ones. {A}n automated prediction system can help reduce resources
	necessary to carry out this task. {R}esults: {T}wo methods for prediction
	of molecular bioactivity for drug design are introduced and shown
	to perform well in a data set previously studied as part of the {KDD}
	({K}nowledge {D}iscovery and {D}ata {M}ining) {C}up 2001. {T}he data
	is characterized by very few positive examples, a very large number
	of features (describing three-dimensional properties of the molecules)
	and rather different distributions between training and test data.
	{T}wo techniques are introduced specifically to tackle these problems:
	a feature selection method for unbalanced data and a classifier which
	adapts to the distribution of the the unlabeled test data (a so-called
	transductive method). {W}e show both techniques improve identification
	performance and in conjunction provide an improvement over using
	only one of the techniques. {O}ur results suggest the importance
	of taking into account the characteristics in this data which may
	also be relevant in other problems of a similar type. {A}vailability:
	{M}atlab source code is available at http://www.kyb.tuebingen.mpg.de/bs/people/weston/kdd/kdd.html
	{C}ontact: jason.weston@tuebingen.mpg.de {S}upplementary information:
	{S}upplementary material is available at http://www.kyb.tuebingen.mpg.de/bs/people/weston/kdd/kdd.html.},
  pdf = {../local/Weston2003Feature.pdf},
  file = {Weston2003Feature.pdf:local/Weston2003Feature.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/6/764}
}

@article{Wheeler2008Complete,
  author = {David A Wheeler and Maithreyan Srinivasan and Michael Egholm and
	Yufeng Shen and Lei Chen and Amy McGuire and Wen He and Yi-Ju Chen
	and Vinod Makhijani and G. Thomas Roth and Xavier Gomes and Karrie
	Tartaro and Faheem Niazi and Cynthia L Turcotte and Gerard P Irzyk
	and James R Lupski and Craig Chinault and Xing-zhi Song and Yue Liu
	and Ye Yuan and Lynne Nazareth and Xiang Qin and Donna M Muzny and
	Marcel Margulies and George M Weinstock and Richard A Gibbs and Jonathan
	M Rothberg},
  title = {The complete genome of an individual by massively parallel DNA sequencing.},
  journal = {Nature},
  year = {2008},
  volume = {452},
  pages = {872--876},
  number = {7189},
  month = {Apr},
  abstract = {The association of genetic variation with disease and drug response,
	and improvements in nucleic acid technologies, have given great optimism
	for the impact of 'genomic medicine'. However, the formidable size
	of the diploid human genome, approximately 6 gigabases, has prevented
	the routine application of sequencing methods to deciphering complete
	individual human genomes. To realize the full potential of genomics
	for human health, this limitation must be overcome. Here we report
	the DNA sequence of a diploid genome of a single individual, James
	D. Watson, sequenced to 7.4-fold redundancy in two months using massively
	parallel sequencing in picolitre-size reaction vessels. This sequence
	was completed in two months at approximately one-hundredth of the
	cost of traditional capillary electrophoresis methods. Comparison
	of the sequence to the reference genome led to the identification
	of 3.3 million single nucleotide polymorphisms, of which 10,654 cause
	amino-acid substitution within the coding sequence. In addition,
	we accurately identified small-scale (2-40,000 base pair (bp)) insertion
	and deletion polymorphism as well as copy number variation resulting
	in the large-scale gain and loss of chromosomal segments ranging
	from 26,000 to 1.5 million base pairs. Overall, these results agree
	well with recent results of sequencing of a single individual by
	traditional methods. However, in addition to being faster and significantly
	less expensive, this sequencing technology avoids the arbitrary loss
	of genomic sequences inherent in random shotgun sequencing by bacterial
	cloning because it amplifies DNA in a cell-free system. As a result,
	we further demonstrate the acquisition of novel human sequence, including
	novel genes not previously identified by traditional genomic sequencing.
	This is the first genome sequenced by next-generation technologies.
	Therefore it is a pilot for the future challenges of 'personalized
	genome sequencing'.},
  doi = {10.1038/nature06884},
  institution = {Human Genome Sequencing Center, Baylor College of Medicine, One Baylor
	Plaza, Houston, Texas 77030, USA.},
  keywords = {Alleles; Computational Biology; Genetic Predisposition to Disease,
	genetics; Genetic Variation, genetics; Genome, Human, genetics; Genomics,
	economics/methods/trends; Genotype; Humans; Individuality; Male;
	Oligonucleotide Array Sequence Analysis; Polymorphism, Single Nucleotide,
	genetics; Reproducibility of Results; Sensitivity and Specificity;
	Sequence Alignment; Sequence Analysis, DNA, economics/methods; Software},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nature06884},
  pmid = {18421352},
  timestamp = {2010.07.28},
  url = {http://dx.doi.org/10.1038/nature06884}
}

@article{Wheeler2008Homologene,
  author = {Wheeler, D. L. and Barrett,T and Benson,D.A and Bryant, S.H.},
  title = {Database resources of the National Center for Biotechnology Information},
  journal = {Nucleic {A}cids {R}es.},
  year = {2006},
  volume = {31},
  pages = {28-33}
}

@article{Whitfield2006Common,
  author = {Whitfield, M.L. and George, L.K. and Grant, G.D. and Perou, C.M.},
  title = {Common markers of proliferation},
  journal = {Nature Reviews Cancer},
  year = {2006},
  volume = {6},
  pages = {99--106},
  number = {2},
  publisher = {Nature Publishing Group}
}

@article{Whitney1932Congruent,
  author = {H. Whitney},
  title = {Congruent graphs and the connectivity of graphs},
  journal = {Amer. J. Math.},
  year = {1932},
  volume = {54},
  pages = {150--168}
}

@article{Whitney1930Nonseparable,
  author = {H. Whitney},
  title = {Non-Separable and Planar Graphs},
  journal = {Proc. Natl. Acad. Sci.},
  year = {1930},
  volume = {93},
  pages = {415--443}
}

@misc{wiki:tsp,
  author = {{Wikipedia}},
  title = {{Travelling Salesman Problem --- Wikipedia{,} The Free Encyclopedia}},
  year = {2009},
  note = {[Online; accessed 5-May-2009]}
}

@article{Wilbur2000Boosting,
  author = {W. J. Wilbur},
  title = {Boosting naive {B}ayesian learning on a large subset of {MEDLINE}.},
  journal = {Proc {AMIA} {S}ymp},
  year = {2000},
  pages = {918-22},
  abstract = {We are concerned with the rating of new documents that appear in a
	large database ({MEDLINE}) and are candidates for inclusion in a
	small specialty database ({REBASE}). {T}he requirement is to rank
	the new documents as nearly in order of decreasing potential to be
	added to the smaller database as possible, so as to improve the coverage
	of the smaller database without increasing the effort of those who
	manage this specialty database. {T}o perform this ranking task we
	have considered several machine learning approaches based on the
	naÃ¯ ve {B}ayesian algorithm. {W}e find that adaptive boosting outperforms
	naÃ¯ ve {B}ayes, but that a new form of boosting which we term staged
	{B}ayesian retrieval outperforms adaptive boosting. {S}taged {B}ayesian
	retrieval involves two stages of {B}ayesian retrieval and we further
	find that if the second stage is replaced by a support vector machine
	we again obtain a significant improvement over the strictly {B}ayesian
	approach.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electrophysiology,
	Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric
	Emptying, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease,
	Genomics, Hemolysins, Humans, Indians, Information Storage and Retrieval,
	Initiator, Ion Channels, Kinetics, Leukemia, Likelihood Functions,
	Lipid Bilayers, Logistic Models, Lymphocytic, MEDLINE, Male, Markov
	Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms,
	Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American,
	Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis,
	Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S.,
	Pattern Recognition, Physical, Pigmented, Predictive Value of Tests,
	Promoter Regions (Genetics), Protein Biosynthesis, Protein Folding,
	Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity
	and Specificity, Sequence Alignment, Sequence Analysis, Sex Characteristics,
	Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound
	Spectrography, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics,
	Transcription, Transcription Factors, Tumor Markers, Type 2, U.S.
	Gov't, Vertebrates, 11080018},
  pii = {D200250}
}

@article{Wilcoxon1945Individual,
  author = {Wilcoxon, F.},
  title = {Individual comparisons by ranking methods},
  journal = {Biometrics Bulletin},
  year = {1945},
  volume = {1},
  pages = {80--83},
  number = {6},
  publisher = {JSTOR}
}

@article{Wilhelm2004Analysis,
  author = {Wilhelm, T. and Behre, J. and Schuster, S.},
  title = {Analysis of structural robustness of metabolic networks.},
  journal = {Syst Biol},
  year = {2004},
  volume = {1},
  pages = {114--120},
  number = {1},
  month = {Jun},
  abstract = {We study the structural robustness of metabolic networks on the basis
	of the concept of elementary flux modes. It is shown that the number
	of elementary modes itself is not an appropriate measure of structural
	robustness. Instead, we introduce three new robustness measures.
	These are based on the relative number of elementary modes remaining
	after the knockout of enzymes. We discuss the relevance of these
	measures with the help of simple examples, as well as with larger,
	realistic metabolic networks. Thereby we demonstrate quantitatively
	that the metabolism of Escherichia coli, which must be able to adapt
	to varying conditions, is more robust than the metabolism of the
	human erythrocyte, which lives under much more homeostatic conditions.},
  pdf = {../local/Wilhelm2004Analysis.pdf},
  file = {Wilhelm2004Analysis.pdf:Wilhelm2004Analysis.pdf:PDF},
  institution = {Institute of Molecular Biotechnology, Theoretical Systems Biology
	Group, Jena, Germany.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17052121},
  timestamp = {2013.01.25}
}

@article{Wilkins1996From,
  author = {M. R. Wilkins and C. Pasquali and R. D. Appel and K. Ou and O. Golaz
	and J. C. Sanchez and J. X. Yan and A. A. Gooley and G. Hughes and
	I. Humphery-Smith and K. L. Williams and D. F. Hochstrasser},
  title = {From proteins to proteomes: large scale protein identification by
	two-dimensional electrophoresis and amino acid analysis.},
  journal = {Biotechnology (N Y)},
  year = {1996},
  volume = {14},
  pages = {61--65},
  number = {1},
  month = {Jan},
  abstract = {Separation and identification of proteins by two-dimensional (2-D)
	electrophoresis can be used for protein-based gene expression analysis.
	In this report single protein spots, from polyvinylidene difluoride
	blots of micropreparative E. coli 2-D gels, were rapidly and economically
	identified by matching their amino acid composition, estimated pI
	and molecular weight against all E. coli entries in the SWISS-PROT
	database. Thirty proteins from an E. coli 2-D map were analyzed and
	identities assigned. Three of the proteins were unknown. By protein
	sequencing analysis, 20 of the 27 proteins were correctly identified.
	Importantly, correct identifications showed unambiguous "correct"
	score patterns. While incorrect protein identifications also showed
	distinctive score patterns, indicating that protein must be identified
	by other means. These techniques allow large-scale screening of the
	protein complement of simple organisms, or tissues in normal and
	disease states. The computer program described here is accessible
	via the World Wide Web at URL address (http:@expasy.hcuge.ch/).},
  institution = {Macquarie University Centre for Analytical Biotechnology, Macquarie
	University, Sydney, NSW, Australia.},
  keywords = {Amino Acids; Bacterial Proteins; Blood Proteins; Databases, Factual;
	Electrophoresis, Gel, Two-Dimensional; Escherichia coli; Humans;
	Microchemistry; Molecular Weight; Multienzyme Complexes; Proteins;
	Reproducibility of Results; Software; Time Factors},
  owner = {ljacob},
  pmid = {9636313},
  timestamp = {2009.09.14}
}

@article{Willems1996Coding,
  author = {Willems, F. M. J.},
  title = {Coding for a {B}inary {I}ndependent {P}iecewise-{I}dentically {D}istributed
	{S}ource},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {2210--2217},
  month = {nov},
  pdf = {../local/will96b.pdf},
  file = {will96b.pdf:local/will96b.pdf:PDF},
  subject = {it},
  url = {http://ei1.ei.ele.tue.nl/~frans/bipid.ps}
}

@article{Willems1989Universal,
  author = {Willems, F. M. J.},
  title = {Universal data compression and repetition times},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1989},
  volume = {35},
  pages = {54-58},
  number = {1},
  month = {Jan},
  abstract = {A novel universal data compression algorithm is described. {T}his
	algorithm encodes {L} source symbols at a time. {A}n upper limit
	for the number of bits per source symbol is given for the class of
	binary stationary sources. {I}n the author's analysis, a property
	of repetition times turns out to be of crucial importance },
  doi = {10.1109/18.42176},
  pdf = {../local/Willems1989Universal.pdf},
  file = {Willems1989Universal.pdf:local/Willems1989Universal.pdf:PDF},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/18.42176}
}

@article{Willems1996Context,
  author = {Willems, F. M. J. and Shtarkov, Y. M. and Tjalkens, T. J.},
  title = {Context {W}eighting for {G}eneral {F}inite {C}ontext {S}ources},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {1514--1520},
  number = {5},
  pdf = {../local/Willems1996Context.pdf},
  file = {Willems1996Context.pdf:local/Willems1996Context.pdf:PDF},
  subject = {it},
  url = {http://ei1.ei.ele.tue.nl/~frans/gcw.ps}
}

@article{Willems1995Context,
  author = {Willems, F. M. J. and Shtarkov, Y. M. and Tjalkens, T. J.},
  title = {The {C}ontext {T}ree {W}eighting {M}ethod: {B}asic {P}roperties},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1995},
  volume = {41},
  pages = {653--664},
  number = {3},
  month = {May},
  abstract = {Describes a sequential universal data compression procedure for binary
	tree sources that performs the ?double mixture.? {U}sing a context
	tree, this method weights in an efficient recursive way the coding
	distributions corresponding to all bounded memory tree sources, and
	achieves a desirable coding distribution for tree sources with an
	unknown model and unknown parameters. {C}omputational and storage
	complexity of the proposed procedure are both linear in the source
	sequence length. {T}he authors derive a natural upper bound on the
	cumulative redundancy of the method for individual sequences. {T}he
	three terms in this bound can be identified as coding, parameter,
	and model redundancy, {T}he bound holds for all source sequence lengths,
	not only for asymptotically large lengths. {T}he analysis that leads
	to this bound is based on standard techniques and turns out to be
	extremely simple. {T}he upper bound on the redundancy shows that
	the proposed context-tree weighting procedure is optimal in the sense
	that it achieves the {R}issanen (1984) lower bound },
  pdf = {../local/Willems1995Context.pdf},
  file = {Willems1995Context.pdf:local/Willems1995Context.pdf:PDF},
  subject = {it},
  url = {http://ei1.ei.ele.tue.nl/~frans/ctw1.ps}
}

@article{Willett2008From,
  author = {Willett, P.},
  title = {From chemical documentation to chemoinformatics: 50 years of chemical
	information science},
  journal = {J. Inf. Sci.},
  year = {2008},
  volume = {34},
  pages = {477--499},
  number = {4},
  address = {Thousand Oaks, CA, USA},
  doi = {http://dx.doi.org/10.1177/0165551507084631},
  issn = {0165-5515},
  publisher = {Sage Publications, Inc.}
}

@article{Willett2006Similarity-based,
  author = {Willett, P.},
  title = {Similarity-based virtual screening using 2D fingerprints.},
  journal = {Drug Discov Today},
  year = {2006},
  volume = {11},
  pages = {1046--1053},
  number = {23-24},
  month = {Dec},
  abstract = {This paper summarizes recent work at the University of Sheffield on
	virtual screening methods that use 2D fingerprint measures of structural
	similarity. A detailed comparison of a large number of similarity
	coefficients demonstrates that the well-known Tanimoto coefficient
	remains the method of choice for the computation of fingerprint-based
	similarity, despite possessing some inherent biases related to the
	sizes of the molecules that are being sought. Group fusion involves
	combining the results of similarity searches based on multiple reference
	structures and a single similarity measure. We demonstrate the effectiveness
	of this approach to screening, and also describe an approximate form
	of group fusion, turbo similarity searching, that can be used when
	just a single reference structure is available.},
  doi = {10.1016/j.drudis.2006.10.005},
  pdf = {../local/Willett2006Similarity-based.pdf},
  file = {Willett2006Similarity-based.pdf:Willett2006Similarity-based.pdf:PDF},
  institution = {Krebs Institute for Biomolecular Research and Department of Information
	Studies, University of Sheffield, 211 Portobello, Sheffield S1 4DP,
	UK. p.willett@sheffield.ac.uk},
  keywords = {PUlearning, chemoinformatics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S1359-6446(06)00419-3},
  pmid = {17129822},
  timestamp = {2010.04.01},
  url = {http://dx.doi.org/10.1016/j.drudis.2006.10.005}
}

@article{Willett1998Chemical,
  author = {P. Willett},
  title = {Chemical {S}imilarity {S}earching},
  journal = {J Chem Inf Comput Sci},
  year = {1998},
  volume = {38},
  pages = {983-996},
  owner = {mahe},
  timestamp = {2006.09.01}
}

@article{Willett1986Implementation,
  author = {P. Willett and V. Winterman and D. Bawden},
  title = {Implementation of nearest-neighbor searching in an online chemical
	structure search system},
  journal = {J. Chem. Inform. Comput. Sci.},
  year = {1986},
  volume = {26},
  pages = {36-41},
  number = {1},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://pubs.acs.org/cgi-bin/archive.cgi/jcisd8/1986/26/i01/f-pdf/f_ci00049a008.pdf}
}

@incollection{Williams1998Prediction,
  author = {Williams, C.K.I. },
  title = {Prediction with {G}aussian {P}rocesses: {F}rom {L}inear {R}egression
	to {L}inear {P}rediction and {B}eyond},
  booktitle = {Learning and {I}nference in {G}raphical {M}odels},
  publisher = {Kluwer Academic Press},
  year = {1998},
  editor = {Jordan, M.I. },
  owner = {vert}
}

@article{Williams2004Prognostic,
  author = {Williams, R.D. and Hing, S.N. and Greer, B.T. and Whiteford, C.C.
	and Wei, J.S. and Natrajan, R. and Kelsey, A. and Rogers, S. and
	Campbell, C. and Pritchard-Jones, K. and Khan, J.},
  title = {Prognostic classification of relapsing favorable histology {W}ilms
	tumor using c{DNA} microarray expression profiling and support vector
	machines.},
  journal = {Genes {C}hromosomes {C}ancer},
  year = {2004},
  volume = {41},
  pages = {65-79},
  number = {1},
  month = {Sep},
  abstract = {Treatment of {W}ilms tumor has a high success rate, with some 85%
	of patients achieving long-term survival. {H}owever, late effects
	of treatment and management of relapse remain significant clinical
	problems. {I}f accurate prognostic methods were available, effective
	risk-adapted therapies could be tailored to individual patients at
	diagnosis. {F}ew molecular prognostic markers for {W}ilms tumor are
	currently defined, though previous studies have linked allele loss
	on 1p or 16q, genomic gain of 1q, and overexpression from 1q with
	an increased risk of relapse. {T}o identify specific patterns of
	gene expression that are predictive of relapse, we used high-density
	(30 k) c{DNA} microarrays to analyze {RNA} samples from 27 favorable
	histology {W}ilms tumors taken from primary nephrectomies at the
	time of initial diagnosis. {T}hirteen of these tumors relapsed within
	2 years. {G}enes differentially expressed between the relapsing and
	nonrelapsing tumor classes were identified by statistical scoring
	(t test). {T}hese genes encode proteins with diverse molecular functions,
	including transcription factors, developmental regulators, apoptotic
	factors, and signaling molecules. {U}se of a support vector machine
	classifier, feature selection, and test evaluation using cross-validation
	led to identification of a generalizable expression signature, a
	small subset of genes whose expression potentially can be used to
	predict tumor outcome in new samples. {S}imilar methods were used
	to identify genes that are differentially expressed between tumors
	with and without genomic 1q gain. {T}his set of discriminators was
	highly enriched in genes on 1q, indicating close agreement between
	data obtained from expression profiling with data from genomic copy
	number analyses.},
  doi = {10.1002/gcc.20060Â },
  pdf = {../local/Williams2004Prognostic.pdf},
  file = {Williams2004Prognostic.pdf:local/Williams2004Prognostic.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/gcc.20060}
}

@inproceedings{Williamson2000Entropy,
  author = {R.C. Williamson and A.J. Smola and B. Schoelkopf},
  title = {Entropy {N}umbers of {L}inear {F}unction {C}lasses},
  booktitle = {Proc. 13th {A}nnu. {C}onference on {C}omput. {L}earning {T}heory},
  year = {2000},
  pages = {309--319},
  publisher = {Morgan Kaufmann, San Francisco},
  pdf = {../local/Williamson2000Entropy.pdf},
  file = {Williamson2000Entropy.pdf:local/Williamson2000Entropy.pdf:PDF}
}

@article{Wilton2003Comparison,
  author = {D. Wilton and P. Willett and K. Lawson and G. Mullier},
  title = {Comparison of ranking methods for virtual screening in lead-discovery
	programs.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {469-74},
  number = {2},
  abstract = {This paper discusses the use of several rank-based virtual screening
	methods for prioritizing compounds in lead-discovery programs, given
	a training set for which both structural and bioactivity data are
	available. {S}tructures from the {NCI} {AIDS} data set and from the
	{S}yngenta corporate database were represented by two types of fragment
	bit-string and by sets of high-level molecular features. {T}hese
	representations were processed using binary kernel discrimination,
	similarity searching, substructural analysis, support vector machine,
	and trend vector analysis, with the effectiveness of the methods
	being judged by the extent to which active test set molecules were
	clustered toward the top of the resultant rankings. {T}he binary
	kernel discrimination approach yielded consistently superior rankings
	and would appear to have considerable potential for chemical screening
	applications.},
  doi = {10.1021/ci025586i},
  pdf = {../local/Wilton2003Comparison.pdf},
  file = {Wilton2003Comparison.pdf:local/Wilton2003Comparison.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci025586i}
}

@article{Winters-Hilt2004Nanopore,
  author = {Stephen Winters-Hilt and Mark Akeson},
  title = {Nanopore cheminformatics.},
  journal = {D{NA} {C}ell {B}iol},
  year = {2004},
  volume = {23},
  pages = {675-83},
  number = {10},
  month = {Oct},
  abstract = {A cheminformatics method is described for classification, and biophysical
	examination, of individual molecules. {A} novel molecular detector
	is used--one based on current blockade measurements through a nanometer-scale
	ion channel (alpha-hemolysin). {C}lassification results are described
	for blockades caused by {DNA} molecules in the alpha-hemolysin nanopore
	detector, with signal analysis and pattern recognition performed
	using a combination of methods from bioinformatics and machine learning.
	{D}ue to the size of the alpha-hemolysin protein channel, the blockade
	events report on one {DNA} molecule at a time, which enables a variety
	of reproducible, single-molecule biophysical experiments. {T}o capture
	the full sensitivity of the nanopore detector's blockade signal,
	{H}idden {M}arkov {M}odels ({HMM}s) were used with {E}xpectation/{M}aximization
	for denoising and for associating a feature vector with the ionic
	current blockade of each captured {DNA} molecule. {S}upport {V}ector
	{M}achines ({SVM}s) that employ novel kernel designs were then used
	as discriminators. {W}ith {SVM} training performed off-line, and
	economical {HMM} processing on-line, blockade classification was
	possible during capture. {HMM}s were also used in conjunction with
	a time-domain finite state automaton (off-line) for feature discovery
	and kinetics analysis. {A}nalysis of the {DNA} data indicates a variety
	of binding ({DNA}-protein), fraying, and conformational shifts that
	are consistent with data obtained from thermodynamic analyses (melting
	curves), {X}-ray crystallography, and {NMR} studies. {T}he software
	tools are designed for analysis of generic blockades in ionic channels,
	including those in other biological pore-forming toxins, other biological
	channels in general, and semiconductor-based channels.},
  doi = {10.1089/1044549042476893},
  keywords = {Algorithms, Artificial Intelligence, Ascomycota, Automated, Base Sequence,
	Chromosome Mapping, Codon, Comparative Study, Crystallography, DNA,
	DNA Primers, Hordeum, Host-Parasite Relations, Informatics, Kinetics,
	Magnetic Resonance Spectroscopy, Nanotechnology, Non-U.S. Gov't,
	Pattern Recognition, Plant, Plants, Research Support, Sequence Alignment,
	Sequence Analysis, Thermodynamics, X-Ray, 15585125},
  url = {http://dx.doi.org/10.1089/1044549042476893}
}

@article{Winters-Hilt2003Highly,
  author = {Winters-Hilt, S. and Vercoutere, W. and DeGuzman, V.S. and Deamer,
	D. and Akeson, M. and Haussler, D.},
  title = {Highly accurate classification of {W}atson-{C}rick basepairs on termini
	of single {DNA} molecules.},
  journal = {Biophys. {J}.},
  year = {2003},
  volume = {84},
  pages = {967-976},
  number = {2},
  abstract = {We introduce a computational method for classification of individual
	{DNA} molecules measured by an{alpha} -hemolysin channel detector.
	{W}e show classification with better than 99% accuracy for {DNA}
	hairpin molecules that differ only in their terminal {W}atson-{C}rick
	basepairs. {S}ignal classification was done in silico to establish
	performance metrics (i.e., where train and test data were of known
	type, via single-species data files). {I}t was then performed in
	solution to assay real mixtures of {DNA} hairpins. {H}idden {M}arkov
	{M}odels ({HMM}s) were used with {E}xpectation/{M}aximization for
	denoising and for associating a feature vector with the ionic current
	blockade of the {DNA} molecule. {S}upport {V}ector {M}achines ({SVM}s)
	were used as discriminators, and were the focus of off-line training.
	{A} multiclass {SVM} architecture was designed to place less discriminatory
	load on weaker discriminators, and novel {SVM} kernels were used
	to boost discrimination strength. {T}he tuning on {HMM}s and {SVM}s
	enabled biophysical analysis of the captured molecule states and
	state transitions; structure revealed in the biophysical analysis
	was used for better feature selection.},
  pdf = {../local/Winters-Hilt2003Highly.pdf},
  file = {Winters-Hilt2003Highly.pdf:local/Winters-Hilt2003Highly.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.biophysj.org/cgi/content/abstract/84/2/967}
}

@article{Winzeler2006Applied,
  author = {E. A Winzeler},
  title = {{A}pplied systems biology and malaria.},
  journal = {Nat. Rev. Microbiol.},
  year = {2006},
  volume = {4},
  pages = {145--151},
  number = {2},
  month = {Feb},
  abstract = {One of the goals of systems-biology research is to discover networks
	and interactions by integrating diverse data sets. So far, systems-biology
	research has focused on model organisms, which are well characterized
	and therefore suited to testing new methods. Systems biology has
	great potential for use in the search for therapies for disease.
	Here, the potential of systems-biology approaches in the search for
	new drugs and vaccines to treat malaria is examined.},
  doi = {10.1038/nrmicro1327},
  pdf = {../local/Winzeler2006Applied.pdf},
  file = {Winzeler2006Applied.pdf:local/Winzeler2006Applied.pdf:PDF},
  keywords = {plasmodium},
  pii = {nrmicro1327},
  pmid = {16362033},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1038/nrmicro1327}
}

@article{Wirapati2008Meta-analysis,
  author = {Wirapati, P. and Sotiriou, C. and Kunkel, S. and Farmer, P. and Pradervand,
	S. and Haibe-Kains, B. and Desmedt, C. and Ignatiadis, M. and Sengstag,
	T. and Sch\"utz, F. and Goldstein, D. R. and Piccart, M. and Delorenzi,
	M.},
  title = {Meta-analysis of gene expression profiles in breast cancer: toward
	a unified understanding of breast cancer subtyping and prognosis
	signatures.},
  journal = {Breast Cancer Res.},
  year = {2008},
  volume = {10},
  pages = {R65},
  number = {4},
  abstract = {INTRODUCTION: Breast cancer subtyping and prognosis have been studied
	extensively by gene expression profiling, resulting in disparate
	signatures with little overlap in their constituent genes. Although
	a previous study demonstrated a prognostic concordance among gene
	expression signatures, it was limited to only one dataset and did
	not fully elucidate how the different genes were related to one another
	nor did it examine the contribution of well-known biological processes
	of breast cancer tumorigenesis to their prognostic performance. METHOD:
	To address the above issues and to further validate these initial
	findings, we performed the largest meta-analysis of publicly available
	breast cancer gene expression and clinical data, which are comprised
	of 2,833 breast tumors. Gene coexpression modules of three key biological
	processes in breast cancer (namely, proliferation, estrogen receptor
	[ER], and HER2 signaling) were used to dissect the role of constituent
	genes of nine prognostic signatures. RESULTS: Using a meta-analytical
	approach, we consolidated the signatures associated with ER signaling,
	ERBB2 amplification, and proliferation. Previously published expression-based
	nomenclature of breast cancer 'intrinsic' subtypes can be mapped
	to the three modules, namely, the ER-/HER2- (basal-like), the HER2+
	(HER2-like), and the low- and high-proliferation ER+/HER2- subtypes
	(luminal A and B). We showed that all nine prognostic signatures
	exhibited a similar prognostic performance in the entire dataset.
	Their prognostic abilities are due mostly to the detection of proliferation
	activity. Although ER- status (basal-like) and ERBB2+ expression
	status correspond to bad outcome, they seem to act through elevated
	expression of proliferation genes and thus contain only indirect
	information about prognosis. Clinical variables measuring the extent
	of tumor progression, such as tumor size and nodal status, still
	add independent prognostic information to proliferation genes. CONCLUSION:
	This meta-analysis unifies various results of previous gene expression
	studies in breast cancer. It reveals connections between traditional
	prognostic factors, expression-based subtyping, and prognostic signatures,
	highlighting the important role of proliferation in breast cancer
	prognosis.},
  doi = {10.1186/bcr2124},
  pdf = {../local/Wirapati2008Meta-analysis.pdf},
  file = {Wirapati2008Meta-analysis.pdf:Wirapati2008Meta-analysis.pdf:PDF},
  institution = {Swiss Institute of Bioinformatics, 'Batiment Genopode', University
	of Lausanne, 1015 Lausanne, Switzerland. Pratyaksha.Wirapati@isb-sib.ch},
  keywords = {microarray, breastcancer},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {bcr2124},
  pmid = {18662380},
  timestamp = {2010.10.13},
  url = {http://dx.doi.org/10.1186/bcr2124}
}

@article{Witten2011New,
  author = {Witten, D. M. and Friedman, J. H. and Simon, N.},
  title = {New insights and faster computations for the graphical lasso},
  journal = {J. Comput. Graph. Stat.},
  year = {2011},
  volume = {20},
  pages = {892--900},
  number = {4},
  abstract = {We consider the graphical lasso formulation for estimating a Gaussian
	graphical model in the high-dimensional setting. This approach entails
	estimating the inverse covariance matrix under a multivariate normal
	model by maximizing the ℓ1-penalized log-likelihood. We present a
	very simple necessary and sufficient condition that can be used to
	identify the connected components in the graphical lasso solution.
	The condition can be employed to determine whether the estimated
	inverse covariance matrix will be block diagonal, and if so, then
	to identify the blocks. This in turn can lead to drastic speed improvements,
	since one can simply apply a standard graphical lasso algorithm to
	each block separately. Moreover, the necessary and sufficient condition
	provides insight into the graphical lasso solution: the set of connected
	nodes at any given tuning parameter value is a superset of the set
	of connected nodes at any larger tuning parameter value. This article
	has supplementary material online.},
  doi = {10.1198/jcgs.2011.11051a},
  pdf = {../local/Witten2011New.pdf},
  file = {Witten2011New.pdf:Witten2011New.pdf:PDF},
  owner = {jp},
  timestamp = {2012.04.14},
  url = {http://dx.doi.org/10.1198/jcgs.2011.11051a}
}

@article{Witten2009Covariance-regularized,
  author = {Witten, D. M. and Tibshirani, R.},
  title = {Covariance-regularized regression and classification for high dimensional
	problems},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2009},
  volume = {71},
  number = {3},
  doi = {10.1111/j.1467-9868.2009.00699.x},
  pdf = {../local/Witten2009Covariance-regularized.pdf},
  file = {Witten2009Covariance-regularized.pdf:Witten2009Covariance-regularized.pdf:PDF},
  owner = {jp},
  timestamp = {2009.03.12},
  url = {http://dx.doi.org/10.1111/j.1467-9868.2009.00699.x}
}

@article{Witten2009penalized,
  author = {Witten, Daniela M. and Tibshirani, Robert and Hastie, Trevor},
  title = {A penalized matrix decomposition, with applications to sparse principal
	components and canonical correlation analysis.},
  journal = {Biostatistics},
  year = {2009},
  volume = {10},
  pages = {515--534},
  number = {3},
  month = {Jul},
  abstract = {We present a penalized matrix decomposition (PMD), a new framework
	for computing a rank-K approximation for a matrix. We approximate
	the matrix X as circumflexX = sigma(k=1)(K) d(k)u(k)v(k)(T), where
	d(k), u(k), and v(k) minimize the squared Frobenius norm of X - circumflexX,
	subject to penalties on u(k) and v(k). This results in a regularized
	version of the singular value decomposition. Of particular interest
	is the use of L(1)-penalties on u(k) and v(k), which yields a decomposition
	of X using sparse vectors. We show that when the PMD is applied using
	an L(1)-penalty on v(k) but not on u(k), a method for sparse principal
	components results. In fact, this yields an efficient algorithm for
	the "SCoTLASS" proposal (Jolliffe and others 2003) for obtaining
	sparse principal components. This method is demonstrated on a publicly
	available gene expression data set. We also establish connections
	between the SCoTLASS method for sparse principal component analysis
	and the method of Zou and others (2006). In addition, we show that
	when the PMD is applied to a cross-products matrix, it results in
	a method for penalized canonical correlation analysis (CCA). We apply
	this penalized CCA method to simulated data and to a genomic data
	set consisting of gene expression and DNA copy number measurements
	on the same set of samples.},
  doi = {10.1093/biostatistics/kxp008},
  pdf = {../local/Witten2009penalized.pdf},
  file = {Witten2009penalized.pdf:Witten2009penalized.pdf:PDF},
  institution = {Department of Statistics, Stanford University, Stanford, CA 94305,
	USA. dwitten@stanford.edu},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {kxp008},
  pmid = {19377034},
  timestamp = {2012.02.28},
  url = {http://dx.doi.org/10.1093/biostatistics/kxp008}
}

@article{Wolf2003Learning,
  author = {Wolf, L. and Shashua, A.},
  title = {Learning over {S}ets using {K}ernel {P}rincipal {A}ngles},
  journal = {J. {M}ach. {L}earn. {R}es.},
  year = {2003},
  volume = {4},
  pages = {913-931},
  keywords = {kernel-theory},
  owner = {mahe},
  timestamp = {2006.08.09},
  url = {http://jmlr.csail.mit.edu/papers/v4/wolf03a.html}
}

@article{Wu2003Comparison,
  author = {Wu, B. and Abbott, T. and Fishman, D. and McMurray, W. and Mor, G.
	and Stone, K. and Ward, D. and Williams, K. and Zhao, H.},
  title = {Comparison of statistical methods for classification of ovarian cancer
	using mass spectrometry data},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1636-1643},
  number = {13},
  abstract = {Motivation: {N}ovel methods, both molecular and statistical, are urgently
	needed to take advantage of recent advances in biotechnology and
	the human genome project for disease diagnosis and prognosis. {M}ass
	spectrometry ({MS}) holds great promise for biomarker identification
	and genome-wide protein profiling. {I}t has been demonstrated in
	the literature that biomarkers can be identified to distinguish normal
	individuals from cancer patients using {MS} data. {S}uch progress
	is especially exciting for the detection of early-stage ovarian cancer
	patients. {A}lthough various statistical methods have been utilized
	to identify biomarkers from {MS} data, there has been no systematic
	comparison among these approaches in their relative ability to analyze
	{MS} data. {R}esults: {W}e compare the performance of several classes
	of statistical methods for the classification of cancer based on
	{MS} spectra. {T}hese methods include: linear discriminant analysis,
	quadratic discriminant analysis, k-nearest neighbor classifier, bagging
	and boosting classification trees, support vector machine, and random
	forest ({RF}). {T}he methods are applied to ovarian cancer and control
	serum samples from the {N}ational {O}varian {C}ancer {E}arly {D}etection
	{P}rogram clinic at {N}orthwestern {U}niversity {H}ospital. {W}e
	found that {RF} outperforms other methods in the analysis of {MS}
	data. {S}upplementary information: http://bioinformatics.med.yale.edu/proteomics/{B}io{S}upp1.html},
  pdf = {../local/Wu2003Comparison.pdf},
  file = {Wu2003Comparison.pdf:local/Wu2003Comparison.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/13/1636}
}

@article{Wu2002Natural,
  author = {Jiann-Ming Wu},
  title = {Natural discriminant analysis using interactive {P}otts models.},
  journal = {Neural {C}omput},
  year = {2002},
  volume = {14},
  pages = {689-713},
  number = {3},
  month = {Mar},
  abstract = {Natural discriminant analysis based on interactive {P}otts models
	is developed in this work. {A} generative model composed of piece-wise
	multivariate gaussian distributions is used to characterize the input
	space, exploring the embedded clustering and mixing structures and
	developing proper internal representations of input parameters. {T}he
	maximization of a log-likelihood function measuring the fitness of
	all input parameters to the generative model, and the minimization
	of a design cost summing up square errors between posterior outputs
	and desired outputs constitutes a mathematical framework for discriminant
	analysis. {W}e apply a hybrid of the mean-field annealing and the
	gradient-descent methods to the optimization of this framework and
	obtain multiple sets of interactive dynamics, which realize coupled
	{P}otts models for discriminant analysis. {T}he new learning process
	is a whole process of component analysis, clustering analysis, and
	labeling analysis. {I}ts major improvement compared to the radial
	basis function and the support vector machine is described by using
	some artificial examples and a real-world application to breast cancer
	diagnosis.},
  doi = {10.1162/089976602317250951},
  url = {http://dx.doi.org/10.1162/089976602317250951}
}

@article{Wu2008Network-based,
  author = {Wu, X. and Jiang, R. and Zhang, M.Q. and Li, S.},
  title = {Network-based global inference of human disease genes.},
  journal = {Mol. Syst. Biol.},
  year = {2008},
  volume = {4},
  pages = {189},
  abstract = {Deciphering the genetic basis of human diseases is an important goal
	of biomedical research. On the basis of the assumption that phenotypically
	similar diseases are caused by functionally related genes, we propose
	a computational framework that integrates human protein-protein interactions,
	disease phenotype similarities, and known gene-phenotype associations
	to capture the complex relationships between phenotypes and genotypes.
	We develop a tool named CIPHER to predict and prioritize disease
	genes, and we show that the global concordance between the human
	protein network and the phenotype network reliably predicts disease
	genes. Our method is applicable to genetically uncharacterized phenotypes,
	effective in the genome-wide scan of disease genes, and also extendable
	to explore gene cooperativity in complex diseases. The predicted
	genetic landscape of over 1000 human phenotypes, which reveals the
	global modular organization of phenotype-genotype relationships.
	The genome-wide prioritization of candidate genes for over 5000 human
	phenotypes, including those with under-characterized disease loci
	or even those lacking known association, is publicly released to
	facilitate future discovery of disease genes.},
  doi = {10.1038/msb.2008.27},
  institution = {MOE Key Laboratory of Bioinformatics and Bioinformatics Division,
	TNLIST/Department of Automation, Tsinghua University, Beijing, China.},
  keywords = {BRCA1 Protein; Bias (Epidemiology); Breast Neoplasms; Disease; Female;
	Gene Regulatory Networks; Genes; Genome, Human; Genotype; Humans;
	Linkage (Genetics); Phenotype; Software},
  owner = {mordelet},
  pii = {msb200827},
  pmid = {18463613},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1038/msb.2008.27}
}

@techreport{Wu2003Model,
  author = {Z Wu and R A Irizarry and R Gentleman and F M Murillo and F Spencer},
  title = {A Model Based Background Adjustment for Oligonucleotide Expression
	Arrays},
  institution = {John Hopkins University, Department of Biostatistics Working Papers,
	Baltimore, MD},
  year = {2003},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Wyner1989Some,
  author = {Wyner, A.D. and Ziv, J.},
  title = {Some asymptotic properties of the entropy of a stationary ergodic
	data source with applications to data compression},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1989},
  volume = {35},
  pages = {1250-1258},
  number = {6},
  month = {Nov},
  abstract = {Theorems concerning the entropy of a stationary ergodic information
	source are derived and used to obtain insight into the workings of
	certain data-compression coding schemes, in particular the {L}empel-{S}iv
	data compression algorithm},
  pdf = {../local/Wyner1989Some.pdf},
  file = {Wyner1989Some.pdf:local/Wyner1989Some.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Xenarios2002DIP,
  author = {Xenarios, I. and Salwínski, L. and Duan, X,J. and Higney, P. and
	Kim, S-M and Eisenberg, D.},
  title = {DIP, the Database of Interacting Proteins: a research tool for studying
	cellular networks of protein interactions.},
  journal = {Nucleic Acids Res},
  year = {2002},
  volume = {30},
  pages = {303--305},
  number = {1},
  month = {Jan},
  abstract = {The Database of Interacting Proteins (DIP: http://dip.doe-mbi.ucla.edu)
	is a database that documents experimentally determined protein-protein
	interactions. It provides the scientific community with an integrated
	set of tools for browsing and extracting information about protein
	interaction networks. As of September 2001, the DIP catalogs approximately
	11 000 unique interactions among 5900 proteins from >80 organisms;
	the vast majority from yeast, Helicobacter pylori and human. Tools
	have been developed that allow users to analyze, visualize and integrate
	their own experimental data with the information about protein-protein
	interactions available in the DIP database.},
  institution = {UCLA-DOE Laboratory of Structural Biology and Molecular Medicine,
	Molecular Biology Institute, PO Box 951570, UCLA, Los Angeles, CA
	90095-1570, USA.},
  owner = {fantine},
  pmid = {11752321},
  timestamp = {2010.10.21}
}

@article{Xia2004RNAi,
  author = {Xia, H. and Mao, Q. and Eliason, S. L. and Harper, S. Q. and Martins,
	I. H. and Orr, H. T. and Paulson, H. L. and Yang, L. and Kotin, R.
	M. and Davidson, B. L.},
  title = {{RNA}i suppresses polyglutamine-induced neurodegeneration in a model
	of spinocerebellar ataxia.},
  journal = {Nat. Med.},
  year = {2004},
  volume = {10},
  pages = {816--820},
  number = {8},
  month = {Aug},
  abstract = {The dominant polyglutamine expansion diseases, which include spinocerebellar
	ataxia type 1 (SCA1) and Huntington disease, are progressive, untreatable,
	neurodegenerative disorders. In inducible mouse models of SCA1 and
	Huntington disease, repression of mutant allele expression improves
	disease phenotypes. Thus, therapies designed to inhibit expression
	of the mutant gene would be beneficial. Here we evaluate the ability
	of RNA interference (RNAi) to inhibit polyglutamine-induced neurodegeneration
	caused by mutant ataxin-1 in a mouse model of SCA1. Upon intracerebellar
	injection, recombinant adeno-associated virus (AAV) vectors expressing
	short hairpin RNAs profoundly improved motor coordination, restored
	cerebellar morphology and resolved characteristic ataxin-1 inclusions
	in Purkinje cells of SCA1 mice. Our data demonstrate in vivo the
	potential use of RNAi as therapy for dominant neurodegenerative disease.},
  doi = {10.1038/nm1076},
  keywords = {Adenoviridae, Animal, Animals, Blotting, Brain, Cells, Comparative
	Study, Cultured, Disease Models, Gene Expression, Genetic, Glutamine,
	Immunohistochemistry, Messenger, Mice, Nerve Degeneration, Nerve
	Tissue Proteins, Non-U.S. Gov't, Northern, Nuclear Proteins, P.H.S.,
	Plasmids, Psychomotor Performance, Purkinje Cells, RNA, RNA Interference,
	Research Support, Reverse Transcriptase Polymerase Chain Reaction,
	Small Interfering, Spinocerebellar Ataxias, Transduction, Transgenic,
	U.S. Gov't, 15286770},
  owner = {vert},
  pii = {nm1076},
  pmid = {15286770},
  timestamp = {2006.03.28},
  url = {http://dx.doi.org/10.1038/nm1076}
}

@article{Xia2006IntNetDB,
  author = {Xia, K. and Dong, D. and Han, J-D.J.},
  title = {IntNetDB v1.0: an integrated protein-protein interaction network
	database generated by a probabilistic model.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {508},
  abstract = {BACKGROUND: Although protein-protein interaction (PPI) networks have
	been explored by various experimental methods, the maps so built
	are still limited in coverage and accuracy. To further expand the
	PPI network and to extract more accurate information from existing
	maps, studies have been carried out to integrate various types of
	functional relationship data. A frequently updated database of computationally
	analyzed potential PPIs to provide biological researchers with rapid
	and easy access to analyze original data as a biological network
	is still lacking. RESULTS: By applying a probabilistic model, we
	integrated 27 heterogeneous genomic, proteomic and functional annotation
	datasets to predict PPI networks in human. In addition to previously
	studied data types, we show that phenotypic distances and genetic
	interactions can also be integrated to predict PPIs. We further built
	an easy-to-use, updatable integrated PPI database, the Integrated
	Network Database (IntNetDB) online, to provide automatic prediction
	and visualization of PPI network among genes of interest. The networks
	can be visualized in SVG (Scalable Vector Graphics) format for zooming
	in or out. IntNetDB also provides a tool to extract topologically
	highly connected network neighborhoods from a specific network for
	further exploration and research. Using the MCODE (Molecular Complex
	Detections) algorithm, 190 such neighborhoods were detected among
	all the predicted interactions. The predicted PPIs can also be mapped
	to worm, fly and mouse interologs. CONCLUSION: IntNetDB includes
	180,010 predicted protein-protein interactions among 9,901 human
	proteins and represents a useful resource for the research community.
	Our study has increased prediction coverage by five-fold. IntNetDB
	also provides easy-to-use network visualization and analysis tools
	that allow biological researchers unfamiliar with computational biology
	to access and analyze data over the internet. The web interface of
	IntNetDB is freely accessible at http://hanlab.genetics.ac.cn/IntNetDB.htm.
	Visualization requires Mozilla version 1.8 (or higher) or Internet
	Explorer with installation of SVGviewer.},
  doi = {10.1186/1471-2105-7-508},
  institution = {Chinese Academy of Sciences Key Laboratory of Developmental Biology,
	Center for Molecular Systems Biology, Institute of Genetics and Developmental
	Biology, Chinese Academy of Sciences, Beijing, China. kxia@genetics.ac.cn},
  owner = {fantine},
  pii = {1471-2105-7-508},
  pmid = {17112386},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1186/1471-2105-7-508}
}

@article{Xia1998Thermodynamic,
  author = {Xia, T. and SantaLucia, J. and Burkard, M. E. and Kierzek, R. and
	Schroeder, S. J. and Jiao, X. and Cox, C. and Turner, D. H.},
  title = {Thermodynamic parameters for an expanded nearest-neighbor model for
	formation of {RNA} duplexes with {W}atson-{C}rick base pairs.},
  journal = {Biochemistry},
  year = {1998},
  volume = {37},
  pages = {14719-35},
  number = {42},
  month = {Oct},
  abstract = {Improved thermodynamic parameters for prediction of {RNA} duplex formation
	are derived from optical melting studies of 90 oligoribonucleotide
	duplexes containing only {W}atson-{C}rick base pairs. {T}o test end
	or base composition effects, new sets of duplexes are included that
	have identical nearest neighbors, but different base compositions
	and therefore different ends. {D}uplexes with terminal {GC} pairs
	are more stable than duplexes with the same nearest neighbors but
	terminal {AU} pairs. {P}enalizing terminal {AU} base pairs by 0.45
	kcal/mol relative to terminal {GC} base pairs significantly improves
	predictions of {D}elta{G} degrees37 from a nearest-neighbor model.
	{A} physical model is suggested in which the differential treatment
	of {AU} and {GC} ends accounts for the dependence of the total number
	of {W}atson-{C}rick hydrogen bonds on the base composition of a duplex.
	{O}n average, the new parameters predict {D}elta{G} degrees37, {D}elta{H}
	degrees, {D}elta{S} degrees, and {TM} within 3.2\%, 6.0\%, 6.8\%,
	and 1.3 degrees{C}, respectively. {T}hese predictions are within
	the limit of the model, based on experimental results for duplexes
	predicted to have identical thermodynamic parameters.},
  doi = {10.1021/bi9809425},
  pii = {bi9809425},
  url = {http://dx.doi.org/10.1021/bi9809425}
}

@article{Xia2004one-layer,
  author = {Youshen Xia and Jun Wang},
  title = {A one-layer recurrent neural network for support vector machine learning.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {1261-9},
  number = {2},
  month = {Apr},
  abstract = {This paper presents a one-layer recurrent neural network for support
	vector machine ({SVM}) learning in pattern classification and regression.
	{T}he {SVM} learning problem is first converted into an equivalent
	formulation, and then a one-layer recurrent neural network for {SVM}
	learning is proposed. {T}he proposed neural network is guaranteed
	to obtain the optimal solution of support vector classification and
	regression. {C}ompared with the existing two-layer neural network
	for the {SVM} classification, the proposed neural network has a low
	complexity for implementation. {M}oreover, the proposed neural network
	can converge exponentially to the optimal solution of {SVM} learning.
	{T}he rate of the exponential convergence can be made arbitrarily
	high by simply turning up a scaling parameter. {S}imulation examples
	based on benchmark problems are discussed to show the good performance
	of the proposed neural network for {SVM} learning.}
}

@article{Xia2011NSMAP,
  author = {Xia, Z. and Wen, J. and Chang, C.-C. and Zhou, X.},
  title = {{NSMAP}: a method for spliced isoforms identification and quantification
	from {RNA-Seq}.},
  journal = {BMC Bioinformatics},
  year = {2011},
  volume = {12},
  pages = {162},
  abstract = {The development of techniques for sequencing the messenger RNA (RNA-Seq)
	enables it to study the biological mechanisms such as alternative
	splicing and gene expression regulation more deeply and accurately.
	Most existing methods employ RNA-Seq to quantify the expression levels
	of already annotated isoforms from the reference genome. However,
	the current reference genome is very incomplete due to the complexity
	of the transcriptome which hiders the comprehensive investigation
	of transcriptome using RNA-Seq. Novel study on isoform inference
	and estimation purely from RNA-Seq without annotation information
	is desirable.A Nonnegativity and Sparsity constrained Maximum APosteriori
	(NSMAP) model has been proposed to estimate the expression levels
	of isoforms from RNA-Seq data without the annotation information.
	In contrast to previous methods, NSMAP performs identification of
	the structures of expressed isoforms and estimation of the expression
	levels of those expressed isoforms simultaneously, which enables
	better identification of isoforms. In the simulations parameterized
	by two real RNA-Seq data sets, more than 77\% expressed isoforms
	are correctly identified and quantified. Then, we apply NSMAP on
	two RNA-Seq data sets of myelodysplastic syndromes (MDS) samples
	and one normal sample in order to identify differentially expressed
	known and novel isoforms in MDS disease.NSMAP provides a good strategy
	to identify and quantify novel isoforms without the knowledge of
	annotated reference genome which can further realize the potential
	of RNA-Seq technique in transcriptome analysis. NSMAP package is
	freely available at https://sites.google.com/site/nsmapforrnaseq.},
  doi = {10.1186/1471-2105-12-162},
  pdf = {../local/Xia2011NSMAP.pdf},
  file = {Xia2011NSMAP.pdf:Xia2011NSMAP.pdf:PDF},
  institution = {Department of Radiology, The Methodist Hospital Research Institute,
	Houston, TX 77030, USA.},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-12-162},
  pmid = {21575225},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1186/1471-2105-12-162}
}

@article{Xie2009CNV-seq,
  author = {Chao Xie and Martti T Tammi},
  title = {CNV-seq, a new method to detect copy number variation using high-throughput
	sequencing.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {80},
  abstract = {BACKGROUND: DNA copy number variation (CNV) has been recognized as
	an important source of genetic variation. Array comparative genomic
	hybridization (aCGH) is commonly used for CNV detection, but the
	microarray platform has a number of inherent limitations. RESULTS:
	Here, we describe a method to detect copy number variation using
	shotgun sequencing, CNV-seq. The method is based on a robust statistical
	model that describes the complete analysis procedure and allows the
	computation of essential confidence values for detection of CNV.
	Our results show that the number of reads, not the length of the
	reads is the key factor determining the resolution of detection.
	This favors the next-generation sequencing methods that rapidly produce
	large amount of short reads. CONCLUSION: Simulation of various sequencing
	methods with coverage between 0.1x to 8x show overall specificity
	between 91.7 - 99.9\%, and sensitivity between 72.2 - 96.5\%. We
	also show the results for assessment of CNV between two individual
	human genomes.},
  doi = {10.1186/1471-2105-10-80},
  pdf = {../local/Xie2009CNV-seq.pdf},
  file = {Xie2009CNV-seq.pdf:Xie2009CNV-seq.pdf:PDF},
  institution = {Department of Biological Sciences, National University of Singapore,
	Singapore. xie@nus.edu.sg},
  keywords = {ngs},
  owner = {jp},
  pii = {1471-2105-10-80},
  pmid = {19267900},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1186/1471-2105-10-80}
}

@article{Xie2005LOCSVMPSI,
  author = {Dan Xie and Ao Li and Minghui Wang and Zhewen Fan and Huanqing Feng},
  title = {L{OCSVMPSI}: a web server for subcellular localization of eukaryotic
	proteins using {SVM} and profile of {PSI}-{BLAST}.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {W105-10},
  number = {Web Server issue},
  month = {Jul},
  abstract = {Subcellular location of a protein is one of the key functional characters
	as proteins must be localized correctly at the subcellular level
	to have normal biological function. {I}n this paper, a novel method
	named {LOCSVMPSI} has been introduced, which is based on the support
	vector machine ({SVM}) and the position-specific scoring matrix generated
	from profiles of {PSI}-{BLAST}. {W}ith a jackknife test on the {RH}2427
	data set, {LOCSVMPSI} achieved a high overall prediction accuracy
	of 90.2\%, which is higher than the prediction results by {S}ub{L}oc
	and {ESL}pred on this data set. {I}n addition, prediction performance
	of {LOCSVMPSI} was evaluated with 5-fold cross validation test on
	the {PK}7579 data set and the prediction results were consistently
	better than the previous method based on several {SVM}s using composition
	of both amino acids and amino acid pairs. {F}urther test on the {SWISSPROT}
	new-unique data set showed that {LOCSVMPSI} also performed better
	than some widely used prediction methods, such as {PSORTII}, {T}arget{P}
	and {LOC}net. {A}ll these results indicate that {LOCSVMPSI} is a
	powerful tool for the prediction of eukaryotic protein subcellular
	localization. {A}n online web server (current version is 1.3) based
	on this method has been developed and is freely available to both
	academic and commercial users, which can be accessed by at http://{B}ioinformatics.ustc.edu.cn/{LOCSVMPSI}/{LOCSVMPSI}.php.},
  doi = {10.1093/nar/gki359},
  pdf = {../local/Xie2005LOCSVMPSI.pdf},
  file = {Xie2005LOCSVMPSI.pdf:local/Xie2005LOCSVMPSI.pdf:PDF},
  keywords = {biosvm},
  pii = {33/suppl_2/W105},
  url = {http://dx.doi.org/10.1093/nar/gki359}
}

@article{Xie2009Unified,
  author = {Lei Xie and Li Xie and Philip E Bourne},
  title = {A unified statistical model to support local sequence order independent
	similarity searching for ligand-binding sites and its application
	to genome-based drug discovery.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {i305--i312},
  number = {12},
  month = {Jun},
  abstract = {Functional relationships between proteins that do not share global
	structure similarity can be established by detecting their ligand-binding-site
	similarity. For a large-scale comparison, it is critical to accurately
	and efficiently assess the statistical significance of this similarity.
	Here, we report an efficient statistical model that supports local
	sequence order independent ligand-binding-site similarity searching.
	Most existing statistical models only take into account the matching
	vertices between two sites that are defined by a fixed number of
	points. In reality, the boundary of the binding site is not known
	or is dependent on the bound ligand making these approaches limited.
	To address these shortcomings and to perform binding-site mapping
	on a genome-wide scale, we developed a sequence-order independent
	profile-profile alignment (SOIPPA) algorithm that is able to detect
	local similarity between unknown binding sites a priori. The SOIPPA
	scoring integrates geometric, evolutionary and physical information
	into a unified framework. However, this imposes a significant challenge
	in assessing the statistical significance of the similarity because
	the conventional probability model that is based on fixed-point matching
	cannot be applied. Here we find that scores for binding-site matching
	by SOIPPA follow an extreme value distribution (EVD). Benchmark studies
	show that the EVD model performs at least two-orders faster and is
	more accurate than the non-parametric statistical method in the previous
	SOIPPA version. Efficient statistical analysis makes it possible
	to apply SOIPPA to genome-based drug discovery. Consequently, we
	have applied the approach to the structural genome of Mycobacterium
	tuberculosis to construct a protein-ligand interaction network. The
	network reveals highly connected proteins, which represent suitable
	targets for promiscuous drugs.},
  doi = {10.1093/bioinformatics/btp220},
  institution = {San Diego Supercomputer Center, University of California, San Diego,
	La Jolla, CA 92093, USA. lxie@sdsc.edu},
  keywords = {Binding Sites; Computational Biology, methods; Drug Discovery, methods;
	Genome; Ligands; Models, Statistical; Mycobacterium tuberculosis,
	genetics/metabolism; Proteins, chemistry},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pii = {btp220},
  pmid = {19478004},
  timestamp = {2009.07.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp220}
}

@article{Xie2000Asymptotic,
  author = {Xie, Q. and Barron, A.R.},
  title = {Asymptotic minimax regret for data compression, gambling, and prediction},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2000},
  volume = {46},
  pages = {431 - 445},
  number = {2},
  month = {Mar},
  abstract = {For problems of data compression, gambling, and prediction of individual
	sequences x1, Â·Â·Â·, xn the following questions arise. {G}iven a
	target family of probability mass functions p(x1, Â·Â·Â·, x n|&thetas;),
	how do we choose a probability mass function q(x 1, Â·Â·Â·, xn) so
	that it approximately minimizes the maximum regret/belowdisplayskip10ptminus6pt
	max (log1/q(x1, Â·Â·Â·, xn)-log1/p(x1, Â·Â·Â·, xn |&thetas;?)) and
	so that it achieves the best constant {C} in the asymptotics of the
	minimax regret, which is of the form (d/2)log(n/2?)+{C}+o(1), where
	d is the parameter dimension? {A}re there easily implementable strategies
	q that achieve those asymptotics? {A}nd how does the solution to
	the worst case sequence problem relate to the solution to the corresponding
	expectation version minq max 0 {E}0(log1/q(x1, Â·Â·Â·, xn)-log1/p(x1,
	Â·Â·Â·, xn|&thetas;))? {I}n the discrete memoryless case, with a
	given alphabet of size m, the {B}ayes procedure with the {D}irichlet(1/2,
	Â·Â·Â·, 1/2) prior is asymptotically maximin. {S}imple modifications
	of it are shown to be asymptotically minimax. {T}he best constant
	is {C}m=log(?(1/2)m/(?(m/2)) which agrees with the logarithm of the
	integral of the square root of the determinant of the {F}isher information.
	{M}oreover, our asymptotically optimal strategies for the worst case
	problem are also asymptotically optimal for the expectation version.
	{A}nalogous conclusions are given for the case of prediction, gambling,
	and compression when, for each observation, one has access to side
	information from an alphabet of size k. {I}n this setting the minimax
	regret is shown to be k(m-1)/2logn/2?k+k{C}m+o(1)},
  pdf = {../local/Xie2000Asymptotic.pdf},
  file = {Xie2000Asymptotic.pdf:local/Xie2000Asymptotic.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Xie1997Minimax,
  author = {Xie, Q. and Barron, A.R.},
  title = {Minimax redundancy for the class of memoryless sources},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1997},
  volume = {43},
  pages = {646-657},
  number = {2},
  month = {Mar},
  abstract = {Let {X}n=({X}1,...,{X}n) be a memoryless source with unknown distribution
	on a finite alphabet of size k. {W}e identify the asymptotic minimax
	coding redundancy for this class of sources, and provide a sequence
	of asymptotically minimax codes. {E}quivalently, we determine the
	limiting behavior of the minimax relative entropy min{QX}n maxp{X}n
	{D}({PX}n∥{QX}n), where the maximum is over all independent and
	identically distributed (i.i.d.) source distributions and the minimum
	is over all joint distributions. {W}e show in this paper that the
	minimax redundancy minus ((k-1)/2) log(n/(2?e)) converges to log??(det
	{I}(&thetas;))d&thetas;=log (?(1/2)k/?(k/2)), where {I}(&thetas;)
	is the {F}isher information and the integral is over the whole probability
	simplex. {T}he {B}ayes strategy using {J}effreys' prior is shown
	to be asymptotically maximin but not asymptotically minimax in our
	setting. {T}he boundary risk using {J}effreys' prior is higher than
	that of interior points. {W}e provide a sequence of modifications
	of {J}effreys' prior that put some prior mass near the boundaries
	of the probability simplex to pull down that risk to the asymptotic
	minimax level in the limit },
  pdf = {../local/Xie1997Minimax.pdf},
  file = {Xie1997Minimax.pdf:local/Xie1997Minimax.pdf:PDF},
  keywords = {information-theory},
  owner = {vert}
}

@article{Xing2004MotifPrototyper,
  author = {Xing, E. and Karp, R.},
  title = {MotifPrototyper: A Bayesian profile model for motif families},
  journal = {PNAS},
  year = {2004},
  volume = {101},
  pages = {10523--10528},
  number = {29}
}

@inproceedings{Xing2003Distance,
  author = {E.P. Xing and A.Y. Ng and M.I. Jordan and S. Russell},
  title = {Distance Metric Learning with Application to Clustering with Side-Information},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2003},
  editor = {S. Becker, S. Thrun and K. Obermayer},
  volume = {15},
  pages = {505--512},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  owner = {vert},
  timestamp = {2006.06.09}
}

@article{Xing2004LOGOS,
  author = {Xing, E. P. and Wu, W. and Jordan, M. I. and Karp, R. M.},
  title = {L{OGOS}: {A} modular {B}ayesian model for de novo motif detection},
  journal = {J. {B}ioinform. {C}omput. {B}iol.},
  year = {2004},
  volume = {2},
  pages = {127--154},
  abstract = {The complexity of the global organization and internal structure of
	motifs in higher eukaryotic organisms raises significant challenges
	for motif detection techniques. {T}o achieve successful de novo motif
	detection, it is necessary to model the complex dependencies within
	and among motifs and to incorporate biological prior knowledge. {I}n
	this paper, we present {LOGOS}, an integrated {LO}cal and {G}l{O}bal
	motif {S}equence model for biopolymer sequences, which provides a
	principled framework for developing, modularizing, extending and
	computing expressive motif models for complex biopolymer sequence
	analysis. {LOGOS} consists of two interacting submodels: {HMDM},
	a local alignment model capturing biological prior knowledge and
	positional dependency within the motif local structure; and {HMM},
	a global motif distribution model modeling frequencies and dependencies
	of motif occurrences. {M}odel parameters can be fit using training
	motifs within an empirical {B}ayesian framework. {A} variational
	{EM} algorithm is developed for de novo motif detection. {LOGOS}
	improves over existing models that ignore biological priors and dependencies
	in motif structures and motif occurrences, and demonstrates superior
	performance on both semi-realistic test data and cis-regulatory sequences
	from yeast and {D}rosophila genomes with regard to sensitivity, specificity,
	flexibility and extensibility.},
  doi = {10.1142/S0219720004000508},
  pdf = {../local/Xing2004LOGOS.pdf},
  file = {Xing2004LOGOS.pdf:Xing2004LOGOS.pdf:PDF},
  keywords = {biogm},
  owner = {vert},
  timestamp = {2006.01.18},
  url = {http://dx.doi.org/10.1142/S0219720004000508}
}

@article{Xiong2001Biomarker,
  author = {Xiong, M. and Fang, X. and Zhao, J.},
  title = {Biomarker {I}dentification by {F}eature {W}rappers},
  journal = {Genome {R}es.},
  year = {2001},
  volume = {11},
  pages = {1878-1887},
  number = {11},
  abstract = {Gene expression studies bridge the gap between {DNA} information and
	trait information by dissecting biochemical pathways into intermediate
	components between genotype and phenotype. {T}hese studies open new
	avenues for identifying complex disease genes and biomarkers for
	disease diagnosis and for assessing drug efficacy and toxicity. {H}owever,
	the majority of analytical methods applied to gene expression data
	are not efficient for biomarker identification and disease diagnosis.
	{I}n this paper, we propose a general framework to incorporate feature
	(gene) selection into pattern recognition in the process to identify
	biomarkers. {U}sing this framework, we develop three feature wrappers
	that search through the space of feature subsets using the classification
	error as measure of goodness for a particular feature subset being
	"wrapped around": linear discriminant analysis, logistic regression,
	and support vector machines. {T}o effectively carry out this computationally
	intensive search process, we employ sequential forward search and
	sequential forward floating search algorithms. {T}o evaluate the
	performance of feature selection for biomarker identification we
	have applied the proposed methods to three data sets. {T}he preliminary
	results demonstrate that very high classification accuracy can be
	attained by identified composite classifiers with several biomarkers.},
  pdf = {../local/Xiong2001Biomarker.pdf},
  file = {Xiong2001Biomarker.pdf:local/Xiong2001Biomarker.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.genome.org/cgi/content/abstract/11/11/1878}
}

@article{Xu2002Chemoinformatics,
  author = {J. Xu and A. Hagler},
  title = {Chemoinformatics and {D}rug {Discovery}},
  journal = {Molecules},
  year = {2002},
  volume = {7},
  pages = {566-600},
  keywords = {chemoinformatics},
  owner = {mahe},
  timestamp = {2006.08.12},
  url = {http://www.mdpi.org/molecules/papers/70800566.pdf}
}

@inproceedings{Xu94pca,
  author = {L. Xu and I. King},
  title = {A PCA Approach for Fast Retrieval of Structural Patterns in Attributed
	Graphs},
  booktitle = {Humboldt University Berlin},
  year = {1994}
}

@article{Xu2004Molecular,
  author = {Xiu-Qin Xu and Chon K Leow and Xin Lu and Xuegong Zhang and Jun S
	Liu and Wing-Hung Wong and Arndt Asperger and SÃ¶ren Deininger and
	Hon-Chiu Eastwood Leung},
  title = {Molecular classification of liver cirrhosis in a rat model by proteomics
	and bioinformatics.},
  journal = {Proteomics},
  year = {2004},
  volume = {4},
  pages = {3235-45},
  number = {10},
  month = {Oct},
  abstract = {Liver cirrhosis is a worldwide health problem. {R}eliable, noninvasive
	methods for early detection of liver cirrhosis are not available.
	{U}sing a three-step approach, we classified sera from rats with
	liver cirrhosis following different treatment insults. {T}he approach
	consisted of: (i) protein profiling using surface-enhanced laser
	desorption/ionization ({SELDI}) technology; (ii) selection of a statistically
	significant serum biomarker set using machine learning algorithms;
	and (iii) identification of selected serum biomarkers by peptide
	sequencing. {W}e generated serum protein profiles from three groups
	of rats: (i) normal (n=8), (ii) thioacetamide-induced liver cirrhosis
	(n=22), and (iii) bile duct ligation-induced liver fibrosis (n=5)
	using a weak cation exchanger surface. {P}rofiling data were further
	analyzed by a recursive support vector machine algorithm to select
	a panel of statistically significant biomarkers for class prediction.
	{S}ensitivity and specificity of classification using the selected
	protein marker set were higher than 92\%. {A} consistently down-regulated
	3495 {D}a protein in cirrhosis samples was one of the selected significant
	biomarkers. {T}his 3495 {D}a protein was purified on-chip and trypsin
	digested. {F}urther structural characterization of this biomarkers
	candidate was done by using cross-platform matrix-assisted laser
	desorption/ionization mass spectrometry ({MALDI}-{MS}) peptide mass
	fingerprinting ({PMF}) and matrix-assisted laser desorption/ionization
	time of flight/time of flight ({MALDI}-{TOF}/{TOF}) tandem mass spectrometry
	({MS}/{MS}). {C}ombined data from {PMF} and {MS}/{MS} spectra of
	two tryptic peptides suggested that this 3495 {D}a protein shared
	homology to a histidine-rich glycoprotein. {T}hese results demonstrated
	a novel approach to discovery of new biomarkers for early detection
	of liver cirrhosis and classification of liver diseases.},
  doi = {10.1002/pmic.200400839},
  pdf = {../local/Xu2004Molecular.pdf},
  file = {Xu2004Molecular.pdf:Xu2004Molecular.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1002/pmic.200400839}
}

@article{Xue2004Support,
  author = {C. X. Xue and R. S. Zhang and H. X. Liu and M. C. Liu and Z. D. Hu
	and B. T. Fan},
  title = {Support vector machines-based quantitative structure-property relationship
	for the prediction of heat capacity.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1267-74},
  number = {4},
  abstract = {The support vector machine ({SVM}), as a novel type of learning machine,
	for the first time, was used to develop a {Q}uantitative {S}tructure-{P}roperty
	{R}elationship ({QSPR}) model of the heat capacity of a diverse set
	of 182 compounds based on the molecular descriptors calculated from
	the structure alone. {M}ultiple linear regression ({MLR}) and radial
	basis function networks ({RBFNN}s) were also utilized to construct
	quantitative linear and nonlinear models to compare with the results
	obtained by {SVM}. {T}he root-mean-square (rms) errors in heat capacity
	predictions for the whole data set given by {MLR}, {RBFNN}s, and
	{SVM} were 4.648, 4.337, and 2.931 heat capacity units, respectively.
	{T}he prediction results are in good agreement with the experimental
	value of heat capacity; also, the results reveal the superiority
	of the {SVM} over {MLR} and {RBFNN}s models.},
  doi = {10.1021/ci049934n},
  pdf = {../local/Xue2004Support.pdf},
  file = {Xue2004Support.pdf:local/Xue2004Support.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049934n}
}

@article{Xue2004accurate,
  author = {C. X. Xue and R. S. Zhang and H. X. Liu and X. J. Yao and M. C. Liu
	and Z. D. Hu and B. T. Fan},
  title = {An accurate {QSPR} study of {O}-{H} bond dissociation energy in substituted
	phenols based on support vector machines.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {669-77},
  number = {2},
  abstract = {The support vector machine ({SVM}), as a novel type of learning machine,
	was used to develop a {Q}uantitative {S}tructure-{P}roperty {R}elationship
	({QSPR}) model of the {O}-{H} bond dissociation energy ({BDE}) of
	78 substituted phenols. {T}he six descriptors calculated solely from
	the molecular structures of compounds selected by forward stepwise
	regression were used as inputs for the {SVM} model. {T}he root-mean-square
	(rms) errors in {BDE} predictions for the training, test, and overall
	data sets were 3.808, 3.320, and 3.713 {BDE} units (k{J} mol(-1)),
	respectively. {T}he results obtained by {G}aussian-kernel {SVM} were
	much better than those obtained by multiple linear regression, radial
	basis function neural networks, linear-kernel {SVM}, and other {QSPR}
	approaches.},
  doi = {10.1021/ci034248u},
  pdf = {../local/Xue2004accurate.pdf},
  file = {Xue2004accurate.pdf:local/Xue2004accurate.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci034248u}
}

@article{Xue2004QSAR,
  author = {C. X. Xue and R. S. Zhang and H. X. Liu and X. J. Yao and M. C. Liu
	and Z. D. Hu and B. T. Fan},
  title = {Q{SAR} models for the prediction of binding affinities to human serum
	albumin using the heuristic method and a support vector machine.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1693-700},
  number = {5},
  abstract = {The binding affinities to human serum albumin for 94 diverse drugs
	and drug-like compounds were modeled with the descriptors calculated
	from the molecular structure alone using a quantitative structure-activity
	relationship ({QSAR}) technique. {T}he heuristic method ({HM}) and
	support vector machine ({SVM}) were utilized to construct the linear
	and nonlinear prediction models, leading to a good correlation coefficient
	({R}2) of 0.86 and 0.94 and root-mean-square errors (rms) of 0.212
	and 0.134 albumin drug binding affinity units, respectively. {F}urthermore,
	the models were evaluated by a 10 compound external test set, yielding
	{R}2 of 0.71 and 0.89 and rms error of 0.430 and 0.222. {T}he specific
	information described by the heuristic linear model could give some
	insights into the factors that are likely to govern the binding affinity
	of the compounds and be used as an aid to the drug design process;
	however, the prediction results of the nonlinear {SVM} model seem
	to be better than that of the {HM}.},
  doi = {10.1021/ci049820b},
  pdf = {../local/Xue2004QSAR.pdf},
  file = {Xue2004QSAR.pdf:local/Xue2004QSAR.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049820b}
}

@article{Xue2004Study,
  author = {C. X. Xue and R. S. Zhang and M. C. Liu and Z. D. Hu and B. T. Fan},
  title = {Study of the quantitative structure-mobility relationship of carboxylic
	acids in capillary electrophoresis based on support vector machines.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {950-7},
  number = {3},
  abstract = {The support vector machines ({SVM}), as a novel type of learning machine,
	were used to develop a quantitative structure-mobility relationship
	({QSMR}) model of 58 aliphatic and aromatic carboxylic acids based
	on molecular descriptors calculated from the structure alone. {M}ultiple
	linear regression ({MLR}) and radial basis function neural networks
	({RBFNN}s) were also utilized to construct the linear and the nonlinear
	model to compare with the results obtained by {SVM}. {T}he root-mean-square
	errors in absolute mobility predictions for the whole data set given
	by {MLR}, {RBFNN}s, and {SVM} were 1.530, 1.373, and 0.888 mobility
	units (10(-5) cm(2) {S}(-1) {V}(-1)), respectively, which indicated
	that the prediction result agrees well with the experimental values
	of these compounds and also revealed the superiority of {SVM} over
	{MLR} and {RBFNN}s models for the prediction of the absolute mobility
	of carboxylic acids. {M}oreover, the models we proposed could also
	provide some insight into what structural features are related to
	the absolute mobility of aliphatic and aromatic carboxylic acids.},
  doi = {10.1021/ci034280o},
  pdf = {../local/Xue2004Study.pdf},
  file = {Xue2004Study.pdf:local/Xue2004Study.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci034280o}
}

@article{Xue2000Molecular,
  author = {L. Xue and J. Bajorath},
  title = {Molecular descriptors in chemoinformatics, computational combinatorial
	chemistry, and virtual screening.},
  journal = {Comb. {C}hem. {H}igh. {T}hroughput {S}creen.},
  year = {2000},
  volume = {3},
  pages = {363--372},
  number = {5},
  month = {Oct},
  abstract = {Many contemporary applications in computer-aided drug discovery and
	chemoinformatics depend on representations of molecules by descriptors
	that capture their structural characteristics and properties. {S}uch
	applications include, among others, diversity analysis, library design,
	and virtual screening. {H}undreds of molecular descriptors have been
	reported in the literature, ranging from simple bulk properties to
	elaborate three-dimensional formulations and complex molecular fingerprints,
	which sometimes consist of thousands of bit positions. {K}nowledge-based
	selection of descriptors that are suitable for specific applications
	is an important task in chemoinformatics research. {I}f descriptors
	are to be selected on rational grounds, rather than guesses or chemical
	intuition, detailed evaluation of their performance is required.
	{A} number of studies have been reported that investigate the performance
	of molecular descriptors in specific applications and/or introduce
	novel types of descriptors. {P}rogress made in this area is reviewed
	here in the context of other computational developments in combinatorial
	chemistry and compound screening.},
  keywords = {chemoinformatics},
  owner = {mahe},
  pmid = {11032954},
  timestamp = {2006.02.03}
}

@article{Xue2001Fingerprint,
  author = {L. Xue and F. L. Stahura and J. W. Godden and J. Bajorath},
  title = {{F}ingerprint scaling increases the probability of identifying molecules
	with similar activity in virtual screening calculations.},
  journal = {J Chem Inf Comput Sci},
  year = {2001},
  volume = {41},
  pages = {746--753},
  number = {3},
  abstract = {Results of systematic virtual screening calculations using a structural
	key-type fingerprint are reported for compounds belonging to 14 activity
	classes added to randomly selected synthetic molecules. For each
	class, a fingerprint profile was calculated to monitor the relative
	occupancy of fingerprint bit positions. Consensus bit patterns were
	determined consisting of all bits that were always set on in compounds
	belonging to a specific activity class. In virtual screening calculations,
	scale factors were applied to each consensus bit position in fingerprints
	of query molecules. This technique, called "fingerprint scaling",
	effectively increases the weight of consensus bit positions in fingerprint
	comparisons. Although overall prediction accuracy was satisfactory
	using unscaled calculations, scaling significantly increased the
	number of correct predictions but only slightly increased the rate
	of false positives. These observations suggest that fingerprint scaling
	is an attractive approach to increase the probability of identifying
	molecules with similar activity by virtual screening. It requires
	the availability of a series of related compounds and can be easily
	applied to any keyed fingerprint representation that associates bit
	positions with specific molecular features.},
  keywords = {16S, Algae, Algorithms, Animals, Archaeal, Automation, Bacteria, Biodiversity,
	Chemical, Colorimetry, Computational Biology, Computer Terminals,
	DNA, DNA Fingerprinting, Daphnia, Databases, Ecosystem, Euryarchaeota,
	Factual, Fresh Water, Hazardous Substances, Humans, Information Storage
	and Retrieval, Methane, Models, Non-U.S. Gov't, Oxidoreductases,
	Perciformes, Photic Stimulation, Photometry, Polymorphism, Quantitative
	Structure-Activity Relationship, RNA, Research Support, Restriction
	Fragment Length, Ribosomal, Seasons, Soil Microbiology, Spain, Sulfur,
	Theoretical, Time Factors, Toxicity Tests, Water Microbiology, Water
	Pollutants, 11410055},
  owner = {mahe},
  pii = {ci000311t},
  pmid = {11410055},
  timestamp = {2006.09.03}
}

@article{Xue2001Mini-fingerprints,
  author = {L. Xue and F. L. Stahura and J. W. Godden and J. Bajorath},
  title = {{M}ini-fingerprints detect similar activity of receptor ligands previously
	recognized only by three-dimensional pharmacophore-based methods.},
  journal = {J Chem Inf Comput Sci},
  year = {2001},
  volume = {41},
  pages = {394--401},
  number = {2},
  abstract = {Mini-fingerprints (MFPs) are short binary bit string representations
	of molecular structure and properties, composed of few selected two-dimensional
	(2D) descriptors and a number of structural keys. MFPs were specifically
	designed to recognize compounds with similar activity. Here we report
	that MFPs are capable of detecting similar activities of some druglike
	molecules, including endothelin A antagonists and alpha(1)-adrenergic
	receptor ligands, the recognition of which was previously thought
	to depend on the use of multiple point three-dimensional (3D) pharmacophore
	methods. Thus, in these cases, MFPs and pharmacophore fingerprints
	produce similar results, although they define, in terms of their
	complexity, opposite ends of the spectrum of methods currently used
	to study molecular similarity or diversity. For each of the studied
	compound classes, comparison of MFP bit settings identified a consensus
	or signature pattern. Scaling factors can be applied to these bits
	in order to increase the probability of finding compounds with similar
	activity by virtual screening.},
  keywords = {Adrenergic, Angiotensin II, Cell Surface, Combinatorial Chemistry
	Techniques, Databases, Drug Evaluation, Endothelins, Environmental
	Pollutants, Factual, Information Management, Ligands, Molecular Structure,
	Pharmaceutical Preparations, Platelet Glycoprotein GPIIb-IIIa Complex,
	Preclinical, Receptors, Serine Proteinase Inhibitors, Structure-Activity
	Relationship, User-Computer Interface, alpha-1, 11277728},
  owner = {mahe},
  pii = {ci000305x},
  pmid = {11277728},
  timestamp = {2006.08.22}
}

@inproceedings{Xue2007The,
  author = {Xue, Ya and Dunson, David and Carin, Lawrence},
  title = {The matrix stick-breaking process for flexible multi-task learning},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {1063--1070},
  address = {New York, NY, USA},
  month = {June},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1273496.1273630},
  isbn = {978-1-59593-793-3},
  location = {Corvalis, Oregon}
}

@article{Xue2004Effect,
  author = {Y. Xue and Z. R. Li and C. W. Yap and L. Z. Sun and X. Chen and Y.
	Z. Chen},
  title = {Effect of molecular descriptor feature selection in support vector
	machine classification of pharmacokinetic and toxicological properties
	of chemical agents.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1630-8},
  number = {5},
  abstract = {Statistical-learning methods have been developed for facilitating
	the prediction of pharmacokinetic and toxicological properties of
	chemical agents. {T}hese methods employ a variety of molecular descriptors
	to characterize structural and physicochemical properties of molecules.
	{S}ome of these descriptors are specifically designed for the study
	of a particular type of properties or agents, and their use for other
	properties or agents might generate noise and affect the prediction
	accuracy of a statistical learning system. {T}his work examines to
	what extent the reduction of this noise can improve the prediction
	accuracy of a statistical learning system. {A} feature selection
	method, recursive feature elimination ({RFE}), is used to automatically
	select molecular descriptors for support vector machines ({SVM})
	prediction of {P}-glycoprotein substrates ({P}-gp), human intestinal
	absorption of molecules ({HIA}), and agents that cause torsades de
	pointes ({T}d{P}), a rare but serious side effect. {RFE} significantly
	reduces the number of descriptors for each of these properties thereby
	increasing the computational speed for their classification. {T}he
	{SVM} prediction accuracies of {P}-gp and {HIA} are substantially
	increased and that of {T}d{P} remains unchanged by {RFE}. {T}hese
	prediction accuracies are comparable to those of earlier studies
	derived from a selective set of descriptors. {O}ur study suggests
	that molecular feature selection is useful for improving the speed
	and, in some cases, the accuracy of statistical learning methods
	for the prediction of pharmacokinetic and toxicological properties
	of chemical agents.},
  doi = {10.1021/ci049869h},
  pdf = {../local/Xue2004Effect.pdf},
  file = {Xue2004Effect.pdf:local/Xue2004Effect.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049869h}
}

@article{Xue2007Multi-task,
  author = {Ya Xue and Xuejun Liao and Lawrence Carin and Balaji Krishnapuram},
  title = {Multi-task learning for classification with dirichlet process priors},
  journal = {Journal of Machine Learning Research},
  year = {2007},
  volume = {8},
  pages = {2007},
  month = {January}
}

@article{Xue2004Prediction,
  author = {Y. Xue and C. W. Yap and L. Z. Sun and Z. W. Cao and J. F. Wang and
	Y. Z. Chen},
  title = {Prediction of {P}-glycoprotein substrates by a support vector machine
	approach.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1497-505},
  number = {4},
  abstract = {P-glycoproteins ({P}-gp) actively transport a wide variety of chemicals
	out of cells and function as drug efflux pumps that mediate multidrug
	resistance and limit the efficacy of many drugs. {M}ethods for facilitating
	early elimination of potential {P}-gp substrates are useful for facilitating
	new drug discovery. {A} computational ensemble pharmacophore model
	has recently been used for the prediction of {P}-gp substrates with
	a promising accuracy of 63\%. {I}t is desirable to extend the prediction
	range beyond compounds covered by the known pharmacophore models.
	{F}or such a purpose, a machine learning method, support vector machine
	({SVM}), was explored for the prediction of {P}-gp substrates. {A}
	set of 201 chemical compounds, including 116 substrates and 85 nonsubstrates
	of {P}-gp, was used to train and test a {SVM} classification system.
	{T}his {SVM} system gave a prediction accuracy of at least 81.2\%
	for {P}-gp substrates based on two different evaluation methods,
	which is substantially improved against that obtained from the multiple-pharmacophore
	model. {T}he prediction accuracy for nonsubstrates of {P}-gp is 79.2\%
	using 5-fold cross-validation. {T}hese accuracies are slightly better
	than those obtained from other statistical classification methods,
	including k-nearest neighbor (k-{NN}), probabilistic neural networks
	({PNN}), and {C}4.5 decision tree, that use the same sets of data
	and molecular descriptors. {O}ur study indicates the potential of
	{SVM} in facilitating the prediction of {P}-gp substrates.},
  doi = {10.1021/ci049971e},
  pdf = {../local/Xue2004Prediction.pdf},
  file = {Xue2004Prediction.pdf:local/Xue2004Prediction.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1021/ci049971e}
}

@article{Yabuki2005GRIFFIN,
  author = {Yabuki, Y. and Muramatsu, T. and Hirokawa, T. and Mukai, H. and Suwa,
	M.},
  title = {G{RIFFIN}: a system for predicting {GPCR}-{G}-protein coupling selectivity
	using a support vector machine and a hidden {M}arkov model.},
  journal = {Nucleic {A}cids {R}es.},
  year = {2005},
  volume = {33},
  pages = {W148-53},
  number = {Web Server issue},
  month = {Jul},
  abstract = {We describe a novel system, {GRIFFIN} ({G}-protein and {R}eceptor
	{I}nteraction {F}eature {F}inding {IN}strument), that predicts {G}-protein
	coupled receptor ({GPCR}) and {G}-protein coupling selectivity based
	on a support vector machine ({SVM}) and a hidden {M}arkov model ({HMM})
	with high sensitivity and specificity. {B}ased on our assumption
	that whole structural segments of ligands, {GPCR}s and {G}-proteins
	are essential to determine {GPCR} and {G}-protein coupling, various
	quantitative features were selected for ligands, {GPCR}s and {G}-protein
	complex structures, and those parameters that are the most effective
	in selecting {G}-protein type were used as feature vectors in the
	{SVM}. {T}he main part of {GRIFFIN} includes a hierarchical {SVM}
	classifier using the feature vectors, which is useful for {C}lass
	{A} {GPCR}s, the major family. {F}or the opsins and olfactory subfamilies
	of {C}lass {A} and other minor families ({C}lasses {B}, {C}, frizzled
	and smoothened), the binding {G}-protein is predicted with high accuracy
	using the {HMM}. {A}pplying this system to known {GPCR} sequences,
	each binding {G}-protein is predicted with high sensitivity and specificity
	(>85\% on average). {GRIFFIN} (http://griffin.cbrc.jp/) is freely
	available and allows users to easily execute this reliable prediction
	of {G}-proteins.},
  doi = {10.1093/nar/gki495},
  pdf = {../local/Yabuki2005GRIFFIN.pdf},
  file = {Yabuki2005GRIFFIN.pdf:local/Yabuki2005GRIFFIN.pdf:PDF},
  keywords = {biosvm},
  pii = {33/suppl_2/W148},
  url = {http://dx.doi.org/10.1093/nar/gki495}
}

@article{Yaffe2011Probabilistic,
  author = {Yaffe, E. and Tanay, A.},
  title = {Probabilistic modeling of {Hi-C} contact maps eliminates systematic
	biases to characterize global chromosomal architecture},
  journal = {Nat. Genet.},
  year = {2011},
  volume = {43},
  pages = {1059--1065},
  number = {11},
  abstract = {Hi-C experiments measure the probability of physical proximity between
	pairs of chromosomal loci on a genomic scale. We report on several
	systematic biases that substantially affect the Hi-C experimental
	procedure, including the distance between restriction sites, the
	GC content of trimmed ligation junctions and sequence uniqueness.
	To address these biases, we introduce an integrated probabilistic
	background model and develop algorithms to estimate its parameters
	and renormalize Hi-C data. Analysis of corrected human lymphoblast
	contact maps provides genome-wide evidence for interchromosomal aggregation
	of active chromatin marks, including DNase-hypersensitive sites and
	transcriptionally active foci. We observe extensive long-range (up
	to 400 kb) cis interactions at active promoters and derive asymmetric
	contact profiles next to transcription start sites and CTCF binding
	sites. Clusters of interacting chromosomal domains suggest physical
	separation of centromere-proximal and centromere-distal regions.
	These results provide a computational basis for the inference of
	chromosomal architectures from Hi-C experiments.},
  doi = {10.1038/ng.947},
  pdf = {../local/Yaffe2011Probabilistic.pdf},
  file = {Yaffe2011Probabilistic.pdf:Yaffe2011Probabilistic.pdf:PDF},
  issn = {1061-4036},
  keywords = {hic, ngs},
  owner = {nelle},
  url = {http://dx.doi.org/10.1038/ng.947},
  urldate = {2012-01-11}
}

@article{Yakoby2008combinatorial,
  author = {Yakoby, Nir and Bristow, Christopher A. and Gong, Danielle and Schafer,
	Xenia and Lembong, Jessica and Zartman, Jeremiah J. and Halfon, Marc
	S. and Schüpbach, Trudi and Shvartsman, Stanislav Y.},
  title = {A combinatorial code for pattern formation in Drosophila oogenesis.},
  journal = {Dev Cell},
  year = {2008},
  volume = {15},
  pages = {725--737},
  number = {5},
  month = {Nov},
  abstract = {Two-dimensional patterning of the follicular epithelium in Drosophila
	oogenesis is required for the formation of three-dimensional eggshell
	structures. Our analysis of a large number of published gene expression
	patterns in the follicle cells suggests that they follow a simple
	combinatorial code based on six spatial building blocks and the operations
	of union, difference, intersection, and addition. The building blocks
	are related to the distribution of inductive signals, provided by
	the highly conserved epidermal growth factor receptor and bone morphogenetic
	protein signaling pathways. We demonstrate the validity of the code
	by testing it against a set of patterns obtained in a large-scale
	transcriptional profiling experiment. Using the proposed code, we
	distinguish 36 distinct patterns for 81 genes expressed in the follicular
	epithelium and characterize their joint dynamics over four stages
	of oogenesis. The proposed combinatorial framework allows systematic
	analysis of the diversity and dynamics of two-dimensional transcriptional
	patterns and guides future studies of gene regulation.},
  doi = {10.1016/j.devcel.2008.09.008},
  pdf = {../local/Yakoby2008combinatorial.pdf},
  file = {Yakoby2008combinatorial.pdf:Yakoby2008combinatorial.pdf:PDF},
  institution = {Lewis-Sigler Institute for Integrative Genomics and Department of
	Chemical Engineering, Princeton University, Princeton, NJ 08544,
	USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {S1534-5807(08)00390-0},
  pmid = {19000837},
  timestamp = {2012.10.23},
  url = {http://dx.doi.org/10.1016/j.devcel.2008.09.008}
}

@article{Yamada2005Accelerated,
  author = {Yamada, T. and Morishita, S.},
  title = {Accelerated off-target search algorithm for si{RNA}.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1316-24},
  number = {8},
  month = {Apr},
  abstract = {M{OTIVATION}: {D}esigning highly effective short interfering {RNA}
	(si{RNA}) sequences with maximum target-specificity for mammalian
	{RNA} interference ({RNA}i) is one of the hottest topics in molecular
	biology. {T}he relationship between si{RNA} sequences and {RNA}i
	activity has been studied extensively to establish rules for selecting
	highly effective sequences. {H}owever, there is a pressing need to
	compute si{RNA} sequences that minimize off-target silencing effects
	efficiently and to match any non-targeted sequences with mismatches.
	{RESULTS}: {T}he enumeration of potential cross-hybridization candidates
	is non-trivial, because si{RNA} sequences are short, ca. 19 nt in
	length, and at least three mismatches with non-targets are required.
	{W}ith at least three mismatches, there are typically four or five
	contiguous matches, so that a {BLAST} search frequently overlooks
	off-target candidates. {B}y contrast, existing accurate approaches
	are expensive to execute; thus we need to develop an accurate, efficient
	algorithm that uses seed hashing, the pigeonhole principle, and combinatorics
	to identify mismatch patterns. {T}ests show that our method can list
	potential cross-hybridization candidates for any si{RNA} sequence
	of selected human gene rapidly, outperforming traditional methods
	by orders of magnitude in terms of computational performance. {AVAILABILITY}:
	http://design.{RNA}i.jp {CONTACT}: yamada@cb.k.u-tokyo.ac.jp.},
  doi = {10.1093/bioinformatics/bti155},
  keywords = {sirna},
  pii = {bti155},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti155}
}

@article{Yamanishi2008Prediction,
  author = {Yamanishi, Y. and Araki, M. and Gutteridge, A. and Honda, W. and
	Kanehisa, M.},
  title = {Prediction of drug-target interaction networks from the integration
	of chemical and genomic spaces},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {i232--i240},
  number = {13},
  month = {Jul},
  abstract = {MOTIVATION: The identification of interactions between drugs and target
	proteins is a key area in genomic drug discovery. Therefore, there
	is a strong incentive to develop new methods capable of detecting
	these potential drug-target interactions efficiently. RESULTS: In
	this article, we characterize four classes of drug-target interaction
	networks in humans involving enzymes, ion channels, G-protein-coupled
	receptors (GPCRs) and nuclear receptors, and reveal significant correlations
	between drug structure similarity, target sequence similarity and
	the drug-target interaction network topology. We then develop new
	statistical methods to predict unknown drug-target interaction networks
	from chemical structure and genomic sequence information simultaneously
	on a large scale. The originality of the proposed method lies in
	the formalization of the drug-target interaction inference as a supervised
	learning problem for a bipartite graph, the lack of need for 3D structure
	information of the target proteins, and in the integration of chemical
	and genomic spaces into a unified space that we call 'pharmacological
	space'. In the results, we demonstrate the usefulness of our proposed
	method for the prediction of the four classes of drug-target interaction
	networks. Our comprehensively predicted drug-target interaction networks
	enable us to suggest many potential drug-target interactions and
	to increase research productivity toward genomic drug discovery.
	AVAILABILITY: Softwares are available upon request. SUPPLEMENTARY
	INFORMATION: Datasets and all prediction results are available at
	http://web.kuicr.kyoto-u.ac.jp/supp/yoshi/drugtarget/.},
  doi = {10.1093/bioinformatics/btn162},
  pdf = {../local/Yamanishi2008Prediction.pdf},
  file = {Yamanishi2008Prediction.pdf:Yamanishi2008Prediction.pdf:PDF},
  institution = {Bioinformatics Center, Institute for Chemical Research, Kyoto University,
	Gokasho, Uji, Kyoto 611-0011, Japan. Yoshihiro.Yamanishi@ensmp.fr},
  owner = {jp},
  pii = {btn162},
  pmid = {18586719},
  timestamp = {2008.12.24},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn162}
}

@article{Yamanishi2007Glycan,
  author = {Yamanishi, Y. and Bach, F. and Vert, J.-P.},
  title = {Glycan classification with tree kernels},
  journal = {Bioinformatics},
  year = {2007},
  volume = {23},
  pages = {1211--1216},
  number = {10},
  month = {May},
  abstract = {MOTIVATION: Glycans are covalent assemblies of sugar that play crucial
	roles in many cellular processes. Recently, comprehensive data about
	the structure and function of glycans have been accumulated, therefore
	the need for methods and algorithms to analyze these data is growing
	fast. RESULTS: This article presents novel methods for classifying
	glycans and detecting discriminative glycan motifs with support vector
	machines (SVM). We propose a new class of tree kernels to measure
	the similarity between glycans. These kernels are based on the comparison
	of tree substructures, and take into account several glycan features
	such as the sugar type, the sugar bound type or layer depth. The
	proposed methods are tested on their ability to classify human glycans
	into four blood components: leukemia cells, erythrocytes, plasma
	and serum. They are shown to outperform a previously published method.
	We also applied a feature selection approach to extract glycan motifs
	which are characteristic of each blood component. We confirmed that
	some leukemia-specific glycan motifs detected by our method corresponded
	to several results in the literature. AVAILABILITY: Softwares are
	available upon request. SUPPLEMENTARY INFORMATION: Datasets are available
	at the following website: http://web.kuicr.kyoto-u.ac.jp/supp/yoshi/glycankernel/},
  doi = {10.1093/bioinformatics/btm090},
  institution = {Bioinformatics Center, Institute for Chemical Research, Kyoto University,
	Gokasho, Uji, Kyoto 611-0011, Japan. yoshi@kuicr.kyoto-u.ac.jp},
  owner = {jp},
  pii = {btm090},
  pmid = {17344232},
  timestamp = {2008.11.30},
  url = {http://dx.doi.org/10.1093/bioinformatics/btm090}
}

@incollection{Yamanishi2004Heterogeneous,
  author = {Yamanishi, Y. and Vert, J.-P. and Kanehisa, M.},
  title = {Heterogeneous data comparison and gene selection with kernel canonical
	correlation analysis},
  booktitle = {Kernel {M}ethods in {C}omputational {B}iology},
  publisher = {MIT Press},
  year = {2004},
  editor = {SchÃ¶lkopf, B. and Tsuda, K. and Vert, J.P.},
  pages = {209-230},
  pdf = {../local/heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\},
  file = {heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF;heterogeneous.pdf:http\://cg.ensmp.fr/~vert/publi/04kmcbbook/heterogeneous.pdf:PDF},
  keywords = {biosvm},
  owner = {vert}
}

@article{Yamanishi2005Supervised,
  author = {Yamanishi, Y. and Vert, J.-P. and Kanehisa, M.},
  title = {Supervised enzyme network inference from the integration of genomic
	data and chemical information},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i468-i477},
  abstract = {Motivation: {T}he metabolic network is an important biological network
	which relates enzyme proteins and chemical compounds. {A} large number
	of metabolic pathways remain unknown nowadays, and many enzymes are
	missing even in known metabolic pathways. {T}here is, therefore,
	an incentive to develop methods to reconstruct the unknown parts
	of the metabolic network and to identify genes coding for missing
	enzymes. {R}esults: {T}his paper presents new methods to infer enzyme
	networks from the integration of multiple genomic data and chemical
	information, in the framework of supervised graph inference. {T}he
	originality of the methods is the introduction of chemical compatibility
	as a constraint for refining the network predicted by the network
	inference engine. {T}he chemical compatibility between two enzymes
	is obtained automatically from the information encoded by their {E}nzyme
	{C}ommission ({EC}) numbers. {T}he proposed methods are tested and
	compared on their ability to infer the enzyme network of the yeast
	{S}accharomyces cerevisiae from four datasets for enzymes with assigned
	{EC} numbers: gene expression data, protein localization data, phylogenetic
	profiles and chemical compatibility information. {I}t is shown that
	the prediction accuracy of the network reconstruction consistently
	improves owing to the introduction of chemical constraints, the use
	of a supervised approach and the weighted integration of multiple
	datasets. {F}inally, we conduct a comprehensive prediction of a global
	enzyme network consisting of all enzyme candidate proteins of the
	yeast to obtain new biological findings.},
  doi = {10.1093/bioinformatics/bti1012},
  pdf = {../local/Yamanishi2005Supervised.pdf},
  file = {Yamanishi2005Supervised.pdf:local/Yamanishi2005Supervised.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1012}
}

@article{Yamanishi2004Protein,
  author = {Yamanishi, Y. and Vert, J.-P. and Kanehisa, M.},
  title = {Protein network inference from multiple genomic data: a supervised
	approach},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {i363-i370},
  abstract = {Motivation: {A}n increasing number of observations support the hypothesis
	that most biological functions involve the interactions between many
	proteins, and that the complexity of living systems arises as a result
	of such interactions. {I}n this context, the problem of inferring
	a global protein network for a given organism, using all available
	genomic data about the organism, is quickly becoming one of the main
	challenges in current computational biology. {R}esults: {T}his paper
	presents a new method to infer protein networks from multiple types
	of genomic data. {B}ased on a variant of kernel canonical correlation
	analysis, its originality is in the formalization of the protein
	network inference problem as a supervised learning problem, and in
	the integration of heterogeneous genomic data within this framework.
	{W}e present promising results on the prediction of the protein network
	for the yeast {S}accharomyces cerevisiae from four types of widely
	available data: gene expressions, protein interactions measured by
	yeast two-hybrid systems, protein localizations in the cell and protein
	phylogenetic profiles. {T}he method is shown to outperform other
	unsupervised protein network inference methods. {W}e finally conduct
	a comprehensive prediction of the protein network for all proteins
	of the yeast, which enables us to propose protein candidates for
	missing enzymes in a biosynthesis pathway. {A}vailability: {S}oftwares
	are available upon request.},
  pdf = {../local/Yamanishi2004Protein.pdf},
  file = {Yamanishi2004Protein.pdf:local/Yamanishi2004Protein.pdf:PDF},
  keywords = {biosvm},
  owner = {vert},
  url = {http://bioinformatics.oupjournals.org/cgi/reprint/19/suppl\_1/i323}
}

@article{Yamanishi2003Extraction,
  author = {Yamanishi, Y. and Vert, J.-P. and Nakaya, A. and Kanehisa, M.},
  title = {Extraction of correlated gene clusters from multiple genomic data
	by generalized kernel canonical correlation analysis},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {i323-i330},
  number = {Suppl. 1},
  abstract = {Motivation: {A} major issue in computational biology is the reconstruction
	of pathways from several genomic datasets, such as expression data,
	protein interaction data and phylogenetic profiles. {A}s a first
	step toward this goal, it is important to investigate the amount
	of correlation which exists between these data. {R}esults: {T}hese
	methods are successfully tested on their ability to recognize operons
	in the {E}scherichia coli genome, from the comparison of three datasets
	corresponding to functional relationships between genes in metabolic
	pathways, geometrical relationships along the chromosome, and co-expression
	relationships as observed by gene expression data. {C}ontact: yoshi@kuicr.kyoto-u.ac.jp},
  pdf = {../local/Yamanishi2003Extraction.pdf},
  file = {Yamanishi2003Extraction.pdf:local/Yamanishi2003Extraction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/suppl_1/i323}
}

@article{Yan2004two-stage,
  author = {Yan, C. and Dobbs, D. and Honavar, V.},
  title = {A two-stage classifier for identification of protein-protein interface
	residues},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {i371-i378},
  number = {Suppl. 1},
  abstract = {Motivation: {T}he ability to identify protein-protein interaction
	sites and to detect specific amino acid residues that contribute
	to the specificity and affinity of protein interactions has important
	implications for problems ranging from rational drug design to analysis
	of metabolic and signal transduction networks. {R}esults: {W}e have
	developed a two-stage method consisting of a support vector machine
	({SVM}) and a {B}ayesian classifier for predicting surface residues
	of a protein that participate in protein-protein interactions. {T}his
	approach exploits the fact that interface residues tend to form clusters
	in the primary amino acid sequence. {O}ur results show that the proposed
	two-stage classifier outperforms previously published sequence-based
	methods for predicting interface residues. {W}e also present results
	obtained using the two-stage classifier on an independent test set
	of seven {CAPRI} ({C}ritical {A}ssessment of {PR}edicted {I}nteractions)
	targets. {T}he success of the predictions is validated by examining
	the predictions in the context of the three-dimensional structures
	of protein complexes. {S}upplementary information: http://www.public.iastate.edu/~chhyan/{ISMB}2004/list.html},
  pdf = {../local/Yan2004two-stage.pdf},
  file = {Yan2004two-stage.pdf:local/Yan2004two-stage.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/suppl_1/i371}
}

@article{Yan2004Identification,
  author = {Yan, C. and Honavar, V. and Dobbs, D.},
  title = {Identification of interface residues in protease-inhibitor and antigen-antibody
	complexes: a support vector machine},
  journal = {Neural {C}omput. \& {A}pplic.},
  year = {2004},
  volume = {13},
  pages = {123-129},
  doi = {10.1007/s00521-004-0414-3},
  pdf = {../local/Yan2004Identification.pdf},
  file = {Yan2004Identification.pdf:local/Yan2004Identification.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Yan2007Determining,
  author = {Yan, Mingjin and Ye, Keying},
  title = {Determining the number of clusters using the weighted gap statistic.},
  journal = {Biometrics},
  year = {2007},
  volume = {63},
  pages = {1031--1037},
  number = {4},
  month = {Dec},
  abstract = {Estimating the number of clusters in a data set is a crucial step
	in cluster analysis. In this article, motivated by the gap method
	(Tibshirani, Walther, and Hastie, 2001, Journal of the Royal Statistical
	Society B63, 411-423), we propose the weighted gap and the difference
	of difference-weighted (DD-weighted) gap methods for estimating the
	number of clusters in data using the weighted within-clusters sum
	of errors: a measure of the within-clusters homogeneity. In addition,
	we propose a "multilayer" clustering approach, which is shown to
	be more accurate than the original gap method, particularly in detecting
	the nested cluster structure of the data. The methods are applicable
	when the input data contain continuous measurements and can be used
	with any clustering method. Simulation studies and real data are
	investigated and compared among these proposed methods as well as
	with the original gap method.},
  doi = {10.1111/j.1541-0420.2007.00784.x},
  institution = {Medtronic Sofamor Danek, 1800 Pyramid Place, Memphis, Tennessee 38132,
	USA. mingjin.yan@medtronic.com},
  keywords = {Algorithms; Biometry, methods; Cluster Analysis; Computer Simulation;
	Data Interpretation, Statistical; Models, Biological; Models, Statistical;
	Pattern Recognition, Automated, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {BIOM784},
  pmid = {17425640},
  timestamp = {2011.12.29},
  url = {http://dx.doi.org/10.1111/j.1541-0420.2007.00784.x}
}

@inproceedings{Yan2007Machine,
  author = {Yan, R. J. and Ling, C. X.},
  title = {Machine learning for stock selection},
  booktitle = {KDD '07 Proceedings of the 13th ACM SIGKDD international conference
	on Knowledge discovery and data mining},
  year = {2007},
  pages = {1038--1042},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  abstract = {In this paper, we propose a new method called Prototype Ranking (PR)
	designed for the stock selection problem. PR takes into account the
	huge size of real-world stock data and applies a modified competitive
	learning technique to predict the ranks of stocks. The primary target
	of PR is to select the top performing stocks among many ordinary
	stocks. PR is designed to perform the learning and testing in a noisy
	stocks sample set where the top performing stocks are usually the
	minority. The performance of PR is evaluated by a trading simulation
	of the real stock data. Each week the stocks with the highest predicted
	ranks are chosen to construct a portfolio. In the period of 1978-2004,
	PR's portfolio earns a much higher average return as well as a higher
	risk-adjusted return than Cooper's method, which shows that the PR
	method leads to a clear profit improvement.},
  doi = {10.1145/1281192.1281307},
  pdf = {../local/Yan2007Machine.pdf},
  file = {Yan2007Machine.pdf:Yan2007Machine.pdf:PDF},
  owner = {jp},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1145/1281192.1281307}
}

@inproceedings{Yang2010Fast,
  author = {Yang, A. and Ganesh, A. and Sastry, S. and Ma, Y.},
  title = {Fast l1-minimization algorithms and an application in robust face
	recognition: a review},
  booktitle = {Proceedings of the International Conference on Image Processing},
  year = {2010},
  pages = {1849--1852}
}

@article{Yang1999Minimax,
  author = {Yang, Y.},
  title = {Minimax nonparametric classification --- {P}art {I}: rates of convergence},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1999},
  volume = {45},
  pages = {2271-2284},
  number = {7},
  doi = {10.1109/18.796368},
  pdf = {../local/Yang1999Minimax.pdf},
  file = {Yang1999Minimax.pdf:local/Yang1999Minimax.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1109/18.796368}
}

@article{Yang2002Normalization,
  author = {Yang, Y. H. and Dudoit, S. and Luu, P. and Lin, D. M. and Peng, V.
	and Ngai, J. and Speed, T. P.},
  title = {Normalization for cDNA microarray data: a robust composite method
	addressing single and multiple slide systematic variation.},
  journal = {Nucleic Acids Res.},
  year = {2002},
  volume = {30},
  number = {4},
  month = {February},
  abstract = {There are many sources of systematic variation in cDNA microarray
	experiments which affect the measured gene expression levels (e.g.
	differences in labeling efficiency between the two fluorescent dyes).
	The term normalization refers to the process of removing such variation.
	A constant adjustment is often used to force the distribution of
	the intensity log ratios to have a median of zero for each slide.
	However, such global normalization approaches are not adequate in
	situations where dye biases can depend on spot overall intensity
	and/or spatial location within the array. This article proposes normalization
	methods that are based on robust local regression and account for
	intensity and spatial dependence in dye biases for different types
	of cDNA microarray experiments. The selection of appropriate controls
	for normalization is discussed and a novel set of controls (microarray
	sample pool, MSP) is introduced to aid in intensity-dependent normalization.
	Lastly, to allow for comparisons of expression levels across slides,
	a robust method based on maximum likelihood estimation is proposed
	to adjust for scale differences among slides.},
  address = {Department of Statistics, Helen Wills Neuroscience Institute, University
	of California, Berkeley, CA 94720-3860, USA.},
  doi = {10.1093/nar/30.4.e15},
  issn = {1362-4962},
  url = {http://dx.doi.org/10.1093/nar/30.4.e15}
}

@article{Yang2002Design,
  author = {Yee Hwa Yang and Terry Speed},
  title = {Design issues for cDNA microarray experiments.},
  journal = {Nat Rev Genet},
  year = {2002},
  volume = {3},
  pages = {579--588},
  number = {8},
  month = {Aug},
  doi = {10.1038/nrg863},
  institution = {Department of Statistics and Program in Biostatistics, 367 Evans
	Hall, 3860, University of California, Berkeley, California 94720-3860,
	USA.},
  keywords = {Gene Expression Profiling; Gene Expression Regulation; Humans; Oligonucleotide
	Array Sequence Analysis, methods; Research Design; Time Factors},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {nrg863},
  pmid = {12154381},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/nrg863}
}

@article{Yang2005Prediction,
  author = {Zheng Rong Yang},
  title = {Prediction of caspase cleavage sites using {B}ayesian bio-basis function
	neural networks.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1831-7},
  number = {9},
  month = {May},
  abstract = {M{OTIVATION}: {A}poptosis has drawn the attention of researchers because
	of its importance in treating some diseases through finding a proper
	way to block or slow down the apoptosis process. {H}aving understood
	that caspase cleavage is the key to apoptosis, we find novel methods
	or algorithms are essential for studying the specificity of caspase
	cleavage activity and this helps the effective drug design. {A}s
	bio-basis function neural networks have proven to outperform some
	conventional neural learning algorithms, there is a motivation, in
	this study, to investigate the application of bio-basis function
	neural networks for the prediction of caspase cleavage sites. {RESULTS}:
	{T}hirteen protein sequences with experimentally determined caspase
	cleavage sites were downloaded from {NCBI}. {B}ayesian bio-basis
	function neural networks are investigated and the comparisons with
	single-layer perceptrons, multilayer perceptrons, the original bio-basis
	function neural networks and support vector machines are given. {T}he
	impact of the sliding window size used to generate sub-sequences
	for modelling on prediction accuracy is studied. {T}he results show
	that the {B}ayesian bio-basis function neural network with two {G}aussian
	distributions for model parameters (weights) performed the best and
	the highest prediction accuracy is 97.15 +/- 1.13\%. {AVAILABILITY}:
	{T}he package of {B}ayesian bio-basis function neural network can
	be obtained by request to the author.},
  doi = {10.1093/bioinformatics/bti281},
  pdf = {../local/Yang2005Prediction.pdf},
  file = {Yang2005Prediction.pdf:local/Yang2005Prediction.pdf:PDF},
  pii = {bti281},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti281}
}

@article{Yang2004Biological,
  author = {Zheng Rong Yang},
  title = {Biological applications of support vector machines.},
  journal = {Brief {B}ioinform},
  year = {2004},
  volume = {5},
  pages = {328-38},
  number = {4},
  month = {Dec}
}

@article{Yang2004Bio-support,
  author = {Yang, Z. R. and Chou, K.-C.},
  title = {Bio-support vector machines for computational proteomics},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {735-741},
  number = {5},
  abstract = {Motivation: {O}ne of the most important issues in computational proteomics
	is to produce a prediction model for the classification or annotation
	of biological function of novel protein sequences. {I}n order to
	improve the prediction accuracy, much attention has been paid to
	the improvement of the performance of the algorithms used, few is
	for solving the fundamental issue, namely, amino acid encoding as
	most existing pattern recognition algorithms are unable to recognize
	amino acids in protein sequences. {I}mportantly, the most commonly
	used amino acid encoding method has the flaw that leads to large
	computational cost and recognition bias. {R}esults: {B}y replacing
	kernel functions of support vector machines ({SVM}s) with amino acid
	similarity measurement matrices, we have modified {SVM}s, a new type
	of pattern recognition algorithm for analysing protein sequences,
	particularly for proteolytic cleavage site prediction. {W}e refer
	to the modified {SVM}s as bio-support vector machine. {W}hen applied
	to the prediction of {HIV} protease cleavage sites, the new method
	has shown a remarkable advantage in reducing the model complexity
	and enhancing the model robustness.},
  pdf = {../local/Yang2004Bio-support.pdf},
  file = {Yang2004Bio-support.pdf:local/Yang2004Bio-support.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/20/5/735}
}

@article{Yano2001Evaluating,
  author = {Yano, Y. and Beal, S.L. and Sheiner, L.B. },
  title = {Evaluating pharmacokinetic/pharmacodynamic models using the {P}osterior
	{P}redictive {C}heck.},
  journal = {J Pharmacokin Pharmacodynam},
  year = {2001},
  volume = {28},
  pages = {171-192},
  number = {2},
  abstract = {The posterior predictive check (PPC) is a model evaluation tool. It
	assigns a value (p PPC ) to the probability that the value of a given
	statistic computed from data arising under an analysis model is as
	or more extreme than the value computed from the real data themselves.
	If this probability is too small, the analysis model is regarded
	as invalid for the given statistic. Properties of the PPC for pharmacokinetic
	(PK) and pharmacodynamic (PD) model evaluation are examined herein
	for a particularly simple simulation setting: extensive sampling
	of a single individual's data arising from simple PK/PD and error
	models. To test the performance characteristics of the PPC, repeatedly,
	ldquorealrdquo data are simulated and for a variety of statistics,
	the PPC is applied to an analysis model, which may (null hypothesis)
	or may not (alternative hypothesis) be identical to the simulation
	model. Five models are used here: (PK1) mono-exponential with proportional
	error, (PK2) biexponential with proportional error, (PK2epsi) biexponential
	with additive error, (PD1) E max model with additive error under
	the logit transform, and (PD2) sigmoid E max model with additive
	error under the logit transform. Six simulation/analysis settings
	are studied. The first three, (PK1/PK1), (PK2/PK2), and (PD1/PD1)
	evaluate whether the PPC has appropriate type-I error level, whereas
	the second three (PK2/PK1), (PK2epsi/PK2), and (PD2/PD1) evaluate
	whether the PPC has adequate power. For a set of 100 data sets simulated/analyzed
	under each model pair according to a stipulated extensive sampling
	design, the p PPC is computed for a number of statistics in three
	different ways (each way uses a different approximation to the posterior
	distribution on the model parameters). We find that in general; (i)
	The PPC is conservative under the null in the sense that for many
	statistics, prob(p PPC leagr)agr for small agr. With respect to such
	statistics, this means that useful models will rarely be regarded
	incorrectly as invalid. A high correlation of a statistic with the
	parameter estimates obtained from the same data used to compute the
	statistic (a measure of statistical ldquosufficiencyrdquo) tends
	to identify the most conservative statistics. (ii) Power is not very
	great, at least for the alternative models we tested, and it is especially
	poor with ldquostatisticsrdquo that are in part a function of parameters
	as well as data. Although there is a tendency for nonsufficient statistics
	(as we have measured this) to have greater power, this is by no means
	an infallible diagnostic. (iii) No clear advantage for one or another
	method of approximating the posterior distribution on model parameters
	is found.},
  owner = {kb}
}

@inproceedings{Yanover2005Predicting,
  author = {Chen Yanover and Tomer Hertz},
  title = {Predicting Protein-Peptide Binding Affinity by Learning Peptide-Peptide
	Distance Functions.},
  booktitle = {RECOMB},
  year = {2005},
  pages = {456-471},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1007/11415770_34}
}

@article{Yao2006Coupling,
  author = {Yao, X. and Parnot, C. and Deupi, X. and Ratnala, V. R. P. and Swaminath,
	G. and Farrens, D. and Kobilka, B.},
  title = {Coupling ligand structure to specific conformational switches in
	the beta2-adrenoceptor.},
  journal = {Nat. Chem. Biol.},
  year = {2006},
  volume = {2},
  pages = {417--422},
  number = {8},
  month = {Aug},
  abstract = {G protein-coupled receptors (GPCRs) regulate a wide variety of physiological
	functions in response to structurally diverse ligands ranging from
	cations and small organic molecules to peptides and glycoproteins.
	For many GPCRs, structurally related ligands can have diverse efficacy
	profiles. To investigate the process of ligand binding and activation,
	we used fluorescence spectroscopy to study the ability of ligands
	having different efficacies to induce a specific conformational change
	in the human beta2-adrenoceptor (beta2-AR). The 'ionic lock' is a
	molecular switch found in rhodopsin-family GPCRs that has been proposed
	to link the cytoplasmic ends of transmembrane domains 3 and 6 in
	the inactive state. We found that most partial agonists were as effective
	as full agonists in disrupting the ionic lock. Our results show that
	disruption of this important molecular switch is necessary, but not
	sufficient, for full activation of the beta2-AR.},
  doi = {10.1038/nchembio801},
  keywords = {chemogenomics},
  owner = {laurent},
  pii = {nchembio801},
  pmid = {16799554},
  timestamp = {2008.07.16},
  url = {http://dx.doi.org/10.1038/nchembio801}
}

@article{Yao2004Comparative,
  author = {X. J. Yao and A. Panaye and J. P. Doucet and R. S. Zhang and H. F.
	Chen and M. C. Liu and Z. D. Hu and B. T. Fan},
  title = {Comparative study of {QSAR}/{QSPR} correlations using support vector
	machines, radial basis function neural networks, and multiple linear
	regression.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {1257-66},
  number = {4},
  abstract = {Support vector machines ({SVM}s) were used to develop {QSAR} models
	that correlate molecular structures to their toxicity and bioactivities.
	{T}he performance and predictive ability of {SVM} are investigated
	and compared with other methods such as multiple linear regression
	and radial basis function neural network methods. {I}n the present
	study, two different data sets were evaluated. {T}he first one involves
	an application of {SVM} to the development of a {QSAR} model for
	the prediction of toxicities of 153 phenols, and the second investigation
	deals with the {QSAR} model between the structures and the activities
	of a set of 85 cyclooxygenase 2 ({COX}-2) inhibitors. {F}or each
	application, the molecular structures were described using either
	the physicochemical parameters or molecular descriptors. {I}n both
	studied cases, the predictive ability of the {SVM} model is comparable
	or superior to those obtained by {MLR} and {RBFNN}. {T}he results
	indicate that {SVM} can be used as an alternative powerful modeling
	tool for {QSAR} studies.},
  doi = {10.1021/ci049965i},
  pdf = {../local/Yao2004Comparative.pdf},
  file = {Yao2004Comparative.pdf:local/Yao2004Comparative.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci049965i}
}

@article{Yao1988Estimating,
  author = {Yao, Y. C.},
  title = {Estimating the number of change-points via Schwarz criterion},
  journal = {Stat. Probab. Lett.},
  year = {1988},
  volume = {6},
  pages = {181--189},
  owner = {jp},
  timestamp = {2010.06.02}
}

@article{Yao1989Least,
  author = {Yao, Y.-C. and Au, S. T.},
  title = {Least-Squares Estimation of a Step Function},
  journal = {Sankhy\={a}: The Indian Journal of Statistics, Series A},
  year = {1989},
  volume = {51},
  pages = {370--381},
  number = {3},
  owner = {jp},
  timestamp = {2012.10.03},
  url = {http://www.jstor.org/stable/25050759}
}

@article{Yap2004Prediction,
  author = {C. W. Yap and C. Z. Cai and Y. Xue and Y. Z. Chen},
  title = {Prediction of torsade-causing potential of drugs by support vector
	machine approach.},
  journal = {Toxicol {S}ci},
  year = {2004},
  volume = {79},
  pages = {170-7},
  number = {1},
  month = {May},
  abstract = {In an effort to facilitate drug discovery, computational methods for
	facilitating the prediction of various adverse drug reactions ({ADR}s)
	have been developed. {S}o far, attention has not been sufficiently
	paid to the development of methods for the prediction of serious
	{ADR}s that occur less frequently. {S}ome of these {ADR}s, such as
	torsade de pointes ({T}d{P}), are important issues in the approval
	of drugs for certain diseases. {T}hus there is a need to develop
	tools for facilitating the prediction of these {ADR}s. {T}his work
	explores the use of a statistical learning method, support vector
	machine ({SVM}), for {T}d{P} prediction. {T}d{P} involves multiple
	mechanisms and {SVM} is a method suitable for such a problem. {O}ur
	{SVM} classification system used a set of linear solvation energy
	relationship ({LSER}) descriptors and was optimized by leave-one-out
	cross validation procedure. {I}ts prediction accuracy was evaluated
	by using an independent set of agents and by comparison with results
	obtained from other commonly used classification methods using the
	same dataset and optimization procedure. {T}he accuracies for the
	{SVM} prediction of {T}d{P}-causing agents and non-{T}d{P}-causing
	agents are 97.4 and 84.6\% respectively; one is substantially improved
	against and the other is comparable to the results obtained by other
	classification methods useful for multiple-mechanism prediction problems.
	{T}his indicates the potential of {SVM} in facilitating the prediction
	of {T}d{P}-causing risk of small molecules and perhaps other {ADR}s
	that involve multiple mechanisms.},
  doi = {10.1093/toxsci/kfh082},
  pdf = {../local/Yap2004Prediction.pdf},
  file = {Yap2004Prediction.pdf:local/Yap2004Prediction.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  pii = {kfh082},
  url = {http://dx.doi.org/10.1093/toxsci/kfh082}
}

@article{Yap2005Prediction,
  author = {C. W. Yap and Y. Z. Chen},
  title = {Prediction of {C}ytochrome {P}450 3{A}4, 2{D}6, and 2{C}9 {I}nhibitors
	and {S}ubstrates by {U}sing {S}upport {V}ector {M}achines.},
  journal = {J {C}hem {I}nf {M}odel},
  year = {2005},
  volume = {45},
  pages = {982-92},
  number = {4},
  abstract = {Statistical learning methods have been used in developing filters
	for predicting inhibitors of two {P}450 isoenzymes, {CYP}3{A}4 and
	{CYP}2{D}6. {T}his work explores the use of different statistical
	learning methods for predicting inhibitors of these enzymes and an
	additional {P}450 enzyme, {CYP}2{C}9, and the substrates of the three
	{P}450 isoenzymes. {T}wo consensus support vector machine ({CSVM})
	methods, "positive majority" ({PM}-{CSVM}) and "positive probability"
	({PP}-{CSVM}), were used in this work. {T}hese methods were first
	tested for the prediction of inhibitors of {CYP}3{A}4 and {CYP}2{D}6
	by using a significantly higher number of inhibitors and noninhibitors
	than that used in earlier studies. {T}hey were then applied to the
	prediction of inhibitors of {CYP}2{C}9 and substrates of the three
	enzymes. {B}oth methods predict inhibitors of {CYP}3{A}4 and {CYP}2{D}6
	at a similar level of accuracy as those of earlier studies. {F}or
	classification of inhibitors of {CYP}2{C}9, the best {CSVM} method
	gives an accuracy of 88.9\% for inhibitors and 96.3\% for noninhibitors.
	{T}he accuracies for classification of substrates and nonsubstrates
	of {CYP}3{A}4, {CYP}2{D}6, and {CYP}2{C}9 are 98.2 and 90.9\%, 96.6
	and 94.4\%, and 85.7 and 98.8\%, respectively. {B}oth {CSVM} methods
	are potentially useful as filters for predicting inhibitors and substrates
	of {P}450 isoenzymes. {T}hese methods generally give better accuracies
	than single {SVM} classification systems, and the performance of
	the {PP}-{CSVM} method is slightly better than that of the {PM}-{CSVM}
	method.},
  doi = {10.1021/ci0500536},
  pdf = {../local/Yap2005Prediction.pdf},
  file = {Yap2005Prediction.pdf:local/Yap2005Prediction.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci0500536}
}

@article{Yeang2001Molecular,
  author = {Yeang, C.H. and Ramaswamy, S. and Tamayo, P. and Mukherjee, S. and
	Rifkin, R.M. and Angelo, M. and Reich, M. and Lander, E. and Mesirov,
	J. and Golub, T.},
  title = {Molecular classification of multiple tumor types},
  journal = {Bioinformatics},
  year = {2001},
  volume = {17},
  pages = {S316--S322},
  number = {Suppl. 1},
  abstract = {Using gene expression data to classify tumor types is a very promising
	tool in cancer diagnosis. {P}revious works show several pairs of
	tumor types can be successfully distinguished by their gene expression
	patterns ({G}olub et al. 1999, {B}en-{D}or et al. 2000, {A}lizadeh
	et al. 2000). {H}owever, the simultaneous classification across a
	heterogeneous set of tumor types has not been well studied yet. {W}e
	obtained 190 samples from 14 tumor classes and generated a combined
	expression dataset containing 16063 genes for each of those samples.
	{W}e performed multi-class classification by combining the outputs
	of binary classifiers. {T}hree binary classifiers (k-nearest neighbors,
	weighted voting, and support vector machines) were applied in conjunction
	with three combination scenarios (one-vs-all, all-pairs, hierarchical
	partitioning). {W}e achieved the best cross validation error rate
	of 18.75% and the best test error rate of 21.74% by using the one-vs-all
	support vector machine algorithm. {T}he results demonstrate the feasibility
	of performing clinically useful classification from samples of multiple
	tumor types.},
  pdf = {../local/Yeang2001Molecular.pdf},
  file = {Yeang2001Molecular.pdf:local/Yeang2001Molecular.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/17/suppl_1/S316}
}

@article{Yeh2005Liver,
  author = {Wen-Chun Yeh and Yung-Ming Jeng and Cheng-Han Li and Po-Huang Lee
	and Pai-Chi Li},
  title = {Liver steatosis classification using high-frequency ultrasound.},
  journal = {Ultrasound {M}ed {B}iol},
  year = {2005},
  volume = {31},
  pages = {599-605},
  number = {5},
  month = {May},
  abstract = {High-frequency {B}-mode images of 19 fresh human liver samples were
	obtained to evaluate their usefulness in determining the steatosis
	grade. {T}he images were acquired by a mechanically controlled single-crystal
	probe at 25 {MH}z. {I}mage features derived from gray-level concurrence
	and nonseparable wavelet transform were extracted to classify steatosis
	grade using a classifier known as the support vector machine. {A}
	subsequent histologic examination of each liver sample graded the
	steatosis from 0 to 3. {T}he four grades were then combined into
	two, three and four classes. {T}he classification results were correlated
	with histology. {T}he best classification accuracies of the two,
	three and four classes were 90.5\%, 85.8\% and 82.6\%, respectively,
	which were markedly better than those at 7 {MH}z. {T}hese results
	indicate that liver steatosis can be more accurately characterized
	using high-frequency {B}-mode ultrasound. {L}imitations and their
	potential solutions of applying high-frequency ultrasound to liver
	imaging are also discussed.},
  doi = {10.1016/j.ultrasmedbio.2005.01.009},
  pdf = {../local/Yeh2005Liver.pdf},
  file = {Yeh2005Liver.pdf:local/Yeh2005Liver.pdf:PDF},
  pii = {S0301-5629(05)00050-5},
  url = {http://dx.doi.org/10.1016/j.ultrasmedbio.2005.01.009}
}

@article{Yeung2002Reverse,
  author = {Yeung, M. K. Stephen and Tegn\'{e}r, Jesper and Collins, James J.},
  title = {Reverse engineering gene networks using singular value decomposition
	and robust regression},
  journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}},
  year = {2002},
  volume = {99},
  pages = {6163-6168},
  number = {9},
  abstract = {We propose a scheme to reverse-engineer gene networks on a genome-wide
	scale using a relatively small amount of gene expression data from
	microarray experiments. Our method is based on the empirical observation
	that such networks are typically large and sparse. It uses singular
	value decomposition to construct a family of candidate solutions
	and then uses robust regression to identify the solution with the
	smallest number of connections as the most likely solution. Our algorithm
	has O(log N) sampling complexity and O(N4) computational complexity.
	We test and validate our approach in a series of in numero experiments
	on model gene networks.},
  doi = {10.1073/pnas.092576199},
  eprint = {http://www.pnas.org/content/99/9/6163.full.pdf+html},
  url = {http://www.pnas.org/content/99/9/6163.abstract}
}

@article{Yewdell1999Immunodominance,
  author = {Yewdell, J. W. and Bennink, J. R.},
  title = {{I}mmunodominance in major histocompatibility complex class {I}-restricted
	{T} lymphocyte responses.},
  journal = {Annu. Rev. Immunol.},
  year = {1999},
  volume = {17},
  pages = {51--88},
  abstract = {Of the many thousands of peptides encoded by a complex foreign antigen
	that can potentially be presented to CD8+ T cells (TCD8+), only a
	small fraction induce measurable responses in association with any
	given major histocompatibility complex class I allele. To design
	vaccines that elicit optimal TCD8+ responses, a thorough understanding
	of this phenomenon, known as immunodominance, is imperative. Here
	we review recent progress in unraveling the molecular and cellular
	basis for immunodominance. Of foremost importance is peptide binding
	to class I molecules; only approximately 1/200 of potential determinants
	bind at greater than the threshold affinity (Kd > 500 nM) associated
	with immunogenicity. Limitations in the TCD8+ repertoire render approximately
	half of these peptides nonimmunogenic, and inefficient antigen processing
	further thins the ranks by approximately four fifths. As a result,
	only approximately 1/2000 of the peptides in a foreign antigen expressed
	by an appropriate antigen presenting cell achieve immunodominant
	status with a given class I allele. A roughly equal fraction of peptides
	have subdominant status, i.e. they induce weak-to-nondetectable primary
	TCD8+ responses in the context of their natural antigen. Subdominant
	determinants may be expressed at or above levels of immunodominant
	determinants, at least on antigen presenting cells in vitro. The
	immunogenicity of subdominant determinants is often limited by immunodomination:
	suppression mediated by TCD8+ specific for immunodominant determinants.
	Immunodomination is a central feature of TCD8+ responses, as it even
	occurs among clones responding to the same immunodominant determinant.
	Little is known about how immunodominant and subdominant determinants
	are distinguished by the TCD8+ repertoire, or how (and why) immunodomination
	occurs, but new tools are available to address these questions.},
  doi = {10.1146/annurev.immunol.17.1.51},
  keywords = {immunoinformatics},
  pmid = {10358753},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1146/annurev.immunol.17.1.51}
}

@article{Yi2007Strategy,
  author = {Yi, Y. and Li, C. and Miller, C. and George, A. L.},
  title = {Strategy for encoding and comparison of gene expression signatures.},
  journal = {Genome Biol.},
  year = {2007},
  volume = {8},
  pages = {R133},
  number = {7},
  abstract = {EXALT (EXpression signature AnaLysis Tool) is a computational system
	enabling comparisons of microarray data across experimental platforms
	and different laboratories http://seq.mc.vanderbilt.edu/exalt/. An
	essential feature of EXALT is a database holding thousands of gene
	expression signatures extracted from the Gene Expression Omnibus,
	and encoded in a searchable format. This novel approach to performing
	global comparisons of shared microarray data may have enormous value
	when coupled directly with a shared data repository.},
  doi = {10.1186/gb-2007-8-7-r133},
  institution = {Department of Medicine, Vanderbilt University, Nashville, Tennessee
	37232-0275, USA.},
  owner = {jp},
  pii = {gb-2007-8-7-r133},
  pmid = {17612401},
  timestamp = {2008.12.09},
  url = {http://dx.doi.org/10.1186/gb-2007-8-7-r133}
}

@article{Yin2008Activity,
  author = {Yin, J. and Yang, Q. and Shen, D. and Li, Z.-N.},
  title = {Activity recognition via user-trace segmentation},
  journal = {ACM Trans. Sen. Netw.},
  year = {2008},
  volume = {4},
  pages = {19:1--19:34},
  number = {4},
  month = {September},
  doi = {10.1145/1387663.1387665},
  pdf = {../local/Yin2008Activity.pdf},
  file = {Yin2008Activity.pdf:Yin2008Activity.pdf:PDF},
  owner = {jp},
  timestamp = {2010.12.07},
  url = {http://dx.doi.org/10.1145/1387663.1387665}
}

@article{Yiu2005Filtering,
  author = {Yiu, S. M. and Wong, Prudence W. H. and Lam, T.W. and Mui, Y.C. and
	Kung, H. F. and Lin, Marie and Cheung, Y. T.},
  title = {Filtering of {I}neffective si{RNA}s and {I}mproved si{RNA} {D}esign
	{T}ool},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {144-151},
  number = {2},
  month = {Jan},
  note = {To appear},
  abstract = {Motivation: {S}hort interfering {RNA}s (si{RNA}s) can be used to suppress
	gene expression and possess many potential applications in therapy,
	but how to design an effective si{RNA} is still not clear. {B}ased
	on the {MPI} ({M}ax-{P}lanck-{I}nstitute) basic principles, a number
	of si{RNA} design tools have been developed recently. {T}he set of
	candidates reported by these tools is usually large and often contains
	ineffective si{RNA}s. {I}n view of this, we initiate the study of
	filtering ineffective si{RNA}s. {R}esults: {T}he contribution of
	this paper is 2-fold. {F}irst, we propose a fair scheme to compare
	existing design tools based on real data in the literature. {S}econd,
	we attempt to improve the {MPI} principles and existing tools by
	an algorithm that can filter ineffective si{RNA}s. {T}he algorithm
	is based on some new observations on the secondary structure, which
	we have verified by {AI} techniques (decision trees and support vector
	machines). {W}e have tested our algorithm together with the {MPI}
	principles and the existing tools. {T}he results show that our filtering
	algorithm is effective. {A}vailability: {T}he si{RNA} design software
	tool can be found in the website http://www.cs.hku.hk/~sirna/ {C}ontact:
	smyiu@cs.hku.hk},
  doi = {10.1093/bioinformatics/bth498},
  pdf = {../local/Yiu2005Filtering.pdf},
  file = {Yiu2005Filtering.pdf:local/Yiu2005Filtering.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/21/2/144}
}

@article{Ylstra2006BAC,
  author = {Ylstra, Bauke and Van den Ijssel, Paul and Carvalho, Beatriz and
	Brakenhoff, Ruud H and Meijer, Gerrit A},
  title = {{BAC to the future! or oligonucleotides: a perspective for micro
	array comparative genomic hybridization (array CGH)}},
  journal = {Nucleic Acids Res.},
  year = {2006},
  volume = {34},
  pages = {445--450},
  keywords = {csbcbook, csbcbook-ch2}
}

@article{Yook2001Weighted,
  author = {S. H. Yook and H. Jeong and Y. Tu and A.-L. Barab{\'a}si},
  title = {Weighted evolution networks},
  journal = {Phys. {R}ev. {L}ett.},
  year = {2001},
  volume = {86},
  pages = {5835--5838},
  number = {25},
  pdf = {../local/yook01.pdf},
  file = {yook01.pdf:local/yook01.pdf:PDF},
  subject = {compnet},
  url = {http://www.nd.edu/~networks/Papers/weighted.pdf}
}

@article{Yoon2009Sensitive,
  author = {Seungtai Yoon and Zhenyu Xuan and Vladimir Makarov and Kenny Ye and
	Jonathan Sebat},
  title = {Sensitive and accurate detection of copy number variants using read
	depth of coverage.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1586--1592},
  number = {9},
  month = {Sep},
  abstract = {Methods for the direct detection of copy number variation (CNV) genome-wide
	have become effective instruments for identifying genetic risk factors
	for disease. The application of next-generation sequencing platforms
	to genetic studies promises to improve sensitivity to detect CNVs
	as well as inversions, indels, and SNPs. New computational approaches
	are needed to systematically detect these variants from genome sequence
	data. Existing sequence-based approaches for CNV detection are primarily
	based on paired-end read mapping (PEM) as reported previously by
	Tuzun et al. and Korbel et al. Due to limitations of the PEM approach,
	some classes of CNVs are difficult to ascertain, including large
	insertions and variants located within complex genomic regions. To
	overcome these limitations, we developed a method for CNV detection
	using read depth of coverage. Event-wise testing (EWT) is a method
	based on significance testing. In contrast to standard segmentation
	algorithms that typically operate by performing likelihood evaluation
	for every point in the genome, EWT works on intervals of data points,
	rapidly searching for specific classes of events. Overall false-positive
	rate is controlled by testing the significance of each possible event
	and adjusting for multiple testing. Deletions and duplications detected
	in an individual genome by EWT are examined across multiple genomes
	to identify polymorphism between individuals. We estimated error
	rates using simulations based on real data, and we applied EWT to
	the analysis of chromosome 1 from paired-end shotgun sequence data
	(30x) on five individuals. Our results suggest that analysis of read
	depth is an effective approach for the detection of CNVs, and it
	captures structural variants that are refractory to established PEM-based
	methods.},
  doi = {10.1101/gr.092981.109},
  pdf = {../local/Yoon2009Sensitive.pdf},
  file = {Yoon2009Sensitive.pdf:Yoon2009Sensitive.pdf:PDF},
  institution = {Cold Spring Harbor Laboratory, Cold Spring Harbor, New York 11724,
	USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.092981.109},
  pmid = {19657104},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.092981.109}
}

@article{Yoon2003Analysis,
  author = {Yoon, Y. and Song, J. and Hong, S.H. and Kim, J.Q.},
  title = {Analysis of multiple single nucleotide polymorphisms of candidate
	genes related to coronary heart disease susceptibility by using support
	vector machines},
  journal = {Clin. {C}hem. {L}ab. {M}ed.},
  year = {2003},
  volume = {41},
  pages = {529-534},
  number = {4},
  abstract = {Coronary heart disease ({CHD}) is a complex genetic disease involving
	gene-environment interaction. {M}any association studies between
	single nucleotide polymorphisms ({SNP}s) of candidate genes and {CHD}
	have been reported. {W}e have applied a new method to analyze such
	relationships using support vector machines ({SVM}s), which is one
	of the methods for artificial neuronal network. {W}e assumed that
	common haplotype implicit in genotypes will differ between cases
	and controls, and that this will allow {SVM}-derived patterns to
	be classifiable according to subject genotypes. {F}ourteen {SNP}s
	of ten candidate genes in 86 {CHD} patients and 119 controls were
	investigated. {G}enotypes were transformed to a numerical vector
	by giving scores based on difference between the genotypes of each
	subject and the reference genotypes, which represent the healthy
	normal population. {O}verall classification accuracy by {SVM}s was
	64.4% with a receiver operating characteristic ({ROC}) area of 0.639.
	{B}y conventional analysis using the chi2 test, the association between
	{CHD} and the {SNP} of the scavenger receptor {B}1 gene was most
	significant in terms of allele frequencies in cases vs. controls
	(p = 0.0001). {I}n conclusion, we suggest that the application of
	{SVM}s for association studies of {SNP}s in candidate genes shows
	considerable promise and that further work could be usefully performed
	upon the estimation of {CHD} susceptibility in individuals of high
	risk.},
  pdf = {../local/Yoon2003Analysis.pdf},
  file = {Yoon2003Analysis.pdf:local/Yoon2003Analysis.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.degruyter.de/journals/cclm/abs/10592.html}
}

@article{Yosef2008Improved,
  author = {N. Yosef and R. Sharan and W. S. Noble},
  title = {Improved network-based identification of protein orthologs},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {i200--i206},
  number = {16},
  month = {Aug},
  abstract = {MOTIVATION: Identifying protein orthologs is an important task that
	is receiving growing attention in the bioinformatics literature.
	Orthology detection provides a fundamental tool towards understanding
	protein evolution, predicting protein functions and interactions,
	aligning protein-protein interaction (PPI) networks of different
	species and detecting conserved modules within these networks. RESULTS:
	Here, we present a novel diffusion-based framework that builds on
	the Rankprop algorithm for protein orthology detection and enhances
	it in several important ways. Specifically, we enhance the Rankprop
	algorithm to account for the presence of multiple paralogs, utilize
	PPI, and consider multiple (>2) species in parallel. We comprehensively
	benchmarked our framework using a variety of training datasets and
	experimental settings. The results, based on the yeast, fly and human
	proteomes, show that the novel enhancements of Rankprop provide substantial
	improvements over its original formulation as well as over a number
	of state of the art methods for network-based orthology detection.
	AVAILABILITY: datasets and source code are available upon request.},
  doi = {10.1093/bioinformatics/btn277},
  pdf = {../local/Yosef2008Improved.pdf},
  file = {Yosef2008Improved.pdf:local/Yosef2008Improved.pdf:PDF},
  institution = {School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel.
	niryosef@post.tau.ac.il},
  owner = {jp},
  pii = {btn277},
  pmid = {18689825},
  timestamp = {2008.10.02},
  url = {http://dx.doi.org/10.1093/bioinformatics/btn277}
}

@article{Young2005Plasmodium,
  author = {Jason A Young and Quinton L Fivelman and Peter L Blair and Patricia
	de la Vega and Karine G Le Roch and Yingyao Zhou and Daniel J Carucci
	and David A Baker and Elizabeth A Winzeler},
  title = {{T}he {P}lasmodium falciparum sexual development transcriptome: a
	microarray analysis using ontology-based pattern identification.},
  journal = {Mol. Biochem. Parasitol.},
  year = {2005},
  volume = {143},
  pages = {67--79},
  number = {1},
  month = {Sep},
  abstract = {The sexual stages of malarial parasites are essential for the mosquito
	transmission of the disease and therefore are the focus of transmission-blocking
	drug and vaccine development. In order to better understand genes
	important to the sexual development process, the transcriptomes of
	high-purity stage I-V Plasmodium falciparum gametocytes were comprehensively
	profiled using a full-genome high-density oligonucleotide microarray.
	The interpretation of this transcriptional data was aided by applying
	a novel knowledge-based data-mining algorithm termed ontology-based
	pattern identification (OPI) using current information regarding
	known sexual stage genes as a guide. This analysis resulted in the
	identification of a sexual development cluster containing 246 genes,
	of which approximately 75\% were hypothetical, exhibiting highly-correlated,
	gametocyte-specific expression patterns. Inspection of the upstream
	promoter regions of these 246 genes revealed putative cis-regulatory
	elements for sexual development transcriptional control mechanisms.
	Furthermore, OPI analysis was extended using current annotations
	provided by the Gene Ontology Consortium to identify 380 statistically
	significant clusters containing genes with expression patterns characteristic
	of various biological processes, cellular components, and molecular
	functions. Collectively, these results, available as part of a web-accessible
	OPI database (http://carrier.gnf.org/publications/Gametocyte), shed
	light on the components of molecular mechanisms underlying parasite
	sexual development and other areas of malarial parasite biology.},
  doi = {10.1016/j.molbiopara.2005.05.007},
  pdf = {../local/Young2005Plasmodium.pdf},
  file = {Young2005Plasmodium.pdf:local/Young2005Plasmodium.pdf:PDF},
  keywords = {plasmodium},
  pii = {S0166-6851(05)00162-3},
  pmid = {16005087},
  timestamp = {2006.04.13},
  url = {http://dx.doi.org/10.1016/j.molbiopara.2005.05.007}
}

@article{Young2005Using,
  author = {J. A. Young and E. A. Winzeler},
  title = {{U}sing expression information to discover new drug and vaccine targets
	in the malaria parasite {P}lasmodium falciparum.},
  journal = {Pharmacogenomics},
  year = {2005},
  volume = {6},
  pages = {17--26},
  number = {1},
  month = {Jan},
  abstract = {The recent completion of the malaria parasite Plasmodium falciparum
	genome has opened the door for applying a variety of genomic-based
	systems biology approaches that complement existing gene-by-gene
	methods of investigation. Transcriptomic analyses of P.falciparum
	using DNA microarrays has allowed for the rapid elucidation of gene
	function, parasite drug response, and invivo expression profiles,
	as well as general mechanisms guiding the parasite life cycle that
	are vital to disease pathogenesis. The results of these studies have
	identified promising novel gene targets for the development of new
	drug and vaccine therapies.},
  keywords = {plasmodium},
  pii = {PGS060105},
  pmid = {15723602},
  timestamp = {2006.04.13}
}

@article{Yu1996Lower,
  author = {Yu, B.},
  title = {Lower bounds on expected redundancy for nonparametric classes},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1996},
  volume = {42},
  pages = {272-275},
  number = {1},
  month = {Jan},
  abstract = {The article focuses on lower bound results on expected redundancy
	for universal coding of independent and identically distributed data
	on [0, 1] from parametric and nonparametric families. {A}fter reviewing
	existing lower bounds, we provide a new proof for minimax lower bounds
	on expected redundancy over nonparametric density classes. {T}his
	new proof is based on the calculation of a mutual information quantity,
	or it utilizes the relationship between redundancy and {S}hannon
	capacity. {I}t therefore unifies the minimax redundancy lower bound
	proofs in the parametric and nonparametric cases },
  doi = {10.1109/18.481802},
  pdf = {../local/Yu1996Lower.pdf},
  file = {Yu1996Lower.pdf:local/Yu1996Lower.pdf:PDF},
  keywords = {information-theory},
  owner = {vert},
  url = {http://dx.doi.org/10.1109/18.481802}
}

@article{Yu2003Fine-grained,
  author = {Yu, C.S. and Wang, J.Y. and Yang, J.M. and Lyu, P.C. and Lin, C.J.
	and Hwang, J.K.},
  title = {Fine-grained protein fold assignment by support vector machines using
	generalized npeptide coding schemes and jury voting from multiple-parameter
	sets.},
  journal = {Proteins},
  year = {2003},
  volume = {50},
  pages = {531},
  number = {4},
  month = {6},
  abstract = {In the coarse-grained fold assignment of major protein classes, such
	as all-alpha, all-beta, alpha + beta, alpha/beta proteins, one can
	easily achieve high prediction accuracy from primary amino acid sequences.
	{H}owever, the fine-grained assignment of folds, such as those defined
	in the {S}tructural {C}lassification of {P}roteins ({SCOP}) database,
	presents a challenge due to the larger amount of folds available.
	{R}ecent study yielded reasonable prediction accuracy of 56.0% on
	an independent set of 27 most populated folds. {I}n this communication,
	we apply the support vector machine ({SVM}) method, using a combination
	of protein descriptors based on the properties derived from the composition
	of n-peptide and jury voting, to the fine-grained fold prediction,
	and are able to achieve an overall prediction accuracy of 69.6% on
	the same independent set-significantly higher than the previous results.
	{O}n 10-fold cross-validation, we obtained a prediction accuracy
	of 65.3%. {O}ur results show that {SVM} coupled with suitable global
	sequence-coding schemes can significantly improve the fine-grained
	fold prediction. {O}ur approach should be useful in structure prediction
	and modeling.},
  doi = {10.1002/prot.10313},
  pdf = {../local/Yu2003Fine-grained.pdf},
  file = {Yu2003Fine-grained.pdf:local/Yu2003Fine-grained.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.10313}
}

@article{Yu2005Classifying,
  author = {Yu, C. and Zavaljevski, N. and Stevens, F. J. and Yackovich, K. and
	Reifman, J.},
  title = {Classifying noisy protein sequence data: a case study of immunoglobulin
	light chains.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {i495-i501},
  number = {Supp 1},
  month = {Jun},
  abstract = {S{UMMARY}: {T}he classification of protein sequences obtained from
	patients with various immunoglobulin-related conformational diseases
	may provide insight into structural correlates of pathogenicity.
	{H}owever, clinical data are very sparse and, in the case of antibody-related
	proteins, the collected sequences have large variability with only
	a small subset of variations relevant to the protein pathogenicity
	(function). {O}n this basis, these sequences represent a model system
	for development of strategies to recognize the small subset of function-determining
	variations among the much larger number of primary structure diversifications
	introduced during evolution. {U}nder such conditions, most protein
	classification algorithms have limited accuracy. {T}o address this
	problem, we propose a support vector machine ({SVM})-based classifier
	that combines sequence and 3{D} structural averaging information.
	{E}ach amino acid in the sequence is represented by a set of six
	physicochemical properties: hydrophobicity, hydrophilicity, volume,
	surface area, bulkiness and refractivity. {E}ach position in the
	sequence is described by the properties of the amino acid at that
	position and the properties of its neighbors in 3{D} space or in
	the sequence. {A} structure template is selected to determine neighbors
	in 3{D} space and a window size is used to determine the neighbors
	in the sequence. {T}he test data consist of 209 proteins of human
	antibody immunoglobulin light chains, each represented by aligned
	sequences of 120 amino acids. {T}he methodology is applied to the
	classification of protein sequences collected from patients with
	and without amyloidosis, and indicates that the proposed modified
	classifiers are more robust to sequence variability than standard
	{SVM} classifiers, improving classification error between 5 and 25\%
	and sensitivity between 9 and 17\%. {T}he classification results
	might also suggest possible mechanisms for the propensity of immunoglobulin
	light chains to amyloid formation. {CONTACT}: cyu@bioanalysis.org.},
  doi = {10.1093/bioinformatics/bti1024},
  pdf = {../local/Yu2005Classifying.pdf},
  file = {Yu2005Classifying.pdf:local/Yu2005Classifying.pdf:PDF},
  keywords = {biosvm},
  pii = {21/suppl_1/i495},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1024}
}

@article{Yu2004Predicting,
  author = {Yu, C.-S. and Lin, C.-J. and Hwang, J.-K.},
  title = {Predicting subcellular localization of proteins for {G}ram-negative
	bacteria by support vector machines based on n-peptide compositions},
  journal = {Protein {S}ci.},
  year = {2004},
  volume = {13},
  pages = {1402-1406},
  number = {5},
  abstract = {Gram-negative bacteria have five major subcellular localization sites:
	the cytoplasm, the periplasm, the inner membrane, the outer membrane,
	and the extracellular space. {T}he subcellular location of a protein
	can provide valuable information about its function. {W}ith the rapid
	increase of sequenced genomic data, the need for an automated and
	accurate tool to predict subcellular localization becomes increasingly
	important. {W}e present an approach to predict subcellular localization
	for {G}ram-negative bacteria. {T}his method uses the support vector
	machines trained by multiple feature vectors based on n-peptide compositions.
	{F}or a standard data set comprising 1443 proteins, the overall prediction
	accuracy reaches 89%, which, to the best of our knowledge, is the
	highest prediction rate ever reported. {O}ur prediction is 14% higher
	than that of the recently developed multimodular {PSORT}-{B}. {B}ecause
	of its simplicity, this approach can be easily extended to other
	organisms and should be a useful tool for the high-throughput and
	large-scale analysis of proteomic and genomic data.},
  doi = {10.1110/ps.03479604},
  pdf = {../local/Yu2004Predicting.pdf},
  file = {Yu2004Predicting.pdf:local/Yu2004Predicting.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.proteinscience.org/cgi/content/abstract/13/5/1402}
}

@article{Yu2008High-quality,
  author = {Yu, H. and Braun, P. and Yildirim, M. A. and Lemmens, I. and Venkatesan,
	K. and Sahalie, J. and Hirozane-Kishikawa, T. and Gebreab, F. and
	Li, N. and Simonis, N. and Hao, T. and Rual, J.-F. and Dricot, A.
	and Vazquez, A. and Murray, R. R. and Simon, C. and Tardivo, L. and
	Tam, S. and Svrzikapa, N. and Fan, C. and de Smet, A.-S. and Motyl,
	A. and Hudson, M. E. and Park, J. and Xin, X. and Cusick, M. E. and
	Moore, T. and Boone, C. and Snyder, M. and Roth, F. R. and Barab{\'a}si,
	A.-L. and Tavernier, J. and Hill, D. E. and Vidal, M.},
  title = {High-quality binary protein interaction map of the yeast interactome
	network},
  journal = {Science},
  year = {2008},
  volume = {322},
  pages = {104--110},
  number = {5898},
  month = {Oct},
  abstract = {Current yeast interactome network maps contain several hundred molecular
	complexes with limited and somewhat controversial representation
	of direct binary interactions. We carried out a comparative quality
	assessment of current yeast interactome data sets, demonstrating
	that high-throughput yeast two-hybrid (Y2H) screening provides high-quality
	binary interaction information. Because a large fraction of the yeast
	binary interactome remains to be mapped, we developed an empirically
	controlled mapping framework to produce a "second-generation" high-quality,
	high-throughput Y2H data set covering approximately 20\% of all yeast
	binary interactions. Both Y2H and affinity purification followed
	by mass spectrometry (AP/MS) data are of equally high quality but
	of a fundamentally different and complementary nature, resulting
	in networks with different topological and biological properties.
	Compared to co-complex interactome models, this binary map is enriched
	for transient signaling interactions and intercomplex connections
	with a highly significant clustering between essential proteins.
	Rather than correlating with essentiality, protein connectivity correlates
	with genetic pleiotropy.},
  doi = {10.1126/science.1158684},
  pdf = {../local/Yu2008High-quality.pdf},
  file = {Yu2008High-quality.pdf:Yu2008High-quality.pdf:PDF},
  institution = {Center for Cancer Systems Biology (CCSB), Dana-Farber Cancer Institute,
	Boston, MA 02115, USA.},
  owner = {jp},
  pii = {1158684},
  pmid = {18719252},
  timestamp = {2009.02.13},
  url = {http://dx.doi.org/10.1126/science.1158684}
}

@article{Yu2004PEBL,
  author = {Yu, H. and Han, J. and Chang, K. C.-C.},
  title = {{PEBL}: Web Page Classification without Negative Examples},
  journal = {IEEE Trans. Knowl. Data Eng.},
  year = {2004},
  volume = {16},
  pages = {70--81},
  number = {1},
  doi = {10.1109/TKDE.2004.1264823},
  pdf = {../local/Yu2004PEBL.pdf},
  file = {Yu2004PEBL.pdf:Yu2004PEBL.pdf:PDF},
  keywords = {PUlearning},
  owner = {fantine},
  timestamp = {2009.06.09},
  url = {http://dx.doi.org/10.1109/TKDE.2004.1264823}
}

@article{Yu2004integrated,
  author = {Yu, J.K. and Chen, Y.D. and Zheng, S.},
  title = {An integrated approach to the detection of colorectal cancer utilizing
	proteomics and bioinformatics},
  journal = {World {J}. {G}astroenterol.},
  year = {2004},
  volume = {10},
  pages = {3127-3131},
  number = {21},
  abstract = {A{IM}: {T}o find new potential biomarkers and to establish patterns
	for early detection of colorectal cancer. {METHODS}: {O}ne hundred
	and eighty-two serum samples including 55 from colorectal cancer
	({CRC}) patients, 35 from colorectal adenoma ({CRA}) patients and
	92 from healthy persons ({HP}) were detected by surface-enhanced
	laser desorption/ionization mass spectrometry ({SELDI}-{MS}). {T}he
	data of spectra were analyzed by bioinformatics tools like artificial
	neural network ({ANN}) and support vector machine ({SVM}). {RESULTS}:
	{T}he diagnostic pattern combined with 7 potential biomarkers could
	differentiate {CRC} patients from {CRA} patients with a specificity
	of 83%, sensitivity of 89% and positive predictive value of 89%.
	{T}he diagnostic pattern combined with 4 potential biomarkers could
	differentiate {CRC} patients from {HP} with a specificity of 92%,
	sensitivity of 89% and positive predictive value of 86%. {CONCLUSION}:
	{T}he combination of {SELDI} with bioinformatics tools could help
	find new biomarkers and establish patterns with high sensitivity
	and specificity for the detection of {CRC}.},
  pdf = {../local/Yu2004integrated.pdf},
  file = {Yu2004integrated.pdf:local/Yu2004integrated.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert}
}

@article{Yu2007Pathway,
  author = {Yu, J. and Sieuwerts, A. and Zhang, Y. and Martens, J. and Smid,
	M. and Klijn, J. and Wang, Y. and Foekens, J.},
  title = {Pathway analysis of gene signatures predicting metastasis of node-negative
	primary breast cancer},
  journal = {BMC cancer},
  year = {2007},
  volume = {7},
  pages = {182},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@article{Yu2004Advances,
  author = {Yu, J. and Smith, V.A. and Wang, P.P. and Hartemink, A.J. and Jarvis,
	E.D.},
  title = {Advances to Bayesian network inference for generating causal networks
	from observational biological data.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3594--3603},
  number = {18},
  month = {Dec},
  abstract = {MOTIVATION: Network inference algorithms are powerful computational
	tools for identifying putative causal interactions among variables
	from observational data. Bayesian network inference algorithms hold
	particular promise in that they can capture linear, non-linear, combinatorial,
	stochastic and other types of relationships among variables across
	multiple levels of biological organization. However, challenges remain
	when applying these algorithms to limited quantities of experimental
	data collected from biological systems. Here, we use a simulation
	approach to make advances in our dynamic Bayesian network (DBN) inference
	algorithm, especially in the context of limited quantities of biological
	data. RESULTS: We test a range of scoring metrics and search heuristics
	to find an effective algorithm configuration for evaluating our methodological
	advances. We also identify sampling intervals and levels of data
	discretization that allow the best recovery of the simulated networks.
	We develop a novel influence score for DBNs that attempts to estimate
	both the sign (activation or repression) and relative magnitude of
	interactions among variables. When faced with limited quantities
	of observational data, combining our influence score with moderate
	data interpolation reduces a significant portion of false positive
	interactions in the recovered networks. Together, our advances allow
	DBN inference algorithms to be more effective in recovering biological
	networks from experimentally collected data. AVAILABILITY: Source
	code and simulated data are available upon request. SUPPLEMENTARY
	INFORMATION: http://www.jarvislab.net/Bioinformatics/BNAdvances/},
  doi = {10.1093/bioinformatics/bth448},
  institution = {>},
  keywords = {Algorithms; Bayes Theorem; Computer Simulation; Gene Expression Profiling;
	Gene Expression Regulation; Models, Genetic; Models, Statistical;
	Oligonucleotide Array Sequence Analysis; Signal Transduction; Software},
  owner = {fantine},
  pii = {bth448},
  pmid = {15284094},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth448}
}

@article{Yu2005Ovarian,
  author = {J. S. Yu and S. Ongarello and R. Fiedler and X. W. Chen and G. Toffolo
	and C. Cobelli and Z. Trajanoski},
  title = {Ovarian cancer identification based on dimensionality reduction for
	high-throughput mass spectrometry data.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2200-9},
  number = {10},
  month = {May},
  abstract = {M{OTIVATION}: {H}igh-throughput and high-resolution mass spectrometry
	instruments are increasingly used for disease classification and
	therapeutic guidance. {H}owever, the analysis of immense amount of
	data poses considerable challenges. {W}e have therefore developed
	a novel method for dimensionality reduction and tested on a published
	ovarian high-resolution {SELDI}-{TOF} dataset. {RESULTS}: {W}e have
	developed a four-step strategy for data preprocessing based on: (1)
	binning, (2) {K}olmogorov-{S}mirnov test, (3) restriction of coefficient
	of variation and (4) wavelet analysis. {S}ubsequently, support vector
	machines were used for classification. {T}he developed method achieves
	an average sensitivity of 97.38\% (sd = 0.0125) and an average specificity
	of 93.30\% (sd = 0.0174) in 1000 independent k-fold cross-validations,
	where k = 2, ..., 10. {AVAILABILITY}: {T}he software is available
	for academic and non-commercial institutions.},
  doi = {10.1093/bioinformatics/bti370},
  pdf = {../local/Yu2005Ovarian.pdf},
  file = {Yu2005Ovarian.pdf:local/Yu2005Ovarian.pdf:PDF},
  keywords = {biosvm proteomics},
  pii = {bti370},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti370}
}

@article{Yu2005integrated,
  author = {Yu, J.-k. and Zheng, S. and Tang, Y. and Li, L.},
  title = {An integrated approach utilizing proteomics and bioinformatics to
	detect ovarian cancer.},
  journal = {J {Z}hejiang {U}niv {S}ci {B}},
  year = {2005},
  volume = {6},
  pages = {227-31},
  number = {4},
  month = {Apr},
  abstract = {O{BJECTIVE}: {T}o find new potential biomarkers and establish the
	patterns for the detection of ovarian cancer. {METHODS}: {S}ixty
	one serum samples including 32 ovarian cancer patients and 29 healthy
	people were detected by surface-enhanced laser desorption/ionization
	mass spectrometry ({SELDI}-{MS}). {T}he protein fingerprint data
	were analyzed by bioinformatics tools. {T}en folds cross-validation
	support vector machine ({SVM}) was used to establish the diagnostic
	pattern. {RESULTS}: {F}ive potential biomarkers were found (2085
	{D}a, 5881 {D}a, 7564 {D}a, 9422 {D}a, 6044 {D}a), combined with
	which the diagnostic pattern separated the ovarian cancer from the
	healthy samples with a sensitivity of 96.7\%, a specificity of 96.7\%
	and a positive predictive value of 96.7\%. {CONCLUSIONS}: {T}he combination
	of {SELDI} with bioinformatics tools could find new biomarkers and
	establish patterns with high sensitivity and specificity for the
	detection of ovarian cancer.},
  doi = {10.1631/jzus.2005.B0227},
  pdf = {../local/Yu2005integrated.pdf},
  file = {Yu2005integrated.pdf:local/Yu2005integrated.pdf:PDF},
  keywords = {biosvm},
  url = {http://dx.doi.org/10.1631/jzus.2005.B0227}
}

@article{Yu2002Methods,
  author = {Kun Yu and Nikolai Petrovsky and Christian SchÃ¶nbach and Judice
	Y L Koh and Vladimir Brusic},
  title = {Methods for prediction of peptide binding to {MHC} molecules: a comparative
	study.},
  journal = {Mol Med},
  year = {2002},
  volume = {8},
  pages = {137--148},
  number = {3},
  month = {Mar},
  abstract = {BACKGROUND: A variety of methods for prediction of peptide binding
	to major histocompatibility complex (MHC) have been proposed. These
	methods are based on binding motifs, binding matrices, hidden Markov
	models (HMM), or artificial neural networks (ANN). There has been
	little prior work on the comparative analysis of these methods. MATERIALS
	AND METHODS: We performed a comparison of the performance of six
	methods applied to the prediction of two human MHC class I molecules,
	including binding matrices and motifs, ANNs, and HMMs. RESULTS: The
	selection of the optimal prediction method depends on the amount
	of available data (the number of peptides of known binding affinity
	to the MHC molecule of interest), the biases in the data set and
	the intended purpose of the prediction (screening of a single protein
	versus mass screening). When little or no peptide data are available,
	binding motifs are the most useful alternative to random guessing
	or use of a complete overlapping set of peptides for selection of
	candidate binders. As the number of known peptide binders increases,
	binding matrices and HMM become more useful predictors. ANN and HMM
	are the predictive methods of choice for MHC alleles with more than
	100 known binding peptides. CONCLUSION: The ability of bioinformatic
	methods to reliably predict MHC binding peptides, and thereby potential
	T-cell epitopes, has major implications for clinical immunology,
	particularly in the area of vaccine design.},
  keywords = {Amino Acid Motifs; Computational Biology; Histocompatibility Antigens
	Class I; Humans; Models, Molecular; Peptides; Protein Binding},
  owner = {laurent},
  pii = {S152836580230137X},
  pmid = {12142545},
  timestamp = {2007.01.27}
}

@inproceedings{Yu2005Learning,
  author = {Yu, Kai and Tresp, Volker and Schwaighofer, Anton},
  title = {Learning Gaussian processes from multiple tasks},
  booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine
	learning},
  year = {2005},
  pages = {1012--1019},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1102351.1102479},
  isbn = {1-59593-180-5},
  location = {Bonn, Germany}
}

@inproceedings{Yu2008Stable,
  author = {Yu, L. and Ding, C. and Loscalzo, S.},
  title = {Stable feature selection via dense feature groups},
  booktitle = {Proceeding of the 14th ACM SIGKDD international conference on Knowledge
	discovery and data mining},
  year = {2008},
  pages = {803--811},
  organization = {ACM}
}

@article{Yu2004Efficient,
  author = {Yu, L. and Liu, H.},
  title = {Efficient feature selection via analysis of relevance and redundancy},
  journal = {The Journal of Machine Learning Research},
  year = {2004},
  volume = {5},
  pages = {1205--1224},
  publisher = {JMLR. org}
}

@article{Yu2010L2-norm,
  author = {Yu, S. and Falck, T. and Daemen, A. and Tranchevent, L-C and Suykens,
	Y.Ak. and De Moor, B. and Moreau, Y.},
  title = {L2-norm multiple kernel learning and its application to biomedical
	data fusion.},
  journal = {BMC Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {309},
  abstract = {BACKGROUND: This paper introduces the notion of optimizing different
	norms in the dual problem of support vector machines with multiple
	kernels. The selection of norms yields different extensions of multiple
	kernel learning (MKL) such as L(infinity), L1, and L2 MKL. In particular,
	L2 MKL is a novel method that leads to non-sparse optimal kernel
	coefficients, which is different from the sparse kernel coefficients
	optimized by the existing L(infinity) MKL method. In real biomedical
	applications, L2 MKL may have more advantages over sparse integration
	method for thoroughly combining complementary information in heterogeneous
	data sources. RESULTS: We provide a theoretical analysis of the relationship
	between the L2 optimization of kernels in the dual problem with the
	L2 coefficient regularization in the primal problem. Understanding
	the dual L2 problem grants a unified view on MKL and enables us to
	extend the L2 method to a wide range of machine learning problems.
	We implement L2 MKL for ranking and classification problems and compare
	its performance with the sparse L(infinity) and the averaging L1
	MKL methods. The experiments are carried out on six real biomedical
	data sets and two large scale UCI data sets. L2 MKL yields better
	performance on most of the benchmark data sets. In particular, we
	propose a novel L2 MKL least squares support vector machine (LSSVM)
	algorithm, which is shown to be an efficient and promising classifier
	for large scale data sets processing. CONCLUSIONS: This paper extends
	the statistical framework of genomic data fusion based on MKL. Allowing
	non-sparse weights on the data sources is an attractive option in
	settings where we believe most data sources to be relevant to the
	problem at hand and want to avoid a "winner-takes-all" effect seen
	in L(infinity) MKL, which can be detrimental to the performance in
	prospective studies. The notion of optimizing L2 kernels can be straightforwardly
	extended to ranking, classification, regression, and clustering algorithms.
	To tackle the computational burden of MKL, this paper proposes several
	novel LSSVM based MKL algorithms. Systematic comparison on real data
	sets shows that LSSVM MKL has comparable performance as the conventional
	SVM MKL algorithms. Moreover, large scale numerical experiments indicate
	that when cast as semi-infinite programming, LSSVM MKL can be solved
	more efficiently than SVM MKL. AVAILABILITY: The MATLAB code of algorithms
	implemented in this paper is downloadable from http://homes.esat.kuleuven.be/~sistawww/bioi/syu/l2lssvm.html.},
  doi = {10.1186/1471-2105-11-309},
  institution = {Bioinformatics Group, Department of Electrical Engineering, Katholieke
	Universiteit Leuven, Kasteelpark Arenberg 10, Heverlee B-3001, Belgium.
	shee.yu@gmail.com},
  owner = {mordelet},
  pii = {1471-2105-11-309},
  pmid = {20529363},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1186/1471-2105-11-309}
}

@inproceedings{Yu2007Robust,
  author = {Shipeng Yu and Volker Tresp and Kai Yu},
  title = {Robust multi-task learning with t-processes},
  booktitle = {ICML '07: Proceedings of the 24th international conference on Machine
	learning},
  year = {2007},
  pages = {1103--1110},
  address = {New York, NY, USA},
  publisher = {ACM},
  doi = {http://doi.acm.org/10.1145/1273496.1273635},
  isbn = {978-1-59593-793-3},
  location = {Corvalis, Oregon}
}

@article{Yuan2007Model,
  author = {Ming Yuan and Yi Lin},
  title = {Model selection and estimation in the Gaussian graphical model},
  journal = {Biometrika},
  year = {2007},
  volume = {94},
  pages = {19-35},
  number = {1},
  pdf = {../local/Yuan2007Model.pdf},
  file = {Yuan2007Model.pdf:Yuan2007Model.pdf:PDF},
  url = {http://ideas.repec.org/a/oup/biomet/v94y2007i1p19-35.html}
}

@article{Yuan2007On,
  author = {Ming Yuan and Yi Lin},
  title = {On the non-negative garrotte estimator},
  journal = {Journal Of The Royal Statistical Society Series B},
  year = {2007},
  volume = {69},
  pages = {143-161},
  number = {2},
  abstract = { We study the non-negative garrotte estimator from three different
	aspects: consistency, computation and flexibility. We argue that
	the non-negative garrotte is a general procedure that can be used
	in combination with estimators other than the original least squares
	estimator as in its original form. In particular, we consider using
	the lasso, the elastic net and ridge regression along with ordinary
	least squares as the initial estimate in the non-negative garrotte.
	We prove that the non-negative garrotte has the nice property that,
	with probability tending to 1, the solution path contains an estimate
	that correctly identifies the set of important variables and is consistent
	for the coefficients of the important variables, whereas such a property
	may not be valid for the initial estimators. In general, we show
	that the non-negative garrotte can turn a consistent estimate into
	an estimate that is not only consistent in terms of estimation but
	also in terms of variable selection. We also show that the non-negative
	garrotte has a piecewise linear solution path. Using this fact, we
	propose an efficient algorithm for computing the whole solution path
	for the non-negative garrotte. Simulations and a real example demonstrate
	that the non-negative garrotte is very effective in improving on
	the initial estimator in terms of variable selection and estimation
	accuracy. Copyright 2007 Royal Statistical Society.},
  url = {http://ideas.repec.org/a/bla/jorssb/v69y2007i2p143-161.html}
}

@article{Yuan2006Model,
  author = {Yuan, M. and Lin, Y.},
  title = {Model selection and estimation in regression with grouped variables},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2006},
  volume = {68},
  pages = {49--67},
  number = {1},
  pdf = {../local/Yuan2006Model.pdf},
  file = {Yuan2006Model.pdf:Yuan2006Model.pdf:PDF},
  owner = {jp},
  timestamp = {2008.12.05}
}

@article{Yuan2007Predicting,
  author = {Yuan, Y. and Guo, L. and Shen, L. and Liu, J. S.},
  title = {Predicting Gene Expression from Sequence: A Reexamination},
  journal = {PLoS Comput. Biol.},
  year = {2007},
  volume = {3},
  pages = {e243},
  number = {11},
  doi = {10.1371/journal.pcbi.0030243},
  pdf = {../local/Yuan2007Predicting.pdf},
  file = {Yuan2007Predicting.pdf:Yuan2007Predicting.pdf:PDF},
  owner = {jp},
  timestamp = {2009.01.05},
  url = {http://dx.doi.org/10.1371/journal.pcbi.0030243}
}

@article{Yuan2002Prediction,
  author = {Yuan, Z. and Burrage, K. and Mattick, J.S.},
  title = {Prediction of protein solvent accessibility using support vector
	machines},
  journal = {Proteins},
  year = {2002},
  volume = {48},
  pages = {566-570},
  number = {3},
  abstract = {A {S}upport {V}ector {M}achine learning system has been trained to
	predict protein solvent accessibility from the primary structure.
	{D}ifferent kernel functions and sliding window sizes have been explored
	to find how they affect the prediction performance. {U}sing a cut-off
	threshold of 15% that splits the dataset evenly (an equal number
	of exposed and buried residues), this method was able to achieve
	a prediction accuracy of 70.1% for single sequence input and 73.9%
	for multiple alignment sequence input, respectively. {T}he prediction
	of three and more states of solvent accessibility was also studied
	and compared with other methods. {T}he prediction accuracies are
	better than, or comparable to, those obtained by other methods such
	as neural networks, {B}ayesian classification, multiple linear regression,
	and information theory. {I}n addition, our results further suggest
	that this system may be combined with other prediction methods to
	achieve more reliable results, and that the {S}upport {V}ector {M}achine
	method is a very useful tool for biological sequence analysis.},
  doi = {10.1002/prot.10176},
  pdf = {../local/Yuan2002Prediction.pdf},
  file = {Yuan2002Prediction.pdf:local/Yuan2002Prediction.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/prot.10176}
}

@article{Yuan2004SVMtm,
  author = {Yuan, Z. and Mattick, J.S. and Teasdale, R.D.},
  title = {{{SVM}tm}: support vector machines to predict transmembrane segments.},
  journal = {J. {C}omput. {C}hem.},
  year = {2004},
  volume = {25},
  pages = {632},
  number = {5},
  month = {6},
  abstract = {A new method has been developed for prediction of transmembrane helices
	using support vector machines. {D}ifferent coding schemes of protein
	sequences were explored, and their performances were assessed by
	crossvalidation tests. {T}he best performance method can predict
	the transmembrane helices with sensitivity of 93.4% and precision
	of 92.0%. {F}or each predicted transmembrane segment, a score is
	given to show the strength of transmembrane signal and the prediction
	reliability. {I}n particular, this method can distinguish transmembrane
	proteins from soluble proteins with an accuracy of approximately
	99%. {T}his method can be used to complement current transmembrane
	helix prediction methods and can be used for consensus analysis of
	entire proteomes. {T}he predictor is located at http://genet.imb.uq.edu.au/predictors/{SVM}tm.},
  doi = {10.1002/jcc.10411},
  pdf = {../local/Yuan2004SVMtm.pdf},
  file = {Yuan2004SVMtm.pdf:local/Yuan2004SVMtm.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1002/jcc.10411}
}

@article{Zheng2006Robust,
  author = {Yefeng Z. and Doermann, D.},
  title = {Robust point matching for nonrigid shapes by preserving local neighborhood
	structures},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2006},
  volume = {28},
  pages = {643--649},
  number = {4},
  month = {April },
  doi = {10.1109/TPAMI.2006.81},
  owner = {michael},
  timestamp = {2009.11.17}
}

@article{Zaki2005Application,
  author = {Zaki, N. M. and Deris, S. and Illias, R.},
  title = {Application of string kernels in protein sequence classification.},
  journal = {Appl. {B}ioinformatics},
  year = {2005},
  volume = {4},
  pages = {45-52},
  number = {1},
  abstract = {I{NTRODUCTION}: {T}he production of biological information has become
	much greater than its consumption. {T}he key issue now is how to
	organise and manage the huge amount of novel information to facilitate
	access to this useful and important biological information. {O}ne
	core problem in classifying biological information is the annotation
	of new protein sequences with structural and functional features.
	{METHOD}: {T}his article introduces the application of string kernels
	in classifying protein sequences into homogeneous families. {A} string
	kernel approach used in conjunction with support vector machines
	has been shown to achieve good performance in text categorisation
	tasks. {W}e evaluated and analysed the performance of this approach,
	and we present experimental results on three selected families from
	the {SCOP} ({S}tructural {C}lassification of {P}roteins) database.
	{W}e then compared the overall performance of this method with the
	existing protein classification methods on benchmark {SCOP} datasets.
	{RESULTS}: {A}ccording to the {F}1 performance measure and the rate
	of false positive ({RFP}) measure, the string kernel method performs
	well in classifying protein sequences. {T}he method outperformed
	all the generative-based methods and is comparable with the {SVM}-{F}isher
	method. {DISCUSSION}: {A}lthough the string kernel approach makes
	no use of prior biological knowledge, it still captures sufficient
	biological information to enable it to outperform some of the state-of-the-art
	methods.},
  keywords = {biosvm},
  pii = {415}
}

@article{Zamore2000RNAi,
  author = {Zamore, P.D. and Tuschl, T. and Sharp, P.A. and Bartel, D.P.},
  title = {R{NA}i: double-stranded {RNA} directs the {ATP}-dependent cleavage
	of m{RNA} at 21 to 23 nucleotide intervals.},
  journal = {Cell},
  year = {2000},
  volume = {101},
  pages = {25-33},
  number = {1},
  month = {Mar},
  abstract = {Double-stranded {RNA} (ds{RNA}) directs the sequence-specific degradation
	of m{RNA} through a process known as {RNA} interference ({RNA}i).
	{U}sing a recently developed {D}rosophila in vitro system, we examined
	the molecular mechanism underlying {RNA}i. {W}e find that {RNA}i
	is {ATP} dependent yet uncoupled from m{RNA} translation. {D}uring
	the {RNA}i reaction, both strands of the ds{RNA} are processed to
	{RNA} segments 21-23 nucleotides in length. {P}rocessing of the ds{RNA}
	to the small {RNA} fragments does not require the targeted m{RNA}.
	{T}he m{RNA} is cleaved only within the region of identity with the
	ds{RNA}. {C}leavage occurs at sites 21-23 nucleotides apart, the
	same interval observed for the ds{RNA} itself, suggesting that the
	21-23 nucleotide fragments from the ds{RNA} are guiding m{RNA} cleavage.},
  doi = {10.1016/S0092-8674(00)80620-0},
  keywords = {sirna},
  pii = {S0092-8674(00)80620-0},
  url = {http://dx.doi.org/10.1016/S0092-8674(00)80620-0}
}

@article{Zanella2010High,
  author = {Fabian Zanella and James B Lorens and Wolfgang Link},
  title = {High content screening: seeing is believing.},
  journal = {Trends Biotechnol},
  year = {2010},
  volume = {28},
  pages = {237--245},
  number = {5},
  month = {May},
  abstract = {High content screening (HCS) combines the efficiency of high-throughput
	techniques with the ability of cellular imaging to collect quantitative
	data from complex biological systems. HCS technology is integrated
	into all aspects of contemporary drug discovery, including primary
	compound screening, post-primary screening capable of supporting
	structure-activity relationships, and early evaluation of ADME (absorption,
	distribution, metabolism and excretion)/toxicity properties and complex
	multivariate drug profiling. Recently, high content approaches have
	been used extensively to interrogate stem cell biology. Despite these
	dramatic advances, a number of significant challenges remain related
	to the use of more biology- and disease-relevant cell systems, the
	development of informative reagents to measure and manipulate cellular
	events, and the integration of data management and informatics.},
  doi = {10.1016/j.tibtech.2010.02.005},
  institution = {Experimental Therapeutics Program, Centro Nacional de Investigaciones
	Oncologicas, Melchor Fernandez Almagro 3, 28029 Madrid, Spain.},
  keywords = {Animals; Drug Evaluation, Preclinical, instrumentation/methods; High-Throughput
	Screening Assays, instrumentation/methods; Humans; Stem Cells, drug
	effects; Structure-Activity Relationship},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {S0167-7799(10)00035-1},
  pmid = {20346526},
  timestamp = {2010.07.26},
  url = {http://dx.doi.org/10.1016/j.tibtech.2010.02.005}
}

@article{Zangwill2004Heidelberg,
  author = {Linda M Zangwill and Kwokleung Chan and Christopher Bowd and Jicuang
	Hao and Te-Won Lee and Robert N Weinreb and Terrence J Sejnowski
	and Michael H Goldbaum},
  title = {Heidelberg retina tomograph measurements of the optic disc and parapapillary
	retina for detecting glaucoma analyzed by machine learning classifiers.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2004},
  volume = {45},
  pages = {3144-51},
  number = {9},
  month = {Sep},
  abstract = {P{URPOSE}: {T}o determine whether topographical measurements of the
	parapapillary region analyzed by machine learning classifiers can
	detect early to moderate glaucoma better than similarly processed
	measurements obtained within the disc margin and to improve methods
	for optimization of machine learning classifier feature selection.
	{METHODS}: {O}ne eye of each of 95 patients with early to moderate
	glaucomatous visual field damage and of each of 135 normal subjects
	older than 40 years participating in the longitudinal {D}iagnostic
	{I}nnovations in {G}laucoma {S}tudy ({DIGS}) were included. {H}eidelberg
	{R}etina {T}omograph ({HRT}; {H}eidelberg {E}ngineering, {D}ossenheim,
	{G}ermany) mean height contour was measured in 36 equal sectors,
	both along the disc margin and in the parapapillary region (at a
	mean contour line radius of 1.7 mm). {E}ach sector was evaluated
	individually and in combination with other sectors. {G}aussian support
	vector machine ({SVM}) learning classifiers were used to interpret
	{HRT} sector measurements along the disc margin and in the parapapillary
	region, to differentiate between eyes with normal and glaucomatous
	visual fields and to compare the results with global and regional
	{HRT} parameter measurements. {T}he area under the receiver operating
	characteristic ({ROC}) curve was used to measure diagnostic performance
	of the {HRT} parameters and to evaluate the cross-validation strategies
	and forward selection and backward elimination optimization techniques
	that were used to generate the reduced feature sets. {RESULTS}: {T}he
	area under the {ROC} curve for mean height contour of the 36 sectors
	along the disc margin was larger than that for the mean height contour
	in the parapapillary region (0.97 and 0.85, respectively). {O}f the
	36 individual sectors along the disc margin, those in the inferior
	region between 240 degrees and 300 degrees, had the largest area
	under the {ROC} curve (0.85-0.91). {W}ith {SVM} {G}aussian techniques,
	the regional parameters showed the best ability to discriminate between
	normal eyes and eyes with glaucomatous visual field damage, followed
	by the global parameters, mean height contour measures along the
	disc margin, and mean height contour measures in the parapapillary
	region. {T}he area under the {ROC} curve was 0.98, 0.94, 0.93, and
	0.85, respectively. {C}ross-validation and optimization techniques
	demonstrated that good discrimination (99\% of peak area under the
	{ROC} curve) can be obtained with a reduced number of {HRT} parameters.
	{CONCLUSIONS}: {M}ean height contour measurements along the disc
	margin discriminated between normal and glaucomatous eyes better
	than measurements obtained in the parapapillary region.},
  doi = {10.1167/iovs.04-0202},
  pdf = {../local/Zangwill2004Heidelberg.pdf},
  file = {Zangwill2004Heidelberg.pdf:local/Zangwill2004Heidelberg.pdf:PDF},
  pii = {45/9/3144},
  url = {http://dx.doi.org/10.1167/iovs.04-0202}
}

@inproceedings{Zaslavskiy2008path,
  author = {Zaslavskiy, M. and Bach, F. and Vert, J. P.},
  title = {A path following algorithm for graph matching},
  booktitle = {Image and Signal Processing, Proceedings of the 3rd International
	Conference, ICISP 2008},
  year = {2008},
  editor = {Elmoataz, A. and Lezoray, O. and Nouboud, F. and Mammass, D.},
  volume = {5099},
  series = {LNCS},
  pages = {329--337},
  publisher = {Springer Berlin / Heidelberg},
  abstract = {We propose a convex-concave programming approach for the labelled
	weighted graph matching problem. The convex-concave programming formulation
	is obtained by rewriting the graph matching problem as a least-square
	problem on the set of permutation matrices and relaxing it to two
	different optimization problems: a quadratic convex and a quadratic
	concave optimization problem on the set of doubly stochastic matrices.
	The concave relaxation has the same global minimum as the initial
	graph matching problem, but the search for its global minimum is
	aslo a complex combinatorial problem. We therefore construct an approximation
	of the concave problem solution by following a solution path of the
	convex-concave problem obtained by linear interpolation of the convex
	and concave formulations, starting from the convex relaxation. The
	algorithm is compared with some of the best performing graph matching
	methods on three datasets: simulated graphs, QAPLib and handwritten
	chinese characters.},
  doi = {10.1007/978-3-540-69905-7_38},
  owner = {jp},
  timestamp = {2008.10.03},
  url = {http://dx.doi.org/10.1007/978-3-540-69905-7_38}
}

@misc{Zaslavskiy2008GraphM,
  author = {M. Zaslavskiy and F. Bach and J. P. Vert},
  title = {{GRAPHM}: Graph matching package},
  year = {2008},
  note = {Available at \texttt{http://cbio.ensmp.fr/graphm}},
  owner = {michael},
  timestamp = {2008.10.02},
  url = {http://cbio.ensmp.fr/graphm/}
}

@inproceedings{Zaslavskiy2010Many-to-Many,
  author = {Zaslavskiy, M. and Bach, F. and Vert, J.-P.},
  title = {Many-to-Many Graph Matching: A Continuous Relaxation Approach},
  booktitle = {Machine Learning and Knowledge Discovery in Databases},
  year = {2010},
  editor = {Balc{\'a}zar, J. and Bonchi, F. and Gionis, A. and Sebag, M.},
  volume = {6323},
  series = {Lecture Notes in Computer Science},
  pages = {515-530},
  publisher = {Springer Berlin / Heidelberg},
  doi = {10.1007/978-3-642-15939-8_33},
  owner = {jp},
  timestamp = {2010.09.22},
  url = {http://dx.doi.org/10.1007/978-3-642-15939-8_33}
}

@article{Zaslavskiy2009Path,
  author = {Zaslavskiy, M. and Bach, F. and Vert, J.-P.},
  title = {A Path Following Algorithm for the Graph Matching Problem},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  year = {2009},
  volume = {31},
  pages = {2227--2242},
  number = {12},
  abstract = {We propose a convex-concave programming approach for the labeled weighted
	graph matching problem. The convex-concave programming formulation
	is obtained by rewriting the weighted graph matching problem as a
	least-square problem on the set of permutation matrices and relaxing
	it to two different optimization problems: a quadratic convex and
	a quadratic concave optimization problem on the set of doubly stochastic
	matrices. The concave relaxation has the same global minimum as the
	initial graph matching problem, but the search for its global minimum
	is also a hard combinatorial problem. We, therefore, construct an
	approximation of the concave problem solution by following a solution
	path of a convex-concave problem obtained by linear interpolation
	of the convex and concave formulations, starting from the convex
	relaxation. This method allows to easily integrate the information
	on graph label similarities into the optimization problem, and therefore,
	perform labeled weighted graph matching. The algorithm is compared
	with some of the best performing graph matching methods on four data
	sets: simulated graphs, QAPLib, retina vessel images, and handwritten
	Chinese characters. In all cases, the results are competitive with
	the state of the art.},
  doi = {10.1109/TPAMI.2008.245},
  pdf = {../local/Zaslavskiy2009Path.pdf},
  file = {Zaslavskiy2009Path.pdf:Zaslavskiy2009Path.pdf:PDF},
  owner = {jp},
  timestamp = {2009.10.22},
  url = {http://dx.doi.org/10.1109/TPAMI.2008.245}
}

@techreport{Zaslavskiy2008patha,
  author = {Zaslavskiy, M. and Bach, F. and Vert, J.-P.},
  title = {A path following algorithm for the graph matching problem},
  institution = {HAL},
  year = {2008},
  number = {00232851},
  note = {To appear in IEEE Trans. Pattern Anal. Mach. Intell.},
  abstract = {We propose a convex-concave programming approach for the labeled weighted
	graph matching problem. The convex-concave programming formulation
	is obtained by rewriting the weighted graph matching problem as a
	least-square problem on the set of permutation matrices and relaxing
	it to two different optimization problems: a quadratic convex and
	a quadratic concave optimization problem on the set of doubly stochastic
	matrices. The concave relaxation has the same global minimum as the
	initial graph matching problem, but the search for its global minimum
	is also a hard combinatorial problem. We therefore construct an approximation
	of the concave problem solution by following a solution path of a
	convex-concave problem obtained by linear interpolation of the convex
	and concave formulations, starting from the convex relaxation. This
	method allows to easily integrate the information on graph label
	similarities into the optimization problem, and therefore to perform
	labeled weighted graph matching. The algorithm is compared with some
	of the best performing graph matching methods on four datasets: simulated
	graphs, QAPLib, retina vessel images and handwritten chinese characters.
	In all cases, the results are competitive with the state-of-the-art.},
  timestamp = {2008.07.29},
  url = {http://hal.archives-ouvertes.fr/hal-00232851}
}

@inproceedings{Zaslavskiy2009Phrase,
  author = {Zaslavskiy, M. and Dymetman, M. and Cancedda, N.},
  title = {Phrase-Based Statistical Machine Translation as a Traveling Salesman
	Problem},
  booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of
	the ACL and the 4th International Joint Conference on Natural Language
	Processing of the AFNLP},
  year = {2009},
  pages = {333--341},
  address = {Suntec, Singapore},
  month = {August},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/P/P09/P09-1038}
}

@article{Zaslavskiy2009Global,
  author = {M. Zaslavskiyi and F. Bach and J-P. Vert},
  title = {Global alignment of protein-protein interaction networks by graph
	matching methods},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  number = {12},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1093/bioinformatics/btp196}
}

@article{Zavaljevski2002Support,
  author = {Zavaljevski, N. and Stevens, F.J. and Reifman, J.},
  title = {Support vector machines with selective kernel scaling for protein
	classification and identification of key amino acid positions },
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {689--696},
  number = {5},
  abstract = {Motivation: {D}ata that characterize primary and tertiary structures
	of proteins are now accumulating at a rapid and accelerating rate
	and require automated computational tools to extract critical information
	relating amino acid changes with the spectrum of functionally attributes
	exhibited by a protein. {W}e propose that immunoglobulin-type beta-domains,
	which are found in approximate 400 functionally distinct forms in
	humans alone, provide the immense genetic variation within limited
	conformational changes that might facilitate the development of new
	computational tools. {A}s an initial step, we describe here an approach
	based on {S}upport {V}ector {M}achine ({SVM}) technology to identify
	amino acid variations that contribute to the functional attribute
	of pathological self-assembly by some human antibody light chains
	produced during plasma cell diseases. {R}esults: {W}e demonstrate
	that {SVM}s with selective kernel scaling are an effective tool in
	discriminating between benign and pathologic human immunoglobulin
	light chains. {I}nitial results compare favorably against manual
	classification performed by experts and indicate the capability of
	{SVM}s to capture the underlying structure of the data. {T}he data
	set consists of 70 proteins of human antibody 1 light chains, each
	represented by aligned sequences of 120 amino acids. {W}e perform
	feature selection based on a first-order adaptive scaling algorithm,
	which confirms the importance of changes in certain amino acid positions
	and identifies other positions that are key in the characterization
	of protein function.},
  pdf = {../local/zava02.pdf},
  file = {zava02.pdf:local/zava02.pdf:PDF},
  keywords = {biosvm},
  subject = {biokernel},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/18/5/689}
}

@article{Zellner1962An,
  author = {Zellner, Arnold},
  title = {An Efficient Method of Estimating Seemingly Unrelated Regressions
	and Tests for Aggregation Bias},
  journal = {J. Am. Stat. Assoc.},
  year = {1962},
  volume = {57},
  pages = {348--368},
  number = {298},
  abstract = {In this paper a method of estimating the parameters of a set of regression
	equations is reported which involves application of Aitken's generalized
	least-squares [1] to the whole system of equations. Under conditions
	generally encountered in practice, it is found that the regression
	coefficient estimators so obtained are at least asymptotically more
	efficient than those obtained by an equation-by-equation application
	of least squares. This gain in efficiency can be quite large if \"{i}ndependent"
	variables in different equations are not highly correlated and if
	disturbance terms in different equations are highly correlated. Further,
	tests of the hypothesis that all regression equation coefficient
	vectors are equal, based on "micro" and "macro" data, are described.
	If this hypothesis is accepted, there will be no aggregation bias.
	Finally, the estimation procedure and the "micro-test" for aggregation
	bias are applied in the analysis of annual investment data, 1935-1954,
	for two firms.},
  citeulike-article-id = {1211699},
  citeulike-linkout-0 = {http://dx.doi.org/10.2307/2281644},
  citeulike-linkout-1 = {http://www.jstor.org/stable/2281644},
  doi = {10.2307/2281644},
  keywords = {480-3, econometrics, sur},
  posted-at = {2007-04-06 06:07:30},
  priority = {2},
  url = {http://dx.doi.org/10.2307/2281644}
}

@article{Zernov2003Drug,
  author = {V. V. Zernov and K. V. Balakin and A. A. Ivaschenko and N. P. Savchuk
	and I. V. Pletnev},
  title = {Drug discovery using support vector machines. {T}he case studies
	of drug-likeness, agrochemical-likeness, and enzyme inhibition predictions.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2003},
  volume = {43},
  pages = {2048-56},
  number = {6},
  abstract = {Support {V}ector {M}achines ({SVM}) is a powerful classification and
	regression tool that is becoming increasingly popular in various
	machine learning applications. {W}e tested the ability of {SVM},
	in comparison with well-known neural network techniques, to predict
	drug-likeness and agrochemical-likeness for large compound collections.
	{F}or both kinds of data, {SVM} outperforms various neural networks
	using the same set of descriptors. {W}e also used {SVM} for estimating
	the activity of {C}arbonic {A}nhydrase {II} ({CA} {II}) enzyme inhibitors
	and found that the prediction quality of our {SVM} model is better
	than that reported earlier for conventional {QSAR}. {M}odel characteristics
	and data set features were studied in detail.},
  doi = {10.1021/ci0340916},
  pdf = {../local/Zernov2003Drug.pdf},
  file = {Zernov2003Drug.pdf:local/Zernov2003Drug.pdf:PDF},
  keywords = {biosvm chemoinformatics},
  url = {http://dx.doi.org/10.1021/ci0340916}
}

@article{Zhang2009Penalized,
  author = {Zhang, D. and Lin, Y. and Zhang, M.},
  title = {Penalized orthogonal-components regression for large p small n data},
  journal = {Electron. J. Statist.},
  year = {2009},
  volume = {3},
  pages = {781--796},
  abstract = {Here we propose a penalized orthogonal-components regression (POCRE)
	for large p small n data. Orthogonal components are sequentially
	constructed to maximize, upon standardization, their correlation
	to the response residuals. A new penalization framework, implemented
	via empirical Bayes thresholding, is presented to effectively identify
	sparse predictors of each component. POCRE is computationally efficient
	owing to its sequential construction of leading sparse principal
	components. In addition, such construction offers other properties
	such as grouping highly correlated predictors and allowing for collinear
	or nearly collinear predictors. With multivariate responses, POCRE
	can construct common components and thus build up latent-variable
	models for large p small n data.},
  doi = {10.1214/09-EJS354},
  owner = {jp},
  review = {A Bayesian formulation of sparse PLS},
  timestamp = {2010.01.10},
  url = {http://dx.doi.org/10.1214/09-EJS354}
}

@article{Zhang2005MULTIPRED,
  author = {Zhang, G. L. and Khan, A. M. and Srinivasan, K. N. and August, J.
	T. and Brusic, V.},
  title = {{MULTIPRED}: a computational system for prediction of promiscuous
	{HLA} binding peptides.},
  journal = {Nucleic Acids Res/},
  year = {2005},
  volume = {33},
  pages = {W172--W179},
  number = {Web Server issue},
  month = {Jul},
  abstract = {MULTIPRED is a web-based computational system for the prediction of
	peptide binding to multiple molecules (proteins) belonging to human
	leukocyte antigens (HLA) class I A2, A3 and class II DR supertypes.
	It uses hidden Markov models and artificial neural network methods
	as predictive engines. A novel data representation method enables
	MULTIPRED to predict peptides that promiscuously bind multiple HLA
	alleles within one HLA supertype. Extensive testing was performed
	for validation of the prediction models. Testing results show that
	MULTIPRED is both sensitive and specific and it has good predictive
	ability (area under the receiver operating characteristic curve A(ROC)
	> 0.80). MULTIPRED can be used for the mapping of promiscuous T-cell
	epitopes as well as the regions of high concentration of these targets--termed
	T-cell epitope hotspots. MULTIPRED is available at http://antigen.i2r.a-star.edu.sg/multipred/.},
  doi = {10.1093/nar/gki452},
  keywords = {Algorithms, Amino Acid Sequence, Antigen-Antibody Complex, Automated,
	Binding Sites, Computational Biology, Drug Delivery Systems, Drug
	Design, Epitopes, HLA Antigens, HLA-A Antigens, HLA-DR Antigens,
	Humans, Internet, Markov Chains, Molecular Sequence Data, Neural
	Networks (Computer), Pattern Recognition, Peptides, Protein, Protein
	Binding, Protein Interaction Mapping, Sequence Analysis, Software,
	T-Lymphocyte, User-Computer Interface, Viral Vaccines, 15980449},
  pii = {33/suppl_2/W172},
  pmid = {15980449},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1093/nar/gki452}
}

@article{Zhang2008Variable,
  author = {Zhang, H. H. and Liu, Y. and Wu, Y. and Zhu, J.},
  title = {Variable selection for multicategory {SVM} via adaptive sup-norm
	regularization},
  journal = {Electronic Journal of Statistics},
  year = {2008},
  volume = {2},
  pages = {149--167},
  doi = {10.1214/08-EJS122},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2009.01.05},
  url = {http://dx.doi.org/10.1214/08-EJS122}
}

@article{Zhang2009Maximum,
  author = {Zhang, K. and Tsang, I.W. and Kwok, J.T.},
  title = {Maximum Margin Clustering Made Practical},
  journal = {IEEE T. Neural Networ.},
  year = {2009},
  volume = {20},
  pages = {583--596},
  number = {4},
  doi = {10.1109/TNN.2008.2010620},
  issn = {1045-9227},
  keywords = {learning (artificial intelligence), optimisation, pattern clustering,
	Laplacian/square loss, alternating optimization, maximum margin clustering,
	nonconvex optimization problem, nonconvex problem, semidefinite programs,
	supervised learning, Large margin methods, maximum margin clustering
	(MMC), scalability, unsupervised learning},
  owner = {mordelet},
  timestamp = {2009.10.22}
}

@article{Zhang2004Intrusion,
  author = {Lian-hua Zhang and Guan-hua Zhang and Jie Zhang and Ying-cai Bai},
  title = {Intrusion detection using rough set classification.},
  journal = {J {Z}hejiang {U}niv {S}ci},
  year = {2004},
  volume = {5},
  pages = {1076-86},
  number = {9},
  month = {Sep},
  abstract = {Recently machine learning-based intrusion detection approaches have
	been subjected to extensive researches because they can detect both
	misuse and anomaly. {I}n this paper, rough set classification ({RSC}),
	a modern learning algorithm, is used to rank the features extracted
	for detecting intrusions and generate intrusion detection models.
	{F}eature ranking is a very critical step when building the model.
	{RSC} performs feature ranking before generating rules, and converts
	the feature ranking to minimal hitting set problem addressed by using
	genetic algorithm ({GA}). {T}his is done in classical approaches
	using {S}upport {V}ector {M}achine ({SVM}) by executing many iterations,
	each of which removes one useless feature. {C}ompared with those
	methods, our method can avoid many iterations. {I}n addition, a hybrid
	genetic algorithm is proposed to increase the convergence speed and
	decrease the training time of {RSC}. {T}he models generated by {RSC}
	take the form of "{IF}-{THEN}" rules, which have the advantage of
	explication. {T}ests and comparison of {RSC} with {SVM} on {DARPA}
	benchmark data showed that for {P}robe and {D}o{S} attacks both {RSC}
	and {SVM} yielded highly accurate results (greater than 99\% accuracy
	on testing set).},
  doi = {10.1631/jzus.2004.1076},
  pdf = {../local/Zhang2004Intrusion.pdf},
  file = {Zhang2004Intrusion.pdf:local/Zhang2004Intrusion.pdf:PDF},
  url = {http://dx.doi.org/10.1631/jzus.2004.1076}
}

@article{Zhang2004Hidden,
  author = {Li Zhang and Weida Zhou and Licheng Jiao},
  title = {Hidden space support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {1424-34},
  number = {6},
  month = {Nov},
  abstract = {Hidden space support vector machines ({HSSVM}s) are presented in this
	paper. {T}he input patterns are mapped into a high-dimensional hidden
	space by a set of hidden nonlinear functions and then the structural
	risk is introduced into the hidden space to construct {HSSVM}s. {M}oreover,
	the conditions for the nonlinear kernel function in {HSSVM}s are
	more relaxed, and even differentiability is not required. {C}ompared
	with support vector machines ({SVM}s), {HSSVM}s can adopt more kinds
	of kernel functions because the positive definite property of the
	kernel function is not a necessary condition. {T}he performance of
	{HSSVM}s for pattern recognition and regression estimation is also
	analyzed. {E}xperiments on artificial and real-world domains confirm
	the feasibility and the validity of our algorithms.},
  doi = {10.1109/TNN.2004.831161},
  pdf = {../local/Zhang2004Hidden.pdf},
  file = {Zhang2004Hidden.pdf:local/Zhang2004Hidden.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TNN.2004.831161}
}

@article{Zhang2004Wavelet,
  author = {Li Zhang and Weida Zhou and Licheng Jiao},
  title = {Wavelet support vector machine.},
  journal = {I{EEE} {T}rans {S}yst {M}an {C}ybern {B} {C}ybern},
  year = {2004},
  volume = {34},
  pages = {34-9},
  number = {1},
  month = {Feb},
  abstract = {An admissible support vector ({SV}) kernel (the wavelet kernel), by
	which we can construct a wavelet support vector machine ({SVM}),
	is presented. {T}he wavelet kernel is a kind of multidimensional
	wavelet function that can approximate arbitrary nonlinear functions.
	{T}he existence of wavelet kernels is proven by results of theoretic
	analysis. {C}omputer simulations show the feasibility and validity
	of wavelet support vector machines ({WSVM}s) in regression and pattern
	recognition.},
  doi = {10.1109/TSMCB.2003.811113},
  pdf = {../local/Zhang2004Wavelet.pdf},
  file = {Zhang2004Wavelet.pdf:local/Zhang2004Wavelet.pdf:PDF},
  url = {http://dx.doi.org/10.1109/TSMCB.2003.811113}
}

@article{Zhang2005Study,
  author = {Lu-Da Zhang and Shi-Guang Su and Lai-Sheng Wang and Jun-Hui Li and
	Li-Ming Yang},
  title = {{S}tudy on application of {F}ourier transformation near-infrared
	spectroscopy analysis with support vector machine ({SVM})},
  journal = {Guang {P}u {X}ue {Y}u {G}uang {P}u {F}en {X}i},
  year = {2005},
  volume = {25},
  pages = {33-5},
  number = {1},
  month = {Jan},
  abstract = {Support {V}ector {M}achine ({SVM}) is a method for the research on
	identifying two types of problem. {I}t is the latest branch in the
	statistics study theories, and the identification model has a strict
	mathematics foundation. {I}n this paper, the basic principle and
	method of {SVM} are not only introduced, but also applied to chemometrics.
	{O}ne hundred and three rhubarb samples were used as experimental
	materials. {T}he identification models were established with near-infrared
	spectroscopy and {SVM} training method with the intention of identifying
	whether the rhubarb samples are true or false. {T}he thirty-three
	samples in training set were identified by the identifying models
	with the accurate rate of 100\%, while seventy estimate samples had
	an accurate rate of 96.77\%. {T}he research result provided the method
	of identifying the traditional {C}hinese medicine rhubarb quickly.
	{S}o, it shows the feasibility of establishing the models with near-infrared
	spectroscopy and {SVM} method to identify biological samples. {T}his
	paper introduced the theme of {SVM} training method in order to beget
	the attention of the research members who deal with chemometrics.}
}

@article{Zhang2010Detecting,
  author = {Zhang, N. R. and Siegmund, D. O. and Ji, H. and Li, J.},
  title = {Detecting Simultaneous Change-points in Multiple Sequences},
  journal = {Biometrika},
  year = {2010},
  volume = {97},
  pages = {631--645},
  number = {3},
  doi = {10.1093/biomet/asq025},
  pdf = {../local/Zhang2010Detecting.pdf},
  file = {Zhang2010Detecting.pdf:Zhang2010Detecting.pdf:PDF},
  keywords = {segmentation},
  owner = {jp},
  timestamp = {2010.06.01},
  url = {http://dx.doi.org/10.1093/biomet/asq025}
}

@article{Zhang2005Improved,
  author = {Qidong Zhang and Sukjoon Yoon and William J Welsh},
  title = {Improved method for predicting beta-turn using support vector machine.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {2370-4},
  number = {10},
  month = {May},
  abstract = {M{OTIVATION}: {N}umerous methods for predicting beta-turns in proteins
	have been developed based on various computational schemes. {H}ere,
	we introduce a new method of beta-turn prediction that uses the support
	vector machine ({SVM}) algorithm together with predicted secondary
	structure information. {V}arious parameters from the {SVM} have been
	adjusted to achieve optimal prediction performance. {RESULTS}: {T}he
	{SVM} method achieved excellent performance as measured by the {M}atthews
	correlation coefficient ({MCC} = 0.45) using a 7-fold cross validation
	on a database of 426 non-homologous protein chains. {T}o our best
	knowledge, this {MCC} value is the highest achieved so far for predicting
	beta-turn. {T}he overall prediction accuracy {Q}total was 77.3\%,
	which is the best among the existing prediction methods. {A}mong
	its unique attractive features, the present {SVM} method avoids overtraining
	and compresses information and provides a predicted reliability index.},
  doi = {10.1093/bioinformatics/bti358},
  pdf = {../local/Zhang2005Improved.pdf},
  file = {Zhang2005Improved.pdf:local/Zhang2005Improved.pdf:PDF},
  keywords = {biosvm},
  pii = {bti358},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti358}
}

@article{Zhang2003Classification,
  author = {Zhang, S.-W. and Pan, Q. and Zhang, H.-C. and Zhang, Y-L. and Wang,
	H.-Y.},
  title = {Classification of protein quaternary structure with support vector
	machine},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {2390-2396},
  number = {18},
  abstract = {Motivation: {S}ince the gap between sharply increasing known sequences
	and slow accumulation of known structures is becoming large, an automatic
	classification process based on the primary sequences and known three-dimensional
	structure becomes indispensable. {T}he classification of protein
	quaternary structure based on the primary sequences can provide some
	useful information for the biologists. {S}o a fully automatic and
	reliable classification system is needed. {T}his work tries to look
	for the effective methods of extracting attribute and the algorithm
	for classifying the quaternary structure from the primary sequences.
	{R}esults: {B}oth of the support vector machine ({SVM}) and the covariant
	discriminant algorithms have been first introduced to predict quaternary
	structure properties from the protein primary sequences. {T}he amino
	acid composition and the auto-correlation functions based on the
	amino acid index profile of the primary sequence have been taken
	into account in the algorithms. {W}e have analyzed 472 amino acid
	indices and selected the four amino acid indices as the examples,
	which have the best performance. {T}hus the five attribute parameter
	data sets ({COMP}, {FASG}, {NISK}, {WOLS} and {KYTJ}) were established
	from the protein primary sequences. {T}he {COMP} attribute data set
	is composed of amino acid composition, and the {FASG}, {NISK}, {WOLS}
	and {KYTJ} attribute data sets are composed of the amino acid composition
	and the auto-correlation functions of the corresponding amino acid
	residue index. {T}he overall accuracies of {SVM} are 78.5, 87.5,
	83.2, 81.7 and 81.9%, respectively, for {COMP}, {FASG}, {NISK}, {WOLS}
	and {KYTJ} data sets in jackknife test, which are 19.6, 7.8, 15.5,
	13.1 and 15.8%, respectively, higher than that of the covariant discriminant
	algorithm in the same test. {T}he results show that {SVM} may be
	applied to discriminate between the primary sequences of homodimers
	and non-homodimers and the two protein sequence descriptors can reflect
	the quaternary structure information. {C}ompared with previous {R}obert
	{G}arian's investigation, the performance of {SVM} is almost equal
	to that of the {D}ecision tree models, and the methods of extracting
	feature vector from the primary sequences are superior to {R}obert's
	binning function method. {A}vailability: {P}rograms are available
	on request from the authors.},
  pdf = {../local/Zhang2003Classification.pdf},
  file = {Zhang2003Classification.pdf:local/Zhang2003Classification.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/18/2390}
}

@article{Zhang2004Statistical,
  author = {Zhang, T.},
  title = {Statistical behavior and consistency of classification methods based
	on convex risk minimization},
  journal = {Ann. {S}tat.},
  year = {2004},
  volume = {32},
  pages = {56-134},
  abstract = {We study how closely the optimal {B}ayes error rate can be approximately
	reached using a classification algorithm that computes a classifier
	by minimizing a convex upper bound of the classification error function.
	{T}he measurement of closeness is characterized by the loss function
	used in the estimation. {W}e show that such a classification scheme
	can be generally regarded as a (nonmaximum-likelihood) conditional
	in-class probability estimate, and we use this analysis to compare
	various convex loss functions that have appeared in the literature.
	{F}urthermore, the theoretical insight allows us to design good loss
	functions with desirable properties. {A}nother aspect of our analysis
	is to demonstrate the consistency of certain classification methods
	using convex risk minimization. {T}his study sheds light on the good
	performance of some recently proposed linear classification methods
	including boosting and support vector machines. {I}t also shows their
	limitations and suggests possible improvements.},
  doi = {10.1214/aos/1079120130},
  pdf = {../local/Zhang2004Statistical.pdf},
  file = {Zhang2004Statistical.pdf:local/Zhang2004Statistical.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1214/aos/1079120130}
}

@article{Zhang2003Sequence,
  author = {Zhang, X. H-F. and Heller, K. A. and Hefter, I. and Leslie, C. S.
	and Chasin, L. A.},
  title = {Sequence {I}nformation for the {S}plicing of {H}uman {P}re-m{RNA}
	{I}dentified by {S}upport {V}ector {M}achine {C}lassification},
  journal = {Genome {R}es.},
  year = {2003},
  volume = {13},
  pages = {2637-2650},
  number = {12},
  abstract = {Vertebrate pre-m{RNA} transcripts contain many sequences that resemble
	splice sites on the basis of agreement to the consensus, yet these
	more numerous false splice sites are usually completely ignored by
	the cellular splicing machinery. {E}ven at the level of exon definition,
	pseudo exons defined by such false splices sites outnumber real exons
	by an order of magnitude. {W}e used a support vector machine to discover
	sequence information that could be used to distinguish real exons
	from pseudo exons. {T}his machine learning tool led to the definition
	of potential branch points, an extended polypyrimidine tract, and
	{C}-rich and {TG}-rich motifs in a region limited to 50 nt upstream
	of constitutively spliced exons. {C}-rich sequences were also found
	in a region extending to 80 nt downstream of exons, along with {G}-triplet
	motifs. {I}n addition, it was shown that combinations of three bases
	within the splice donor consensus sequence were more effective than
	consensus values in distinguishing real from pseudo splice sites;
	two-way base combinations were optimal for distinguishing 3' splice
	sites. {T}hese data also suggest that interactions between two or
	more of these elements may contribute to exon recognition, and provide
	candidate sequences for assessment as intronic splicing enhancers.},
  doi = {10.1101/gr.1679003},
  pdf = {../local/Zhang2003Sequence.pdf},
  file = {Zhang2003Sequence.pdf:local/Zhang2003Sequence.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://www.genome.org/cgi/content/abstract/13/12/2637}
}

@article{Zhang2008Progress,
  author = {Zhang, Yang},
  title = {Progress and challenges in protein structure prediction},
  journal = {Curr. Opin. Struct. Biol.},
  year = {2008},
  volume = {18},
  pages = {342--348},
  number = {3},
  month = {June},
  abstract = {Depending on whether similar structures are found in the PDB library,
	the protein structure prediction can be categorized into template-based
	modeling and free modeling. Although threading is an efficient tool
	to detect the structural analogs, the advancements in methodology
	development have come to a steady state. Encouraging progress is
	observed in structure refinement which aims at drawing template structures
	closer to the native; this has been mainly driven by the use of multiple
	structure templates and the development of hybrid knowledge-based
	and physics-based force fields. For free modeling, exciting examples
	have been witnessed in folding small proteins to atomic resolutions.
	However, predicting structures for proteins larger than 150 residues
	still remains a challenge, with bottlenecks from both force field
	and conformational search.},
  booktitle = {Nucleic acids / Sequences and topology},
  doi = {10.1016/j.sbi.2008.02.004},
  keywords = {casp8, modeling, zhang},
  url = {http://dx.doi.org/10.1016/j.sbi.2008.02.004}
}

@article{Zhang2009Copy,
  author = {Zhang, Y. and Martens, J. W. M. and Yu, J. X. and Jiang, J. and Sieuwerts,
	A. M. and Smid, M. and Klijn, J. G. M. and Wang, Y. and Foekens,
	J. A.},
  title = {Copy number alterations that predict metastatic capability of human
	breast cancer.},
  journal = {Cancer Res},
  year = {2009},
  volume = {69},
  pages = {3795--3801},
  number = {9},
  month = {May},
  abstract = {We have analyzed the DNA copy numbers for over 100,000 single-nucleotide
	polymorphism loci across the human genome in genomic DNA from 313
	lymph node-negative primary breast tumors for which genome-wide gene
	expression data were also available. Combining these two data sets
	allowed us to identify the genomic loci and their mapped genes, having
	high correlation with distant metastasis. An estimation of the likely
	response based on published predictive signatures was performed in
	the identified prognostic subgroups defined by gene expression and
	DNA copy number data. In the training set of 200 patients, we constructed
	an 81-gene prognostic copy number signature (CNS) that identified
	a subgroup of patients with increased probability of distant metastasis
	in the independent validation set of 113 patients [hazard ratio (HR),
	2.8; 95\% confidence interval (95\% CI), 1.4-5.6] and in an external
	data set of 116 patients (HR, 3.7; 95\% CI, 1.3-10.6). These high-risk
	patients constituted a subset of the high-risk patients predicted
	by our previously established 76-gene gene expression signature (GES).
	This very poor prognostic group identified by CNS and GES was putatively
	more resistant to preoperative paclitaxel and 5-fluorouracil-doxorubicin-cyclophosphamide
	combination chemotherapy (P = 0.0048), particularly against the doxorubicin
	compound, while potentially benefiting from etoposide. Our study
	shows the feasibility of using copy number alterations to predict
	patient prognostic outcome. When combined with gene expression-based
	signatures for prognosis, the CNS refines risk classification and
	can help identify those breast cancer patients who have a significantly
	worse outlook in prognosis and a potential differential response
	to chemotherapeutic drugs.},
  doi = {10.1158/0008-5472.CAN-08-4596},
  pdf = {../local/Zhang2009Copy.pdf},
  file = {Zhang2009Copy.pdf:Zhang2009Copy.pdf:PDF},
  institution = { Johnson Company, San Diego, California, USA.},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {0008-5472.CAN-08-4596},
  pmid = {19336569},
  timestamp = {2011.05.13},
  url = {http://dx.doi.org/10.1158/0008-5472.CAN-08-4596}
}

@article{Zhang2012Spatial,
  author = {Zhang, Y. and McCord, R. A. and Ho, Y.-J. and Lajoie, B. R. and Hildebrand,
	D. G. and Simon, A. C. and Becker, M. S. and Alt, F. W. and Dekker,
	J.},
  title = {Spatial Organization of the Mouse Genome and Its Role in Recurrent
	Chromosomal Translocations},
  journal = {Cell},
  year = {2012},
  volume = {148},
  pages = {908 - 921},
  number = {5},
  abstract = {Summary The extent to which the three-dimensional organization of
	the genome contributes to chromosomal translocations is an important
	question in cancer genomics. We generated a high-resolution Hi-C
	spatial organization map of the G1-arrested mouse pro-B cell genome
	and used high-throughput genome-wide translocation sequencing to
	map translocations from target DNA double-strand breaks (DSBs) within
	it. RAG endonuclease-cleaved antigen-receptor loci are dominant translocation
	partners for target DSBs regardless of genomic position, reflecting
	high-frequency DSBs at these loci and their colocalization in a fraction
	of cells. To directly assess spatial proximity contributions, we
	normalized genomic DSBs via ionizing radiation. Under these conditions,
	translocations were highly enriched in cis along single chromosomes
	containing target DSBs and within other chromosomes and subchromosomal
	domains in a manner directly related to pre-existing spatial proximity.
	By combining two high-throughput genomic methods in a genetically
	tractable system, we provide a new lens for viewing cancer genomes.},
  doi = {10.1016/j.cell.2012.02.002},
  pdf = {../local/Zhang2012Spatial.pdf},
  file = {Zhang2012Spatial.pdf:Zhang2012Spatial.pdf:PDF},
  issn = {0092-8674},
  keywords = {hic, ngs},
  owner = {nelle},
  url = {http://www.sciencedirect.com/science/article/pii/S0092867412001584}
}

@book{Zhang2008Machine,
  title = {Machine learning in bioinformatics},
  publisher = {Wiley},
  year = {2008},
  author = {Zhang, Y. and Rajapakse, J.C.},
  volume = {4}
}

@techreport{Zhang1992Iterative,
  author = {Z. Zhang},
  title = {Iterative Point Matching for Registration of Free-form Curves},
  institution = {Institut National de Recherche en Informatique et en Automatique
	(INRIA)},
  year = {1992},
  publisher = {Institut National de Recherche en Informatique et en Automatique
	(INRIA)}
}

@article{Zhang2006Similarity,
  author = {Zhang, Z. and Grigorov, M.G.},
  title = {Similarity networks of protein binding sites.},
  journal = {Proteins},
  year = {2006},
  volume = {62},
  pages = {470--478},
  number = {2},
  month = {Feb},
  abstract = {An increasing attention has been dedicated to the characterization
	of complex networks within the protein world. This work is reporting
	how we uncovered networked structures that reflected the structural
	similarities among protein binding sites. First, a 211 binding sites
	dataset has been compiled by removing the redundant proteins in the
	Protein Ligand Database (PLD) (http://www-mitchell.ch.cam.ac.uk/pld/).
	Using a clique detection algorithm we have performed all-against-all
	binding site comparisons among the 211 available ones. Within the
	set of nodes representing each binding site an edge was added whenever
	a pair of binding sites had a similarity higher than a threshold
	value. The generated similarity networks revealed that many nodes
	had few links and only few were highly connected, but due to the
	limited data available it was not possible to definitively prove
	a scale-free architecture. Within the same dataset, the binding site
	similarity networks were compared with the networks of sequence and
	fold similarity networks. In the protein world, indications were
	found that structure is better conserved than sequence, but on its
	own, sequence was better conserved than the subset of functional
	residues forming the binding site. Because a binding site is strongly
	linked with protein function, the identification of protein binding
	site similarity networks could accelerate the functional annotation
	of newly identified genes. In view of this we have discussed several
	potential applications of binding site similarity networks, such
	as the construction of novel binding site classification databases,
	as well as the implications for protein molecular design in general
	and computational chemogenomics in particular.},
  keywords = {complex network, binding site, small world, scale free, sequence,
	fold, drug design},
  owner = {vero},
  pmid = {16299776},
  timestamp = {2009.02.04}
}

@article{Zhang2005Descriptor-based,
  author = {Zhang, Z. and Kochhar, S. and Grigorov, M. G.},
  title = {Descriptor-based protein remote homology identification.},
  journal = {Protein {S}ci.},
  year = {2005},
  volume = {42},
  pages = {431-444},
  number = {2},
  abstract = {Here, we report a novel protein sequence descriptor-based remote homology
	identification method, able to infer fold relationships without the
	explicit knowledge of structure. {I}n a first phase, we have individually
	benchmarked 13 different descriptor types in fold identification
	experiments in a highly diverse set of protein sequences. {T}he relevant
	descriptors were related to the fold class membership by using simple
	similarity measures in the descriptor spaces, such as the cosine
	angle. {O}ur results revealed that the three best-performing sets
	of descriptors were the sequence-alignment-based descriptor using
	{PSI}-{BLAST} e-values, the descriptors based on the alignment of
	secondary structural elements ({SSEA}), and the descriptors based
	on the occurrence of {PROSITE} functional motifs. {I}n a second phase,
	the three top-performing descriptors were combined to obtain a final
	method with improved performance, which we named {D}esc{F}old. {C}lass
	membership was predicted by {S}upport {V}ector {M}achine ({SVM})
	learning. {I}n comparison with the individual {PSI}-{BLAST}-based
	descriptor, the rate of remote homology identification increased
	from 33.7% to 46.3%. {W}e found out that the composite set of descriptors
	was able to identify the true remote homolog for nearly every sixth
	sequence at the 95% confidence level, or some 10% more than a single
	{PSI}-{BLAST} search. {W}e have benchmarked the {D}esc{F}old method
	against several other state-of-the-art fold recognition algorithms
	for the 172 {L}ive{B}ench-8 targets, and we concluded that it was
	able to add value to the existing techniques by providing a confident
	hit for at least 10% of the sequences not identifiable by the previously
	known methods.},
  doi = {10.1110/ps.041035505},
  pdf = {../local/Zhang2005Descriptor-based.pdf},
  file = {Zhang2005Descriptor-based.pdf:local/Zhang2005Descriptor-based.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1110/ps.041035505}
}

@article{Zhao2004Diagnosing,
  author = {C. Y. Zhao and R. S. Zhang and H. X. Liu and C. X. Xue and S. G.
	Zhao and X. F. Zhou and M. C. Liu and B. T. Fan},
  title = {Diagnosing anorexia based on partial least squares, back propagation
	neural network, and support vector machines.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2004},
  volume = {44},
  pages = {2040-6},
  number = {6},
  abstract = {Support vector machine ({SVM}), as a novel type of learning machine,
	for the first time, was used to develop a predictive model for early
	diagnosis of anorexia. {I}t was based on the concentration of six
	elements ({Z}n, {F}e, {M}g, {C}u, {C}a, and {M}n) and the age extracted
	from 90 cases. {C}ompared with the results obtained from two other
	classifiers, partial least squares ({PLS}) and back-propagation neural
	network ({BPNN}), the {SVM} method exhibited the best whole performance.
	{T}he accuracies for the test set by {PLS}, {BPNN}, and {SVM} methods
	were 52\%, 65\%, and 87\%, respectively. {M}oreover, the models we
	proposed could also provide some insight into what factors were related
	to anorexia.},
  doi = {10.1021/ci049877y},
  pdf = {../local/Zhao2004Diagnosing.pdf},
  file = {Zhao2004Diagnosing.pdf:local/Zhao2004Diagnosing.pdf:PDF},
  url = {http://dx.doi.org/10.1021/ci049877y}
}

@article{Zhao2008Grouped,
  author = {Zhao, P. and Rocha, G. and Yu, B.},
  title = {Grouped and hierarchical model selection through composite absolute
	penalties},
  journal = {Ann. Stat.},
  year = {2009},
  volume = {37},
  pages = {3468--3497},
  number = {6A},
  owner = {jp}
}

@article{Zhao2009composite,
  author = {Zhao, P. and Rocha, G. and Yu, B.},
  title = {The composite absolute penalties family for grouped and hierarchical
	variable selection},
  journal = {The Annals of Statistics},
  year = {2009},
  volume = {37},
  pages = {3468--3497},
  number = {6A},
  publisher = {Institute of Mathematical Statistics}
}

@article{Zhao2006model,
  author = {Zhao, P. and Yu, B.},
  title = {On model selection consistency of lasso},
  journal = {J. Mach. Learn. Res.},
  year = {2006},
  volume = {7},
  pages = {2541},
  abstract = {Sparsity or parsimony of statistical models is crucial for their proper
	interpretations, as in sciences and social sciences. Model selection
	is a commonly used method to find such models, but usually involves
	a computationally heavy combinatorial search. Lasso (Tibshirani,
	1996) is now being used as a computationally feasible alternative
	to model selection. Therefore it is important to study Lasso for
	model selection purposes.
	
	
	In this paper, we prove that a single condition, which we call the
	Irrepresentable Condition, is almost necessary and sufficient for
	Lasso to select the true model both in the classical fixed p setting
	and in the large p setting as the sample size n gets large. Based
	on these results, sufficient conditions that are verifiable in practice
	are given to relate to previous works and help applications of Lasso
	for feature selection and sparse representation.
	
	
	This Irrepresentable Condition, which depends mainly on the covariance
	of the predictor variables, states that Lasso selects the true model
	consistently if and (almost) only if the predictors that are not
	in the true model are "irrepresentable" (in a sense to be clarified)
	by predictors that are in the true model. Furthermore, simulations
	are carried out to provide insights and understanding of this result.},
  pdf = {../local/Zhao2006model.pdf},
  file = {Zhao2006model.pdf:local/Zhao2006model.pdf:PDF},
  keywords = {lasso},
  owner = {jp},
  timestamp = {2008.12.21},
  url = {http://jmlr.csail.mit.edu/papers/v7/zhao06a.html}
}

@article{Zhao2003Application,
  author = {Zhao, Y. and Pinilla, C. and Valmori, D. and Martin, R. and Simon,
	R.},
  title = {Application of support vector machines for {T}-cell epitopes prediction},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1978-1984},
  number = {15},
  abstract = {Motivation: {T}he {T}-cell receptor, a major histocompatibility complex
	({MHC}) molecule, and a bound antigenic peptide, play major roles
	in the process of antigen-specific {T}-cell activation. {T}-cell
	recognition was long considered exquisitely specific. {R}ecent data
	also indicate that it is highly flexible, and one receptor may recognize
	thousands of different peptides. {D}eciphering the patterns of peptides
	that elicit a {MHC} restricted {T}-cell response is critical for
	vaccine development. {R}esults: {F}or the first time we develop a
	support vector machine ({SVM}) for {T}-cell epitope prediction with
	an {MHC} type {I} restricted {T}-cell clone. {U}sing cross-validation,
	we demonstrate that {SVM}s can be trained on relatively small data
	sets to provide prediction more accurate than those based on previously
	published methods or on {MHC} binding. {S}upplementary information:
	{D}ata for 203 synthesized peptides is available at http://linus.nci.nih.gov/{D}ata/{LAU}203_{P}eptide.pdf},
  pdf = {../local/Zhao2003Application.pdf},
  file = {Zhao2003Application.pdf:local/Zhao2003Application.pdf:PDF},
  keywords = {biosvm immunoinformatics},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/19/15/1978}
}

@article{Zhao2003Applicationa,
  author = {Zhao, Y. and Pinilla, C. and Valmori, D. and Martin, R. and Simon,
	R.},
  title = {{A}pplication of support vector machines for {T}-cell epitopes prediction.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1978--1984},
  number = {15},
  month = {Oct},
  abstract = {MOTIVATION: The T-cell receptor, a major histocompatibility complex
	(MHC) molecule, and a bound antigenic peptide, play major roles in
	the process of antigen-specific T-cell activation. T-cell recognition
	was long considered exquisitely specific. Recent data also indicate
	that it is highly flexible, and one receptor may recognize thousands
	of different peptides. Deciphering the patterns of peptides that
	elicit a MHC restricted T-cell response is critical for vaccine development.
	RESULTS: For the first time we develop a support vector machine (SVM)
	for T-cell epitope prediction with an MHC type I restricted T-cell
	clone. Using cross-validation, we demonstrate that SVMs can be trained
	on relatively small data sets to provide prediction more accurate
	than those based on previously published methods or on MHC binding.
	SUPPLEMENTARY INFORMATION: Data for 203 synthesized peptides is available
	at http://linus.nci.nih.gov/Data/LAU203_Peptide.pdf},
  keywords = {Algorithms, Amino Acid Sequence, Antigen, Antigen Presentation, Antigen-Antibody
	Complex, Artificial Intelligence, Autoimmune Diseases, Autoimmunity,
	Bacterial Proteins, CD4-Positive T-Lymphocytes, Cell Proliferation,
	Cells, Clone Cells, Cluster Analysis, Conserved Sequence, Cross Reactions,
	Cultured, Cytokines, Databases, Epitope Mapping, Epitopes, Gene Products,
	Genetic, HIV-1, HLA-DQ Antigens, HLA-DR2 Antigen, Haplotypes, Helper-Inducer,
	Hemagglutination, Histocompatibility Antigens Class I, Humans, K562
	Cells, Molecular Mimicry, Molecular Sequence Data, Multiple Sclerosis,
	Myelin Proteins, Neural Networks (Computer), Orthomyxoviridae, Peptide
	Library, Peptides, Protein, Protein Binding, Protein Interaction
	Mapping, ROC Curve, Receptors, Relapsing-Remitting, Reproducibility
	of Results, Reverse Transcriptase Polymerase Chain Reaction, Sensitivity
	and Specificity, Sequence Analysis, Structure-Activity Relationship,
	T-Cell, T-Lymphocyte, T-Lymphocytes, Torque teno virus, Viral, Viral
	Proteins, gag, 14555632},
  pmid = {14555632},
  timestamp = {2007.01.25}
}

@article{Zheng2005Foley-Sammon,
  author = {Wenming Zheng and Li Zhao and Cairong Zou},
  title = {Foley-{S}ammon optimal discriminant vectors using kernel approach.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {1-9},
  number = {1},
  month = {Jan},
  abstract = {A new nonlinear feature extraction method called kernel {F}oley-{S}ammon
	optimal discriminant vectors ({KFSODV}s) is presented in this paper.
	{T}his new method extends the well-known {F}oley-{S}ammon optimal
	discriminant vectors ({FSODV}s) from linear domain to a nonlinear
	domain via the kernel trick that has been used in support vector
	machine ({SVM}) and other commonly used kernel-based learning algorithms.
	{T}he proposed method also provides an effective technique to solve
	the so-called small sample size ({SSS}) problem which exists in many
	classification problems such as face recognition. {W}e give the derivation
	of {KFSODV} and conduct experiments on both simulated and real data
	sets to confirm that the {KFSODV} method is superior to the previous
	commonly used kernel-based learning algorithms in terms of the performance
	of discrimination.}
}

@article{Zhou2002covering,
  author = {Zhou, D.},
  title = {The covering number in learning theory},
  journal = {J. {C}omplexity},
  year = {2002},
  volume = {18},
  pages = {739-767},
  abstract = {The covering number of a ball of a reproducing kernel {H}ilbert space
	as a subset of the continuous function space plays an important role
	in {L}earning {T}heory. {W}e give estimates for this covering number
	by means of the regularity of the {M}ercer kernel {K}. {F}or convolution
	type kernels {K}(x, t) = k(x - t) on [0, 1]n, we provide estimates
	depending on the decay of k, the {F}ourier transform of k. {I}n particular,
	when k decays exponentially, our estimate for this covering number
	is better than all the previous results and covers many important
	{M}ercer kernels. {A} counter example is presented to show that the
	eigenfunctions of the {H}ilbert-{S}chmidt operator {LK} associated
	with a {M}ercer kernel {K} may not be uniformly bounded. {H}ence
	some previous methods used for estimating the covering number in
	{L}earning {T}heory are not valid. {W}e also provide an example of
	a {M}ercer kernel to show that {LK}Â½ may not be generated by a {M}ercer
	kernel.},
  doi = {10.1006/jcom.2002.0635},
  pdf = {../local/Zhou2002covering.pdf},
  file = {Zhou2002covering.pdf:local/Zhou2002covering.pdf:PDF},
  owner = {jeanphilippevert},
  url = {http://dx.doi.org/10.1006/jcom.2002.0635}
}

@article{Zhou2005Recognition,
  author = {GuoDong Zhou and Dan Shen and Jie Zhang and Jian Su and SoonHeng
	Tan},
  title = {Recognition of protein/gene names from text using an ensemble of
	classifiers.},
  journal = {B{MC} {B}ioinformatics},
  year = {2005},
  volume = {6 Suppl 1},
  pages = {S7},
  abstract = {This paper proposes an ensemble of classifiers for biomedical name
	recognition in which three classifiers, one {S}upport {V}ector {M}achine
	and two discriminative {H}idden {M}arkov {M}odels, are combined effectively
	using a simple majority voting strategy. {I}n addition, we incorporate
	three post-processing modules, including an abbreviation resolution
	module, a protein/gene name refinement module and a simple dictionary
	matching module, into the system to further improve the performance.
	{E}valuation shows that our system achieves the best performance
	from among 10 systems with a balanced {F}-measure of 82.58 on the
	closed evaluation of the {B}io{C}reative protein/gene name recognition
	task ({T}ask 1{A}).},
  doi = {10.1186/1471-2105-6-S1-S7},
  pdf = {../local/Zhou2005Recognition.pdf},
  file = {Zhou2005Recognition.pdf:local/Zhou2005Recognition.pdf:PDF},
  keywords = {biosvm nlp},
  pii = {1471-2105-6-S1-S7},
  url = {http://dx.doi.org/10.1186/1471-2105-6-S1-S7}
}

@article{Zhou2004Recognizing,
  author = {GuoDong Zhou and Jie Zhang and Jian Su and Dan Shen and ChewLim Tan},
  title = {Recognizing names in biomedical texts: a machine learning approach.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {1178-90},
  number = {7},
  month = {May},
  abstract = {M{OTIVATION}: {W}ith an overwhelming amount of textual information
	in molecular biology and biomedicine, there is a need for effective
	and efficient literature mining and knowledge discovery that can
	help biologists to gather and make use of the knowledge encoded in
	text documents. {I}n order to make organized and structured information
	available, automatically recognizing biomedical entity names becomes
	critical and is important for information retrieval, information
	extraction and automated knowledge acquisition. {RESULTS}: {I}n this
	paper, we present a named entity recognition system in the biomedical
	domain, called {P}ower{B}io{NE}. {I}n order to deal with the special
	phenomena of naming conventions in the biomedical domain, we propose
	various evidential features: (1) word formation pattern; (2) morphological
	pattern, such as prefix and suffix; (3) part-of-speech; (4) head
	noun trigger; (5) special verb trigger and (6) name alias feature.
	{A}ll the features are integrated effectively and efficiently through
	a hidden {M}arkov model ({HMM}) and a {HMM}-based named entity recognizer.
	{I}n addition, a k-{N}earest {N}eighbor (k-{NN}) algorithm is proposed
	to resolve the data sparseness problem in our system. {F}inally,
	we present a pattern-based post-processing to automatically extract
	rules from the training data to deal with the cascaded entity name
	phenomenon. {F}rom our best knowledge, {P}ower{B}io{NE} is the first
	system which deals with the cascaded entity name phenomenon. {E}valuation
	shows that our system achieves the {F}-measure of 66.6 and 62.2 on
	the 23 classes of {GENIA} {V}3.0 and {V}1.1, respectively. {I}n particular,
	our system achieves the {F}-measure of 75.8 on the "protein" class
	of {GENIA} {V}3.0. {F}or comparison, our system outperforms the best
	published result by 7.8 on {GENIA} {V}1.1, without help of any dictionaries.
	{I}t also shows that our {HMM} and the k-{NN} algorithm outperform
	other models, such as back-off {HMM}, linear interpolated {HMM},
	support vector machines, {C}4.5, {C}4.5 rules and {RIPPER}, by effectively
	capturing the local context dependency and resolving the data sparseness
	problem. {M}oreover, evaluation on {GENIA} {V}3.0 shows that the
	post-processing for the cascaded entity name phenomenon improves
	the {F}-measure by 3.9. {F}inally, error analysis shows that about
	half of the errors are caused by the strict annotation scheme and
	the annotation inconsistency in the {GENIA} corpus. {T}his suggests
	that our system achieves an acceptable {F}-measure of 83.6 on the
	23 classes of {GENIA} {V}3.0 and in particular 86.2 on the "protein"
	class, without help of any dictionaries. {W}e think that a {F}-measure
	of 90 on the 23 classes of {GENIA} {V}3.0 and in particular 92 on
	the "protein" class, can be achieved through refining of the annotation
	scheme in the {GENIA} corpus, such as flexible annotation scheme
	and annotation consistency, and inclusion of a reasonable biomedical
	dictionary. {AVAILABILITY}: {A} demo system is available at http://textmining.i2r.a-star.edu.sg/{NLS}/demo.htm.
	{T}echnology license is available upon the bilateral agreement.},
  doi = {10.1093/bioinformatics/bth060},
  pdf = {../local/Zhou2004Recognizing.pdf},
  file = {Zhou2004Recognizing.pdf:Zhou2004Recognizing.pdf:PDF},
  pii = {bth060},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth060}
}

@article{Zhou2005LS,
  author = {Xin Zhou and K. Z. Mao},
  title = {L{S} {B}ound based gene selection for {DNA} microarray data.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {1559-64},
  number = {8},
  month = {Apr},
  abstract = {M{OTIVATION}: {O}ne problem with discriminant analysis of {DNA} microarray
	data is that each sample is represented by quite a large number of
	genes, and many of them are irrelevant, insignificant or redundant
	to the discriminant problem at hand. {M}ethods for selecting important
	genes are, therefore, of much significance in microarray data analysis.
	{I}n the present study, a new criterion, called {LS} {B}ound measure,
	is proposed to address the gene selection problem. {T}he {LS} {B}ound
	measure is derived from leave-one-out procedure of {LS}-{SVM}s (least
	squares support vector machines), and as the upper bound for leave-one-out
	classification results it reflects to some extent the generalization
	performance of gene subsets. {RESULTS}: {W}e applied this {LS} {B}ound
	measure for gene selection on two benchmark microarray datasets:
	colon cancer and leukemia. {W}e also compared the {LS} {B}ound measure
	with other evaluation criteria, including the well-known {F}isher's
	ratio and {M}ahalanobis class separability measure, and other published
	gene selection algorithms, including {W}eighting factor and {SVM}
	{R}ecursive {F}eature {E}limination. {T}he strength of the {LS} {B}ound
	measure is that it provides gene subsets leading to more accurate
	classification results than the filter method while its computational
	complexity is at the level of the filter method. {AVAILABILITY}:
	{A} companion website can be accessed at http://www.ntu.edu.sg/home5/pg02776030/lsbound/.
	{T}he website contains: (1) the source code of the gene selection
	algorithm; (2) the complete set of tables and figures regarding the
	experimental study; (3) proof of the inequality (9). {CONTACT}: ekzmao@ntu.edu.sg.},
  doi = {10.1093/bioinformatics/bti216},
  pdf = {../local/Zhou2005LS.pdf},
  file = {Zhou2005LS.pdf:local/Zhou2005LS.pdf:PDF},
  keywords = {biosvm featureselection microarray},
  pii = {bti216},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti216}
}

@article{Zhu2000Two,
  author = {Zhu, G. and Spellman, P. T. and Volpe, T. and Brown, P. O. and Botstein,
	D. and Davis, T. N. and Futcher, B.},
  title = {Two yeast forkhead genes regulate the cell cycle and pseudohyphal
	growth},
  journal = {Nature},
  year = {2000},
  volume = {406},
  pages = {90--94},
  pdf = {../local/zhu00.pdf},
  file = {zhu00.pdf:local/zhu00.pdf:PDF},
  subject = {microarray},
  url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v406/n6791/full/406090a0_fs.html&content_filetype=pdf}
}

@article{Zhu2001Global,
  author = {H. Zhu and M. Bilgin and R. Bangham and D. Hall and A. Casamayor
	and P. Bertone and N. Lan and R. Jansen and S. Bidlingmaier and T.
	Houfek and T. Mitchell and P. Miller and R. A. Dean and M. Gerstein
	and M. Snyder},
  title = {Global analysis of protein activities using proteome chips.},
  journal = {Science},
  year = {2001},
  volume = {293},
  pages = {2101-5},
  number = {5537},
  month = {Sep},
  abstract = {To facilitate studies of the yeast proteome, we cloned 5800 open reading
	frames and overexpressed and purified their corresponding proteins.
	{T}he proteins were printed onto slides at high spatial density to
	form a yeast proteome microarray and screened for their ability to
	interact with proteins and phospholipids. {W}e identified many new
	calmodulin- and phospholipid-interacting proteins; a common potential
	binding motif was identified for many of the calmodulin-binding proteins.
	{T}hus, microarrays of an entire eukaryotic proteome can be prepared
	and screened for diverse biochemical activities. {T}he microarrays
	can also be used to screen protein-drug interactions and to detect
	posttranslational modifications.},
  doi = {10.1126/science.1062191},
  pdf = {../local/zhu01.pdf},
  file = {zhu01.pdf:local/zhu01.pdf:PDF},
  keywords = {Amino Acid Motifs, Amino Acid Sequence, Calmodulin, Calmodulin-Binding
	Proteins, Cell Membrane, Cloning, Fungal Proteins, Glucose, Liposomes,
	Membrane Proteins, Molecular, Molecular Sequence Data, Non-U.S. Gov't,
	Open Reading Frames, P.H.S., Peptide Library, Phosphatidylcholines,
	Phosphatidylinositols, Phospholipids, Protein Binding, Proteome,
	Recombinant Fusion Proteins, Research Support, Saccharomyces cerevisiae,
	Signal Transduction, Streptavidin, U.S. Gov't, 11474067},
  pii = {1062191},
  url = {http://dx.doi.org/10.1126/science.1062191}
}

@article{Zhu2005Kernel,
  author = {Zhu, J. and Hastie, T.},
  title = {Kernel {L}ogistic {R}egression and the {I}mport {V}ector {M}achine},
  journal = {Journal of {C}omputational \& {G}raphical {S}tatistics},
  year = {2005},
  volume = {14},
  pages = {185-205},
  number = {1},
  month = {Mar},
  abstract = {The support vector machine ({SVM}) is known for its good performance
	in two-class classification, but its extension to multiclass classification
	is still an ongoing research issue. {I}n this article, we propose
	a new approach for classification, called the import vector machine
	({IVM}), which is built on kernel logistic regression ({KLR}). {W}e
	show that the {IVM} not only performs as well as the {SVM} in two-class
	classification, but also can naturally be generalized to the multiclass
	case. {F}urthermore, the {IVM} provides an estimate of the underlying
	probability. {S}imilar to the support points of the {SVM}, the {IVM}
	model uses only a fraction of the training data to index kernel basis
	functions, typically a much smaller fraction than the {SVM}. {T}his
	gives the {IVM} a potential computational advantage over the {SVM}.},
  doi = {10.1198/106186005X25619},
  pdf = {../local/Zhu2005Kernel.pdf},
  file = {Zhu2005Kernel.pdf:local/Zhu2005Kernel.pdf:PDF},
  url = {http://dx.doi.org/10.1198/106186005X25619}
}

@article{Zhu2004Classification,
  author = {Zhu, J. and Hastie, T.},
  title = {Classification of gene microarrays by penalized logistic regression},
  journal = {Biostatistics},
  year = {2004},
  volume = {5},
  pages = {427-43},
  number = {3},
  month = {Jul},
  abstract = {Classification of patient samples is an important aspect of cancer
	diagnosis and treatment. {T}he support vector machine ({SVM}) has
	been successfully applied to microarray cancer diagnosis problems.
	{H}owever, one weakness of the {SVM} is that given a tumor sample,
	it only predicts a cancer class label but does not provide any estimate
	of the underlying probability. {W}e propose penalized logistic regression
	({PLR}) as an alternative to the {SVM} for the microarray cancer
	diagnosis problem. {W}e show that when using the same set of genes,
	{PLR} and the {SVM} perform similarly in cancer classification, but
	{PLR} has the advantage of additionally providing an estimate of
	the underlying probability. {O}ften a primary goal in microarray
	cancer diagnosis is to identify the genes responsible for the classification,
	rather than class prediction. {W}e consider two gene selection methods
	in this paper, univariate ranking ({UR}) and recursive feature elimination
	({RFE}). {E}mpirical results indicate that {PLR} combined with {RFE}
	tends to select fewer genes than other methods and also performs
	well in both cross-validation and test samples. {A} fast algorithm
	for solving {PLR} is also described.},
  doi = {10.1093/biostatistics/5.3.427},
  pdf = {../local/Zhu2004Classification.pdf},
  file = {Zhu2004Classification.pdf:Zhu2004Classification.pdf:PDF},
  pii = {5/3/427},
  url = {http://dx.doi.org/10.1093/biostatistics/5.3.427}
}

@inproceedings{Zhu20041norm,
  author = {Zhu, J. and Rosset, S. and Hastie, T. and Tibshirani, R.},
  title = {1-norm Support Vector Machines},
  booktitle = {Adv. Neural. Inform. Process Syst.},
  year = {2004},
  editor = {Thrun, S. and Saul, L. and {Sch\"{o}lkopf}, B.},
  volume = {16},
  address = {Cambridge, MA},
  publisher = {MIT Press},
  timestamp = {2008.01.17}
}

@article{Zhu2003Introduction,
  author = {Lingyun Zhu and Baoming Wu and Changxiu Cao},
  title = {Introduction to medical data mining},
  journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi},
  year = {2003},
  volume = {20},
  pages = {559-62},
  number = {3},
  month = {Sep},
  abstract = {Modern medicine generates a great deal of information stored in the
	medical database. {E}xtracting useful knowledge and providing scientific
	decision-making for the diagnosis and treatment of disease from the
	database increasingly becomes necessary. {D}ata mining in medicine
	can deal with this problem. {I}t can also improve the management
	level of hospital information and promote the development of telemedicine
	and community medicine. {B}ecause the medical information is characteristic
	of redundancy, multi-attribution, incompletion and closely related
	with time, medical data mining differs from other one. {I}n this
	paper we have discussed the key techniques of medical data mining
	involving pretreatment of medical data, fusion of different pattern
	and resource, fast and robust mining algorithms and reliability of
	mining results. {T}he methods and applications of medical data mining
	based on computation intelligence such as artificial neural network,
	fuzzy system, evolutionary algorithms, rough set, and support vector
	machine have been introduced. {T}he features and problems in data
	mining are summarized in the last section.},
  keywords = {Algorithms, Anion Exchange Resins, Automatic Data Processing, Chemical,
	Chromatography, Computational Biology, Computer-Assisted, Data Interpretation,
	Databases, Decision Making, Decision Trees, English Abstract, Factual,
	Fuzzy Logic, Humans, Indicators and Reagents, Information Storage
	and Retrieval, Ion Exchange, Models, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, P.H.S., Proteins,
	Quantitative Structure-Activity Relationship, RNA, ROC Curve, Research
	Support, Sequence Analysis, Statistical, Transfer, U.S. Gov't, 14565039}
}

@article{Zhu2006Improving,
  author = {Shanfeng Zhu and Keiko Udaka and John Sidney and Alessandro Sette
	and Kiyoko F. Aoki-Kinoshita and Hiroshi Mamitsuka},
  title = {Improving {MHC} binding peptide prediction by incorporating binding
	data of auxiliary {MHC} molecules.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {1648-1655},
  number = {13},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://dx.doi.org/10.1093/bioinformatics/btl141}
}

@article{Zhu1998FRAME:,
  author = {Zhu, S. C. and Wu, Y. and Mumford, D.},
  title = {F{RAME}: {F}ilters, {R}andom field {A}nd {M}aximum {E}ntropy: ---
	{T}owards a {U}nified {T}heory for {T}exture {M}odeling },
  journal = {Int'l {J}ournal of {C}omputer {V}ision},
  year = {1998},
  volume = {27},
  pages = {1--20},
  number = {2},
  pdf = {../local/zhu98.pdf},
  file = {zhu98.pdf:local/zhu98.pdf:PDF},
  subject = {stat},
  url = {http://www.cis.ohio-state.edu/~szhu/frame_ijcv.ps.gz}
}

@article{Zhu1997Minimax,
  author = {Zhu, S. C. and Wu, Z. N. and Mumford, D.},
  title = {Minimax {E}ntropy {P}rinciple and {I}ts {A}pplication to {T}exture
	{M}odeling},
  journal = {Neural {C}omput.},
  year = {1997},
  volume = {9},
  pages = {1627-1660},
  number = {8},
  pdf = {../local/zhu97.pdf},
  file = {zhu97.pdf:local/zhu97.pdf:PDF},
  subject = {stat},
  url = {http://www.cis.ohio-state.edu/~szhu/frame_neuro.ps.gz}
}

@incollection{Zien2007Multiclass,
  author = {A. Zien and C. Ong},
  title = {Multiclass multiple kernel learning},
  booktitle = {Proceedings of the 24th Annual International Conference on Machine
	Learning (ICML 2007)},
  publisher = {Omnipress},
  year = {2007},
  editor = {Zoubin Ghahramani},
  pages = {1191--1198},
  location = {Corvallis, OR}
}

@article{Zien2000Engineering,
  author = {Zien, A. and R{\"a}tsch, G. and Mika, S. and Sch{\"o}lkopf, B. and
	Lengauer, T. and M{\"u}ller, K.-R.},
  title = {Engineering support vector machine kernels that recognize translation
	initiation sites},
  journal = {Bioinformatics},
  year = {2000},
  volume = {16},
  pages = {799-807},
  number = {9},
  abstract = {Motivation: {I}n order to extract protein sequences from nucleotide
	sequences, it is an important step to recognize points at which regions
	start that code for proteins. {T}hese points are called translation
	initiation sites ({TIS}). {R}esults: {T}he task of finding {TIS}
	can be modeled as a classification problem. {W}e demonstrate the
	applicability of support vector machines for this task, and show
	how to incorporate prior biological knowledge by engineering an appropriate
	kernel function. {W}ith the described techniques the recognition
	performance can be improved by 26% over leading existing approaches.
	{W}e provide evidence that existing related methods (e.g. {ESTS}can)
	could profit from advanced {TIS} recognition.},
  pdf = {../local/Zien2000Engineering.pdf},
  file = {Zien2000Engineering.pdf:local/Zien2000Engineering.pdf:PDF},
  keywords = {biosvm},
  owner = {jeanphilippevert},
  url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/16/9/799}
}

@article{Zinovyev2008Bioinformatics,
  author = {Zinovyev, A. and Viara, E. and Calzone, L. and Barillot, E.},
  title = {{BiNoM}: a {C}ytoscape plugin for manipulating and analyzing biological
	networks},
  journal = {Bioinformatics},
  year = {2008},
  volume = {24},
  pages = {876-877},
  number = {6},
  abstract = {BiNoM (Biological Network Manager) is a new bioinformatics software
	that significantly facilitates the usage and the analysis of biological
	networks in standard systems biology formats (SBML, SBGN, BioPAX).
	BiNoM implements a full-featured BioPAX editor and a method of interfaces'
	for accessing BioPAX content. BiNoM is able to work with huge BioPAX
	files such as whole pathway databases. In addition, BiNoM allows
	the analysis of networks created with CellDesigner software and their
	conversion into BioPAX format. BiNoM comes as a library and as a
	Cytoscape plugin which adds a rich set of operations to Cytoscape
	such as path and cycle analysis, clustering sub-networks, decomposition
	of network into modules, clipboard operations and others. Availability:
	Last version of BiNoM distributed under the LGPL licence together
	with documentation, source code and API are available at http://bioinfo.curie.fr/projects/binom
	Contact: andrei.zinovyev@curie.fr},
  doi = {10.1093/bioinformatics/btm553},
  eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/24/6/876.pdf},
  keywords = {csbcbook},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/24/6/876}
}

@article{Ziv1978Compression,
  author = {Ziv, J. and Lempel, A.},
  title = {Compression of individual sequences via variable-rate coding},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {1978},
  volume = {24},
  pages = {530-536},
  number = {5},
  month = {Sep},
  pdf = {../local/Ziv1978Compression.pdf},
  file = {Ziv1978Compression.pdf:local/Ziv1978Compression.pdf:PDF},
  owner = {vert}
}

@article{Zomer2004Toxicological,
  author = {Simeone Zomer and Christelle Guillo and Richard G Brereton and Melissa
	Hanna-Brown},
  title = {Toxicological classification of urine samples using pattern recognition
	techniques and capillary electrophoresis.},
  journal = {Anal {B}ioanal {C}hem},
  year = {2004},
  volume = {378},
  pages = {2008-20},
  number = {8},
  month = {Apr},
  abstract = {In toxicology, hazardous substances detected in organisms may often
	lead to different pathological conditions depending on the type of
	exposure and level of dosage; hence, further analysis on this can
	suggest the best cure. {U}rine profiling may serve the purpose because
	samples typically contain hundreds of compounds representing an effective
	metabolic fingerprint. {T}his paper proposes a pattern recognition
	procedure for determining the type of cadmium dosage, acute or chronic,
	administrated to laboratory rats, where urinary profiles are detected
	using capillary electrophoresis. {T}he procedure is based on the
	composition of a sample data matrix consisting of areas of common
	peaks, with appropriate pre-processing aimed at reducing the lack
	of reproducibility and enhancing the potential contribution of low-level
	metabolites in discrimination. {T}he matrix is then used for pattern
	recognition including principal components analysis, cluster analysis,
	discriminant analysis and support vector machines. {A}ttention is
	particularly focussed on the last of these techniques, because of
	its novelty and some attractive features such as its suitability
	to work with datasets that are small and/or have low samples/variable
	ratios. {T}he type of cadmium administration is detected as a relevant
	feature that contributes to the structure of the sample matrix, and
	samples are classified according to the class membership, with discriminant
	analysis and support vector machines performing complementarily on
	a training and on a test set.},
  doi = {10.1007/s00216-004-2518-0},
  pdf = {../local/Zomer2004Toxicological.pdf},
  file = {Zomer2004Toxicological.pdf:local/Zomer2004Toxicological.pdf:PDF},
  keywords = {Algorithms, Ambergris, Animals, Automated, Cadmium, Candida, Candida
	albicans, Capillary, Cluster Analysis, Combinatorial Chemistry Techniques,
	Electrophoresis, Eye Enucleation, Humans, Magnetic Resonance Spectroscopy,
	Melanoma, Models, Molecular, Molecular Conformation, Non-U.S. Gov't,
	Odors, P.H.S., Pattern Recognition, Perfume, Predictive Value of
	Tests, Prognosis, Prospective Studies, Quantitative Structure-Activity
	Relationship, Rats, Research Support, U.S. Gov't, Uveal Neoplasms,
	15007590},
  url = {http://dx.doi.org/10.1007/s00216-004-2518-0}
}

@article{Zoppoli2010TimeDelay,
  author = {Zoppoli, P. and Morganella, S. and Ceccarelli, M.},
  title = {TimeDelay-ARACNE: Reverse engineering of gene networks from time-course
	data by an information theoretic approach},
  journal = {Bmc Bioinformatics},
  year = {2010},
  volume = {11},
  pages = {154},
  number = {1},
  publisher = {BioMed Central Ltd}
}

@article{Zou2006adaptive,
  author = {Zou, Hui},
  title = {The Adaptive Lasso and Its Oracle Properties},
  journal = {J. Am. Stat. Assoc.},
  year = {2006},
  volume = {101},
  pages = {1418-1429},
  month = {December},
  pdf = {../local/Zou2006adaptive.pdf},
  file = {Zou2006adaptive.pdf:Zou2006adaptive.pdf:PDF},
  url = {http://ideas.repec.org/a/bes/jnlasa/v101y2006p1418-1429.html}
}

@article{Zou2005Regularization,
  author = {Zou, H. and Hastie, T.},
  title = {Regularization and variable selection via the {E}lastic {N}et},
  journal = {J. R. Stat. Soc. Ser. B},
  year = {2005},
  volume = {67},
  pages = {301--320},
  abstract = {Summary. We propose the elastic net, a new regularization and variable
	selection method. Real world data and a simulation study show that
	the elastic net often outperforms the lasso, while enjoying a similar
	sparsity of representation. In addition, the elastic net encourages
	a grouping effect, where strongly correlated predictors tend to be
	in or out of the model together.The elastic net is particularly useful
	when the number of predictors (p) is much bigger than the number
	of observations (n). By contrast, the lasso is not a very satisfactory
	variable selection method in the p n case. An algorithm called LARS-EN
	is proposed for computing elastic net regularization paths efficiently,
	much like algorithm LARS does for the lasso.},
  keywords = {elastic-net, feature-selection, lars, lasso},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.1596}
}

@article{Zou2006Sparse,
  author = {Zou, H. and Hastie, T. and Tibshirani, R.},
  title = {Sparse Principal Component Analysis},
  journal = {J. Comput. Graph. Stat.},
  year = {2006},
  volume = {15},
  pages = {265--286},
  number = {2},
  abstract = {Principal component analysis (PCA) is widely used in data processing
	and dimensionality reduction. However, PCA suffers from the fact
	that each principal component is a linear combination of all the
	original variables, thus it is often difficult to interpret the results.
	We introduce a new method called sparse principal component analysis
	(SPCA) using the lasso (elastic net) to produce modified principal
	components with sparse loadings. We first show that PCA can be formulated
	as a regression-type optimization problem; sparse loadings are then
	obtained by imposing the lasso (elastic net) constraint on the regression
	coefficients. Efficient algorithms are proposed to fit our SPCA models
	for both regular multivariate data and gene expression arrays. We
	also give a new formula to compute the total variance of modified
	principal components. As illustrations, SPCA is applied to real and
	simulated data with encouraging results.},
  doi = {10.1198/106186006X113430},
  owner = {jp},
  timestamp = {2011.12.30},
  url = {http://dx.doi.org/10.1198/106186006X113430}
}

@article{Zucknick2008Comparing,
  author = {Zucknick, M. and Richardson, S. and Stronach, E.A. and others},
  title = {Comparing the characteristics of gene expression profiles derived
	by univariate and multivariate classification methods},
  journal = {Stat Appl Genet Mol Biol},
  year = {2008},
  volume = {7},
  pages = {7},
  number = {1}
}

@article{Zuker1989finding,
  author = {Zuker, M.},
  title = {{O}n finding all suboptimal foldings of an {RNA} molecule.},
  journal = {Science},
  year = {1989},
  volume = {244},
  pages = {48--52},
  number = {4900},
  month = {Apr},
  abstract = {An algorithm and a computer program have been prepared for determining
	RNA secondary structures within any prescribed increment of the computed
	global minimum free energy. The mathematical problem of determining
	how well defined a minimum energy folding is can now be solved. All
	predicted base pairs that can participate in suboptimal structures
	may be displayed and analyzed graphically. Representative suboptimal
	foldings are generated by selecting these base pairs one at a time
	and computing the best foldings that contain them. A distance criterion
	that ensures that no two structures are "too close" is used to avoid
	multiple generation of similar structures. Thermodynamic parameters,
	including free-energy increments for single-base stacking at the
	ends of helices and for terminal mismatched pairs in interior and
	hairpin loops, are incorporated into the underlying folding model
	of the above algorithm.},
  keywords = {sirna},
  owner = {vert},
  pmid = {2468181},
  timestamp = {2006.04.27}
}

@book{Zupan1999Neural,
  title = {Neural Networks in Chemistry and Drug Design},
  publisher = {Wiley-VCH},
  year = {1999},
  author = {J. Zupan and J. Gasteiger},
  owner = {mahe},
  timestamp = {2006.09.06}
}

@book{Bottou2007Large-scale,
  title = {Large-scale kernel machines},
  publisher = {MIT Press},
  year = {2007},
  editor = {Bottou, L. and Chapelle, O. and DeCoste, D. and Weston, J.},
  timestamp = {2007.10.25}
}

@book{Callison-Burch08Proceedings,
  title = {Proceedings of the Third Workshop on SMT},
  publisher = {ACL},
  year = {2008},
  editor = {C. Callison-Burch and P. Koehn and C. Monz and J. Schroeder and C.
	S. Fordyce},
  address = {Columbus, Ohio},
  month = {June},
  url = {http://www.aclweb.org/anthology/W/W08/W08-03}
}

@proceedings{Cohen2008Machine,
  title = {Machine Learning, Proceedings of the Twenty-Fifth International Conference
	(ICML 2008), Helsinki, Finland, June 5-9, 2008},
  year = {2008},
  editor = {William W. Cohen and Andrew McCallum and Sam T. Roweis},
  volume = {307},
  series = {ACM International Conference Proceeding Series},
  publisher = {ACM},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  booktitle = {ICML},
  isbn = {978-1-60558-205-4}
}

@book{Gasteiger2003Chemoinformatics,
  title = {Chemoinformatics : a {T}extbook},
  publisher = {Wiley},
  year = {2003},
  editor = {Gasteiger, J. and Engel, T.},
  address = {New York, NY, USA},
  keywords = {chemoinformatics},
  location = {Sheffield, United Kingdom},
  owner = {mahe},
  timestamp = {2006.08.09}
}

@book{Guyon2006Feature,
  title = {Feature Extraction, Foundations and Applications},
  publisher = {Springer},
  year = {2006},
  editor = {Guyon, I. and Gunn, S. and Nikravesh, M. and Zadeh, L.},
  owner = {jp},
  timestamp = {2008.11.27}
}

@book{Jacoby2006Chemogenomics,
  title = {Chemogenomics: Knowledge-based Approaches to Drug Discovery},
  publisher = {Imperial College Press},
  year = {2006},
  editor = {Jacoby, E.},
  owner = {vert},
  timestamp = {2007.08.02}
}

@book{2006Chemical,
  title = {Chemical Genomics: Small Molecule Probes to Study Cellular Function},
  publisher = {Springer},
  year = {2006},
  editor = {Jaroch, S. E. and Weinmann, H.},
  series = {Ernst Schering Research Foundation Workshop},
  address = {Berlin},
  keywords = {chemogenomics},
  owner = {vert},
  timestamp = {2007.08.02}
}

@book{Johnson1990Concepts,
  title = {Concepts and Applications of Molecular Similarity},
  publisher = {Wiley},
  year = {1990},
  editor = {M. A. Johnson and G. M. Maggiora},
  owner = {mahe},
  timestamp = {2006.09.01}
}

@book{Jordan2001Learning,
  title = {Learning in Graphical Models},
  publisher = {The MIT Press},
  year = {2001},
  editor = {M. Jordan},
  owner = {michael},
  timestamp = {2008.10.02}
}

@book{Kubinyi2004Chemogenomics,
  title = {Chemogenomics in Drug Discovery: A Medicinal Chemistry Perspective},
  publisher = {Wiley-VCH},
  year = {2004},
  editor = {Kubinyi, H. and M{\"u}ller, G. and Mannhold, R. and Folkers, G.},
  series = {Methods and Principles in Medicinal Chemistry},
  address = {New York},
  owner = {vert},
  timestamp = {2007.08.02}
}

@book{Scherer2009Batch,
  title = {Batch Effects and Noise in Microarray Experiments: Sources and Solutions},
  publisher = {John Wiley and Sons},
  year = {2009},
  editor = {Scherer, A.},
  owner = {jp},
  timestamp = {2012.02.29}
}

@book{Thrun1998Learning,
  title = {Learning to learn},
  publisher = {Kluwer Academic Publishers},
  year = {1998},
  editor = {Thrun, Sebastian and Pratt, Lorien},
  address = {Norwell, MA, USA},
  isbn = {0-7923-8047-9}
}

@comment{{jabref-meta: selector_author:}}

@comment{{jabref-meta: selector_journal:Adv. Drug Deliv. Rev.;Am. J. Hu
m. Genet.;Am. J. Pathol.;Ann. Appl. Stat.;Ann. Math. Statist.;Ann. N. 
Y. Acad. Sci.;Ann. Probab.;Ann. Stat.;Artif. Intell. Med.;Bernoulli;Bi
ochim. Biophys. Acta;Bioinformatics;Biometrika;BMC Bioinformatics;Br. 
J. Pharmacol.;Breast Cancer Res.;Cell;Cell. Signal.;Chem. Res. Toxicol
.;Clin. Cancer Res.;Combinator. Probab. Comput.;Comm. Pure Appl. Math.
;Comput. Chem.;Comput. Comm. Rev.;Comput. Stat. Data An.;Curr. Genom.;
Curr. Opin. Chem. Biol.;Curr. Opin. Drug Discov. Devel.;Data Min. Know
l. Discov.;Electron. J. Statist.;Eur. J. Hum. Genet.;FEBS Lett.;Found.
 Comput. Math.;Genome Biol.;IEEE T. Neural Networ.;IEEE T. Pattern. An
al.;IEEE T. Signal. Proces.;IEEE Trans. Inform. Theory;IEEE Trans. Kno
wl. Data Eng.;IEEE/ACM Trans. Comput. Biol. Bioinf.;Int. J. Comput. Vi
sion;Int. J. Data Min. Bioinform.;Int. J. Qantum Chem.;J Biol Syst;J. 
ACM;J. Am. Soc. Inf. Sci. Technol.;J. Am. Stat. Assoc.;J. Bioinform. C
omput. Biol.;J. Biol. Chem.;J. Biomed. Inform.;J. Cell. Biochem.;J. Ch
em. Inf. Comput. Sci.;J. Chem. Inf. Model.;J. Clin. Oncol.;J. Comput. 
Biol.;J. Comput. Graph. Stat.;J. Eur. Math. Soc.;J. Intell. Inform. Sy
st.;J. Mach. Learn. Res.;J. Med. Chem.;J. Mol. BIol.;J. R. Stat. Soc. 
Ser. B;Journal of Statistical Planning and Inference;Mach. Learn.;Math
. Program.;Meth. Enzymol.;Mol. Biol. Cell;Mol. Biol. Evol.;Mol. Cell. 
Biol.;Mol. Syst. Biol.;N. Engl. J. Med.;Nat. Biotechnol.;Nat. Genet.;N
at. Med.;Nat. Methods;Nat. Rev. Cancer;Nat. Rev. Drug Discov.;Nat. Rev
. Genet.;Nature;Neural Comput.;Neural Network.;Neurocomputing;Nucleic 
Acids Res.;Pattern Anal. Appl.;Pattern Recognit.;Phys. Rev. E;Phys. Re
v. Lett.;PLoS Biology;PLoS Comput. Biol.;Probab. Theory Relat. Fields;
Proc. IEEE;Proc. Natl. Acad. Sci. USA;Protein Eng.;Protein Eng. Des. S
el.;Protein Sci.;Protein. Struct. Funct. Genet.;Random Struct. Algorit
hm.;Rev. Mod. Phys.;Science;Stat. Probab. Lett.;Statistica Sinica;Theo
r. Comput. Sci.;Trans. Am. Math. Soc.;Trends Genet.;}}

@comment{{jabref-meta: selector_keywords:biogm;biosvm;breastcancer;cgh;
chemogenomics;chemoinformatics;csbcbook;csbcbook-ch1;csbcbook-ch2;csbc
book-ch3;csbcbook-ch4;csbcbook-ch5;csbcbook-ch6;csbcbook-ch7;csbcbook-
ch8;csbcbook-ch9;csbcbook-mustread;dimred;featureselection;glycans;her
g;hic;highcontentscreening;image;immunoinformatics;kernel-theory;kerne
lbook;lasso;microarray;ngs;nlp;plasmodium;proteomics;PUlearning;rnaseq
;segmentation;sirna;}}

@comment{{jabref-meta: selector_booktitle:Adv. Neural. Inform. Process 
Syst.;}}

This file was generated by bibtex2html 1.97.