stat.bib

@comment{{This file has been generated by bib2bib 1.97}}

@comment{{Command line: bib2bib ../bibli.bib -c 'subject:"stat" or keywords:"stat"' -ob tmp.bib}}

@article{Tayrac2009Simultaneous,
  author = {{de Tayrac}, M. and L\^e, S. and Aubry, M. and Mosser, J. and Husson,
	F.},
  title = {Simultaneous analysis of distinct Omics data sets with integration
	of biological knowledge: Multiple Factor Analysis approach.},
  journal = {BMC Genomics},
  year = {2009},
  volume = {10},
  pages = {32},
  abstract = {Genomic analysis will greatly benefit from considering in a global
	way various sources of molecular data with the related biological
	knowledge. It is thus of great importance to provide useful integrative
	approaches dedicated to ease the interpretation of microarray data.Here,
	we introduce a data-mining approach, Multiple Factor Analysis (MFA),
	to combine multiple data sets and to add formalized knowledge. MFA
	is used to jointly analyse the structure emerging from genomic and
	transcriptomic data sets. The common structures are underlined and
	graphical outputs are provided such that biological meaning becomes
	easily retrievable. Gene Ontology terms are used to build gene modules
	that are superimposed on the experimentally interpreted plots. Functional
	interpretations are then supported by a step-by-step sequence of
	graphical representations.When applied to genomic and transcriptomic
	data and associated Gene Ontology annotations, our method prioritize
	the biological processes linked to the experimental settings. Furthermore,
	it reduces the time and effort to analyze large amounts of 'Omics'
	data.},
  doi = {10.1186/1471-2164-10-32},
  institution = {CNRS UMR 6061, Université de Rennes 1, IFR 140, Faculté de Médecine,
	CS 34317, 35043 Rennes, France. marie.de-tayrac@univ-rennes1.fr},
  keywords = {Animals; Comparative Genomic Hybridization; Factor Analysis, Statistical;
	Gene Expression Profiling, methods; Genomics, methods; Glioma, genetics;
	Humans; Mice; Models, Biological; Oligonucleotide Array Sequence
	Analysis, methods},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2164-10-32},
  pmid = {19154582},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2164-10-32}
}

@article{Amari2001Information,
  author = {Amari, S.-I.},
  title = {Information geometry on hierarchy of probability distributions},
  journal = {I{EEE} {T}rans. {I}nform. {T}heory},
  year = {2001},
  volume = {47},
  pages = {1701--1711},
  number = {5},
  month = {July},
  pdf = {../local/amar01.pdf},
  file = {amar01.pdf:local/amar01.pdf:PDF},
  subject = {stat},
  url = {http://www.islab.brain.riken.go.jp/~amari/pub/IGHI.ps.gz}
}

@article{Bagga2005Quantitative,
  author = {Harmohina Bagga and David S Greenfield and William J Feuer},
  title = {Quantitative assessment of atypical birefringence images using scanning
	laser polarimetry with variable corneal compensation.},
  journal = {Am {J} {O}phthalmol},
  year = {2005},
  volume = {139},
  pages = {437-46},
  number = {3},
  month = {Mar},
  abstract = {P{URPOSE}: {T}o define the clinical characteristics of atypical birefringence
	images and to describe a quantitative method for their identification.
	{DESIGN}: {P}rospective, comparative, clinical observational study.
	{METHODS}: {N}ormal and glaucomatous eyes underwent complete examination,
	standard automated perimetry, scanning laser polarimetry with variable
	corneal compensation ({GD}x-{VCC}), and optical coherence tomography
	({OCT}) of the macula, peripapillary retinal nerve fiber layer ({RNFL}),
	and optic disk. {E}yes were classified into two groups: normal birefringence
	pattern ({NBP}) and atypical birefringence pattern ({ABP}). {C}linical,
	functional, and structural characteristics were assessed separately.
	{A} multiple logistic regression model was used to predict eyes with
	{ABP} on the basis of a quantitative scan score generated by a support
	vector machine ({SVM}) with {GD}x-{VCC}. {RESULTS}: {S}ixty-five
	eyes of 65 patients were enrolled. {ABP} images were observed in
	5 of 20 (25\%) normal eyes and 23 of 45 (51\%) glaucomatous eyes.
	{C}ompared with eyes with {NBP}, glaucomatous eyes with {ABP} demonstrated
	significantly lower {SVM} scores ({P} < .0001, < 0.0001, 0.008, 0.03,
	and 0.03, respectively) and greater temporal, mean, inferior, and
	nasal {RNFL} thickness using {GD}x-{VCC}; and a weaker correlation
	with {OCT} generated {RNFL} thickness ({R}(2) = .75 vs .27). {ABP}
	images were significantly correlated with older age ({R}(2) = .16,
	{P} = .001). {T}he {SVM} score was the only significant ({P} < .0001)
	predictor of {ABP} images and provided high discriminating power
	between eyes with {NBP} and {ABP} (area under the receiver operator
	characteristic curve = 0.98). {CONCLUSIONS}: {ABP} images exist in
	a subset of normal and glaucomatous eyes, are associated with older
	patient age, and produce an artifactual increase in {RNFL} thickness
	using {GD}x-{VCC}. {T}he {SVM} score is highly predictive of {ABP}
	images.},
  doi = {10.1016/j.ajo.2004.10.019},
  pdf = {../local/Bagga2005Quantitative.pdf},
  file = {Bagga2005Quantitative.pdf:locql/Bagga2005Quantitative.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15767051},
  pii = {S0002-9394(04)01265-6},
  url = {http://dx.doi.org/10.1016/j.ajo.2004.10.019}
}

@article{Bagirov2003New,
  author = {A. M. Bagirov and B. Ferguson and S. Ivkovic and G. Saunders and
	J. Yearwood},
  title = {New algorithms for multi-class cancer diagnosis using tumor gene
	expression signatures.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {1800-7},
  number = {14},
  month = {Sep},
  abstract = {M{OTIVATION}: {T}he increasing use of {DNA} microarray-based tumor
	gene expression profiles for cancer diagnosis requires mathematical
	methods with high accuracy for solving clustering, feature selection
	and classification problems of gene expression data. {RESULTS}: {N}ew
	algorithms are developed for solving clustering, feature selection
	and classification problems of gene expression data. {T}he clustering
	algorithm is based on optimization techniques and allows the calculation
	of clusters step-by-step. {T}his approach allows us to find as many
	clusters as a data set contains with respect to some tolerance. {F}eature
	selection is crucial for a gene expression database. {O}ur feature
	selection algorithm is based on calculating overlaps of different
	genes. {T}he database used, contains over 16 000 genes and this number
	is considerably reduced by feature selection. {W}e propose a classification
	algorithm where each tissue sample is considered as the center of
	a cluster which is a ball. {T}he results of numerical experiments
	confirm that the classification algorithm in combination with the
	feature selection algorithm perform slightly better than the published
	results for multi-class classifiers based on support vector machines
	for this data set. {AVAILABILITY}: {A}vailable on request from the
	authors.},
  pdf = {../local/Bagirov2003New.pdf},
  file = {Bagirov2003New.pdf:local/Bagirov2003New.pdf:PDF},
  keywords = {Algorithms, Amino Acid Sequence, Anion Exchange Resins, Antigen-Antibody
	Complex, Artificial Intelligence, Automated, Automatic Data Processing,
	Biological, Blood Cells, Chemical, Chromatography, Cluster Analysis,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	DNA, Data Interpretation, Databases, Decision Making, Decision Trees,
	Diffusion Magnetic Resonance Imaging, English Abstract, Epitopes,
	Expert Systems, Factual, Fuzzy Logic, Gene Expression Profiling,
	Gene Expression Regulation, Gene Targeting, Genetic, Genome, Histocompatibility
	Antigens Class I, Humans, Image Interpretation, Image Processing,
	In Vitro, Indicators and Reagents, Information Storage and Retrieval,
	Ion Exchange, Least-Squares Analysis, Liver Cirrhosis, Magnetic Resonance
	Imaging, Male, Models, Molecular Sequence Data, Neoplasms, Neoplastic,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic
	Acid Conformation, Oligonucleotide Array Sequence Analysis, P.H.S.,
	Pattern Recognition, Pro, Prostatic Neoplasms, Protein, Protein Binding,
	Protein Interaction Mapping, Proteins, Quantitative Structure-Activity
	Relationship, RNA, ROC Curve, Reproducibility of Results, Research
	Support, Sensitivity and Specificity, Sequence Alignment, Sequence
	Analysis, Severity of Illness Index, Statistical, Structure-Activity
	Relationship, Subtraction Technique, T-Lymphocyte, Transcription
	Factors, Transfer, Treatment Outcome, Tumor Markers, U.S. Gov't,
	User-Computer Interface, inear Dynamics, teome, 14512351},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/19/14/1800}
}

@article{Bernardo2005Chemogenomica,
  author = {di Bernardo, D. and Thompson, M.J. and Gardner, T.S. and Chobot,
	S.E. and Eastwood, E.L. and Wojtovich, A.P. and Elliott, S.J. and
	Schaus, S.E. and Collins, J.J.},
  title = {Chemogenomic profiling on a genome-wide scale using reverse-engineered
	gene networks.},
  journal = {Nat Biotechnol},
  year = {2005},
  volume = {23},
  pages = {377--383},
  number = {3},
  month = {Mar},
  abstract = {A major challenge in drug discovery is to distinguish the molecular
	targets of a bioactive compound from the hundreds to thousands of
	additional gene products that respond indirectly to changes in the
	activity of the targets. Here, we present an integrated computational-experimental
	approach for computing the likelihood that gene products and associated
	pathways are targets of a compound. This is achieved by filtering
	the mRNA expression profile of compound-exposed cells using a reverse-engineered
	model of the cell's gene regulatory network. We apply the method
	to a set of 515 whole-genome yeast expression profiles resulting
	from a variety of treatments (compounds, knockouts and induced expression),
	and correctly enrich for the known targets and associated pathways
	in the majority of compounds examined. We demonstrate our approach
	with PTSB, a growth inhibitory compound with a previously unknown
	mode of action, by predicting and validating thioredoxin and thioredoxin
	reductase as its target.},
  doi = {10.1038/nbt1075},
  institution = {Telethon Institute for Genetics and Medicine, Naples, Italy.},
  keywords = {Algorithms; Artificial Intelligence; Computer Simulation; Drug Delivery
	Systems; Drug Design; Gene Expression Profiling; Gene Expression
	Regulation; Models, Biological; Models, Statistical; Protein Engineering;
	Protein Interaction Mapping; Saccharomyces cerevisiae; Saccharomyces
	cerevisiae Proteins; Signal Transduction; Thioredoxin-Disulfide Reductase;
	Thioredoxins},
  owner = {fantine},
  pii = {nbt1075},
  pmid = {15765094},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1038/nbt1075}
}

@article{Bernstein1977Protein,
  author = {F. C. Bernstein and T. F. Koetzle and G. J. Williams and E. F. Meyer
	and M. D. Brice and J. R. Rodgers and O. Kennard and T. Shimanouchi
	and M. Tasumi},
  title = {The Protein Data Bank: a computer-based archival file for macromolecular
	structures.},
  journal = {J. Mol. Biol.},
  year = {1977},
  volume = {112},
  pages = {535--542},
  number = {3},
  month = {May},
  keywords = {Computers; Great Britain; Information Systems; Japan; Protein Conformation;
	Proteins; United States},
  owner = {bricehoffmann},
  pmid = {875032},
  timestamp = {2009.02.13}
}

@article{Bhavani2006Substructure-based,
  author = {S. Bhavani and A. Nagargadde and A. Thawani and V. Sridhar and N.
	Chandra},
  title = {Substructure-based support vector machine classifiers for prediction
	of adverse effects in diverse classes of drugs.},
  journal = {J. Chem. Inform. Model.},
  year = {2006},
  volume = {46},
  pages = {2478--2486},
  number = {6},
  abstract = {Unforeseen adverse effects exhibited by drugs contribute heavily to
	late-phase failure and even withdrawal of marketed drugs. Torsade
	de pointes (TdP) is one such important adverse effect, which causes
	cardiac arrhythmia and, in some cases, sudden death, making it crucial
	for potential drugs to be screened for torsadogenicity. The need
	to tap the power of computational approaches for the prediction of
	adverse effects such as TdP is increasingly becoming evident. The
	availability of screening data including those in organized databases
	greatly facilitates exploration of newer computational approaches.
	In this paper, we report the development of a prediction method based
	on a support machine vector algorithm. The method uses a combination
	of descriptors, encoding both the type of toxicophore as well as
	the position of the toxicophore in the drug molecule, thus considering
	both the pharmacophore and the three-dimensional shape information
	of the molecule. For delineating toxicophores, a novel pattern-recognition
	method that utilizes substructures within a molecule has been developed.
	The results obtained using the hybrid approach have been compared
	with those available in the literature for the same data set. An
	improvement in prediction accuracy is clearly seen, with the accuracy
	reaching up to 97\% in predicting compounds that can cause TdP and
	90\% for predicting compounds that do not cause TdP. The generic
	nature of the method has been demonstrated with four data sets available
	for carcinogenicity, where prediction accuracies were significantly
	higher, with a best receiver operating characteristics (ROC) value
	of 0.81 as against a best ROC value of 0.7 reported in the literature
	for the same data set. Thus, the method holds promise for wide applicability
	in toxicity prediction.},
  doi = {10.1021/ci060128l},
  keywords = {Algorithms; Carcinogens; Chemistry, Pharmaceutical; Computational
	Biology; Drug Evaluation, Preclinical; Drug Industry; Humans; Models,
	Chemical; Models, Statistical; Neural Networks (Computer); Pattern
	Recognition, Automated; ROC Curve; Sequence Analysis, Protein; Software;
	Torsades de Pointes},
  owner = {laurent},
  pmid = {17125188},
  timestamp = {2007.09.18},
  url = {http://dx.doi.org/10.1021/ci060128l}
}

@article{Boucheron2000sharp,
  author = {Boucheron, S. and Lugosi, G. and Massart, P.},
  title = {A sharp concentration inequality with applications},
  journal = {Random {S}tructures and {A}lgorithms},
  year = {2000},
  volume = {16},
  pages = {277--292},
  pdf = {../local/bouc00.pdf},
  file = {bouc00.pdf:local/bouc00.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/concentration.ps}
}

@article{Bowd2002Comparing,
  author = {Christopher Bowd and Kwokleung Chan and Linda M Zangwill and Michael
	H Goldbaum and Te-Won Lee and Terrence J Sejnowski and Robert N Weinreb},
  title = {Comparing neural networks and linear discriminant functions for glaucoma
	detection using confocal scanning laser ophthalmoscopy of the optic
	disc.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2002},
  volume = {43},
  pages = {3444-54},
  number = {11},
  month = {Nov},
  abstract = {P{URPOSE}: {T}o determine whether neural network techniques can improve
	differentiation between glaucomatous and nonglaucomatous eyes, using
	the optic disc topography parameters of the {H}eidelberg {R}etina
	{T}omograph ({HRT}; {H}eidelberg {E}ngineering, {H}eidelberg, {G}ermany).
	{METHODS}: {W}ith the {HRT}, one eye was imaged from each of 108
	patients with glaucoma (defined as having repeatable visual field
	defects with standard automated perimetry) and 189 subjects without
	glaucoma (no visual field defects with healthy-appearing optic disc
	and retinal nerve fiber layer on clinical examination) and the optic
	nerve topography was defined by 17 global and 66 regional {HRT} parameters.
	{W}ith all the {HRT} parameters used as input, receiver operating
	characteristic ({ROC}) curves were generated for the classification
	of eyes, by three neural network techniques: linear and {G}aussian
	support vector machines ({SVM} linear and {SVM} {G}aussian, respectively)
	and a multilayer perceptron ({MLP}), as well as four previously proposed
	linear discriminant functions ({LDF}s) and one {LDF} developed on
	the current data with all {HRT} parameters used as input. {RESULTS}:
	{T}he areas under the {ROC} curves for {SVM} linear and {SVM} {G}aussian
	were 0.938 and 0.945, respectively; for {MLP}, 0.941; for the current
	{LDF}, 0.906; and for the best previously proposed {LDF}, 0.890.
	{W}ith the use of forward selection and backward elimination optimization
	techniques, the areas under the {ROC} curves for {SVM} {G}aussian
	and the current {LDF} were increased to approximately 0.96. {CONCLUSIONS}:
	{T}rained neural networks, with global and regional {HRT} parameters
	used as input, improve on previously proposed {HRT} parameter-based
	{LDF}s for discriminating between glaucomatous and nonglaucomatous
	eyes. {T}he performance of both neural networks and {LDF}s can be
	improved with optimization of the features in the input. {N}eural
	network analyses show promise for increasing diagnostic accuracy
	of tests for glaucoma.},
  pdf = {../local/Bowd2002Comparing.pdf},
  file = {Bowd2002Comparing.pdf:local/Bowd2002Comparing.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Discriminant Analysis,
	Drug, Drug Design, Electrostatics, Eukaryotic Cells, Factual, Feasibility
	Studies, Female, Gene Expression, Gene Expression Profiling, Gene
	Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic
	Markers, Glaucoma, Hemolysins, Humans, Internet, Intraocular Pressure,
	Ion Exchange, Lasers, Leukemia, Ligands, Likelihood Functions, Logistic
	Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Open-Angle, Ophthalmoscopy,
	Optic Disk, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability,
	Probability Learning, Protein Binding, Protein Conformation, Proteins,
	Quality Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12407155},
  url = {http://www.iovs.org/cgi/content/abstract/43/11/3444}
}

@article{Bowd2004Confocal,
  author = {Christopher Bowd and Linda M Zangwill and Felipe A Medeiros and Jiucang
	Hao and Kwokleung Chan and Te-Won Lee and Terrence J Sejnowski and
	Michael H Goldbaum and Pamela A Sample and Jonathan G Crowston and
	Robert N Weinreb},
  title = {Confocal scanning laser ophthalmoscopy classifiers and stereophotograph
	evaluation for prediction of visual field abnormalities in glaucoma-suspect
	eyes.},
  journal = {Invest {O}phthalmol {V}is {S}ci},
  year = {2004},
  volume = {45},
  pages = {2255-62},
  number = {7},
  month = {Jul},
  abstract = {P{URPOSE}: {T}o determine whether {H}eidelberg {R}etina {T}omograph
	({HRT}; {H}eidelberg {E}ngineering, {D}ossenheim, {G}ermany) classification
	techniques and investigational support vector machine ({SVM}) analyses
	can detect optic disc abnormalities in glaucoma-suspect eyes before
	the development of visual field abnormalities. {METHODS}: {G}laucoma-suspect
	eyes (n = 226) were classified as converts or nonconverts based on
	the development of repeatable (either two or three consecutive) standard
	automated perimetry ({SAP})-detected abnormalities over the course
	of the study (mean follow-up, approximately 4.5 years). {H}azard
	ratios for development of {SAP} abnormalities were calculated based
	on baseline classification results, follow-up time, and end point
	status (convert, nonconvert). {C}lassification techniques applied
	were {HRT} classification ({HRTC}), {M}oorfields {R}egression {A}nalysis,
	forward-selection optimized {SVM} ({SVM} fwd) and backward elimination-optimized
	{SVM} ({SVM} back) analysis of {HRT} data, and stereophotograph assessment.
	{RESULTS}: {U}nivariate analyses indicated that all classification
	techniques were predictors of the development of two repeatable abnormal
	{SAP} results, with hazards ratios (95\% confidence interval [{CI}])
	ranging from 1.32 (1.00-1.75) for {HRTC} to 2.0 (1.48-2.76) for stereophotograph
	assessment (all {P} < or = 0.05). {O}nly {SVM} ({SVM} fwd and {SVM}
	back) analysis of {HRT} data and stereophotograph assessment were
	univariate predictors of the development of three repeatable abnormal
	{SAP} results, with hazard ratios (95\% {CI}) ranging from 1.73 (1.16-2.82)
	for {SVM} fwd to 1.82 (1.19-3.12) for {SVM} back (both {P} < 0.007).
	{M}ultivariate analyses including each classification technique individually
	in a model with age, baseline {SAP} pattern standard deviation [{PSD}],
	and baseline {IOP} indicated that all classification techniques except
	{HRTC} ({P} = 0.06) were predictors of the development of two repeatable
	abnormal {SAP} results with hazards ratios ranging from 1.30 (0.99,
	1.73) for {HRTC} to 1.90 (1.37, 2.69) for stereophotograph assessment.
	{O}nly {SVM} ({SVM} fwd and {SVM} back) analysis of {HRT} data and
	stereophotograph assessment were significant predictors of the development
	of three repeatable abnormal {SAP} results in multivariate analyses;
	hazard ratios of 1.57 (1.03, 2.59) and 1.70 (1.18, 2.51), respectively.
	{SAP} {PSD} was a significant predictor of two repeatable abnormal
	{SAP} results in multivariate models with all classification techniques,
	with hazard ratios ranging from 3.31 (1.39, 7.89) to 4.70 (2.02,
	10.93) per 1-d{B} increase. {CONCLUSIONS}: {HRT} classifications
	techniques and stereophotograph assessment can detect optic disc
	topography abnormalities in glaucoma-suspect eyes before the development
	of {SAP} abnormalities. {T}hese data support strongly the importance
	of optic disc examination for early glaucoma diagnosis.},
  doi = {10.1167/iovs.03-1087},
  pdf = {../local/Bowd2004Confocal.pdf},
  file = {Bowd2004Confocal.pdf:local/Bowd2004Confocal.pdf:PDF},
  keywords = {80 and over, Adolescent, Adult, Aged, Algorithms, Artificial Intelligence,
	Auditory, Benchmarking, Binding Sites, Brain Stem, Breast Diseases,
	Chemical, Child, Chromosomes, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, Data Interpretation, Databases,
	Diagnosis, Diagnostic Errors, Differential, Drug Resistance, Electroencephalography,
	Epilepsy, Evoked Potentials, Female, Forecasting, Gene Expression,
	Gene Expression Profiling, Genetic, Genotype, Glaucoma, Greece, HIV
	Protease Inhibitors, HIV-1, Human, Humans, Infant, Information Management,
	Information Storage and Retrieval, Intraocular Pressure, Kinetics,
	Language Development Disorders, Lasers, Least-Squares Analysis, Linear
	Models, Male, Microbial Sensitivity Tests, Middle Aged, Models, Molecular,
	Monitoring, Nephroblastoma, Non-U.S. Gov't, Nonlinear Dynamics, Ocular
	Hypertension, Oligonucleotide Array Sequence Analysis, Ophthalmoscopy,
	Optic Disk, Optic Nerve Diseases, P.H.S., Pair 1, Perimetry, Periodicals,
	Phosphorylation, Phosphotransferases, Photography, Physiologic, Point
	Mutation, Preschool, Prognosis, Protein, Proteins, Pyrimidinones,
	Reaction Time, Recurrence, Reproducibility of Results, Research Support,
	Reverse Transcriptase Inhibitors, Sensitivity and Specificity, Sequence
	Alignment, Sequence Analysis, Signal Processing, Software, Sound
	Localization, Statistical, Stochastic Processes, Structure-Activity
	Relationship, Theoretical, Time Factors, U.S. Gov't, Viral, Vision
	Disorders, Visual Fields, 15223803},
  url = {http://dx.doi.org/10.1167/iovs.03-1087}
}

@article{Bui2006Structural,
  author = {Bui, H.-H. and Schiewe, A. J. and von Grafenstein, H. and Haworth,
	I. S.},
  title = {{S}tructural prediction of peptides binding to {MHC} class {I} molecules.},
  journal = {Proteins},
  year = {2006},
  volume = {63},
  pages = {43--52},
  number = {1},
  month = {Apr},
  abstract = {Peptide binding to class I major histocompatibility complex (MHCI)
	molecules is a key step in the immune response and the structural
	details of this interaction are of importance in the design of peptide
	vaccines. Algorithms based on primary sequence have had success in
	predicting potential antigenic peptides for MHCI, but such algorithms
	have limited accuracy and provide no structural information. Here,
	we present an algorithm, PePSSI (peptide-MHC prediction of structure
	through solvated interfaces), for the prediction of peptide structure
	when bound to the MHCI molecule, HLA-A2. The algorithm combines sampling
	of peptide backbone conformations and flexible movement of MHC side
	chains and is unique among other prediction algorithms in its incorporation
	of explicit water molecules at the peptide-MHC interface. In an initial
	test of the algorithm, PePSSI was used to predict the conformation
	of eight peptides bound to HLA-A2, for which X-ray data are available.
	Comparison of the predicted and X-ray conformations of these peptides
	gave RMSD values between 1.301 and 2.475 A. Binding conformations
	of 266 peptides with known binding affinities for HLA-A2 were then
	predicted using PePSSI. Structural analyses of these peptide-HLA-A2
	conformations showed that peptide binding affinity is positively
	correlated with the number of peptide-MHC contacts and negatively
	correlated with the number of interfacial water molecules. These
	results are consistent with the relatively hydrophobic binding nature
	of the HLA-A2 peptide binding interface. In summary, PePSSI is capable
	of rapid and accurate prediction of peptide-MHC binding conformations,
	which may in turn allow estimation of MHCI-peptide binding affinity.},
  doi = {10.1002/prot.20870},
  keywords = {Algorithms, Amino Acid Sequence, Antigens, Artificial Intelligence,
	Automated, Binding Sites, Chemical, Computational Biology, Computer
	Simulation, Crystallog, Crystallography, Electrostatics, Genes, Genetic,
	HLA Antigens, Histocompatibility Antigens Class I, Humans, Hydrogen
	Bonding, Ligands, MHC Class I, Major Histocompatibility Complex,
	Models, Molecular, Molecular Conformation, Molecular Sequence Data,
	Pattern Recognition, Peptides, Protein, Protein Binding, Protein
	Conformation, Proteomics, Quantitative Structure-Activity Relationship,
	Sequence Alignment, Sequence Analysis, Software, Structural Homology,
	Structure-Activity Relationship, Thermodynamics, Water, X-Ray, X-Rays,
	raphy, 16447245},
  pmid = {16447245},
  timestamp = {2007.01.25},
  url = {http://dx.doi.org/10.1002/prot.20870}
}

@inproceedings{Catoni2002Data,
  author = {Catoni, O.},
  title = {Data {C}ompression and {A}daptive {H}istograms},
  booktitle = {Foundations of {C}omputational {M}athematics, {P}roceedings of {S}malefest
	2000},
  year = {2002},
  editor = {Felipe Cucker and J. Maurice Rojas},
  publisher = {World Scientific},
  pdf = {../local/cato02.pdf},
  file = {cato02.pdf:local/cato02.pdf:PDF},
  subject = {stat},
  url = {http://www.proba.jussieu.fr/users/catoni/gibbsHist_doc/}
}

@unpublished{CatoniGibbs,
  author = {Catoni, O.},
  title = {Gibbs estimators},
  note = {Revised version},
  pdf = {../local/cato02.ps},
  file = {cato02.ps:local/cato02.ps:PostScript},
  subject = {stat},
  url = {http://www.proba.jussieu.fr/users/catoni/homepage/gibbs5.dvi}
}

@article{Chan2003Detection,
  author = {Ian Chan and William Wells and Robert V Mulkern and Steven Haker
	and Jianqing Zhang and Kelly H Zou and Stephan E Maier and Clare
	M C Tempany},
  title = {Detection of prostate cancer by integration of line-scan diffusion,
	{T}2-mapping and {T}2-weighted magnetic resonance imaging; a multichannel
	statistical classifier.},
  journal = {Med {P}hys},
  year = {2003},
  volume = {30},
  pages = {2390-8},
  number = {9},
  month = {Sep},
  abstract = {A multichannel statistical classifier for detecting prostate cancer
	was developed and validated by combining information from three different
	magnetic resonance ({MR}) methodologies: {T}2-weighted, {T}2-mapping,
	and line scan diffusion imaging ({LSDI}). {F}rom these {MR} sequences,
	four different sets of image intensities were obtained: {T}2-weighted
	({T}2{W}) from {T}2-weighted imaging, {A}pparent {D}iffusion {C}oefficient
	({ADC}) from {LSDI}, and proton density ({PD}) and {T}2 ({T}2 {M}ap)
	from {T}2-mapping imaging. {M}anually segmented tumor labels from
	a radiologist, which were validated by biopsy results, served as
	tumor "ground truth." {T}extural features were extracted from the
	images using co-occurrence matrix ({CM}) and discrete cosine transform
	({DCT}). {A}natomical location of voxels was described by a cylindrical
	coordinate system. {A} statistical jack-knife approach was used to
	evaluate our classifiers. {S}ingle-channel maximum likelihood ({ML})
	classifiers were based on 1 of the 4 basic image intensities. {O}ur
	multichannel classifiers: support vector machine ({SVM}) and {F}isher
	linear discriminant ({FLD}), utilized five different sets of derived
	features. {E}ach classifier generated a summary statistical map that
	indicated tumor likelihood in the peripheral zone ({PZ}) of the prostate
	gland. {T}o assess classifier accuracy, the average areas under the
	receiver operator characteristic ({ROC}) curves over all subjects
	were compared. {O}ur best {FLD} classifier achieved an average {ROC}
	area of 0.839(+/-0.064), and our best {SVM} classifier achieved an
	average {ROC} area of 0.761(+/-0.043). {T}he {T}2{W} {ML} classifier,
	our best single-channel classifier, only achieved an average {ROC}
	area of 0.599(+/-0.146). {C}ompared to the best single-channel {ML}
	classifier, our best multichannel {FLD} and {SVM} classifiers have
	statistically superior {ROC} performance ({P}=0.0003 and 0.0017,
	respectively) from pairwise two-sided t-test. {B}y integrating the
	information from multiple images and capturing the textural and anatomical
	features in tumor areas, summary statistical maps can potentially
	aid in image-guided prostate biopsy and assist in guiding and controlling
	delivery of localized therapy under image guidance.},
  pdf = {../local/Chan2003Detection.pdf},
  file = {Chan2003Detection.pdf:local/Chan2003Detection.pdf:PDF},
  keywords = {Algorithms, Anion Exchange Resins, Antigen-Antibody Complex, Artificial
	Intelligence, Automated, Automatic Data Processing, Biological, Blood
	Cells, Chemical, Chromatography, Cluster Analysis, Comparative Study,
	Computational Biology, Computer Simulation, Computer-Assisted, Data
	Interpretation, Databases, Decision Making, Decision Trees, Diffusion
	Magnetic Resonance Imaging, English Abstract, Epitopes, Expert Systems,
	Factual, Fuzzy Logic, Gene Expression Profiling, Gene Expression
	Regulation, Gene Targeting, Genome, Histocompatibility Antigens Class
	I, Humans, Image Interpretation, Image Processing, In Vitro, Indicators
	and Reagents, Information Storage and Retrieval, Ion Exchange, Least-Squares
	Analysis, Liver Cirrhosis, Magnetic Resonance Imaging, Male, Models,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic
	Acid Conformation, P.H.S., Pattern Recognition, Pro, Prostatic Neoplasms,
	Protein, Protein Binding, Protein Interaction Mapping, Proteins,
	Quantitative Structure-Activity Relationship, RNA, ROC Curve, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Severity of Illness Index, Statistical, Structure-Activity
	Relationship, Subtraction Technique, T-Lymphocyte, Transcription
	Factors, Transfer, Treatment Outcome, U.S. Gov't, User-Computer Interface,
	inear Dynamics, teome, 14528961}
}

@article{Chan2002Comparison,
  author = {Kwokleung Chan and Te-Won Lee and Pamela A Sample and Michael H Goldbaum
	and Robert N Weinreb and Terrence J Sejnowski},
  title = {Comparison of machine learning and traditional classifiers in glaucoma
	diagnosis.},
  journal = {I{EEE} {T}rans {B}iomed {E}ng},
  year = {2002},
  volume = {49},
  pages = {963-74},
  number = {9},
  month = {Sep},
  abstract = {Glaucoma is a progressive optic neuropathy with characteristic structural
	changes in the optic nerve head reflected in the visual field. {T}he
	visual-field sensitivity test is commonly used in a clinical setting
	to evaluate glaucoma. {S}tandard automated perimetry ({SAP}) is a
	common computerized visual-field test whose output is amenable to
	machine learning. {W}e compared the performance of a number of machine
	learning algorithms with {STATPAC} indexes mean deviation, pattern
	standard deviation, and corrected pattern standard deviation. {T}he
	machine learning algorithms studied included multilayer perceptron
	({MLP}), support vector machine ({SVM}), and linear ({LDA}) and quadratic
	discriminant analysis ({QDA}), {P}arzen window, mixture of {G}aussian
	({MOG}), and mixture of generalized {G}aussian ({MGG}). {MLP} and
	{SVM} are classifiers that work directly on the decision boundary
	and fall under the discriminative paradigm. {G}enerative classifiers,
	which first model the data probability density and then perform classification
	via {B}ayes' rule, usually give deeper insight into the structure
	of the data space. {W}e have applied {MOG}, {MGG}, {LDA}, {QDA},
	and {P}arzen window to the classification of glaucoma from {SAP}.
	{P}erformance of the various classifiers was compared by the areas
	under their receiver operating characteristic curves and by sensitivities
	(true-positive rates) at chosen specificities (true-negative rates).
	{T}he machine-learning-type classifiers showed improved performance
	over the best indexes from {STATPAC}. {F}orward-selection and backward-elimination
	methodology further improved the classification rate and also has
	the potential to reduce testing time by diminishing the number of
	visual-field location measurements.},
  doi = {10.1109/TBME.2002.802012},
  pdf = {../local/Chan2002Comparison.pdf},
  file = {Chan2002Comparison.pdf:local/Chan2002Comparison.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Discriminant Analysis,
	Drug, Drug Design, Electrostatics, Epitopes, Eukaryotic Cells, Factual,
	False Negative Reactions, False Positive Reactions, Feasibility Studies,
	Female, Gene Expression, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers,
	Glaucoma, HLA Antigens, Hemolysins, Histocompatibility Antigens Class
	I, Humans, Internet, Intraocular Pressure, Ion Exchange, Lasers,
	Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms,
	Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models,
	Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology,
	Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation,
	Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array
	Sequence Analysis, Open-Angle, Ophthalmoscopy, Optic Disk, Optic
	Nerve Diseases, Ovarian Neoplasms, P.H.S., Pattern Recognition, Peptides,
	Perimetry, Predictive Value of Tests, Probability, Probability Learning,
	Protein, Protein Binding, Protein Conformation, Proteins, Quality
	Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	T-Lymphocyte, Thermodynamics, Transcription, Tumor Markers, U.S.
	Gov't, 12214886},
  url = {http://dx.doi.org/10.1109/TBME.2002.802012}
}

@article{Chen2011Removing,
  author = {Chao Chen and Kay Grennan and Judith Badner and Dandan Zhang and
	Elliot Gershon and Li Jin and Chunyu Liu},
  title = {Removing batch effects in analysis of expression microarray data:
	an evaluation of six batch adjustment methods.},
  journal = {PLoS One},
  year = {2011},
  volume = {6},
  pages = {e17238},
  number = {2},
  abstract = {The expression microarray is a frequently used approach to study gene
	expression on a genome-wide scale. However, the data produced by
	the thousands of microarray studies published annually are confounded
	by "batch effects," the systematic error introduced when samples
	are processed in multiple batches. Although batch effects can be
	reduced by careful experimental design, they cannot be eliminated
	unless the whole study is done in a single batch. A number of programs
	are now available to adjust microarray data for batch effects prior
	to analysis. We systematically evaluated six of these programs using
	multiple measures of precision, accuracy and overall performance.
	ComBat, an Empirical Bayes method, outperformed the other five programs
	by most metrics. We also showed that it is essential to standardize
	expression data at the probe level when testing for correlation of
	expression profiles, due to a sizeable probe effect in microarray
	data that can inflate the correlation among replicates and unrelated
	samples.},
  doi = {10.1371/journal.pone.0017238},
  institution = {National Ministry of Education Key Laboratory of Contemporary Anthropology,
	Fudan University, Shanghai, People's Republic of China.},
  keywords = {Bayes Theorem; Case-Control Studies; Data Interpretation, Statistical;
	Gene Expression Profiling, standards/statistics /&/ numerical data;
	Humans; Microarray Analysis, standards/statistics /&/ numerical data;
	ROC Curve; Reference Standards; Research Design; Sample Size; Selection
	Bias; Validation Studies as Topic},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pmid = {21386892},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1371/journal.pone.0017238}
}

@article{Churchill2002Fundamentals,
  author = {Churchill, G. A.},
  title = {Fundamentals of experimental design for cDNA microarrays},
  journal = {Nat. Genet.},
  year = {2002},
  volume = {32 Suppl},
  pages = {490--495},
  month = {Dec},
  abstract = {Microarray technology is now widely available and is being applied
	to address increasingly complex scientific questions. Consequently,
	there is a greater demand for statistical assessment of the conclusions
	drawn from microarray experiments. This review discusses fundamental
	issues of how to design an experiment to ensure that the resulting
	data are amenable to statistical analysis. The discussion focuses
	on two-color spotted cDNA microarrays, but many of the same issues
	apply to single-color gene-expression assays as well.},
  doi = {10.1038/ng1031},
  institution = {The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA.
	garyc@jax.org},
  keywords = {Animals; DNA, Complementary, analysis; Gene Expression; Gene Expression
	Profiling, methods; Mice; Models, Biological; Oligonucleotide Array
	Sequence Analysis, methods; Reference Standards; Reproducibility
	of Results; Research Design; Statistics as Topic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {ng1031},
  pmid = {12454643},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/ng1031}
}

@article{Cohen2004application,
  author = {Gilles Cohen and M\'elanie Hilario and Hugo Sax and StÃÂ©phane Hugonnet
	and Christian Pellegrini and Antoine Geissbuhler},
  title = {An application of one-class support vector machine to nosocomial
	infection detection.},
  journal = {Medinfo},
  year = {2004},
  volume = {11},
  pages = {716-20},
  number = {Pt 1},
  abstract = {Nosocomial infections ({NI}s)---those acquired in health care settings---are
	among the major causes of increased mortality among hospitalized
	patients. {T}hey are a significant burden for patients and health
	authorities alike; it is thus important to monitor and detect them
	through an effective surveillance system. {T}his paper describes
	a retrospective analysis of a prevalence survey of {NI}s done in
	the {G}eneva {U}niversity {H}ospital. {O}ur goal is to identify patients
	with one or more {NI}s on the basis of clinical and other data collected
	during the survey. {I}n this two-class classification task, the main
	difficulty lies in the significant imbalance between positive or
	infected (11\%) and negative (89\%) cases. {T}o cope with class imbalance,
	we investigate one-class {SVM}s which can be trained to distinguish
	two classes on the basis of examples from a single class (in this
	case, only "normal" or non infected patients). {T}he infected ones
	are then identified as "abnormal" cases or outliers that deviate
	significantly from the normal profile. {E}xperimental results are
	encouraging: whereas standard 2-class {SVM}s scored a baseline sensitivity
	of 50.6\% on this problem, the one-class approach increased sensitivity
	to as much as 92.6\%. {T}hese results are comparable to those obtained
	by the authors in a previous study on asymmetrical soft margin {SVM}s;
	they suggest that one-class {SVM}s can provide an effective and efficient
	way of overcoming data imbalance in classification problems.},
  keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts,
	Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis,
	Cerebrovascular Accident, Cerebrovascular Circulation, Comparative
	Study, Computer-Assisted, Cross Infection, Cysteine, Data Collection,
	Decision Trees, Dementia, Diagnosis, Disulfides, Doppler, Embolism,
	Expert Systems, Extramural, Factor Analysis, Female, Gene Expression,
	Gene Expression Profiling, Health Status, Heart Septal Defects, Hospitals,
	Humans, Infection Control, Intracranial Embolism, Male, Models, Molecular,
	Myocardial Infarction, N.I.H., Neoplasms, Neural Networks (Computer),
	Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Oxidation-Reduction,
	P.H.S., Pattern Recognition, Population Surveillance, Prevalence,
	Prognosis, Protein Binding, Protein Folding, Proteins, ROC Curve,
	Research Support, Retrospective Studies, Sensitivity and Specificity,
	Software, Statistical, Switzerland, Transcranial, Treatment Outcome,
	U.S. Gov't, Ultrasonography, University, 15360906},
  pii = {D040004219}
}

@article{Consortium2006MicroArray,
  author = {M. A. Q. C. Consortium and Leming Shi and Laura H Reid and Wendell
	D Jones and Richard Shippy and Janet A Warrington and Shawn C Baker
	and Patrick J Collins and Francoise de Longueville and Ernest S Kawasaki
	and Kathleen Y Lee and Yuling Luo and Yongming Andrew Sun and James
	C Willey and Robert A Setterquist and Gavin M Fischer and Weida Tong
	and Yvonne P Dragan and David J Dix and Felix W Frueh and Frederico
	M Goodsaid and Damir Herman and Roderick V Jensen and Charles D Johnson
	and Edward K Lobenhofer and Raj K Puri and Uwe Schrf and Jean Thierry-Mieg
	and Charles Wang and Mike Wilson and Paul K Wolber and Lu Zhang and
	Shashi Amur and Wenjun Bao and Catalin C Barbacioru and Anne Bergstrom
	Lucas and Vincent Bertholet and Cecilie Boysen and Bud Bromley and
	Donna Brown and Alan Brunner and Roger Canales and Xiaoxi Megan Cao
	and Thomas A Cebula and James J Chen and Jing Cheng and Tzu-Ming
	Chu and Eugene Chudin and John Corson and J. Christopher Corton and
	Lisa J Croner and Christopher Davies and Timothy S Davison and Glenda
	Delenstarr and Xutao Deng and David Dorris and Aron C Eklund and
	Xiao-hui Fan and Hong Fang and Stephanie Fulmer-Smentek and James
	C Fuscoe and Kathryn Gallagher and Weigong Ge and Lei Guo and Xu
	Guo and Janet Hager and Paul K Haje and Jing Han and Tao Han and
	Heather C Harbottle and Stephen C Harris and Eli Hatchwell and Craig
	A Hauser and Susan Hester and Huixiao Hong and Patrick Hurban and
	Scott A Jackson and Hanlee Ji and Charles R Knight and Winston P
	Kuo and J. Eugene LeClerc and Shawn Levy and Quan-Zhen Li and Chunmei
	Liu and Ying Liu and Michael J Lombardi and Yunqing Ma and Scott
	R Magnuson and Botoul Maqsodi and Tim McDaniel and Nan Mei and Ola
	Myklebost and Baitang Ning and Natalia Novoradovskaya and Michael
	S Orr and Terry W Osborn and Adam Papallo and Tucker A Patterson
	and Roger G Perkins and Elizabeth H Peters and Ron Peterson and Kenneth
	L Philips and P. Scott Pine and Lajos Pusztai and Feng Qian and Hongzu
	Ren and Mitch Rosen and Barry A Rosenzweig and Raymond R Samaha and
	Mark Schena and Gary P Schroth and Svetlana Shchegrova and Dave D
	Smith and Frank Staedtler and Zhenqiang Su and Hongmei Sun and Zoltan
	Szallasi and Zivana Tezak and Danielle Thierry-Mieg and Karol L Thompson
	and Irina Tikhonova and Yaron Turpaz and Beena Vallanat and Christophe
	Van and Stephen J Walker and Sue Jane Wang and Yonghong Wang and
	Russ Wolfinger and Alex Wong and Jie Wu and Chunlin Xiao and Qian
	Xie and Jun Xu and Wen Yang and Liang Zhang and Sheng Zhong and Yaping
	Zong and William Slikker},
  title = {The {M}icro{A}rray {Q}uality {C}ontrol ({MAQC}) project shows inter-
	and intraplatform reproducibility of gene expression measurements},
  journal = {Nat. Biotechnol.},
  year = {2006},
  volume = {24},
  pages = {1151--1161},
  number = {9},
  month = {Sep},
  abstract = {Over the last decade, the introduction of microarray technology has
	had a profound impact on gene expression research. The publication
	of studies with dissimilar or altogether contradictory results, obtained
	using different microarray platforms to analyze identical RNA samples,
	has raised concerns about the reliability of this technology. The
	MicroArray Quality Control (MAQC) project was initiated to address
	these concerns, as well as other performance and data analysis issues.
	Expression data on four titration pools from two distinct reference
	RNA samples were generated at multiple test sites using a variety
	of microarray-based and alternative technology platforms. Here we
	describe the experimental design and probe mapping efforts behind
	the MAQC project. We show intraplatform consistency across test sites
	as well as a high level of interplatform concordance in terms of
	genes identified as differentially expressed. This study provides
	a resource that represents an important first step toward establishing
	a framework for the use of microarrays in clinical and regulatory
	settings.},
  doi = {10.1038/nbt1239},
  institution = {National Center for Toxicological Research, US Food and Drug Administration,
	Jefferson, Arkansas 72079, USA.},
  keywords = {Equipment Design; Equipment Failure Analysis; Gene Expression Profiling,
	instrumentation/methods; Oligonucleotide Array Sequence Analysis,
	instrumentation; Quality Assurance, Health Care, methods; Quality
	Control; Reproducibility of Results; Sensitivity and Specificity;
	United States},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {nbt1239},
  pmid = {16964229},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/nbt1239}
}

@article{Diekman2003Hybrid,
  author = {Casey Diekman and Wei He and Nagabhushana Prabhu and Harvey Cramer},
  title = {Hybrid methods for automated diagnosis of breast tumors.},
  journal = {Anal {Q}uant {C}ytol {H}istol},
  year = {2003},
  volume = {25},
  pages = {183-90},
  number = {4},
  month = {Aug},
  abstract = {O{BJECTIVE}: {T}o design and analyze a new family of hybrid methods
	for the diagnosis of breast tumors using fine needle aspirates. {STUDY}
	{DESIGN}: {W}e present a radically new approach to the design of
	diagnosis systems. {I}n the new approach, a nonlinear classifier
	with high sensitivity but low specificity is hybridized with a linear
	classifier having low sensitivity but high specificity. {D}ata from
	the {W}isconsin {B}reast {C}ancer {D}atabase are used to evaluate,
	computationally, the performance of the hybrid classifiers. {RESULTS}:
	{T}he diagnosis scheme obtained by hybridizing the nonlinear classifier
	ellipsoidal multisurface method ({EMSM}) with the linear classifier
	proximal support vector machine ({PSVM}) was found to have a mean
	sensitivity of 97.36\% and a mean specificity of 95.14\% and was
	found to yield a 2.44\% improvement in the reliability of positive
	diagnosis over that of {EMSM} at the expense of 0.4\% degradation
	in the reliability of negative diagnosis, again compared to {EMSM}.
	{A}t the 95\% confidence level we can trust the hybrid method to
	be 96.19-98.53\% correct in its malignant diagnosis of new tumors
	and 93.57-96.71\% correct in its benign diagnosis. {CONCLUSION}:
	{H}ybrid diagnosis schemes represent a significant paradigm shift
	and provide a promising new technique to improve the specificity
	of nonlinear classifiers without seriously affecting the high sensitivity
	of nonlinear classifiers.},
  keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Anion Exchange Resins,
	Antigen-Antibody Complex, Artificial Intelligence, Automated, Automatic
	Data Processing, Benchmarking, Biological, Biological Markers, Biopsy,
	Blood Cells, Blood Proteins, Breast Neoplasms, Cell Line, Cellular
	Structures, Chemical, Chromatography, Chromosome Aberrations, Cluster
	Analysis, Colonic Neoplasms, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, Computing Methodologies,
	DNA, Data Interpretation, Databases, Decision Making, Decision Trees,
	Diagnosis, Diffusion Magnetic Resonance Imaging, Disease, English
	Abstract, Epitopes, Expert Systems, Factual, Female, Fine-Needle,
	Fusion, Fuzzy Logic, Gene Expression Profiling, Gene Expression Regulation,
	Gene Targeting, Genetic, Genome, Histocompatibility Antigens Class
	I, Humans, Hydrogen Bonding, Hydrophobicity, Image Interpretation,
	Image Processing, In Vitro, Indicators and Reagents, Information
	Storage and Retrieval, Ion Exchange, Least-Squares Analysis, Leiomyosarcoma,
	Liver Cirrhosis, Lung Neoplasms, Magnetic Resonance Imaging, Male,
	Mass, Mathematical Computing, Matrix-Assisted Laser Desorption-Ionization,
	Models, Molecular, Molecular Sequence Data, Neoplasm Proteins, Neoplasms,
	Neoplastic, Nephroblastoma, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonl, Nucleic Acid Conformation, Nucleic Acid Hybridization,
	Oligonucleotide Array Sequence Analysis, Oncogene Proteins, Ovarian
	Neoplasms, P.H.S., Pattern Recognition, Predictive Value of Tests,
	Pro, Prostatic Neoplasms, Protein, Protein Binding, Protein Interaction
	Mapping, Protein Structure, Proteins, Quantitative Structure-Activity
	Relationship, RNA, ROC Curve, Reproducibility of Results, Research
	Support, Rhabdomyosarcoma, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sequence Analysis, Severity of Illness Index,
	Software, Solubility, Spectrometry, Statistical, Structure-Activity
	Relationship, Subcellular Fractions, Subtraction Technique, T-Lymphocyte,
	Tissue Distribution, Transcription Factors, Transfer, Treatment Outcome,
	Tumor, Tumor Markers, U.S. Gov't, User-Computer Interface, inear
	Dynamics, teome, 12961824}
}

@article{DiMasi2003price,
  author = {J. A. DiMasi and R. W. Hansen and H. G. Grabowski},
  title = {{T}he price of innovation: new estimates of drug development costs.},
  journal = {J Health Econ},
  year = {2003},
  volume = {22},
  pages = {151--185},
  number = {2},
  month = {Mar},
  abstract = {The research and development costs of 68 randomly selected new drugs
	were obtained from a survey of 10 pharmaceutical firms. These data
	were used to estimate the average pre-tax cost of new drug development.
	The costs of compounds abandoned during testing were linked to the
	costs of compounds that obtained marketing approval. The estimated
	average out-of-pocket cost per new drug is 403 million US dollars
	(2000 dollars). Capitalizing out-of-pocket costs to the point of
	marketing approval at a real discount rate of 11\% yields a total
	pre-approval cost estimate of 802 million US dollars (2000 dollars).
	When compared to the results of an earlier study with a similar methodology,
	total capitalized costs were shown to have increased at an annual
	rate of 7.4\% above general price inflation.},
  keywords = {Capital Expenditures, Costs and Cost Analysis, Data Collection, Drug
	Approval, Drug Evaluation, Drug Industry, Drugs, Economic, Humans,
	Inflation, Investigational, Organizational Innovation, Preclinical,
	Research Support, United States, 16087260},
  owner = {mahe},
  pii = {S0167629602001261},
  pmid = {16087260},
  timestamp = {2006.08.12}
}

@article{Ding2005Minimum,
  author = {Chris Ding and Hanchuan Peng},
  title = {Minimum redundancy feature selection from microarray gene expression
	data.},
  journal = {J {B}ioinform {C}omput {B}iol},
  year = {2005},
  volume = {3},
  pages = {185-205},
  number = {2},
  month = {Apr},
  abstract = {How to selecting a small subset out of the thousands of genes in microarray
	data is important for accurate classification of phenotypes. {W}idely
	used methods typically rank genes according to their differential
	expressions among phenotypes and pick the top-ranked genes. {W}e
	observe that feature sets so obtained have certain redundancy and
	study methods to minimize it. {W}e propose a minimum redundancy -
	maximum relevance ({MRMR}) feature selection framework. {G}enes selected
	via {MRMR} provide a more balanced coverage of the space and capture
	broader characteristics of phenotypes. {T}hey lead to significantly
	improved class predictions in extensive experiments on 6 gene expression
	data sets: {NCI}, {L}ymphoma, {L}ung, {C}hild {L}eukemia, {L}eukemia,
	and {C}olon. {I}mprovements are observed consistently among 4 classification
	methods: {N}aive {B}ayes, {L}inear discriminant analysis, {L}ogistic
	regression, and {S}upport vector machines. {SUPPLIMENTARY}: {T}he
	top 60 {MRMR} genes for each of the datasets are listed in http://crd.lbl.gov/~cding/{MRMR}/.
	{M}ore information related to {MRMR} methods can be found at http://www.hpeng.net/.},
  keywords = {Adult, Aged, Aging, Algorithms, Animals, Apoptosis, Artificial Intelligence,
	Automated, Biological, Bone Marrow, Breast Neoplasms, Classification,
	Cluster Analysis, Comparative Study, Computer Simulation, Computer-Assisted,
	Diagnosis, Dose-Response Relationship, Drug, Female, Foot, Gait,
	Gene Expression Profiling, Gene Expression Regulation, Gene Silencing,
	Genetic Vectors, Humans, Image Interpretation, Information Storage
	and Retrieval, Kidney, Liver, Logistic Models, Male, Messenger, Models,
	Myocardium, Neoplasms, Non-U.S. Gov't, Oligonucleotide Array Sequence
	Analysis, Pattern Recognition, Pharmaceutical Preparations, Polymerase
	Chain Reaction, Principal Component Analysis, Proteins, RNA, Rats,
	Reproducibility of Results, Research Support, Sensitivity and Specificity,
	Small Interfering, Sprague-Dawley, Statistical, Subcellular Fractions,
	Unknown Primary, 15852500},
  pii = {S0219720005001004}
}

@article{Driel2006text-mining,
  author = {van Driel, M.A. and Bruggeman, J. and Vriend, G. and Brunner, H.G.
	and Leunissen, J.A.M.},
  title = {A text-mining analysis of the human phenome.},
  journal = {Eur. J. Hum. Genet.},
  year = {2006},
  volume = {14},
  pages = {535--542},
  number = {5},
  month = {May},
  abstract = {A number of large-scale efforts are underway to define the relationships
	between genes and proteins in various species. But, few attempts
	have been made to systematically classify all such relationships
	at the phenotype level. Also, it is unknown whether such a phenotype
	map would carry biologically meaningful information. We have used
	text mining to classify over 5000 human phenotypes contained in the
	Online Mendelian Inheritance in Man database. We find that similarity
	between phenotypes reflects biological modules of interacting functionally
	related genes. These similarities are positively correlated with
	a number of measures of gene function, including relatedness at the
	level of protein sequence, protein motifs, functional annotation,
	and direct protein-protein interaction. Phenotype grouping reflects
	the modular nature of human disease genetics. Thus, phenotype mapping
	may be used to predict candidate genes for diseases as well as functional
	relations between genes and proteins. Such predictions will further
	improve if a unified system of phenotype descriptors is developed.
	The phenotype similarity data are accessible through a web interface
	at http://www.cmbi.ru.nl/MimMiner/.},
  doi = {10.1038/sj.ejhg.5201585},
  institution = {Centre for Molecular and Biomolecular Informatics, Radboud University
	Nijmegen, Toernooiveld 1, 6525ED Nijmegen, the Netherlands.},
  keywords = {Chromosome Mapping; Databases, Genetic; Genetic Predisposition to
	Disease; Genetic Vectors; Genome, Human; Genotype; Humans; Models,
	Genetic; Models, Statistical; Multigene Family; Phenotype},
  owner = {mordelet},
  pii = {5201585},
  pmid = {16493445},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1038/sj.ejhg.5201585}
}

@article{Ehlers2005NBS1,
  author = {Justis P Ehlers and J. William Harbour},
  title = {N{BS}1 expression as a prognostic marker in uveal melanoma.},
  journal = {Clin. {C}ancer {R}es.},
  year = {2005},
  volume = {11},
  pages = {1849-53},
  number = {5},
  month = {Mar},
  abstract = {P{URPOSE}: {U}p to half of uveal melanoma patients die of metastatic
	disease. {T}reatment of the primary eye tumor does not improve survival
	in high-risk patients due to occult micrometastatic disease, which
	is present at the time of eye tumor diagnosis but is not detected
	and treated until months to years later. {H}ere, we use microarray
	gene expression data to identify a new prognostic marker. {EXPERIMENTAL}
	{DESIGN}: {M}icroarray gene expression profiles were analyzed in
	25 primary uveal melanomas. {T}umors were ranked by support vector
	machine ({SVM}) and by cytologic severity. {N}bs1 protein expression
	was assessed by quantitative immunohistochemistry in 49 primary uveal
	melanomas. {S}urvival was assessed using {K}aplan-{M}eier life-table
	analysis. {RESULTS}: {E}xpression of the {N}ijmegen breakage syndrome
	({NBS}1) gene correlated strongly with {SVM} and cytologic tumor
	rankings ({P} < 0.0001). {F}urther, immunohistochemistry expression
	of the {N}bs1 protein correlated strongly with both {SVM} and cytologic
	rankings ({P} < 0.0001). {T}he 6-year actuarial survival was 100\%
	in patients with low immunohistochemistry expression of {N}bs1 and
	22\% in those with high {N}bs1 expression ({P} = 0.01). {CONCLUSIONS}:
	{NBS}1 is a strong predictor of uveal melanoma survival and potentially
	could be used as a clinical marker for guiding clinical management.},
  doi = {10.1158/1078-0432.CCR-04-2054},
  pdf = {../local/Ehlers2005NBS1.pdf},
  file = {Ehlers2005NBS1.pdf:local/Ehlers2005NBS1.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acid Sequence, Amino Acids,
	Analysis of Variance, Animals, Area Under Curve, Artifacts, Automated,
	Bacteriophage T4, Base Sequence, Biological, Birefringence, Brain
	Chemistry, Brain Neoplasms, Cell Cycle Proteins, Comparative Study,
	Computational Biology, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Databases, Decision Trees, Diagnosis, Diagnostic Imaging,
	Diagnostic Techniques, Discriminant Analysis, Evolution, Extramural,
	Face, Female, Gene Expression Profiling, Genetic, Glaucoma, Humans,
	Immunohistochemistry, Intraocular Pressure, Lasers, Least-Squares
	Analysis, Likelihood Functions, Magnetic Resonance Imaging, Magnetic
	Resonance Spectroscopy, Male, Markov Chains, Melanoma, Middle Aged,
	Models, Molecular, Mutation, N.I.H., Nerve Fibers, Non-P.H.S., Non-U.S.
	Gov't, Nuclear Proteins, Nucleic Acid, Nucleic Acid Conformation,
	Numerical Analysis, Oligonucleotide Array Sequence Analysis, Ophthalmological,
	Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition,
	Photic Stimulation, Polymorphism, Prognosis, Prospective Studies,
	Protein, Protein Structure, Proteins, RNA, ROC Curve, Regression
	Analysis, Reproducibility of Results, Research Support, Retinal Ganglion
	Cells, Secondary, Sensitivity and Specificity, Sequence Analysis,
	Single Nucleotide, Single-Stranded Conformational, Software, Statistics,
	Survival Analysis, Tertiary, Tomography, Tumor Markers, U.S. Gov't,
	Untranslated, Uveal Neoplasms, Visual Fields, beta-Lactamases, 15756009},
  pii = {11/5/1849},
  url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/11/5/1849}
}

@article{Eroes2004Comparison,
  author = {D. Er\"os and G. K\'eri and I. K\"ovesdi and C. Sz\'antai-Kis and
	G. M\'esz\'aros and L. Orfi},
  title = {{C}omparison of predictive ability of water solubility {QSPR} models
	generated by {MLR}, {PLS} and {ANN} methods.},
  journal = {Mini Rev Med Chem},
  year = {2004},
  volume = {4},
  pages = {167--177},
  number = {2},
  month = {Feb},
  abstract = {ADME/Tox computational screening is one of the most hot topics of
	modern drug research. About one half of the potential drug candidates
	fail because of poor ADME/Tox properties. Since the experimental
	determination of water solubility is time-consuming also, reliable
	computational predictions are needed for the pre-selection of acceptable
	"drug-like" compounds from diverse combinatorial libraries. Recently
	many successful attempts were made for predicting water solubility
	of compounds. A comprehensive review of previously developed water
	solubility calculation methods is presented here, followed by the
	description of the solubility prediction method designed and used
	in our laboratory. We have selected carefully 1381 compounds from
	scientific publications in a unified database and used this dataset
	in the calculations. The externally validated models were based on
	calculated descriptors only. The aim of model optimization was to
	improve repeated evaluations statistics of the predictions and effective
	descriptor scoring functions were used to facilitate quick generation
	of multiple linear regression analysis (MLR), partial least squares
	method (PLS) and artificial neural network (ANN) models with optimal
	predicting ability. Standard error of prediction of the best model
	generated with ANN (with 39-7-1 network structure) was 0.72 in logS
	units while the cross validated squared correlation coefficient (Q(2))
	was better than 0.85. These values give a good chance for successful
	pre-selection of screening compounds from virtual libraries, based
	on the predicted water solubility.},
  keywords = {Chemical, Chemistry, Comparative Study, Cytochrome P-450 Enzyme System,
	Estradiol, Least-Squares Analysis, Ligands, Linear Models, Models,
	Molecular, Naphthalenes, Neural Networks (Computer), Non-U.S. Gov't,
	Physical, Quantitative Structure-Activity Relationship, Reproducibility
	of Results, Research Support, Solubility, Spectrum Analysis, Statistical,
	Water, 14965289},
  owner = {mahe},
  pmid = {14965289},
  timestamp = {2006.09.07}
}

@article{Faugeras2004Variational,
  author = {Olivier Faugeras and Geoffray Adde and Guillaume Charpiat and Christophe
	Chefd'hotel and Maureen Clerc and Thomas Deneux and Rachid Deriche
	and Gerardo Hermosillo and Renaud Keriven and Pierre Kornprobst and
	Jan Kybic and Christophe Lenglet and Lucero Lopez-Perez and ThÃ©o
	Papadopoulo and Jean-Philippe Pons and Florent Segonne and Bertrand
	Thirion and David TschumperlÃ© and Thierry ViÃ©ville and Nicolas
	Wotawa},
  title = {Variational, geometric, and statistical methods for modeling brain
	anatomy and function.},
  journal = {Neuroimage},
  year = {2004},
  volume = {23 Suppl 1},
  pages = {S46-55},
  abstract = {We survey the recent activities of the {O}dyssÃ©e {L}aboratory in
	the area of the application of mathematics to the design of models
	for studying brain anatomy and function. {W}e start with the problem
	of reconstructing sources in {MEG} and {EEG}, and discuss the variational
	approach we have developed for solving these inverse problems. {T}his
	motivates the need for geometric models of the head. {W}e present
	a method for automatically and accurately extracting surface meshes
	of several tissues of the head from anatomical magnetic resonance
	({MR}) images. {A}natomical connectivity can be extracted from diffusion
	tensor magnetic resonance images but, in the current state of the
	technology, it must be preceded by a robust estimation and regularization
	stage. {W}e discuss our work based on variational principles and
	show how the results can be used to track fibers in the white matter
	({WM}) as geodesics in some {R}iemannian space. {W}e then go to the
	statistical modeling of functional magnetic resonance imaging (f{MRI})
	signals from the viewpoint of their decomposition in a pseudo-deterministic
	and stochastic part that we then use to perform clustering of voxels
	in a way that is inspired by the theory of support vector machines
	and in a way that is grounded in information theory. {M}ultimodal
	image matching is discussed next in the framework of image statistics
	and partial differential equations ({PDE}s) with an eye on registering
	f{MRI} to the anatomy. {T}he paper ends with a discussion of a new
	theory of random shapes that may prove useful in building anatomical
	and functional atlases.},
  doi = {10.1016/j.neuroimage.2004.07.015},
  pdf = {../local/Faugeras2004Variational.pdf},
  file = {Faugeras2004Variational.pdf:local/Faugeras2004Variational.pdf:PDF},
  keywords = {Adolescent, Adult, Algorithms, Anatomic, Bacterial Proteins, Brain,
	Brain Mapping, Comparative Study, Computer Simulation, Computer-Assisted,
	Diffusion Magnetic Resonance Imaging, Facial Asymmetry, Facial Expression,
	Facial Paralysis, Female, Gene Expression Profiling, Gram-Negative
	Bacteria, Gram-Positive Bacteria, Humans, Image Interpretation, Magnetoencephalography,
	Male, Middle Aged, Models, Motion, Neural Pathways, Non-U.S. Gov't,
	Photography, Protein, Proteome, Research Support, Retina, Sequence
	Alignment, Sequence Analysis, Severity of Illness Index, Software,
	Statistical, Subcellular Fractions, 15501100},
  pii = {S1053-8119(04)00380-5},
  url = {http://dx.doi.org/10.1016/j.neuroimage.2004.07.015}
}

@article{Garrett2003Comparison,
  author = {D. Garrett and D. A Peterson and C. Anderson and M. Thaut},
  title = {Comparison of linear, nonlinear, and feature selection methods for
	{EEG} signal classification.},
  journal = {I{EEE} {T}rans {N}eural {S}yst {R}ehabil {E}ng},
  year = {2003},
  volume = {11},
  pages = {141-4},
  number = {2},
  month = {Jun},
  abstract = {The reliable operation of brain-computer interfaces ({BCI}s) based
	on spontaneous electroencephalogram ({EEG}) signals requires accurate
	classification of multichannel {EEG}. {T}he design of {EEG} representations
	and classifiers for {BCI} are open research questions whose difficulty
	stems from the need to extract complex spatial and temporal patterns
	from noisy multidimensional time series obtained from {EEG} measurements.
	{T}he high-dimensional and noisy nature of {EEG} may limit the advantage
	of nonlinear classification methods over linear ones. {T}his paper
	reports the results of a linear (linear discriminant analysis) and
	two nonlinear classifiers (neural networks and support vector machines)
	applied to the classification of spontaneous {EEG} during five mental
	tasks, showing that nonlinear classifiers produce only slightly better
	classification results. {A}n approach to feature selection based
	on genetic algorithms is also presented with preliminary results
	of application to {EEG} during finger movement.},
  keywords = {80 and over, Adnexal Diseases, Adult, Aged, Algorithms, Artificial
	Intelligence, Automated, Bayes Theorem, Biological, Brain, Brain
	Mapping, Breast Neoplasms, Case-Control Studies, Chromatography,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	DNA, Diagnosis, Differential, Discriminant Analysis, Electroencephalography,
	Evoked Potentials, Feasibility Studies, Female, Fingers, Gene Expression
	Profiling, Gene Expression Regulation, Genetic, Genetic Markers,
	Genetic Predisposition to Disease, Genetic Screening, Habituation
	(Psychophysiology), High Pressure Liquid, Humans, Linear Models,
	Logistic Models, Male, Middle Aged, Migraine, Models, Movement, Neural
	Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nonlinear
	Dynamics, Nucleosides, Ovarian Neoplasms, Pattern Recognition, Photic
	Stimulation, Predictive Value of Tests, ROC Curve, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Signal
	Processing, Software, Statistical, Thinking, Tumor Markers, U.S.
	Gov't, User-Computer Interface, Visual, 12899257}
}

@article{Ge2003Reducing,
  author = {Xijin Ge and Shuichi Tsutsumi and Hiroyuki Aburatani and Shuichi
	Iwata},
  title = {Reducing false positives in molecular pattern recognition.},
  journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform},
  year = {2003},
  volume = {14},
  pages = {34-43},
  abstract = {In the search for new cancer subtypes by gene expression profiling,
	it is essential to avoid misclassifying samples of unknown subtypes
	as known ones. {I}n this paper, we evaluated the false positive error
	rates of several classification algorithms through a 'null test'
	by presenting classifiers a large collection of independent samples
	that do not belong to any of the tumor types in the training dataset.
	{T}he benchmark dataset is available at www2.genome.rcast.u-tokyo.ac.jp/pm/.
	{W}e found that k-nearest neighbor ({KNN}) and support vector machine
	({SVM}) have very high false positive error rates when fewer genes
	(<100) are used in prediction. {T}he error rate can be partially
	reduced by including more genes. {O}n the other hand, prototype matching
	({PM}) method has a much lower false positive error rate. {S}uch
	robustness can be achieved without loss of sensitivity by introducing
	suitable measures of prediction confidence. {W}e also proposed a
	cluster-and-select technique to select genes for classification.
	{T}he nonparametric {K}ruskal-{W}allis {H} test is employed to select
	genes differentially expressed in multiple tumor types. {T}o reduce
	the redundancy, we then divided these genes into clusters with similar
	expression patterns and selected a given number of genes from each
	cluster. {T}he reliability of the new algorithm is tested on three
	public datasets.},
  keywords = {Amino Acid Sequence, Amino Acids, Animals, Automated, Base Sequence,
	Bayes Theorem, Biological, Carbohydrate Conformation, Carbohydrate
	Sequence, Cattle, Computational Biology, Computer Simulation, Crystallography,
	DNA, Databases, Factual, False Positive Reactions, Gene Expression
	Profiling, Genes, Genetic, Genetic Techniques, Genome, Histocompatibility
	Antigens Class I, Human, Humans, Introns, Least-Squares Analysis,
	MHC Class I, Major Histocompatibility Complex, Markov Chains, Messenger,
	Mice, Models, Monosaccharides, Neoplasms, Non-U.S. Gov't, Nonparametric,
	Pattern Recognition, Peptides, Phylogeny, Plants, Poly A, Polysaccharides,
	Predictive Value of Tests, Protein, Protein Structure, Proteins,
	RNA, Rats, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sequence Alignment, Software, Species Specificity,
	Statistics, Theoretical, X-Ray, 15706518}
}

@article{Girosi1998Equivalence,
  author = {Girosi},
  title = {An {E}quivalence {B}etween {S}parse {A}pproximation and {S}upport
	{V}ector {M}achines.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {1455-80},
  number = {6},
  month = {Jul},
  abstract = {This article shows a relationship between two different approximation
	techniques: the support vector machines ({SVM}), proposed by {V}.
	{V}apnik (1995) and a sparse approximation scheme that resembles
	the basis pursuit denoising algorithm ({C}hen, 1995; {C}hen, {D}onoho,
	and {S}aunders, 1995). {SVM} is a technique that can be derived from
	the structural risk minimization principle ({V}apnik, 1982) and can
	be used to estimate the parameters of several different approximation
	schemes, including radial basis functions, algebraic and trigonometric
	polynomials, {B}-splines, and some forms of multilayer perceptrons.
	{B}asis pursuit denoising is a sparse approximation technique in
	which a function is reconstructed by using a small number of basis
	functions chosen from a large set (the dictionary). {W}e show that
	if the data are noiseless, the modified version of basis pursuit
	denoising proposed in this article is equivalent to {SVM} in the
	following sense: if applied to the same data set, the two techniques
	give the same solution, which is obtained by solving the same quadratic
	programming problem. {I}n the appendix, we present a derivation of
	the {SVM} technique in one framework of regularization theory, rather
	than statistical learning theory, establishing a connection between
	{SVM}, sparse approximation, and regularization theory.},
  keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual,
	Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes,
	Learning, Markov Chains, Models, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9698353}
}

@article{Glotsos2004Automated,
  author = {Dimitris Glotsos and Panagiota Spyridonos and Dionisis Cavouras and
	Panagiota Ravazoula and Petroula-Arampantoni Dadioti and George Nikiforidis},
  title = {Automated segmentation of routinely hematoxylin-eosin-stained microscopic
	images by combining support vector machine clustering and active
	contour models.},
  journal = {Anal {Q}uant {C}ytol {H}istol},
  year = {2004},
  volume = {26},
  pages = {331-40},
  number = {6},
  month = {Dec},
  abstract = {O{BJECTIVE}: {T}o develop a method for the automated segmentation
	of images of routinely hematoxylin-eosin ({H}-{E})-stained microscopic
	sections to guarantee correct results in computer-assisted microscopy.
	{STUDY} {DESIGN}: {C}linical material was composed 50 {H}-{E}-stained
	biopsies of astrocytomas and 50 {H}-{E}-stained biopsies of urinary
	bladder cancer. {T}he basic idea was to use a support vector machine
	clustering ({SVMC}) algorithm to provide gross segmentation of regions
	holding nuclei and subsequently to refine nuclear boundary detection
	with active contours. {T}he initialization coordinates of the active
	contour model were defined using a {SVMC} pixel-based classification
	algorithm that discriminated nuclear regions from the surrounding
	tissue. {S}tarting from the boundaries of these regions, the snake
	fired and propagated until converging to nuclear boundaries. {RESULTS}:
	{T}he method was validated for 2 different types of {H}-{E}-stained
	images. {R}esults were evaluated by 2 histopathologists. {O}n average,
	94\% of nuclei were correctly delineated. {CONCLUSION}: {T}he proposed
	algorithm could be of value in computer-based systems for automated
	interpretation of microscopic images.},
  keywords = {Adenosinetriphosphatase, Adolescent, Adult, Algorithms, Amino Acid
	Sequence, Amino Acids, Animals, Astrocytoma, Automated, Automation,
	Base Sequence, Bayes Theorem, Biological, Biopsy, Bladder Neoplasms,
	Breast Neoplasms, Carbohydrate Conformation, Carbohydrate Sequence,
	Cattle, Cell Cycle Proteins, Cell Nucleus, Computational Biology,
	Computer Simulation, Computer-Assisted, Crystallography, DNA, Databases,
	Diagnosis, Differential, Eosine Yellowish-(YS), Exoribonucleases,
	Factual, False Negative Reactions, False Positive Reactions, Female,
	Gene Expression, Gene Expression Profiling, Genes, Genetic, Genetic
	Techniques, Genetic Vectors, Genome, Hematoxylin, Histocompatibility
	Antigens Class I, Human, Humans, Image Interpretation, Image Processing,
	Introns, Least-Squares Analysis, MHC Class I, Major Histocompatibility
	Complex, Markov Chains, Messenger, Mice, Middle Aged, Models, Molecular
	Structure, Monosaccharides, Multigene Family, Mutation, Neoplasms,
	Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonparametric,
	Nucleotidyltransferases, Observer Variation, Oligonucleotide Array
	Sequence Analysis, P.H.S., Pattern Recognition, Peptides, Phenotype,
	Phylogeny, Plants, Poly A, Polysaccharides, Predictive Value of Tests,
	Protein, Protein Biosynthesis, Protein Kinase Inhibitors, Protein
	Structure, Proteins, RNA, RNA Helicases, RNA Splicing, Rats, Reproducibility
	of Results, Research Support, Retrospective Studies, Saccharomyces
	cerevisiae, Saccharomyces cerevisiae Proteins, Secondary, Sensitivity
	and Specificity, Sequence Alignment, Software, Species Specificity,
	Staining and Labeling, Statistics, Theoretical, Transcription, U.S.
	Gov't, Ultrasonography, X-Ray, 15678615}
}

@article{Golland2005Detection,
  author = {Polina Golland and W. Eric L Grimson and Martha E Shenton and Ron
	Kikinis},
  title = {Detection and analysis of statistical differences in anatomical shape.},
  journal = {Med {I}mage {A}nal},
  year = {2005},
  volume = {9},
  pages = {69-86},
  number = {1},
  month = {Feb},
  abstract = {We present a computational framework for image-based analysis and
	interpretation of statistical differences in anatomical shape between
	populations. {A}pplications of such analysis include understanding
	developmental and anatomical aspects of disorders when comparing
	patients versus normal controls, studying morphological changes caused
	by aging, or even differences in normal anatomy, for example, differences
	between genders. {O}nce a quantitative description of organ shape
	is extracted from input images, the problem of identifying differences
	between the two groups can be reduced to one of the classical questions
	in machine learning of constructing a classifier function for assigning
	new examples to one of the two groups while making as few misclassifications
	as possible. {T}he resulting classifier must be interpreted in terms
	of shape differences between the two groups back in the image domain.
	{W}e demonstrate a novel approach to such interpretation that allows
	us to argue about the identified shape differences in anatomically
	meaningful terms of organ deformation. {G}iven a classifier function
	in the feature space, we derive a deformation that corresponds to
	the differences between the two classes while ignoring shape variability
	within each class. {B}ased on this approach, we present a system
	for statistical shape analysis using distance transforms for shape
	representation and the support vector machines learning algorithm
	for the optimal classifier estimation and demonstrate it on artificially
	generated data sets, as well as real medical studies.},
  doi = {10.1016/j.media.2004.07.003},
  keywords = {Algorithms, Amino Acid, Artificial Intelligence, Ascomycota, Automated,
	Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms, Comparative
	Study, Computer-Assisted, Crystallography, DNA, DNA Primers, Databases,
	Diagnostic Imaging, Gene Expression Profiling, Hordeum, Host-Parasite
	Relations, Humans, Image Interpretation, Informatics, Kinetics, Magnetic
	Resonance Spectroscopy, Models, Nanotechnology, Non-P.H.S., Non-U.S.
	Gov't, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition,
	Plant, Plants, Predictive Value of Tests, Protein, Research Support,
	Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence
	Homology, Skin, Software, Statistical, Theoretical, Thermodynamics,
	U.S. Gov't, Viral Proteins, X-Ray, 15581813},
  pii = {S1361-8415(04)00059-3},
  url = {http://dx.doi.org/10.1016/j.media.2004.07.003}
}

@article{Ifantis2003nonlinear,
  author = {A. Ifantis and S. Papadimitriou},
  title = {The nonlinear predictability of the electrotelluric field variations
	data analyzed with support vector machines as an earthquake precursor.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2003},
  volume = {13},
  pages = {315-32},
  number = {5},
  month = {Oct},
  abstract = {This work investigates the nonlinear predictability of the {E}lectro
	{T}elluric {F}ield ({ETF}) variations data in order to develop new
	intelligent tools for the difficult task of earthquake prediction.
	{S}upport {V}ector {M}achines trained on a signal window have been
	used to predict the next sample. {W}e observe a significant increase
	at this short-term unpredictability of the {ETF} signal at about
	two weeks time period before the major earthquakes that took place
	in regions near the recording devices. {T}he unpredictability increase
	can be attributed to a quick time variation of the dynamics that
	produce the {ETF} signal due to the earthquake generation process.
	{T}hus, this increase can be taken into advantage for signaling for
	an increased possibility of a large earthquake within the next few
	days in the neighboring region of the recording station.},
  keywords = {Air Pollutants, Aircraft, Algorithms, Artificial Intelligence, Automated,
	Base Composition, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, Computing Methodologies, Cytosine,
	Data Interpretation, Databases, Enhancer Elements (Genetics), Environmental
	Monitoring, Ethanol, Exons, Fourier Transform Infrared, Genetic,
	Guanine, Humans, Image Interpretation, Natural Disasters, Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Online Systems, P.H.S., Pattern
	Recognition, Photography, Probability, Pyrimidines, RNA Precursors,
	RNA Splice Sites, RNA Splicing, Radiation, Reproducibility of Results,
	Research Support, Sensitivity and Specificity, Signal Processing,
	Spectroscopy, Statistical, Subtraction Technique, Thermodynamics,
	Time Factors, U.S. Gov't, Untranslated Regions, Video Recording,
	Walking, 14652873},
  pii = {S0129065703001674}
}

@article{Ioannidis2009Repeatability,
  author = {John P A Ioannidis and David B Allison and Catherine A Ball and Issa
	Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and
	Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion
	and Tapan Mehta and Michael Nitzberg and Grier P Page and Enrico
	Petretto and Vera van Noort},
  title = {Repeatability of published microarray gene expression analyses.},
  journal = {Nat Genet},
  year = {2009},
  volume = {41},
  pages = {149--155},
  number = {2},
  month = {Feb},
  abstract = {Given the complexity of microarray-based gene expression studies,
	guidelines encourage transparent design and public data availability.
	Several journals require public data deposition and several public
	databases exist. However, not all data are publicly available, and
	even when available, it is unknown whether the published results
	are reproducible by independent scientists. Here we evaluated the
	replication of data analyses in 18 articles on microarray-based gene
	expression profiling published in Nature Genetics in 2005-2006. One
	table or figure from each article was independently evaluated by
	two teams of analysts. We reproduced two analyses in principle and
	six partially or with some discrepancies; ten could not be reproduced.
	The main reason for failure to reproduce was data unavailability,
	and discrepancies were mostly due to incomplete data annotation or
	specification of data processing and analysis. Repeatability of published
	microarray studies is apparently limited. More strict publication
	rules enforcing public data availability and explicit description
	of data processing and analysis should be considered.},
  doi = {10.1038/ng.295},
  institution = {Clinical and Molecular Epidemiology Unit, Department of Hygiene and
	Epidemiology, University of Ioannina School of Medicine, Ioannina
	45110, Greece. jioannid@cc.uoi.gr},
  keywords = {Animals; Data Interpretation, Statistical; Databases, Genetic; Gene
	Expression Profiling, standards; Genome-Wide Association Study, standards;
	Humans; Oligonucleotide Array Sequence Analysis, standards; Peer
	Review, Research; Publications, standards; Reproducibility of Results},
  language = {eng},
  medline-pst = {ppublish},
  owner = {phupe},
  pii = {ng.295},
  pmid = {19174838},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1038/ng.295}
}

@article{Johnson2007Adjusting,
  author = {W. Evan Johnson and Cheng Li and Ariel Rabinovic},
  title = {Adjusting batch effects in microarray expression data using empirical
	Bayes methods.},
  journal = {Biostatistics},
  year = {2007},
  volume = {8},
  pages = {118--127},
  number = {1},
  month = {Jan},
  abstract = {Non-biological experimental variation or "batch effects" are commonly
	observed across multiple batches of microarray experiments, often
	rendering the task of combining data from these batches difficult.
	The ability to combine microarray data sets is advantageous to researchers
	to increase statistical power to detect biological phenomena from
	studies where logistical considerations restrict sample size or in
	studies that require the sequential hybridization of arrays. In general,
	it is inappropriate to combine data sets without adjusting for batch
	effects. Methods have been proposed to filter batch effects from
	data, but these are often complicated and require large batch sizes
	( > 25) to implement. Because the majority of microarray studies
	are conducted using much smaller sample sizes, existing methods are
	not sufficient. We propose parametric and non-parametric empirical
	Bayes frameworks for adjusting data for batch effects that is robust
	to outliers in small sample sizes and performs comparable to existing
	methods for large samples. We illustrate our methods using two example
	data sets and show that our methods are justifiable, easy to apply,
	and useful in practice. Software for our method is freely available
	at: http://biosun1.harvard.edu/complab/batch/.},
  doi = {10.1093/biostatistics/kxj037},
  institution = {Department of Biostatistics and Computational Biology, Dana-Farber
	Cancer Institute, Boston, MA, USA.},
  keywords = {Bayes Theorem; Data Interpretation, Statistical; Gene Expression Profiling,
	methods; Humans; Oligonucleotide Array Sequence Analysis, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {kxj037},
  pmid = {16632515},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1093/biostatistics/kxj037}
}

@article{Juditsky2000Functional,
  author = {Juditsky, A. and Nemirovski, A.},
  title = {Functional {A}ggregation for {N}onparametric {E}stimation},
  journal = {Ann. {S}tat.},
  year = {2000},
  volume = {28},
  pages = {681--712},
  number = {3},
  month = {June},
  pdf = {../local/judi00.pdf},
  file = {judi00.pdf:local/judi00.pdf:PDF},
  subject = {stat},
  url = {http://ftp://ftp.irisa.fr/techreports/1996/PI-993.ps.gz}
}

@article{Kahraman2007Shape,
  author = {A. Kahraman and R. J. Morris and R. A. Laskowski and J. M. Thornton},
  title = {Shape variation in protein binding pockets and their ligands.},
  journal = {J. Mol. Biol.},
  year = {2007},
  volume = {368},
  pages = {283--301},
  number = {1},
  month = {Apr},
  abstract = {A common assumption about the shape of protein binding pockets is
	that they are related to the shape of the small ligand molecules
	that can bind there. But to what extent is that assumption true?
	Here we use a recently developed shape matching method to compare
	the shapes of protein binding pockets to the shapes of their ligands.
	We find that pockets binding the same ligand show greater variation
	in their shapes than can be accounted for by the conformational variability
	of the ligand. This suggests that geometrical complementarity in
	general is not sufficient to drive molecular recognition. Nevertheless,
	we show when considering only shape and size that a significant proportion
	of the recognition power of a binding pocket for its ligand resides
	in its shape. Additionally, we observe a "buffer zone" or a region
	of free space between the ligand and protein, which results in binding
	pockets being on average three times larger than the ligand that
	they bind.},
  doi = {10.1016/j.jmb.2007.01.086},
  keywords = {Binding Sites; Computer Simulation; Ligands; Models, Molecular; Models,
	Statistical; Protein Binding; Protein Conformation; Protein Folding},
  owner = {laurent},
  pii = {S0022-2836(07)00164-7},
  pmid = {17337005},
  timestamp = {2008.07.08},
  url = {http://dx.doi.org/10.1016/j.jmb.2007.01.086}
}

@article{Kapp2006Discovery,
  author = {Amy V Kapp and Stefanie S Jeffrey and Anita Langerød and Anne-Lise
	Børresen-Dale and Wonshik Han and Dong-Young Noh and Ida R K Bukholm
	and Monica Nicolau and Patrick O Brown and Robert Tibshirani},
  title = {Discovery and validation of breast cancer subtypes.},
  journal = {BMC Genomics},
  year = {2006},
  volume = {7},
  pages = {231},
  abstract = {Previous studies demonstrated breast cancer tumor tissue samples could
	be classified into different subtypes based upon DNA microarray profiles.
	The most recent study presented evidence for the existence of five
	different subtypes: normal breast-like, basal, luminal A, luminal
	B, and ERBB2+.Based upon the analysis of 599 microarrays (five separate
	cDNA microarray datasets) using a novel approach, we present evidence
	in support of the most consistently identifiable subtypes of breast
	cancer tumor tissue microarrays being: ESR1+/ERBB2-, ESR1-/ERBB2-,
	and ERBB2+ (collectively called the ESR1/ERBB2 subtypes). We validate
	all three subtypes statistically and show the subtype to which a
	sample belongs is a significant predictor of overall survival and
	distant-metastasis free probability.As a consequence of the statistical
	validation procedure we have a set of centroids which can be applied
	to any microarray (indexed by UniGene Cluster ID) to classify it
	to one of the ESR1/ERBB2 subtypes. Moreover, the method used to define
	the ESR1/ERBB2 subtypes is not specific to the disease. The method
	can be used to identify subtypes in any disease for which there are
	at least two independent microarray datasets of disease samples.},
  doi = {10.1186/1471-2164-7-231},
  institution = {Department of Statistics, Stanford University, Stanford, CA, USA.
	AKapp@stanford.edu},
  keywords = {Algorithms; Breast Neoplasms, classification/genetics/pathology; Female;
	Gene Expression Profiling, methods/statistics /&/ numerical data;
	Humans; Multivariate Analysis; Oligonucleotide Array Sequence Analysis,
	methods/statistics /&/ numerical data; Proportional Hazards Models;
	Risk Factors; Survival Analysis},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2164-7-231},
  pmid = {16965636},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.1186/1471-2164-7-231}
}

@article{Larsen2005integrative,
  author = {Mette Voldby Larsen and Claus Lundegaard and Kasper Lamberth and
	S\o ren Buus and S\o ren Brunak and Ole Lund and Morten Nielsen},
  title = {An integrative approach to {CTL} epitope prediction: a combined algorithm
	integrating {MHC} class {I} binding, {TAP} transport efficiency,
	and proteasomal cleavage predictions.},
  journal = {Eur. J. Immunol.},
  year = {2005},
  volume = {35},
  pages = {2295--2303},
  number = {8},
  month = {Aug},
  abstract = {Reverse immunogenetic approaches attempt to optimize the selection
	of candidate epitopes, and thus minimize the experimental effort
	needed to identify new epitopes. When predicting cytotoxic T cell
	epitopes, the main focus has been on the highly specific MHC class
	I binding event. Methods have also been developed for predicting
	the antigen-processing steps preceding MHC class I binding, including
	proteasomal cleavage and transporter associated with antigen processing
	(TAP) transport efficiency. Here, we use a dataset obtained from
	the SYFPEITHI database to show that a method integrating predictions
	of MHC class I binding affinity, TAP transport efficiency, and C-terminal
	proteasomal cleavage outperforms any of the individual methods. Using
	an independent evaluation dataset of HIV epitopes from the Los Alamos
	database, the validity of the integrated method is confirmed. The
	performance of the integrated method is found to be significantly
	higher than that of the two publicly available prediction methods
	BIMAS and SYFPEITHI. To identify 85\% of the epitopes in the HIV
	dataset, 9\% and 10\% of all possible nonamers in the HIV proteins
	must be tested when using the BIMAS and SYFPEITHI methods, respectively,
	for the selection of candidate epitopes. This number is reduced to
	7\% when using the integrated method. In practical terms, this means
	that the experimental effort needed to identify an epitope in a hypothetical
	protein with 85\% probability is reduced by 20-30\% when using the
	integrated method.The method is available at http://www.cbs.dtu.dk/services/NetCTL.
	Supplementary material is available at http://www.cbs.dtu.dk/suppl/immunology/CTL.php.},
  doi = {10.1002/eji.200425811},
  keywords = {Algorithms; Data Interpretation, Statistical; Epitopes, T-Lymphocyte;
	Histocompatibility Antigens Class I; Humans; Hydrolysis; Predictive
	Value of Tests; Proteasome Endopeptidase Complex; Protein Binding;
	Research Support, N.I.H., Extramural; Research Support, Non-U.S.
	Gov't; Research Support, U.S. Gov't, P.H.S.; T-Lymphocytes, Cytotoxic},
  owner = {jacob},
  pmid = {15997466},
  timestamp = {2006.08.30},
  url = {http://dx.doi.org/10.1002/eji.200425811}
}

@book{Lauritzen1996Graphical,
  title = {Graphical {M}odels},
  publisher = {Oxford},
  year = {1996},
  author = {S. Lauritzen},
  subject = {stat}
}

@article{Li2003Simple,
  author = {Jinyan Li and Huiqing Liu and James R Downing and Allen Eng-Juh Yeoh
	and Limsoon Wong},
  title = {Simple rules underlying gene expression profiles of more than six
	subtypes of acute lymphoblastic leukemia ({ALL}) patients.},
  journal = {Bioinformatics},
  year = {2003},
  volume = {19},
  pages = {71-8},
  number = {1},
  month = {Jan},
  abstract = {M{OTIVATIONS} {AND} {RESULTS}: {F}or classifying gene expression profiles
	or other types of medical data, simple rules are preferable to non-linear
	distance or kernel functions. {T}his is because rules may help us
	understand more about the application in addition to performing an
	accurate classification. {I}n this paper, we discover novel rules
	that describe the gene expression profiles of more than six subtypes
	of acute lymphoblastic leukemia ({ALL}) patients. {W}e also introduce
	a new classifier, named {PCL}, to make effective use of the rules.
	{PCL} is accurate and can handle multiple parallel classifications.
	{W}e evaluate this method by classifying 327 heterogeneous {ALL}
	samples. {O}ur test error rate is competitive to that of support
	vector machines, and it is 71\% better than {C}4.5, 50\% better than
	{N}aive {B}ayes, and 43\% better than k-nearest neighbour. {E}xperimental
	results on another independent data sets are also presented to show
	the strength of our method. {AVAILABILITY}: {U}nder http://sdmc.lit.org.sg/{GED}atasets/,
	click on {S}upplementary {I}nformation.},
  keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base
	Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative
	Study, Computer-Assisted, DNA, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans,
	Leukemia, Lymphocytic, Markov Chains, Messenger, Models, Molecular
	Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm,
	Neoplastic, Neural Networks (Computer), Non-U.S. Gov't, Nucleic Acid
	Conformation, Oligonucleotide Array Sequence Analysis, Pattern Recognition,
	Quality Control, RNA, Research Support, Signal Processing, Statistical,
	Stomach Neoplasms, Tumor Markers, 12499295}
}

@article{Li2004Fusing,
  author = {Shutao Li and James Tin-Yau Kwok and Ivor Wai-Hung Tsang and Yaonan
	Wang},
  title = {Fusing images with different focuses using support vector machines.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2004},
  volume = {15},
  pages = {1555-61},
  number = {6},
  month = {Nov},
  abstract = {Many vision-related processing tasks, such as edge detection, image
	segmentation and stereo matching, can be performed more easily when
	all objects in the scene are in good focus. {H}owever, in practice,
	this may not be always feasible as optical lenses, especially those
	with long focal lengths, only have a limited depth of field. {O}ne
	common approach to recover an everywhere-in-focus image is to use
	wavelet-based image fusion. {F}irst, several source images with different
	focuses of the same scene are taken and processed with the discrete
	wavelet transform ({DWT}). {A}mong these wavelet decompositions,
	the wavelet coefficient with the largest magnitude is selected at
	each pixel location. {F}inally, the fused image can be recovered
	by performing the inverse {DWT}. {I}n this paper, we improve this
	fusion procedure by applying the discrete wavelet frame transform
	({DWFT}) and the support vector machines ({SVM}). {U}nlike {DWT},
	{DWFT} yields a translation-invariant signal representation. {U}sing
	features extracted from the {DWFT} coefficients, a {SVM} is trained
	to select the source image that has the best focus at each pixel
	location, and the corresponding {DWFT} coefficients are then incorporated
	into the composite wavelet representation. {E}xperimental results
	show that the proposed method outperforms the traditional approach
	both visually and quantitatively.},
  keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota,
	Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms,
	Comparative Study, Computer Simulation, Computer-Assisted, Computing
	Methodologies, Crystallography, DNA, DNA Primers, Databases, Diagnostic
	Imaging, Enzymes, Fixation, Gene Expression Profiling, Genetic, Hordeum,
	Host-Parasite Relations, Humans, Image Enhancement, Image Interpretation,
	Informatics, Information Storage and Retrieval, Kinetics, Magnetic
	Resonance Spectroscopy, Models, Nanotechnology, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Ocular, Oligonucleotide Array Sequence
	Analysis, P.H.S., Pattern Recognition, Plant, Plants, Predictive
	Value of Tests, Protein, Protein Conformation, Research Support,
	Sample Size, Selection (Genetics), Sequence Alignment, Sequence Analysis,
	Sequence Homology, Signal Processing, Skin, Software, Statistical,
	Subtraction Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral
	Proteins, X-Ray, 15565781}
}

@unpublished{Lugosi1998On,
  author = {Lugosi, G.},
  title = {On concentration-of-measure inequalities},
  note = {Seminar notes},
  year = {1998},
  pdf = {../local/lugo98.pdf},
  file = {lugo98.pdf:local/lugo98.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/concmeas.ps}
}

@article{Lugosi1999Adaptive,
  author = {Lugosi, G. and Nobel, A.},
  title = {Adaptive {M}odel {S}election {U}sing {E}mpirical {C}omplexities},
  journal = {Ann. {S}tat.},
  year = {1999},
  volume = {27},
  pages = {1830--1864},
  number = {6},
  month = dec,
  pdf = {../local/lugo99.pdf},
  file = {lugo99.pdf:local/lugo99.pdf:PDF},
  subject = {stat},
  url = {http://www.econ.upf.es/~lugosi/amsec.ps}
}

@article{Luo2004gene-silencing,
  author = {Luo, K. Q. and Chang, D. C.},
  title = {The gene-silencing efficiency of si{RNA} is strongly dependent on
	the local structure of m{RNA} at the targeted region.},
  journal = {Biochem. {B}iophys. {R}es. {C}ommun.},
  year = {2004},
  volume = {318},
  pages = {303-10},
  number = {1},
  month = {May},
  abstract = {The gene-silencing effect of short interfering {RNA} (si{RNA}) is
	known to vary strongly with the targeted position of the m{RNA}.
	{A} number of hypotheses have been suggested to explain this phenomenon.
	{W}e would like to test if this positional effect is mainly due to
	the secondary structure of the m{RNA} at the target site. {W}e proposed
	that this structural factor can be characterized by a single parameter
	called "the hydrogen bond ({H}-b) index," which represents the average
	number of hydrogen bonds formed between nucleotides in the target
	region and the rest of the m{RNA}. {T}his index can be determined
	using a computational approach. {W}e tested the correlation between
	the {H}-b index and the gene-silencing effects on three genes ({B}cl-2,
	h{TF}, and cyclin {B}1) using a variety of si{RNA}s. {W}e found that
	the gene-silencing effect is inversely dependent on the {H}-b index,
	indicating that the local m{RNA} structure at the targeted site is
	the main cause of the positional effect. {B}ased on this finding,
	we suggest that the {H}-b index can be a useful guideline for future
	si{RNA} design.},
  doi = {10.1016/j.bbrc.2004.04.027},
  keywords = {Animals, Apoptosis, Base Composition, Base Pairing, Base Sequence,
	Binding Sites, Cell Cycle, Cell Proliferation, Comparative Study,
	Cultured, Cyclin B, Cyclin D1, DNA-Binding Proteins, Down-Regulation,
	Extramural, Fluorescence, Gene Silencing, Gene Targeting, Genetic
	Vectors, Green Fluorescent Proteins, Hela Cells, Humans, Hydrogen
	Bonding, Luminescent Proteins, Male, Messenger, Mice, Microscopy,
	Models, Molecular, Molecular Sequence Data, N.I.H., Non-U.S. Gov't,
	Nucleic Acid Conformation, Nude, P.H.S., Prostatic Neoplasms, Proto-Oncogene
	Proteins c-bcl-2, Proto-Oncogene Proteins c-myc, RNA, Regression
	Analysis, Research Support, STAT3 Transcription Factor, Small Interfering,
	Thromboplastin, Trans-Activators, Tumor Cells, U.S. Gov't, 15110788},
  pii = {S0006291X04007284},
  url = {http://dx.doi.org/10.1016/j.bbrc.2004.04.027}
}

@article{Madeira2004Biclustering,
  author = {Madeira, S. C. and Oliveira, A. L.},
  title = {Biclustering algorithms for biological data analysis: a survey.},
  journal = {IEEE/ACM Trans Comput Biol Bioinform},
  year = {2004},
  volume = {1},
  pages = {24--45},
  number = {1},
  abstract = {A large number of clustering approaches have been proposed for the
	analysis of gene expression data obtained from microarray experiments.
	However, the results from the application of standard clustering
	methods to genes are limited. This limitation is imposed by the existence
	of a number of experimental conditions where the activity of genes
	is uncorrelated. A similar limitation exists when clustering of conditions
	is performed. For this reason, a number of algorithms that perform
	simultaneous clustering on the row and column dimensions of the data
	matrix has been proposed. The goal is to find submatrices, that is,
	subgroups of genes and subgroups of conditions, where the genes exhibit
	highly correlated activities for every condition. In this paper,
	we refer to this class of algorithms as biclustering. Biclustering
	is also referred in the literature as coclustering and direct clustering,
	among others names, and has also been used in fields such as information
	retrieval and data mining. In this comprehensive survey, we analyze
	a large number of existing approaches to biclustering, and classify
	them in accordance with the type of biclusters they can find, the
	patterns of biclusters that are discovered, the methods used to perform
	the search, the approaches used to evaluate the solution, and the
	target applications.},
  doi = {10.1109/TCBB.2004.2},
  institution = {University of Beira Interior, Rua Marquês D'Avila e Bolama, Covilhã,
	Portugal. smadeira@di.ubi.pt},
  keywords = {Algorithms; Cluster Analysis; Computational Biology, methods; Gene
	Expression Profiling, statistics /&/ numerical data; Gene Expression,
	genetics; Humans; Models, Statistical; Oligonucleotide Array Sequence
	Analysis, methods; Saccharomyces cerevisiae, genetics},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17048406},
  timestamp = {2012.02.27},
  url = {http://dx.doi.org/10.1109/TCBB.2004.2}
}

@article{Markowetz2010How,
  author = {Florian Markowetz},
  title = {How to understand the cell by breaking it: network analysis of gene
	perturbation screens.},
  journal = {PLoS Comput Biol},
  year = {2010},
  volume = {6},
  pages = {e1000655},
  number = {2},
  doi = {10.1371/journal.pcbi.1000655},
  institution = {Cancer Research UK Cambridge Research Institute, Cambridge, United
	Kingdom.},
  keywords = {Animals; Cell Physiological Processes; Cluster Analysis; Gene Regulatory
	Networks; Genomics; Humans; Models, Genetic; Models, Statistical;
	Phenotype; Signal Transduction; Systems Biology},
  owner = {phupe},
  pmid = {20195495},
  timestamp = {2010.08.30},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000655}
}

@article{Marsland2002self-organising,
  author = {Stephen Marsland and Jonathan Shapiro and Ulrich Nehmzow},
  title = {A self-organising network that grows when required.},
  journal = {Neural {N}etw},
  year = {2002},
  volume = {15},
  pages = {1041-58},
  number = {8-9},
  abstract = {The ability to grow extra nodes is a potentially useful facility for
	a self-organising neural network. {A} network that can add nodes
	into its map space can approximate the input space more accurately,
	and often more parsimoniously, than a network with predefined structure
	and size, such as the {S}elf-{O}rganising {M}ap. {I}n addition, a
	growing network can deal with dynamic input distributions. {M}ost
	of the growing networks that have been proposed in the literature
	add new nodes to support the node that has accumulated the highest
	error during previous iterations or to support topological structures.
	{T}his usually means that new nodes are added only when the number
	of iterations is an integer multiple of some pre-defined constant,
	{A}. {T}his paper suggests a way in which the learning algorithm
	can add nodes whenever the network in its current state does not
	sufficiently match the input. {I}n this way the network grows very
	quickly when new data is presented, but stops growing once the network
	has matched the data. {T}his is particularly important when we consider
	dynamic data sets, where the distribution of inputs can change to
	a new regime after some time. {W}e also demonstrate the preservation
	of neighbourhood relations in the data by the network. {T}he new
	network is compared to an existing growing network, the {G}rowing
	{N}eural {G}as ({GNG}), on a artificial dataset, showing how the
	network deals with a change in input distribution after some time.
	{F}inally, the new network is applied to several novelty detection
	tasks and is compared with both the {GNG} and an unsupervised form
	of the {R}educed {C}oulomb {E}nergy network on a robotic inspection
	task and with a {S}upport {V}ector {M}achine on two benchmark novelty
	detection tasks.},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics,
	Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet,
	Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models,
	Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Probability Learning, Protein Binding,
	Protein Conformation, Proteins, Quality Control, Quantum Theory,
	RNA, RNA Splicing, Receptors, Reference Values, Regression Analysis,
	Reproducibility of Results, Research Support, Robotics, Saccharomyces
	cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis,
	Signal Processing, Software, Statistical, Stomach Neoplasms, Structural,
	Structure-Activity Relationship, Thermodynamics, Transcription, Tumor
	Markers, U.S. Gov't, 12416693}
}

@article{Martoglio2002decomposition,
  author = {Ann-Marie Martoglio and James W Miskin and Stephen K Smith and David
	J C MacKay},
  title = {A decomposition model to track gene expression signatures: preview
	on observer-independent classification of ovarian cancer.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {1617-24},
  number = {12},
  month = {Dec},
  abstract = {M{OTIVATION}: {A} number of algorithms and analytical models have
	been employed to reduce the multidimensional complexity of {DNA}
	array data and attempt to extract some meaningful interpretation
	of the results. {T}hese include clustering, principal components
	analysis, self-organizing maps, and support vector machine analysis.
	{E}ach method assumes an implicit model for the data, many of which
	separate genes into distinct clusters defined by similar expression
	profiles in the samples tested. {A} point of concern is that many
	genes may be involved in a number of distinct behaviours, and should
	therefore be modelled to fit into as many separate clusters as detected
	in the multidimensional gene expression space. {T}he analysis of
	gene expression data using a decomposition model that is independent
	of the observer involved would be highly beneficial to improve standard
	and reproducible classification of clinical and research samples.
	{RESULTS}: {W}e present a variational independent component analysis
	({ICA}) method for reducing high dimensional {DNA} array data to
	a smaller set of latent variables, each associated with a gene signature.
	{W}e present the results of applying the method to data from an ovarian
	cancer study, revealing a number of tissue type-specific and tissue
	type-independent gene signatures present in varying amounts among
	the samples surveyed. {T}he observer independent results of such
	molecular analysis of biological samples could help identify patients
	who would benefit from different treatment strategies. {W}e further
	explore the application of the model to similar high-throughput studies.},
  keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base
	Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative
	Study, Computer-Assisted, Cystadenoma, DNA, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Markers, Hemolysins, Humans, Leukemia, Lymphocytic, Markov
	Chains, Messenger, Models, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks
	(Computer), Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, Pattern
	Recognition, Quality Control, RNA, Reference Values, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Signal
	Processing, Statistical, Stomach Neoplasms, Transcription, Tumor
	Markers, 12490446}
}

@article{Mateos2002Systematic,
  author = {Alvaro Mateos and JoaquÃn Dopazo and Ronald Jansen and Yuhai Tu
	and Mark Gerstein and Gustavo Stolovitzky},
  title = {Systematic learning of gene functional classes from {DNA} array expression
	data by using multilayer perceptrons.},
  journal = {Genome {R}es.},
  year = {2002},
  volume = {12},
  pages = {1703-15},
  number = {11},
  month = {Nov},
  abstract = {Recent advances in microarray technology have opened new ways for
	functional annotation of previously uncharacterised genes on a genomic
	scale. {T}his has been demonstrated by unsupervised clustering of
	co-expressed genes and, more importantly, by supervised learning
	algorithms. {U}sing prior knowledge, these algorithms can assign
	functional annotations based on more complex expression signatures
	found in existing functional classes. {P}reviously, support vector
	machines ({SVM}s) and other machine-learning methods have been applied
	to a limited number of functional classes for this purpose. {H}ere
	we present, for the first time, the comprehensive application of
	supervised neural networks ({SNN}s) for functional annotation. {O}ur
	study is novel in that we report systematic results for ~100 classes
	in the {M}unich {I}nformation {C}enter for {P}rotein {S}equences
	({MIPS}) functional catalog. {W}e found that only ~10\% of these
	are learnable (based on the rate of false negatives). {A} closer
	analysis reveals that false positives (and negatives) in a machine-learning
	context are not necessarily "false" in a biological sense. {W}e show
	that the high degree of interconnections among functional classes
	confounds the signatures that ought to be learned for a unique class.
	{W}e term this the "{B}orges effect" and introduce two new numerical
	indices for its quantification. {O}ur analysis indicates that classification
	systems with a lower {B}orges effect are better suitable for machine
	learning. {F}urthermore, we introduce a learning procedure for combining
	false positives with the original class. {W}e show that in a few
	iterations this process converges to a gene set that is learnable
	with considerably low rates of false positives and negatives and
	contains genes that are biologically related to the original class,
	allowing for a coarse reconstruction of the interactions between
	associated biological pathways. {W}e exemplify this methodology using
	the well-studied tricarboxylic acid cycle.},
  doi = {10.1101/gr.192502},
  pdf = {../local/Mateos2002Systematic.pdf},
  file = {Mateos2002Systematic.pdf:local/Mateos2002Systematic.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric
	Acid Cycle, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases,
	Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics,
	Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression,
	Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet,
	Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models,
	Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics,
	Messenger, Models, Molecular, Molecular Probe Techniques, Molecular
	Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural
	Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't,
	Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Protein Binding, Protein Conformation,
	Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, Receptors,
	Reference Values, Regression Analysis, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae Proteins, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12421757},
  url = {http://dx.doi.org/10.1101/gr.192502}
}

@article{Mathews1999Expandeda,
  author = {D. H. Mathews and J. Sabina and M. Zuker and D. H. Turner},
  title = {{E}xpanded sequence dependence of thermodynamic parameters improves
	prediction of {RNA} secondary structure.},
  journal = {J. Mol. Biol.},
  year = {1999},
  volume = {288},
  pages = {911--940},
  number = {5},
  month = {May},
  abstract = {An improved dynamic programming algorithm is reported for RNA secondary
	structure prediction by free energy minimization. Thermodynamic parameters
	for the stabilities of secondary structure motifs are revised to
	include expanded sequence dependence as revealed by recent experiments.
	Additional algorithmic improvements include reduced search time and
	storage for multibranch loop free energies and improved imposition
	of folding constraints. An extended database of 151,503 nt in 955
	structures? determined by comparative sequence analysis was assembled
	to allow optimization of parameters not based on experiments and
	to test the accuracy of the algorithm. On average, the predicted
	lowest free energy structure contains 73 \% of known base-pairs when
	domains of fewer than 700 nt are folded; this compares with 64 \%
	accuracy for previous versions of the algorithm and parameters. For
	a given sequence, a set of 750 generated structures contains one
	structure that, on average, has 86 \% of known base-pairs. Experimental
	constraints, derived from enzymatic and flavin mononucleotide cleavage,
	improve the accuracy of structure predictions.},
  doi = {10.1006/jmbi.1999.2700},
  keywords = {16S, 23S, 5S, Affinity, Algorithms, Aluminum Silicates, Amino Acid,
	Amino Acid Sequence, Amyloidosis, Archaeal, Bacillus, Bacterial,
	Bacterial Proteins, Bacteriophage T4, Base Sequence, Chloroplast,
	Chromatography, Circular Dichroism, Comparative Study, Computational
	Biology, Databases, Electrophoresis, Entropy, Enzyme Stability, Escherichia
	coli, Factual, Fibroblast Growth Factor 2, Flavin Mononucleotide,
	Fluorescence, Genetic, Guanidine, Humans, Huntington Disease, Kinetics,
	Light, Models, Molecular Sequence Data, Non-P.H.S., Non-U.S. Gov't,
	Nucleic Acid Conformation, P.H.S., Peptides, Phylogeny, Polyacrylamide
	Gel, Predictive Value of Tests, Protein Binding, Protein Denaturation,
	Protein Folding, Protein Structure, RNA, Radiation, Recombinant Proteins,
	Research Support, Ribosomal, Scattering, Secondary, Sequence Homology,
	Solutions, Spectrometry, Statistical, Temperature, Thermodynamics,
	Time Factors, Trinucleotide Repeat Expansion, U.S. Gov't, alpha-Amylase,
	10329189},
  owner = {vert},
  pii = {S0022-2836(99)92700-6},
  pmid = {10329189},
  timestamp = {2006.04.27},
  url = {http://dx.doi.org/10.1006/jmbi.1999.2700}
}

@article{Mayr2003Cross-reactive,
  author = {Torsten Mayr and Christian Igel and Gregor Liebsch and Ingo Klimant
	and Otto S Wolfbeis},
  title = {Cross-reactive metal ion sensor array in a micro titer plate format.},
  journal = {Anal {C}hem},
  year = {2003},
  volume = {75},
  pages = {4389-96},
  number = {17},
  month = {Sep},
  abstract = {A cross-reactive array in a micro titer plate ({MTP}) format is described
	that is based on a versatile and highly flexible scheme. {I}t makes
	use of rather unspecific metal ions probes having almost identical
	fluorescence spectra, thus enabling (a) interrogation at identical
	analytical wavelengths, and (b) imaging of the probes contained in
	the wells of the {MTP} using a {CCD} camera and an array of blue-light-emitting
	diodes as a light source. {T}he unselective response of the indicators
	in the presence of mixtures of five divalent cations generates a
	characteristic pattern that was analyzed by chemometric tools. {T}he
	fluorescence intensity of the indicators was transferred into a time-dependent
	parameter applying a scheme called dual lifetime referencing. {I}n
	this method, the fluorescence decay profile of the indicator is referenced
	against the phosphorescence of an inert reference dye added to the
	system. {T}he intrinsically referenced measurements also were performed
	using blue {LED}s as light sources and a {CCD} camera without intensifiers
	as the detector. {T}he best performance was observed if each well
	was excited by a single {LED}. {T}he assembly allows the detection
	of dye concentrations in the nanomoles-per-liter range without amplification
	and the acquisition of 96 wells simultaneously. {T}he pictures obtained
	form the basis for evaluation by pattern recognition algorithms.
	{S}upport vector machines are capable of predicting the presence
	of significant concentrations of metal ions with high accuracy.},
  keywords = {Agrochemicals, Air Pollutants, Aircraft, Algorithms, Artificial Intelligence,
	Automated, Base Composition, Base Sequence, Bayes Theorem, Carbonic
	Anhydrase Inhibitors, Cluster Analysis, Colonic Neoplasms, Comparative
	Study, Computational Biology, Computer Simulation, Computer Systems,
	Computer-Assisted, Computing Methodologies, Confidence Intervals,
	Cytosine, DNA, Data Interpretation, Databases, Diagnosis, Drug Design,
	Enhancer Elements (Genetics), Environmental Monitoring, Enzyme Inhibitors,
	Ethanol, Exons, Forecasting, Fourier Transform Infrared, Gene Expression
	Profiling, Gene Expression Regulation, Genetic, Genetic Screening,
	Glucuronosyltransferase, Guanine, Humans, Image Interpretation, Isoenzymes,
	Least-Squares Analysis, Leukemia, Linear Models, Lymphoma, Models,
	Molecular, Molecular Conformation, Molecular Sequence Data, Natural
	Disasters, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Oligonucleotide Array Sequence
	Analysis, Online Systems, P.H.S., Pattern Recognition, Pharmaceutical
	Preparations, Phenotype, Photography, Probability, Pyrimidines, Quantitative
	Structure-Activity Relationship, RNA Precursors, RNA Splice Sites,
	RNA Splicing, Radiation, Reproducibility of Results, Research Support,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Signal Processing, Software, Spectroscopy, Statistical, Subtraction
	Technique, Terminology, Thermodynamics, Time Factors, U.S. Gov't,
	Untranslated Regions, Video Recording, Walking, 14632041}
}

@article{Micchelli2005On,
  author = {Charles A Micchelli and Massimiliano Pontil},
  title = {On learning vector-valued functions.},
  journal = {Neural {C}omput},
  year = {2005},
  volume = {17},
  pages = {177-204},
  number = {1},
  month = {Jan},
  abstract = {In this letter, we provide a study of learning in a {H}ilbert space
	of vectorvalued functions. {W}e motivate the need for extending learning
	theory of scalar-valued functions by practical considerations and
	establish some basic results for learning vector-valued functions
	that should prove useful in applications. {S}pecifically, we allow
	an output space {Y} to be a {H}ilbert space, and we consider a reproducing
	kernel {H}ilbert space of functions whose values lie in {Y}. {I}n
	this setting, we derive the form of the minimal norm interpolant
	to a finite set of data and apply it to study some regularization
	functionals that are important in learning theory. {W}e consider
	specific examples of such functionals corresponding to multiple-output
	regularization networks and support vector machines, for both regression
	and classification. {F}inally, we provide classes of operator-valued
	kernels of the dot product and translation-invariant type.},
  doi = {10.1162/0899766052530802},
  keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota,
	Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms,
	Comparative Study, Computer Simulation, Computer-Assisted, Computing
	Methodologies, Crystallography, DNA, DNA Primers, Databases, Decision
	Support Techniques, Diagnostic Imaging, Enzymes, Feedback, Fixation,
	Gene Expression Profiling, Genetic, Hordeum, Host-Parasite Relations,
	Humans, Image Enhancement, Image Interpretation, Informatics, Information
	Storage and Retrieval, Kinetics, Logistic Models, Magnetic Resonance
	Spectroscopy, Mathematical Computing, Models, Nanotechnology, Neural
	Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics,
	Ocular, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern
	Recognition, Plant, Plants, Predictive Value of Tests, Protein, Protein
	Conformation, Regression Analysis, Research Support, Sample Size,
	Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence
	Homology, Signal Processing, Skin, Software, Statistical, Subtraction
	Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral Proteins,
	X-Ray, 15563752},
  url = {http://dx.doi.org/10.1162/0899766052530802}
}

@article{Miwakeichi2001comparison,
  author = {F. Miwakeichi and R. Ramirez-Padron and P. A. Valdes-Sosa and T.
	Ozaki},
  title = {A comparison of non-linear non-parametric models for epilepsy data.},
  journal = {Comput. {B}iol. {M}ed.},
  year = {2001},
  volume = {31},
  pages = {41-57},
  number = {1},
  month = {Jan},
  abstract = {E{EG} spike and wave ({SW}) activity has been described through a
	non-parametric stochastic model estimated by the {N}adaraya-{W}atson
	({NW}) method. {I}n this paper the performance of the {NW}, the local
	linear polynomial regression and support vector machines ({SVM})
	methods were compared. {T}he noise-free realizations obtained by
	the {NW} and {SVM} methods reproduced {SW} better than as reported
	in previous works. {T}he tuning parameters had to be estimated manually.
	{A}dding dynamical noise, only the {NW} method was capable of generating
	{SW} similar to training data. {T}he standard deviation of the dynamical
	noise was estimated by means of the correlation dimension.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electroencephalography,
	Electrophysiology, Epilepsy, Escherichia coli Proteins, Factual,
	Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling,
	Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic
	Predisposition to Disease, Genomics, Hemolysins, Humans, Indians,
	Information Storage and Retrieval, Initiator, Ion Channels, Kinetics,
	Leukemia, Likelihood Functions, Linear Models, Lipid Bilayers, Logistic
	Models, Lymphocytic, MEDLINE, Male, Markov Chains, Melanoma, Models,
	Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks
	(Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear
	Dynamics, Normal Distribution, North American, Nucleic Acid Conformation,
	Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles,
	Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical,
	Pigmented, Predictive Value of Tests, Promoter Regions (Genetics),
	Protein Biosynthesis, Protein Folding, Protein Structure, Proteins,
	Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment,
	Sequence Analysis, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Sound Spectrography, Statistical, Stochastic
	Processes, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription,
	Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, Vertebrates,
	11058693},
  pii = {S0010482500000214}
}

@article{Nabieva2005Whole-proteome,
  author = {Elena Nabieva and Kam Jim and Amit Agarwal and Bernard Chazelle and
	Mona Singh},
  title = {Whole-proteome prediction of protein function via graph-theoretic
	analysis of interaction maps.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21 Suppl 1},
  pages = {i302--i310},
  month = {Jun},
  abstract = {MOTIVATION: Determining protein function is one of the most important
	problems in the post-genomic era. For the typical proteome, there
	are no functional annotations for one-third or more of its proteins.
	Recent high-throughput experiments have determined proteome-scale
	protein physical interaction maps for several organisms. These physical
	interactions are complemented by an abundance of data about other
	types of functional relationships between proteins, including genetic
	interactions, knowledge about co-expression and shared evolutionary
	history. Taken together, these pairwise linkages can be used to build
	whole-proteome protein interaction maps. RESULTS: We develop a network-flow
	based algorithm, FunctionalFlow, that exploits the underlying structure
	of protein interaction maps in order to predict protein function.
	In cross-validation testing on the yeast proteome, we show that FunctionalFlow
	has improved performance over previous methods in predicting the
	function of proteins with few (or no) annotated protein neighbors.
	By comparing several methods that use protein interaction maps to
	predict protein function, we demonstrate that FunctionalFlow performs
	well because it takes advantage of both network topology and some
	measure of locality. Finally, we show that performance can be improved
	substantially as we consider multiple data sources and use them to
	create weighted interaction networks. AVAILABILITY: http://compbio.cs.princeton.edu/function},
  doi = {10.1093/bioinformatics/bti1054},
  institution = {Computer Science Department, Princeton University Princeton, NJ 08544,
	USA.},
  keywords = {Algorithms; Computational Biology, methods; Evolution, Molecular;
	Fungal Proteins, chemistry; Genomics; Models, Statistical; Models,
	Theoretical; Protein Interaction Mapping, methods; Proteins, chemistry;
	Proteomics, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {21/suppl_1/i302},
  pmid = {15961472},
  timestamp = {2010.04.03},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti1054}
}

@article{Neuvial2006Spatial,
  author = {Pierre Neuvial and Philippe Hupé and Isabel Brito and Stéphane Liva
	and Elodie Manié and Caroline Brennetot and François Radvanyi and
	Alain Aurias and Emmanuel Barillot},
  title = {Spatial normalization of array-CGH data.},
  journal = {BMC Bioinformatics},
  year = {2006},
  volume = {7},
  pages = {264},
  abstract = {BACKGROUND: Array-based comparative genomic hybridization (array-CGH)
	is a recently developed technique for analyzing changes in DNA copy
	number. As in all microarray analyses, normalization is required
	to correct for experimental artifacts while preserving the true biological
	signal. We investigated various sources of systematic variation in
	array-CGH data and identified two distinct types of spatial effect
	of no biological relevance as the predominant experimental artifacts:
	continuous spatial gradients and local spatial bias. Local spatial
	bias affects a large proportion of arrays, and has not previously
	been considered in array-CGH experiments. RESULTS: We show that existing
	normalization techniques do not correct these spatial effects properly.
	We therefore developed an automatic method for the spatial normalization
	of array-CGH data. This method makes it possible to delineate and
	to eliminate and/or correct areas affected by spatial bias. It is
	based on the combination of a spatial segmentation algorithm called
	NEM (Neighborhood Expectation Maximization) and spatial trend estimation.
	We defined quality criteria for array-CGH data, demonstrating significant
	improvements in data quality with our method for three data sets
	coming from two different platforms (198, 175 and 26 BAC-arrays).
	CONCLUSION: We have designed an automatic algorithm for the spatial
	normalization of BAC CGH-array data, preventing the misinterpretation
	of experimental artifacts as biologically relevant outliers in the
	genomic profile. This algorithm is implemented in the R package MANOR
	(Micro-Array NORmalization), which is described at http://bioinfo.curie.fr/projects/manor
	and available from the Bioconductor site http://www.bioconductor.org.
	It can also be tested on the CAPweb bioinformatics platform at http://bioinfo.curie.fr/CAPweb.},
  doi = {10.1186/1471-2105-7-264},
  institution = {Institut Curie, Service de Bioinformatique, 26, rue d'Ulm, Paris,
	75248 cedex 05, France. pierre.neuvial@curie.fr},
  keywords = {Algorithms; Artifacts; Base Sequence; Chromosome Mapping, methods;
	Computer Simulation; Data Interpretation, Statistical; Gene Dosage;
	In Situ Hybridization, methods; Models, Genetic; Models, Statistical;
	Molecular Sequence Data; Oligonucleotide Array Sequence Analysis,
	methods; Sequence Analysis, DNA, methods},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1471-2105-7-264},
  pmid = {16716215},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1186/1471-2105-7-264}
}

@article{Opper2000Gaussian,
  author = {M. Opper and O. Winther},
  title = {Gaussian processes for classification: mean-field algorithms.},
  journal = {Neural {C}omput},
  year = {2000},
  volume = {12},
  pages = {2655-84},
  number = {11},
  month = {Nov},
  abstract = {We derive a mean-field algorithm for binary classification with gaussian
	processes that is based on the {TAP} approach originally proposed
	in statistical physics of disordered systems. {T}he theory also yields
	an approximate leave-one-out estimator for the generalization error,
	which is computed with no extra computational cost. {W}e show that
	from the {TAP} approach, it is possible to derive both a simpler
	"naive" mean-field theory and support vector machines ({SVM}s) as
	limiting cases. {F}or both mean-field algorithms and support vector
	machines, simulation results for three small benchmark data sets
	are presented. {T}hey show that one may get state-of-the-art performance
	by using the leave-one-out estimator for model selection and the
	built-in leave-one-out estimators are extremely precise when compared
	to the exact leave-one-out estimate. {T}he second result is taken
	as strong support for the internal consistency of the mean-field
	approach.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Colonic
	Neoplasms, Comparative Study, Computational Biology, Computer Simulation,
	Computer-Assisted, DNA, Data Interpretation, Databases, Decision
	Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination
	Learning, Electric Conductivity, Electrophysiology, Escherichia coli
	Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene
	Expression Profiling, Gene Expression Regulation, Genes, Genetic,
	Genetic Markers, Genetic Predisposition to Disease, Hemolysins, Humans,
	Indians, Ion Channels, Kinetics, Leukemia, Likelihood Functions,
	Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains,
	Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic,
	Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S.
	Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic
	Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ
	Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern
	Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter
	Regions (Genetics), Protein Folding, Protein Structure, Proteins,
	Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment,
	Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation,
	Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	Type 2, U.S. Gov't, 11110131}
}

@article{Pang2005Face,
  author = {Shaoning Pang and Daijin Kim and Sung Yang Bang},
  title = {Face membership authentication using {SVM} classification tree generated
	by membership-based {LLE} data partition.},
  journal = {I{EEE} {T}rans {N}eural {N}etw},
  year = {2005},
  volume = {16},
  pages = {436-46},
  number = {2},
  month = {Mar},
  abstract = {This paper presents a new membership authentication method by face
	classification using a support vector machine ({SVM}) classification
	tree, in which the size of membership group and the members in the
	membership group can be changed dynamically. {U}nlike our previous
	{SVM} ensemble-based method, which performed only one face classification
	in the whole feature space, the proposed method employed a divide
	and conquer strategy that first performs a recursive data partition
	by membership-based locally linear embedding ({LLE}) data clustering,
	then does the {SVM} classification in each partitioned feature subset.
	{O}ur experimental results show that the proposed {SVM} tree not
	only keeps the good properties that the {SVM} ensemble method has,
	such as a good authentication accuracy and the robustness to the
	change of members, but also has a considerable improvement on the
	stability under the change of membership group size.},
  keywords = {80 and over, Aged, Algorithms, Area Under Curve, Cross-Sectional Studies,
	Decision Trees, Diagnostic Imaging, Diagnostic Techniques, Face,
	Glaucoma, Humans, Lasers, Least-Squares Analysis, Middle Aged, Nerve
	Fibers, Non-U.S. Gov't, Ophthalmological, Optic Nerve Diseases, P.H.S.,
	Photic Stimulation, ROC Curve, Research Support, Retinal Ganglion
	Cells, Sensitivity and Specificity, Statistics, U.S. Gov't, 15787150}
}

@article{Parkhomenko2009Sparse,
  author = {Parkhomenko, E. and Tritchler, D. and Beyene, J.},
  title = {Sparse canonical correlation analysis with application to genomic
	data integration.},
  journal = {Stat Appl Genet Mol Biol},
  year = {2009},
  volume = {8},
  pages = {Article 1},
  number = {1},
  month = {Jan},
  abstract = {Large scale genomic studies with multiple phenotypic or genotypic
	measures may require the identification of complex multivariate relationships.
	In multivariate analysis a common way to inspect the relationship
	between two sets of variables based on their correlation is canonical
	correlation analysis, which determines linear combinations of all
	variables of each type with maximal correlation between the two linear
	combinations. However, in high dimensional data analysis, when the
	number of variables under consideration exceeds tens of thousands,
	linear combinations of the entire sets of features may lack biological
	plausibility and interpretability. In addition, insufficient sample
	size may lead to computational problems, inaccurate estimates of
	parameters and non-generalizable results. These problems may be solved
	by selecting sparse subsets of variables, i.e. obtaining sparse loadings
	in the linear combinations of variables of each type. In this paper
	we present Sparse Canonical Correlation Analysis (SCCA) which examines
	the relationships between two types of variables and provides sparse
	solutions that include only small subsets of variables of each type
	by maximizing the correlation between the subsets of variables of
	different types while performing variable selection. We also present
	an extension of SCCA--adaptive SCCA. We evaluate their properties
	using simulated data and illustrate practical use by applying both
	methods to the study of natural variation in human gene expression.},
  doi = {10.2202/1544-6115.1406},
  institution = {Hospital for Sick Children Research Institute. elena@utstat.toronto.edu},
  keywords = {Algorithms; Genomics, statistics /&/ numerical data; Humans; Models,
	Statistical; Sample Size},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {19222376},
  timestamp = {2012.02.29},
  url = {http://dx.doi.org/10.2202/1544-6115.1406}
}

@article{Pavlidis2004Support,
  author = {Paul Pavlidis and Ilan Wapinski and William Stafford Noble},
  title = {Support vector machine classification on the web.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {586-7},
  number = {4},
  month = {Mar},
  abstract = {The support vector machine ({SVM}) learning algorithm has been widely
	applied in bioinformatics. {W}e have developed a simple web interface
	to our implementation of the {SVM} algorithm, called {G}ist. {T}his
	interface allows novice or occasional users to apply a sophisticated
	machine learning algorithm easily to their data. {M}ore advanced
	users can download the software and source code for local installation.
	{T}he availability of these tools will permit more widespread application
	of this powerful learning algorithm in bioinformatics.},
  doi = {10.1093/bioinformatics/btg461},
  pdf = {../local/Pavlidis2004Support.pdf},
  file = {Pavlidis2004Support.pdf:local/Pavlidis2004Support.pdf:PDF},
  keywords = {Adaptation, Algorithms, Ambergris, Amino Acid Sequence, Animals, Artifacts,
	Artificial Intelligence, Automated, Cadmium, Candida, Candida albicans,
	Capillary, Clinical, Cluster Analysis, Combinatorial Chemistry Techniques,
	Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted,
	Computing Methodologies, Databases, Decision Support Systems, Electrophoresis,
	Enzymes, Europe, Eye Enucleation, Humans, Image Interpretation, Image
	Processing, Information Storage and Retrieval, Internet, Magnetic
	Resonance Imaging, Magnetic Resonance Spectroscopy, Markov Chains,
	Melanoma, Models, Molecular, Molecular Conformation, Molecular Sequence
	Data, Molecular Structure, Neural Networks (Computer), Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Odors, P.H.S., Pattern Recognition,
	Perfume, Physiological, Predictive Value of Tests, Prognosis, Prospective
	Studies, Protein, Protein Structure, Proteins, Proteomics, Quantitative
	Structure-Activity Relationship, Rats, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae, Saccharomyces cerevisiae
	Proteins, Secondary, Sensitivity and Specificity, Signal Processing,
	Single-Blind Method, Soft Tissue Neoplasms, Software, Statistical,
	U.S. Gov't, Uveal Neoplasms, Visual, 14990457},
  pii = {btg461},
  url = {http://dx.doi.org/10.1093/bioinformatics/btg461}
}

@article{Peters2005Generating,
  author = {Bjoern Peters and Alessandro Sette},
  title = {Generating quantitative models describing the sequence specificity
	of biological processes with the stabilized matrix method.},
  journal = {BMC Bioinformatics},
  year = {2005},
  volume = {6},
  pages = {132},
  abstract = {BACKGROUND: Many processes in molecular biology involve the recognition
	of short sequences of nucleic-or amino acids, such as the binding
	of immunogenic peptides to major histocompatibility complex (MHC)
	molecules. From experimental data, a model of the sequence specificity
	of these processes can be constructed, such as a sequence motif,
	a scoring matrix or an artificial neural network. The purpose of
	these models is two-fold. First, they can provide a summary of experimental
	results, allowing for a deeper understanding of the mechanisms involved
	in sequence recognition. Second, such models can be used to predict
	the experimental outcome for yet untested sequences. In the past
	we reported the development of a method to generate such models called
	the Stabilized Matrix Method (SMM). This method has been successfully
	applied to predicting peptide binding to MHC molecules, peptide transport
	by the transporter associated with antigen presentation (TAP) and
	proteasomal cleavage of protein sequences. RESULTS: Herein we report
	the implementation of the SMM algorithm as a publicly available software
	package. Specific features determining the type of problems the method
	is most appropriate for are discussed. Advantageous features of the
	package are: (1) the output generated is easy to interpret, (2) input
	and output are both quantitative, (3) specific computational strategies
	to handle experimental noise are built in, (4) the algorithm is designed
	to effectively handle bounded experimental data, (5) experimental
	data from randomized peptide libraries and conventional peptides
	can easily be combined, and (6) it is possible to incorporate pair
	interactions between positions of a sequence. CONCLUSION: Making
	the SMM method publicly available enables bioinformaticians and experimental
	biologists to easily access it, to compare its performance to other
	prediction methods, and to extend it to other applications.},
  doi = {10.1186/1471-2105-6-132},
  keywords = {Algorithms; Amino Acid Sequence; Biology; Computational Biology; Computer
	Simulation; Data Interpretation, Statistical; Databases, Protein;
	Models, Biological; Models, Statistical; Neural Networks (Computer);
	Peptide Library; Peptides; Programming Languages; Prote; Sensitivity
	and Specificity; Software; in Binding},
  owner = {laurent},
  pii = {1471-2105-6-132},
  pmid = {15927070},
  timestamp = {2007.07.12},
  url = {http://dx.doi.org/10.1186/1471-2105-6-132}
}

@article{Poggio1998Sparse,
  author = {Poggio and Girosi},
  title = {A {S}parse {R}epresentation for {F}unction {A}pproximation.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {1445-54},
  number = {6},
  month = {Jul},
  abstract = {We derive a new general representation for a function as a linear
	combination of local correlation kernels at optimal sparse locations
	(and scales) and characterize its relation to principal component
	analysis, regularization, sparsity principles, and support vector
	machines.},
  keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual,
	Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes,
	Learning, Markov Chains, Models, Neural Networks (Computer), Neurological,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9698352}
}

@article{Pontil1998Properties,
  author = {M. Pontil and A. Verri},
  title = {Properties of support vector machines.},
  journal = {Neural {C}omput},
  year = {1998},
  volume = {10},
  pages = {955-74},
  number = {4},
  month = {May},
  abstract = {Support vector machines ({SVM}s) perform pattern recognition between
	two point classes by finding a decision surface determined by certain
	points of the training set, termed support vectors ({SV}). {T}his
	surface, which in some feature space of possibly infinite dimension
	can be regarded as a hyperplane, is obtained from the solution of
	a problem of quadratic programming that depends on a regularization
	parameter. {I}n this article, we study some mathematical properties
	of support vectors and show that the decision surface can be written
	as the sum of two orthogonal terms, the first depending on only the
	margin vectors (which are {SV}s lying on the margin), the second
	proportional to the regularization parameter. {F}or almost all values
	of the parameter, this enables us to predict how the decision surface
	varies for small parameter changes. {I}n the special but important
	case of feature space of finite dimension m, we also show that m
	+ 1 {SV}s are usually sufficient to determine the decision surface
	fully. {F}or relatively small m, this latter result leads to a consistent
	reduction of the {SV} number.},
  keywords = {Algorithms, Artificial Intelligence, Automated, Biometry, Computers,
	DNA, Databases, Factual, Fungal, Fungal Proteins, GTP-Binding Proteins,
	Gene Expression, Genes, Learning, Linear Models, Markov Chains, Mathematics,
	Models, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S.
	Gov't, Nonlinear Dynamics, Nucleic Acid Hybridization, Open Reading
	Frames, P.H.S., Pattern Recognition, Protein, Protein Structure,
	Proteins, Reproducibility of Results, Research Support, Saccharomyces
	cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical,
	Tertiary, U.S. Gov't, 9573414}
}

@article{Prill2005PlosBiol,
  author = {Robert J Prill and Pablo A Iglesias and Andre Levchenko},
  title = {Dynamic properties of network motifs contribute to biological network
	organization.},
  journal = {PLoS Biol},
  year = {2005},
  volume = {3},
  pages = {e343},
  number = {11},
  month = {Nov},
  abstract = {Biological networks, such as those describing gene regulation, signal
	transduction, and neural synapses, are representations of large-scale
	dynamic systems. Discovery of organizing principles of biological
	networks can be enhanced by embracing the notion that there is a
	deep interplay between network structure and system dynamics. Recently,
	many structural characteristics of these non-random networks have
	been identified, but dynamical implications of the features have
	not been explored comprehensively. We demonstrate by exhaustive computational
	analysis that a dynamical property--stability or robustness to small
	perturbations--is highly correlated with the relative abundance of
	small subnetworks (network motifs) in several previously determined
	biological networks. We propose that robust dynamical stability is
	an influential property that can determine the non-random structure
	of biological networks.},
  doi = {10.1371/journal.pbio.0030343},
  institution = {Department of Biomedical Engineering, Johns Hopkins University, Baltimore,
	Maryland, USA.},
  keywords = {Animals; Caenorhabditis elegans, physiology; Computational Biology,
	methods; Computer Simulation; Drosophila melanogaster, physiology;
	Escherichia coli, physiology; Models, Biological; Nerve Net; Saccharomyces
	cerevisiae, physiology; Signal Transduction; Statistics as Topic;
	Systems Theory; Transcription, Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {Andrei Zinovyev},
  pii = {05-PLBI-RA-0233R2},
  pmid = {16187794},
  timestamp = {2011.04.08},
  url = {http://dx.doi.org/10.1371/journal.pbio.0030343}
}

@article{Perez-Cruz2005Convergence,
  author = {Fernando PÃ©rez-Cruz and Carlos BousoÃ±o-CalzÃ³n and Antonio ArtÃ©s-RodrÃguez},
  title = {Convergence of the {IRWLS} {P}rocedure to the {S}upport {V}ector
	{M}achine {S}olution.},
  journal = {Neural {C}omput},
  year = {2005},
  volume = {17},
  pages = {7-18},
  number = {1},
  month = {Jan},
  abstract = {An iterative reweighted least squares ({IRWLS}) procedure recently
	proposed is shown to converge to the support vector machine solution.
	{T}he convergence to a stationary point is ensured by modifying the
	original {IRWLS} procedure.},
  keywords = {80 and over, Aged, Algorithms, Amino Acids, Animals, Area Under Curve,
	Automated, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted,
	Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging,
	Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Genetic,
	Glaucoma, Humans, Lasers, Least-Squares Analysis, Magnetic Resonance
	Imaging, Magnetic Resonance Spectroscopy, Middle Aged, Models, Molecular,
	Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological,
	Optic Nerve Diseases, P.H.S., Pattern Recognition, Photic Stimulation,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	U.S. Gov't, beta-Lactamases, 15779160}
}

@article{Qin2004[Automated,
  author = {Dong-mei Qin and Zhan-yi Hu and Yong-heng Zhao},
  title = {Automated classification of celestial spectra based on support vector
	machines},
  journal = {Guang {P}u {X}ue {Y}u {G}uang {P}u {F}en {X}i},
  year = {2004},
  volume = {24},
  pages = {507-11},
  number = {4},
  month = {Apr},
  abstract = {The main objective of an automatic recognition system of celestial
	objects via their spectra is to classify celestial spectra and estimate
	physical parameters automatically. {T}his paper proposes a new automatic
	classification method based on support vector machines to separate
	non-active objects from active objects via their spectra. {W}ith
	low {SNR} and unknown red-shift value, it is difficult to extract
	true spectral lines, and as a result, active objects can not be determined
	by finding strong spectral lines and the spectral classification
	between non-active and active objects becomes difficult. {T}he proposed
	method in this paper combines the principal component analysis with
	support vector machines, and can automatically recognize the spectra
	of active objects with unknown red-shift values from non-active objects.
	{I}t finds its applicability in the automatic processing of voluminous
	observed data from large sky surveys in astronomy.},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15766170}
}

@article{Quackenbush2002Microarray,
  author = {John Quackenbush},
  title = {Microarray data normalization and transformation.},
  journal = {Nat Genet},
  year = {2002},
  volume = {32 Suppl},
  pages = {496--501},
  month = {Dec},
  doi = {10.1038/ng1032},
  keywords = {Animals; Data Interpretation, Statistical; Forecasting; Gene Expression
	Profiling, methods; Humans; Oligonucleotide Array Sequence Analysis,
	methods; Research Design},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {ng1032},
  pmid = {12454644},
  timestamp = {2010.08.04},
  url = {http://dx.doi.org/10.1038/ng1032}
}

@article{Rhodes2007Oncomine,
  author = {Rhodes, Daniel R. and Kalyana-Sundaram, Shanker and Mahavisno, Vasudeva
	and Varambally, Radhika and Yu, Jianjun and Briggs, Benjamin B. and
	Barrette, Terrence R. and Anstet, Matthew J. and Kincead-Beal, Colleen
	and Kulkarni, Prakash and Varambally, Sooryanaryana and Ghosh, Debashis
	and Chinnaiyan, Arul M.},
  title = {Oncomine 3.0: genes, pathways, and networks in a collection of 18,000
	cancer gene expression profiles.},
  journal = {Neoplasia},
  year = {2007},
  volume = {9},
  pages = {166--180},
  number = {2},
  month = {Feb},
  abstract = {DNA microarrays have been widely applied to cancer transcriptome analysis;
	however, the majority of such data are not easily accessible or comparable.
	Furthermore, several important analytic approaches have been applied
	to microarray analysis; however, their application is often limited.
	To overcome these limitations, we have developed Oncomine, a bioinformatics
	initiative aimed at collecting, standardizing, analyzing, and delivering
	cancer transcriptome data to the biomedical research community. Our
	analysis has identified the genes, pathways, and networks deregulated
	across 18,000 cancer gene expression microarrays, spanning the majority
	of cancer types and subtypes. Here, we provide an update on the initiative,
	describe the database and analysis modules, and highlight several
	notable observations. Results from this comprehensive analysis are
	available at http://www.oncomine.org.},
  institution = {Department of Pathology, University of Michigan Medical School, Ann
	Arbor, MI 48109-0940, USA.},
  keywords = {Antineoplastic Agents, pharmacology; Automatic Data Processing; Chromosome
	Mapping; Chromosomes, Human, genetics; Computational Biology, organization
	/&/ administration; Data Collection; Data Display; Data Interpretation,
	Statistical; Databases, Genetic; Drug Design; Gene Expression Profiling,
	statistics /&/ numerical data; Gene Expression Regulation, Neoplastic;
	Genes, Neoplasm; Humans; Internet; Models, Biological; Neoplasm Proteins,
	biosynthesis/chemistry/genetics; Neoplasms, classification/genetics/metabolism;
	Oligonucleotide Array Sequence Analysis; Subtraction Technique; Transcription,
	Genetic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {17356713},
  timestamp = {2012.03.10}
}

@article{Rice2005Reconstructing,
  author = {Rice, J.J. and Tu, Y. and Stolovitzky, G.},
  title = {Reconstructing biological networks using conditional correlation
	analysis.},
  journal = {Bioinformatics},
  year = {2005},
  volume = {21},
  pages = {765--773},
  number = {6},
  month = {Mar},
  abstract = {MOTIVATION: One of the present challenges in biological research is
	the organization of the data originating from high-throughput technologies.
	One way in which this information can be organized is in the form
	of networks of influences, physical or statistical, between cellular
	components. We propose an experimental method for probing biological
	networks, analyzing the resulting data and reconstructing the network
	architecture. METHODS: We use networks of known topology consisting
	of nodes (genes), directed edges (gene-gene interactions) and a dynamics
	for the genes' mRNA concentrations in terms of the gene-gene interactions.
	We proposed a network reconstruction algorithm based on the conditional
	correlation of the mRNA equilibrium concentration between two genes
	given that one of them was knocked down. Using simulated gene expression
	data on networks of known connectivity, we investigated how the reconstruction
	error is affected by noise, network topology, size, sparseness and
	dynamic parameters. RESULTS: Errors arise from correlation between
	nodes connected through intermediate nodes (false positives) and
	when the correlation between two directly connected nodes is obscured
	by noise, non-linearity or multiple inputs to the target node (false
	negatives). Two critical components of the method are as follows:
	(1) the choice of an optimal correlation threshold for predicting
	connections and (2) the reduction of errors arising from indirect
	connections (for which a novel algorithm is proposed). With these
	improvements, we can reconstruct networks with the topology of the
	transcriptional regulatory network in Escherichia coli with a reasonably
	low error rate.},
  doi = {10.1093/bioinformatics/bti064},
  institution = {Computational Biology Center, IBM T.J. Watson Research Center, PO
	Box 218, Yorktown Heights, NY 10598, USA.},
  keywords = {Algorithms; Computer Simulation; Gene Expression Profiling; Gene Expression
	Regulation; Models, Biological; Models, Statistical; Oligonucleotide
	Array Sequence Analysis; Protein Interaction Mapping; Signal Transduction;
	Statistics as Topic; Transcription Factors},
  owner = {fantine},
  pii = {bti064},
  pmid = {15486043},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/bti064}
}

@article{Risau-Gusman2000Generalization,
  author = {Risau-Gusman and Gordon},
  title = {Generalization properties of finite-size polynomial support vector
	machines},
  journal = {Phys {R}ev {E} {S}tat {P}hys {P}lasmas {F}luids {R}elat {I}nterdiscip
	{T}opics},
  year = {2000},
  volume = {62},
  pages = {7092-9},
  number = {5 Pt B},
  month = {Nov},
  abstract = {The learning properties of finite-size polynomial support vector machines
	are analyzed in the case of realizable classification tasks. {T}he
	normalization of the high-order features acts as a squeezing factor,
	introducing a strong anisotropy in the patterns distribution in feature
	space. {A}s a function of the training set size, the corresponding
	generalization error presents a crossover, more or less abrupt depending
	on the distribution's anisotropy and on the task to be learned, between
	a fast-decreasing and a slowly decreasing regime. {T}his behavior
	corresponds to the stepwise decrease found by {D}ietrich et al. [{P}hys.
	{R}ev. {L}ett. 82, 2975 (1999)] in the thermodynamic limit. {T}he
	theoretical results are in excellent agreement with the numerical
	simulations.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electrophysiology,
	Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric
	Emptying, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease,
	Genomics, Hemolysins, Humans, Indians, Initiator, Ion Channels, Kinetics,
	Leukemia, Likelihood Functions, Lipid Bilayers, Logistic Models,
	Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid,
	Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological,
	Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution,
	North American, Nucleic Acid Conformation, Oligonucleotide Array
	Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms,
	Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive
	Value of Tests, Promoter Regions (Genetics), Protein Biosynthesis,
	Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility
	of Results, Research Support, Saccharomyces cerevisiae, Secondary,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation,
	Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	Type 2, U.S. Gov't, Vertebrates, 0011102066}
}

@article{Roth2004Bayesian,
  author = {Volker Roth and Tilman Lange},
  title = {{B}ayesian class discovery in microarray datasets.},
  journal = {IEEE Trans Biomed Eng},
  year = {2004},
  volume = {51},
  pages = {707--718},
  number = {5},
  month = {May},
  abstract = {A novel approach to class discovery in gene expression datasets is
	presented. In the context of clinical diagnosis, the central goal
	of class discovery algorithms is to simultaneously find putative
	(sub-)types of diseases and to identify informative subsets of genes
	with disease-type specific expression profile. Contrary to many other
	approaches in the literature, the method presented implements a wrapper
	strategy for feature selection, in the sense that the features are
	directly selected by optimizing the discriminative power of the used
	partitioning algorithm. The usual combinatorial problems associated
	with wrapper approaches are overcome by a Bayesian inference mechanism.
	On the technical side, we present an efficient optimization algorithm
	with guaranteed local convergence property. The only free parameter
	of the optimization method is selected by a resampling-based stability
	analysis. Experiments with Leukemia and Lymphoma datasets demonstrate
	that our method is able to correctly infer partitions and corresponding
	subsets of genes which both are relevant in a biological sense. Moreover,
	the frequently observed problem of ambiguities caused by different
	but equally high-scoring partitions is successfully overcome by the
	model selection method proposed.},
  keywords = {Algorithms, Automated, Bayes Theorem, Cluster Analysis, Comparative
	Study, DNA, Databases, Gene Expression Profiling, Genetic, Genetic
	Screening, Humans, Leukemia, Models, Non-U.S. Gov't, Nucleic Acid,
	Oligonucleotide Array Sequence Analysis, Pattern Recognition, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Alignment, Sequence Analysis, Statistical, 15132496},
  pmid = {15132496},
  timestamp = {2006.07.27}
}

@article{Sassi2005automated,
  author = {Alexander P Sassi and Frank Andel and Hans-Marcus L Bitter and Michael
	P S Brown and Robert G Chapman and Jeraldine Espiritu and Alfred
	C Greenquist and Isabelle Guyon and Mariana Horchi-Alegre and Kathy
	L Stults and Ann Wainright and Jonathan C Heller and John T Stults},
  title = {An automated, sheathless capillary electrophoresis-mass spectrometry
	platform for discovery of biomarkers in human serum.},
  journal = {Electrophoresis},
  year = {2005},
  volume = {26},
  pages = {1500-12},
  number = {7-8},
  month = {Apr},
  abstract = {A capillary electrophoresis-mass spectrometry ({CE}-{MS}) method has
	been developed to perform routine, automated analysis of low-molecular-weight
	peptides in human serum. {T}he method incorporates transient isotachophoresis
	for in-line preconcentration and a sheathless electrospray interface.
	{T}o evaluate the performance of the method and demonstrate the utility
	of the approach, an experiment was designed in which peptides were
	added to sera from individuals at each of two different concentrations,
	artificially creating two groups of samples. {T}he {CE}-{MS} data
	from the serum samples were divided into separate training and test
	sets. {A} pattern-recognition/feature-selection algorithm based on
	support vector machines was used to select the mass-to-charge (m/z)
	values from the training set data that distinguished the two groups
	of samples from each other. {T}he added peptides were identified
	correctly as the distinguishing features, and pattern recognition
	based on these peptides was used to assign each sample in the independent
	test set to its respective group. {A} twofold difference in peptide
	concentration could be detected with statistical significance (p-value
	< 0.0001). {T}he accuracy of the assignment was 95\%, demonstrating
	the utility of this technique for the discovery of patterns of biomarkers
	in serum.},
  doi = {10.1002/elps.200410127},
  pdf = {../local/Sassi2005automated.pdf},
  file = {Sassi2005automated.pdf:local/Sassi2005automated.pdf:PDF},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15765480},
  url = {http://dx.doi.org/10.1002/elps.200410127}
}

@article{Schneider1998Artificial,
  author = {G. Schneider and P. Wrede},
  title = {{A}rtificial neural networks for computer-based molecular design.},
  journal = {Prog Biophys Mol Biol},
  year = {1998},
  volume = {70},
  pages = {175--222},
  number = {3},
  abstract = {The theory of artificial neural networks is briefly reviewed focusing
	on supervised and unsupervised techniques which have great impact
	on current chemical applications. An introduction to molecular descriptors
	and representation schemes is given. In addition, worked examples
	of recent advances in this field are highlighted and pioneering publications
	are discussed. Applications of several types of artificial neural
	networks to compound classification, modelling of structure-activity
	relationships, biological target identification, and feature extraction
	from biopolymers are presented and compared to other techniques.
	Advantages and limitations of neural networks for computer-aided
	molecular design and sequence analysis are discussed.},
  keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Animals, Artificial
	Intelligence, Automated, Bacterial, Bacterial Proteins, Bicuculline,
	Binding Sites, Biological, Biological Availability, Blood Proteins,
	Blood-Brain Barrier, Cation Transport Proteins, Cats, Cell Membrane
	Permeability, Chemical, Chemistry, Cluster Analysis, Combinatorial
	Chemistry Techniques, Comparative Study, Computational Biology, Computer
	Simulation, Computer Systems, Computer-Aided Design, Computer-Assisted,
	Computing Methodologies, DNA-Binding Proteins, Databases, Dogs, Drug
	Design, Electric Stimulation, Electromyography, Enzyme Inhibitors,
	Ether-A-Go-Go Potassium Channels, Excitatory Amino Acid Antagonists,
	Factual, False Positive Reactions, Forecasting, Forelimb, GABA Antagonists,
	Gene Expression Profiling, Genome, Glutamic Acid, Humans, Hydrogen
	Bonding, Image Enhancement, Image Interpretation, Image Processing,
	Information Storage and Retrieval, Iontophoresis, Kynurenic Acid,
	Least-Squares Analysis, Linear Models, Liver, Markov Chains, Metabolic
	Clearance Rate, Metalloendopeptidases, Microelectrodes, Models, Molecular,
	Molecular Conformation, Molecular Sequence Data, Molecular Structure,
	Motor Cortex, Movement, Multivariate Analysis, Nerve Net, Neural
	Networks (Computer), Neuropeptides, Non-U.S. Gov't, Nonlinear Dynamics,
	Pattern Recognition, Pharmaceutical, Pharmaceutical Preparations,
	Pharmacokinetics, Phylogeny, Potassium Channels, Predictive Value
	of Tests, Protein Interaction Mapping, Protein Sorting Signals, Protein
	Structure, Proteins, Rats, Reproducibility of Results, Research Support,
	Sensitivity and Specificity, Sequence Alignment, Sequence Analysis,
	Shoulder, Signal Processing, Software, Statistical, Stereotaxic Techniques,
	Structure-Activity Relationship, Terminology, Tertiary, Trans-Activators,
	Voltage-Gated, Zinc, 9830312},
  owner = {mahe},
  pii = {S0079610798000261},
  pmid = {9830312},
  timestamp = {2006.09.06}
}

@article{Seeger2004Gaussian,
  author = {Matthias Seeger},
  title = {Gaussian processes for machine learning.},
  journal = {Int {J} {N}eural {S}yst},
  year = {2004},
  volume = {14},
  pages = {69-106},
  number = {2},
  month = {Apr},
  abstract = {Gaussian processes ({GP}s) are natural generalisations of multivariate
	{G}aussian random variables to infinite (countably or continuous)
	index sets. {GP}s have been applied in a large number of fields to
	a diverse range of ends, and very many deep theoretical analyses
	of various properties are available. {T}his paper gives an introduction
	to {G}aussian processes on a fairly elementary level with special
	emphasis on characteristics relevant in machine learning. {I}t draws
	explicit connections to branches such as spline smoothing models
	and support vector machines in which similar ideas have been investigated.
	{G}aussian process models are routinely used to solve hard machine
	learning problems. {T}hey are attractive because of their flexible
	non-parametric nature and computational simplicity. {T}reated within
	a {B}ayesian framework, very powerful statistical methods can be
	implemented which offer valid estimates of uncertainties in our predictions
	and generic model selection procedures cast as nonlinear optimization
	problems. {T}heir main drawback of heavy computational scaling has
	recently been alleviated by the introduction of generic sparse approximations.13,78,31
	{T}he mathematical literature on {GP}s is large and often uses deep
	concepts which are not required to fully understand most machine
	learning applications. {I}n this tutorial paper, we aim to present
	characteristics of {GP}s relevant to machine learning and to show
	up precise connections to other "kernel machines" popular in the
	community. {O}ur focus is on a simple presentation, but references
	to more detailed sources are provided.},
  keywords = {Algorithms, Amino Acids, Antibodies, Artificial Intelligence, Astrocytoma,
	Automated, Bayes Theorem, Biological, Biopsy, Brain, Brain Mapping,
	Brain Neoplasms, Calibration, Comparative Study, Computational Biology,
	Computer-Assisted, Computing Methodologies, Cysteine, Cystine, Dysplastic
	Nevus Syndrome, Electrodes, Electroencephalography, Entropy, Eosine
	Yellowish-(YS), Evoked Potentials, Female, Gene Expression Profiling,
	Hematoxylin, Horseradish Peroxidase, Humans, Image Interpretation,
	Image Processing, Imagery (Psychotherapy), Imagination, Laterality,
	Linear Models, Male, Melanoma, Models, Monoclonal, Movement, Neoplasms,
	Neural Networks (Computer), Neuropeptides, Non-P.H.S., Non-U.S. Gov't,
	Nonparametric, Normal Distribution, P.H.S., Pattern Recognition,
	Perception, Principal Component Analysis, Protein, Protein Array
	Analysis, Protein Interaction Mapping, Proteins, Regression Analysis,
	Research Support, Sensitivity and Specificity, Sequence Alignment,
	Sequence Ana, Sequence Analysis, Skin Neoplasms, Software, Statistical,
	Statistics, Tumor Markers, U.S. Gov't, User-Computer Interface, World
	Health Organization, lysis, 15112367},
  pii = {S0129065704001899}
}

@article{Sheinerman2005High,
  author = {Felix B Sheinerman and Elie Giraud and Abdelazize Laoui},
  title = {High affinity targets of protein kinase inhibitors have similar residues
	at the positions energetically important for binding.},
  journal = {J. Mol. Biol.},
  year = {2005},
  volume = {352},
  pages = {1134--1156},
  number = {5},
  month = {Oct},
  abstract = {Inhibition of protein kinase activity is a focus of intense drug discovery
	efforts in several therapeutic areas. Major challenges facing the
	field include understanding of the factors determining the selectivity
	of kinase inhibitors and the development of compounds with the desired
	selectivity profile. Here, we report the analysis of sequence variability
	among high and low affinity targets of eight different small molecule
	kinase inhibitors (BIRB796, Tarceva, NU6102, Gleevec, SB203580, balanol,
	H89, PP1). It is observed that all high affinity targets of each
	inhibitor are found among a relatively small number of kinases, which
	have similar residues at the specific positions important for binding.
	The findings are highly statistically significant, and allow one
	to exclude the majority of kinases in a genome from a list of likely
	targets for an inhibitor. The findings have implications for the
	design of novel inhibitors with a desired selectivity profile (e.g.
	targeted at multiple kinases), the discovery of new targets for kinase
	inhibitor drugs, comparative analysis of different in vivo models,
	and the design of "a-la-carte" chemical libraries tailored for individual
	kinases.},
  doi = {10.1016/j.jmb.2005.07.074},
  keywords = {Amino Acid Sequence; Amino Acids; Binding Sites; Electrostatics; Humans;
	Ligands; Molecular Sequence Data; Piperazines; Protein Binding; Protein
	Kinase Inhibitors; Protein Kinases; Pyrazoles; Pyrimidines; Sequence
	Alignment; Thermodynamics},
  owner = {laurent},
  pii = {S0022-2836(05)00900-9},
  pmid = {16139843},
  timestamp = {2007.01.03},
  url = {http://dx.doi.org/10.1016/j.jmb.2005.07.074}
}

@article{Shen2005[Detection,
  author = {Li Shen and Jie Yang and Yue Zhou},
  title = {Detection of {PVC}s with support vector machine},
  journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi},
  year = {2005},
  volume = {22},
  pages = {78-81},
  number = {1},
  month = {Feb},
  abstract = {The classifiction of heart beats is the foundation for automated arrhythmia
	monitoring devices. {S}upport vector machnies ({SVM}s) have meant
	a great advance in solving classification or pattern recognition.
	{T}his study describes {SVM} for the identification of premature
	ventricular contractions ({PVC}s) in surface {ECG}s. {F}eatures for
	the classification task are extracted by analyzing the heart rate,
	morphology and wavelet energy of the heart beats from a single lead.
	{T}he performance of different {SVM}s is evaluated on the {MIT}-{BIH}
	arrhythmia database following the association for the advancement
	of medical instrumentation ({AAMI}) recommendations.},
  keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under
	Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain
	Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional
	Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic
	Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic,
	Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis,
	Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male,
	Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical
	Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence,
	P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies,
	Protein, ROC Curve, Regression Analysis, Research Support, Retinal
	Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics,
	Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15762121}
}

@article{Sherry2001dbSNP,
  author = {S. T. Sherry and M. H. Ward and M. Kholodov and J. Baker and L. Phan
	and E. M. Smigielski and K. Sirotkin},
  title = {dbSNP: the NCBI database of genetic variation.},
  journal = {Nucleic Acids Res},
  year = {2001},
  volume = {29},
  pages = {308--311},
  number = {1},
  month = {Jan},
  abstract = {In response to a need for a general catalog of genome variation to
	address the large-scale sampling designs required by association
	studies, gene mapping and evolutionary biology, the National Center
	for Biotechnology Information (NCBI) has established the dbSNP database
	[S.T.Sherry, M.Ward and K. Sirotkin (1999) Genome Res., 9, 677-679].
	Submissions to dbSNP will be integrated with other sources of information
	at NCBI such as GenBank, PubMed, LocusLink and the Human Genome Project
	data. The complete contents of dbSNP are available to the public
	at website: http://www.ncbi.nlm.nih.gov/SNP. The complete contents
	of dbSNP can also be downloaded in multiple formats via anonymous
	FTP at ftp://ncbi.nlm.nih.gov/snp/.},
  institution = {National Center for Biotechnology Information, National Library of
	Medicine, National Institutes of Health, Bethesda, MD, 20894, USA.
	sherry@ncbi.nlm.nih.gov},
  keywords = {Animals; Biotechnology; Databases, Factual; Genetic Variation; Humans;
	Information Services; Internet; National Institutes of Health (U.S.);
	National Library of Medicine (U.S.); Polymorphism, Single Nucleotide,
	genetics; United States},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pmid = {11125122},
  timestamp = {2010.08.01}
}

@article{Song2002Prediction,
  author = {Minghu Song and Curt M Breneman and Jinbo Bi and N. Sukumar and Kristin
	P Bennett and Steven Cramer and Nihal Tugcu},
  title = {Prediction of protein retention times in anion-exchange chromatography
	systems using support vector regression.},
  journal = {J {C}hem {I}nf {C}omput {S}ci},
  year = {2002},
  volume = {42},
  pages = {1347-57},
  number = {6},
  abstract = {Quantitative {S}tructure-{R}etention {R}elationship ({QSRR}) models
	are developed for the prediction of protein retention times in anion-exchange
	chromatography systems. {T}opological, subdivided surface area, and
	{TAE} ({T}ransferable {A}tom {E}quivalent) electron-density-based
	descriptors are computed directly for a set of proteins using molecular
	connectivity patterns and crystal structure geometries. {A} novel
	algorithm based on {S}upport {V}ector {M}achine ({SVM}) regression
	has been employed to obtain predictive {QSRR} models using a two-step
	computational strategy. {I}n the first step, a sparse linear {SVM}
	was utilized as a feature selection procedure to remove irrelevant
	or redundant information. {S}ubsequently, the selected features were
	used to produce an ensemble of nonlinear {SVM} regression models
	that were combined using bootstrap aggregation (bagging) techniques,
	where various combinations of training and validation data sets were
	selected from the pool of available data. {A} visualization scheme
	(star plots) was used to display the relative importance of each
	selected descriptor in the final set of "bagged" models. {O}nce these
	predictive models have been validated, they can be used as an automated
	prediction tool for virtual high-throughput screening ({VHTS}).},
  keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence,
	Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological,
	Biosensing Techniques, Carcinoma, Chemical, Chromatography, Classification,
	Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted,
	Cystadenoma, DNA, Decision Making, Diagnosis, Differential, Drug,
	Drug Design, Electrostatics, Eukaryotic Cells, Feasibility Studies,
	Female, Gene Expression, Gene Expression Profiling, Gene Expression
	Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans,
	Internet, Ion Exchange, Leukemia, Ligands, Likelihood Functions,
	Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains,
	Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques,
	Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic,
	Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S.
	Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer
	Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms,
	P.H.S., Pattern Recognition, Probability, Protein Binding, Protein
	Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA
	Splicing, Receptors, Reference Values, Regression Analysis, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms,
	Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12444731},
  pii = {ci025580t}
}

@article{Song2004Comparison,
  author = {Xiaowei Song and Arnold Mitnitski and Jafna Cox and Kenneth Rockwood},
  title = {Comparison of machine learning techniques with classical statistical
	models in predicting health outcomes.},
  journal = {Medinfo},
  year = {2004},
  volume = {11},
  pages = {736-40},
  number = {Pt 1},
  abstract = {Several machine learning techniques (multilayer and single layer perceptron,
	logistic regression, least square linear separation and support vector
	machines) are applied to calculate the risk of death from two biomedical
	data sets, one from patient care records, and another from a population
	survey. {E}ach dataset contained multiple sources of information:
	history of related symptoms and other illnesses, physical examination
	findings, laboratory tests, medications (patient records dataset),
	health attitudes, and disabilities in activities of daily living
	(survey dataset). {E}ach technique showed very good mortality prediction
	in the acute patients data sample ({AUC} up to 0.89) and fair prediction
	accuracy for six year mortality ({AUC} from 0.70 to 0.76) in individuals
	from epidemiological database surveys. {T}he results suggest that
	the nature of data is of primary importance rather than the learning
	technique. {H}owever, the consistently superior performance of the
	artificial neural network (multi-layer perceptron) indicates that
	nonlinear relationships (which cannot be discerned by linear separation
	techniques) can provide additional improvement in correctly predicting
	health outcomes.},
  keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts,
	Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis,
	Cerebrovascular Accident, Cerebrovascular Circulation, Comparative
	Study, Computer-Assisted, Cysteine, Decision Trees, Dementia, Diagnosis,
	Disulfides, Doppler, Embolism, Expert Systems, Extramural, Factor
	Analysis, Female, Gene Expression, Gene Expression Profiling, Health
	Status, Heart Septal Defects, Humans, Intracranial Embolism, Male,
	Models, Molecular, Myocardial Infarction, N.I.H., Neoplasms, Neural
	Networks (Computer), Non-U.S. Gov't, Oligonucleotide Array Sequence
	Analysis, Oxidation-Reduction, P.H.S., Pattern Recognition, Prognosis,
	Protein Binding, Protein Folding, Proteins, ROC Curve, Research Support,
	Sensitivity and Specificity, Software, Statistical, Transcranial,
	Treatment Outcome, U.S. Gov't, Ultrasonography, 15360910},
  pii = {D040004933}
}

@article{Sturn2002Genesis:,
  author = {Alexander Sturn and John Quackenbush and Zlatko Trajanoski},
  title = {Genesis: cluster analysis of microarray data.},
  journal = {Bioinformatics},
  year = {2002},
  volume = {18},
  pages = {207-8},
  number = {1},
  month = {Jan},
  abstract = {A versatile, platform independent and easy to use {J}ava suite for
	large-scale gene expression analysis was developed. {G}enesis integrates
	various tools for microarray data analysis such as filters, normalization
	and visualization tools, distance measures as well as common clustering
	algorithms including hierarchical clustering, self-organizing maps,
	k-means, principal component analysis, and support vector machines.
	{T}he results of the clustering are transparent across all implemented
	methods and enable the analysis of the outcome of different algorithms
	and parameters. {A}dditionally, mapping of gene expression data onto
	chromosomal sequences was implemented to enhance promoter analysis
	and investigation of transcriptional control mechanisms.},
  keywords = {Algorithms, Artificial Intelligence, Cluster Analysis, Comparative
	Study, Computational Biology, Databases, Gene Expression Profiling,
	Genetic, Models, Molecular Structure, Neural Networks (Computer),
	Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Principal
	Component Analysis, Programming Languages, Promoter Regions (Genetics),
	Protein, Proteins, Research Support, Software, Statistical, Transcription,
	11836235}
}

@article{Suykens2001Optimal,
  author = {J. A. Suykens and J. Vandewalle and B. De Moor},
  title = {Optimal control by least squares support vector machines.},
  journal = {Neural {N}etw},
  year = {2001},
  volume = {14},
  pages = {23-35},
  number = {1},
  month = {Jan},
  abstract = {Support vector machines have been very successful in pattern recognition
	and function estimation problems. {I}n this paper we introduce the
	use of least squares support vector machines ({LS}-{SVM}'s) for the
	optimal control of nonlinear systems. {L}inear and neural full static
	state feedback controllers are considered. {T}he problem is formulated
	in such a way that it incorporates the {N}-stage optimal control
	problem as well as a least squares support vector machine approach
	for mapping the state space into the action space. {T}he solution
	is characterized by a set of nonlinear equations. {A}n alternative
	formulation as a constrained nonlinear optimization problem in less
	unknowns is given, together with a method for imposing local stability
	in the {LS}-{SVM} control scheme. {T}he results are discussed for
	support vector machines with radial basis function kernel. {A}dvantages
	of {LS}-{SVM} control are that no number of hidden units has to be
	determined for the controller and that no centers have to be specified
	for the {G}aussian kernels when applying {M}ercer's condition. {T}he
	curse of dimensionality is avoided in comparison with defining a
	regular grid for the centers in classical radial basis function networks.
	{T}his is at the expense of taking the trajectory of state variables
	as additional unknowns in the optimization problem, while classical
	neural network approaches typically lead to parametric optimization
	problems. {I}n the {SVM} methodology the number of unknowns equals
	the number of training data, while in the primal space the number
	of unknowns can be infinite dimensional. {T}he method is illustrated
	both on stabilization and tracking problems including examples on
	swinging up an inverted pendulum with local stabilization at the
	endpoint and a tracking problem for a ball and beam system.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins,
	Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites,
	Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry,
	Child, Chromosome Aberrations, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, DNA, Data Interpretation,
	Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric
	Conductivity, Electrophysiology, Escherichia coli Proteins, Factual,
	Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling,
	Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins,
	Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic
	Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular,
	Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus,
	Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution,
	Nucleic Acid Conformation, Organ Specificity, Organelles, P.H.S.,
	Pattern Recognition, Physical, Pigmented, Predictive Value of Tests,
	Promoter Regions (Genetics), Protein Folding, Protein Structure,
	Proteins, Proteome, RNA, Reproducibility of Results, Research Support,
	Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	U.S. Gov't, 11213211},
  pii = {S0893608000000770}
}

@article{Talagrand1996Majorizing,
  author = {Talagrand, M.},
  title = {Majorizing measures: {T}he generic chaining},
  journal = {Ann. {P}robab.},
  year = {1996},
  volume = {24},
  pages = {1049--1103},
  pdf = {../local/tala96b.pdf},
  file = {tala96b.pdf:local/tala96b.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/majmeas.dvi}
}

@article{Talagrand1996New,
  author = {Talagrand, M.},
  title = {New concentration inequalities for product spaces},
  journal = {Inventionnes {M}ath.},
  year = {1996},
  volume = {126},
  pages = {505--563},
  pdf = {../local/tala96.pdf},
  file = {tala96.pdf:local/tala96.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/newcon.dvi}
}

@article{Talagrand1996Newa,
  author = {Talagrand, M.},
  title = {A {N}ew {L}ook at {I}ndependence},
  journal = {Ann. {P}robab.},
  year = {1996},
  volume = {24},
  pages = {1--34},
  pdf = {../local/tala96c.pdf},
  file = {tala96c.pdf:local/tala96c.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/newlook.dvi}
}

@article{Talagrand1995Concentration,
  author = {Talagrand, M.},
  title = {Concentration of measure and isoperimetric inequalities in product
	spaces},
  journal = {Publ. {M}ath. {I}.{H}.{E}.{S}.},
  year = {1995},
  volume = {81},
  pages = {73--203},
  pdf = {../local/tala95.pdf},
  file = {tala95.pdf:local/tala95.pdf:PDF},
  subject = {stat},
  url = {http://www.math.ohio-state.edu/~talagran/preprints/ihes.dvi}
}

@article{Tzeng2004Predicting,
  author = {Huey-Ming Tzeng and Jer-Guang Hsieh and Yih-Lon Lin},
  title = {Predicting nurses' intention to quit with a support vector machine:
	a new approach to set up an early warning mechanism in human resource
	management.},
  journal = {Comput {I}nform {N}urs},
  year = {2004},
  volume = {22},
  pages = {232-42},
  number = {4},
  abstract = {This project developed a {S}upport {V}ector {M}achine for predicting
	nurses' intention to quit, using working motivation, job satisfaction,
	and stress levels as predictors. {T}his study was conducted in three
	hospitals located in southern {T}aiwan. {T}he target population was
	all nurses (389 valid cases). {F}or cross-validation, we randomly
	split cases into four groups of approximately equal sizes, and performed
	four training runs. {A}fter the training, the average percentage
	of misclassification on the training data was 0.86, while that on
	the testing data was 10.8, resulting in predictions with 89.2\% accuracy.
	{T}his {S}upport {V}ector {M}achine can predict nurses' intention
	to quit, without asking these nurses whether they have an intention
	to quit.},
  keywords = {Adolescent, Adult, Algorithms, Amino Acid Sequence, Amino Acids, Anatomic,
	Attitude of Health Personnel, Bacterial Proteins, Bias (Epidemiology),
	Brain, Brain Mapping, Burnout, Comparative Study, Computer Simulation,
	Computer-Assisted, Data Interpretation, Diffusion Magnetic Resonance
	Imaging, Facial Asymmetry, Facial Expression, Facial Paralysis, Female,
	Gene Expression Profiling, Gram-Negative Bacteria, Gram-Positive
	Bacteria, Hospital, Humans, Image Interpretation, Intention, Job
	Satisfaction, Logistic Models, Magnetoencephalography, Male, Middle
	Aged, Models, Motion, Neural Networks (Computer), Neural Pathways,
	Non-U.S. Gov't, Nonlinear Dynamics, Nursing Administration Research,
	Nursing Staff, Personnel Management, Personnel Turnover, Photography,
	Predictive Value of Tests, Professional, Protein, Proteins, Proteome,
	Psychological, Questionnaires, Regression Analysis, Reproducibility
	of Results, Research Support, Retina, Risk Factors, Sequence Alignment,
	Sequence Analysis, Severity of Illness Index, Software, Statistical,
	Subcellular Fractions, Taiwan, Theoretical, Workplace, 15494654},
  pii = {00024665-200407000-00012}
}

@article{Vanunu2010Associating,
  author = {Vanunu, O. and Magger, O. and Ruppin, E. and Shlomi, T. and Sharan,
	R.},
  title = {Associating genes and protein complexes with disease via network
	propagation.},
  journal = {PLoS Comput. Biol.},
  year = {2010},
  volume = {6},
  pages = {e1000641},
  number = {1},
  month = {Jan},
  abstract = {A fundamental challenge in human health is the identification of disease-causing
	genes. Recently, several studies have tackled this challenge via
	a network-based approach, motivated by the observation that genes
	causing the same or similar diseases tend to lie close to one another
	in a network of protein-protein or functional interactions. However,
	most of these approaches use only local network information in the
	inference process and are restricted to inferring single gene associations.
	Here, we provide a global, network-based method for prioritizing
	disease genes and inferring protein complex associations, which we
	call PRINCE. The method is based on formulating constraints on the
	prioritization function that relate to its smoothness over the network
	and usage of prior information. We exploit this function to predict
	not only genes but also protein complex associations with a disease
	of interest. We test our method on gene-disease association data,
	evaluating both the prioritization achieved and the protein complexes
	inferred. We show that our method outperforms extant approaches in
	both tasks. Using data on 1,369 diseases from the OMIM knowledgebase,
	our method is able (in a cross validation setting) to rank the true
	causal gene first for 34\% of the diseases, and infer 139 disease-related
	complexes that are highly coherent in terms of the function, expression
	and conservation of their member proteins. Importantly, we apply
	our method to study three multi-factorial diseases for which some
	causal genes have been found already: prostate cancer, alzheimer
	and type 2 diabetes mellitus. PRINCE's predictions for these diseases
	highly match the known literature, suggesting several novel causal
	genes and protein complexes for further investigation.},
  doi = {10.1371/journal.pcbi.1000641},
  institution = {School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel.},
  keywords = {Algorithms; Alzheimer Disease; Databases, Genetic; Diabetes Mellitus;
	Disease; Genes; Humans; Male; Multiprotein Complexes; Prostatic Neoplasms;
	Protein Interaction Mapping; Proteins; Reproducibility of Results},
  owner = {mordelet},
  pmid = {20090828},
  timestamp = {2010.09.27},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000641}
}

@article{Vercoutere2001Rapid,
  author = {W. Vercoutere and S. Winters-Hilt and H. Olsen and D. Deamer and
	D. Haussler and M. Akeson},
  title = {Rapid discrimination among individual {DNA} hairpin molecules at
	single-nucleotide resolution using an ion channel.},
  journal = {Nat {B}iotechnol},
  year = {2001},
  volume = {19},
  pages = {248-52},
  number = {3},
  month = {Mar},
  abstract = {R{NA} and {DNA} strands produce ionic current signatures when driven
	through an alpha-hemolysin channel by an applied voltage. {H}ere
	we combine this nanopore detector with a support vector machine ({SVM})
	to analyze {DNA} hairpin molecules on the millisecond time scale.
	{M}easurable properties include duplex stem length, base pair mismatches,
	and loop length. {T}his nanopore instrument can discriminate between
	individual {DNA} hairpins that differ by one base pair or by one
	nucleotide.},
  doi = {10.1038/85696},
  pdf = {../local/Vercoutere2001Rapid.pdf},
  file = {Vercoutere2001Rapid.pdf:local/Vercoutere2001Rapid.pdf:PDF},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins,
	Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites,
	Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry,
	Child, Chromosome Aberrations, Comparative Study, Computational Biology,
	Computer Simulation, Computer-Assisted, DNA, Data Interpretation,
	Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric
	Conductivity, Electrophysiology, Escherichia coli Proteins, Factual,
	Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene
	Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins,
	Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic
	Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular,
	Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus,
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Organ Specificity,
	Organelles, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive
	Value of Tests, Promoter Regions (Genetics), Protein Folding, Protein
	Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research
	Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity,
	Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms,
	Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes,
	Thermodynamics, Transcription, Transcription Factors, Tumor Markers,
	U.S. Gov't, 11231558},
  pii = {85696},
  url = {http://dx.doi.org/10.1038/85696}
}

@article{Wahba2002Soft,
  author = {Grace Wahba},
  title = {Soft and hard classification by reproducing kernel {H}ilbert space
	methods.},
  journal = {Proc {N}atl {A}cad {S}ci {U} {S} {A}},
  year = {2002},
  volume = {99},
  pages = {16524-30},
  number = {26},
  month = {Dec},
  abstract = {Reproducing kernel {H}ilbert space ({RKHS}) methods provide a unified
	context for solving a wide variety of statistical modelling and function
	estimation problems. {W}e consider two such problems: {W}e are given
	a training set [yi, ti, i = 1, em leader, n], where yi is the response
	for the ith subject, and ti is a vector of attributes for this subject.
	{T}he value of y(i) is a label that indicates which category it came
	from. {F}or the first problem, we wish to build a model from the
	training set that assigns to each t in an attribute domain of interest
	an estimate of the probability pj(t) that a (future) subject with
	attribute vector t is in category j. {T}he second problem is in some
	sense less ambitious; it is to build a model that assigns to each
	t a label, which classifies a future subject with that t into one
	of the categories or possibly "none of the above." {T}he approach
	to the first of these two problems discussed here is a special case
	of what is known as penalized likelihood estimation. {T}he approach
	to the second problem is known as the support vector machine. {W}e
	also note some alternate but closely related approaches to the second
	problem. {T}hese approaches are all obtained as solutions to optimization
	problems in {RKHS}. {M}any other problems, in particular the solution
	of ill-posed inverse problems, can be obtained as solutions to optimization
	problems in {RKHS} and are mentioned in passing. {W}e caution the
	reader that although a large literature exists in all of these topics,
	in this inaugural article we are selectively highlighting work of
	the author, former students, and other collaborators.},
  doi = {10.1073/pnas.242574899},
  pdf = {../local/Wahba2002Soft.pdf},
  file = {Wahba2002Soft.pdf:local/Wahba2002Soft.pdf:PDF},
  keywords = {Acute, Algorithms, Animals, Automated, Base Pair Mismatch, Base Pairing,
	Base Sequence, Biological, Biosensing Techniques, Classification,
	Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted,
	Cystadenoma, DNA, Drug, Drug Design, Eukaryotic Cells, Female, Gene
	Expression, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Hemolysins, Humans, Leukemia, Ligands,
	Likelihood Functions, Lymphocytic, Markov Chains, Mathematics, Messenger,
	Models, Molecular, Molecular Probe Techniques, Molecular Sequence
	Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation,
	Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S.,
	Pattern Recognition, Probability, Protein Binding, Proteins, Quality
	Control, RNA, RNA Splicing, Receptors, Reference Values, Reproducibility
	of Results, Research Support, Sensitivity and Specificity, Sequence
	Analysis, Signal Processing, Statistical, Stomach Neoplasms, Thermodynamics,
	Transcription, Tumor Markers, U.S. Gov't, 12477931},
  pii = {242574899},
  url = {http://dx.doi.org/10.1073/pnas.242574899}
}

@article{Weber2002Building,
  author = {Griffin Weber and Staal Vinterbo and Lucila Ohno-Machado},
  title = {Building an asynchronous web-based tool for machine learning classification.},
  journal = {Proc {AMIA} {S}ymp},
  year = {2002},
  pages = {869-73},
  abstract = {Various unsupervised and supervised learning methods including support
	vector machines, classification trees, linear discriminant analysis
	and nearest neighbor classifiers have been used to classify high-throughput
	gene expression data. {S}impler and more widely accepted statistical
	tools have not yet been used for this purpose, hence proper comparisons
	between classification methods have not been conducted. {W}e developed
	free software that implements logistic regression with stepwise variable
	selection as a quick and simple method for initial exploration of
	important genetic markers in disease classification. {T}o implement
	the algorithm and allow our collaborators in remote locations to
	evaluate and compare its results against those of other methods,
	we developed a user-friendly asynchronous web-based application with
	a minimal amount of programming using free, downloadable software
	tools. {W}ith this program, we show that classification using logistic
	regression can perform as well as other more sophisticated algorithms,
	and it has the advantages of being easy to interpret and reproduce.
	{B}y making the tool freely and easily available, we hope to promote
	the comparison of classification methods. {I}n addition, we believe
	our web application can be used as a model for other bioinformatics
	laboratories that need to develop web-based analysis tools in a short
	amount of time and on a limited budget.},
  keywords = {Acute, Algorithms, Animals, Artificial Intelligence, Automated, Base
	Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing
	Techniques, Classification, Cluster Analysis, Comparative Study,
	Computational Biology, Computer-Assisted, Cystadenoma, DNA, Drug,
	Drug Design, Eukaryotic Cells, Female, Gene Expression, Gene Expression
	Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers,
	Hemolysins, Humans, Internet, Leukemia, Ligands, Likelihood Functions,
	Logistic Models, Lymphocytic, Markov Chains, Mathematics, Messenger,
	Models, Molecular, Molecular Probe Techniques, Molecular Sequence
	Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks
	(Computer), Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation,
	Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian
	Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding,
	Proteins, Quality Control, RNA, RNA Splicing, Receptors, Reference
	Values, Reproducibility of Results, Research Support, Sensitivity
	and Specificity, Sequence Analysis, Signal Processing, Software,
	Statistical, Stomach Neoplasms, Thermodynamics, Transcription, Tumor
	Markers, U.S. Gov't, 12463949},
  pii = {D020001919}
}

@article{Wilbur2000Boosting,
  author = {W. J. Wilbur},
  title = {Boosting naive {B}ayesian learning on a large subset of {MEDLINE}.},
  journal = {Proc {AMIA} {S}ymp},
  year = {2000},
  pages = {918-22},
  abstract = {We are concerned with the rating of new documents that appear in a
	large database ({MEDLINE}) and are candidates for inclusion in a
	small specialty database ({REBASE}). {T}he requirement is to rank
	the new documents as nearly in order of decreasing potential to be
	added to the smaller database as possible, so as to improve the coverage
	of the smaller database without increasing the effort of those who
	manage this specialty database. {T}o perform this ranking task we
	have considered several machine learning approaches based on the
	naÃ¯ ve {B}ayesian algorithm. {W}e find that adaptive boosting outperforms
	naÃ¯ ve {B}ayes, but that a new form of boosting which we term staged
	{B}ayesian retrieval outperforms adaptive boosting. {S}taged {B}ayesian
	retrieval involves two stages of {B}ayesian retrieval and we further
	find that if the second stage is replaced by a support vector machine
	we again obtain a significant improvement over the strictly {B}ayesian
	approach.},
  keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence,
	Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial
	Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding
	Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation,
	Chemistry, Child, Chromosome Aberrations, Classification, Codon,
	Colonic Neoplasms, Comparative Study, Computational Biology, Computer
	Simulation, Computer-Assisted, DNA, Data Interpretation, Databases,
	Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis,
	Discrimination Learning, Electric Conductivity, Electrophysiology,
	Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric
	Emptying, Gene Expression Profiling, Gene Expression Regulation,
	Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease,
	Genomics, Hemolysins, Humans, Indians, Information Storage and Retrieval,
	Initiator, Ion Channels, Kinetics, Leukemia, Likelihood Functions,
	Lipid Bilayers, Logistic Models, Lymphocytic, MEDLINE, Male, Markov
	Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms,
	Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S.,
	Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American,
	Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis,
	Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S.,
	Pattern Recognition, Physical, Pigmented, Predictive Value of Tests,
	Promoter Regions (Genetics), Protein Biosynthesis, Protein Folding,
	Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results,
	Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity
	and Specificity, Sequence Alignment, Sequence Analysis, Sex Characteristics,
	Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound
	Spectrography, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics,
	Transcription, Transcription Factors, Tumor Markers, Type 2, U.S.
	Gov't, Vertebrates, 11080018},
  pii = {D200250}
}

@article{Xie2009Unified,
  author = {Lei Xie and Li Xie and Philip E Bourne},
  title = {A unified statistical model to support local sequence order independent
	similarity searching for ligand-binding sites and its application
	to genome-based drug discovery.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {i305--i312},
  number = {12},
  month = {Jun},
  abstract = {Functional relationships between proteins that do not share global
	structure similarity can be established by detecting their ligand-binding-site
	similarity. For a large-scale comparison, it is critical to accurately
	and efficiently assess the statistical significance of this similarity.
	Here, we report an efficient statistical model that supports local
	sequence order independent ligand-binding-site similarity searching.
	Most existing statistical models only take into account the matching
	vertices between two sites that are defined by a fixed number of
	points. In reality, the boundary of the binding site is not known
	or is dependent on the bound ligand making these approaches limited.
	To address these shortcomings and to perform binding-site mapping
	on a genome-wide scale, we developed a sequence-order independent
	profile-profile alignment (SOIPPA) algorithm that is able to detect
	local similarity between unknown binding sites a priori. The SOIPPA
	scoring integrates geometric, evolutionary and physical information
	into a unified framework. However, this imposes a significant challenge
	in assessing the statistical significance of the similarity because
	the conventional probability model that is based on fixed-point matching
	cannot be applied. Here we find that scores for binding-site matching
	by SOIPPA follow an extreme value distribution (EVD). Benchmark studies
	show that the EVD model performs at least two-orders faster and is
	more accurate than the non-parametric statistical method in the previous
	SOIPPA version. Efficient statistical analysis makes it possible
	to apply SOIPPA to genome-based drug discovery. Consequently, we
	have applied the approach to the structural genome of Mycobacterium
	tuberculosis to construct a protein-ligand interaction network. The
	network reveals highly connected proteins, which represent suitable
	targets for promiscuous drugs.},
  doi = {10.1093/bioinformatics/btp220},
  institution = {San Diego Supercomputer Center, University of California, San Diego,
	La Jolla, CA 92093, USA. lxie@sdsc.edu},
  keywords = {Binding Sites; Computational Biology, methods; Drug Discovery, methods;
	Genome; Ligands; Models, Statistical; Mycobacterium tuberculosis,
	genetics/metabolism; Proteins, chemistry},
  language = {eng},
  medline-pst = {ppublish},
  owner = {bricehoffmann},
  pii = {btp220},
  pmid = {19478004},
  timestamp = {2009.07.27},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp220}
}

@article{Yan2007Determining,
  author = {Yan, Mingjin and Ye, Keying},
  title = {Determining the number of clusters using the weighted gap statistic.},
  journal = {Biometrics},
  year = {2007},
  volume = {63},
  pages = {1031--1037},
  number = {4},
  month = {Dec},
  abstract = {Estimating the number of clusters in a data set is a crucial step
	in cluster analysis. In this article, motivated by the gap method
	(Tibshirani, Walther, and Hastie, 2001, Journal of the Royal Statistical
	Society B63, 411-423), we propose the weighted gap and the difference
	of difference-weighted (DD-weighted) gap methods for estimating the
	number of clusters in data using the weighted within-clusters sum
	of errors: a measure of the within-clusters homogeneity. In addition,
	we propose a "multilayer" clustering approach, which is shown to
	be more accurate than the original gap method, particularly in detecting
	the nested cluster structure of the data. The methods are applicable
	when the input data contain continuous measurements and can be used
	with any clustering method. Simulation studies and real data are
	investigated and compared among these proposed methods as well as
	with the original gap method.},
  doi = {10.1111/j.1541-0420.2007.00784.x},
  institution = {Medtronic Sofamor Danek, 1800 Pyramid Place, Memphis, Tennessee 38132,
	USA. mingjin.yan@medtronic.com},
  keywords = {Algorithms; Biometry, methods; Cluster Analysis; Computer Simulation;
	Data Interpretation, Statistical; Models, Biological; Models, Statistical;
	Pattern Recognition, Automated, methods},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {BIOM784},
  pmid = {17425640},
  timestamp = {2011.12.29},
  url = {http://dx.doi.org/10.1111/j.1541-0420.2007.00784.x}
}

@article{Yu2004Advances,
  author = {Yu, J. and Smith, V.A. and Wang, P.P. and Hartemink, A.J. and Jarvis,
	E.D.},
  title = {Advances to Bayesian network inference for generating causal networks
	from observational biological data.},
  journal = {Bioinformatics},
  year = {2004},
  volume = {20},
  pages = {3594--3603},
  number = {18},
  month = {Dec},
  abstract = {MOTIVATION: Network inference algorithms are powerful computational
	tools for identifying putative causal interactions among variables
	from observational data. Bayesian network inference algorithms hold
	particular promise in that they can capture linear, non-linear, combinatorial,
	stochastic and other types of relationships among variables across
	multiple levels of biological organization. However, challenges remain
	when applying these algorithms to limited quantities of experimental
	data collected from biological systems. Here, we use a simulation
	approach to make advances in our dynamic Bayesian network (DBN) inference
	algorithm, especially in the context of limited quantities of biological
	data. RESULTS: We test a range of scoring metrics and search heuristics
	to find an effective algorithm configuration for evaluating our methodological
	advances. We also identify sampling intervals and levels of data
	discretization that allow the best recovery of the simulated networks.
	We develop a novel influence score for DBNs that attempts to estimate
	both the sign (activation or repression) and relative magnitude of
	interactions among variables. When faced with limited quantities
	of observational data, combining our influence score with moderate
	data interpolation reduces a significant portion of false positive
	interactions in the recovered networks. Together, our advances allow
	DBN inference algorithms to be more effective in recovering biological
	networks from experimentally collected data. AVAILABILITY: Source
	code and simulated data are available upon request. SUPPLEMENTARY
	INFORMATION: http://www.jarvislab.net/Bioinformatics/BNAdvances/},
  doi = {10.1093/bioinformatics/bth448},
  institution = {>},
  keywords = {Algorithms; Bayes Theorem; Computer Simulation; Gene Expression Profiling;
	Gene Expression Regulation; Models, Genetic; Models, Statistical;
	Oligonucleotide Array Sequence Analysis; Signal Transduction; Software},
  owner = {fantine},
  pii = {bth448},
  pmid = {15284094},
  timestamp = {2010.10.21},
  url = {http://dx.doi.org/10.1093/bioinformatics/bth448}
}

@article{Zhu2003Introduction,
  author = {Lingyun Zhu and Baoming Wu and Changxiu Cao},
  title = {Introduction to medical data mining},
  journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi},
  year = {2003},
  volume = {20},
  pages = {559-62},
  number = {3},
  month = {Sep},
  abstract = {Modern medicine generates a great deal of information stored in the
	medical database. {E}xtracting useful knowledge and providing scientific
	decision-making for the diagnosis and treatment of disease from the
	database increasingly becomes necessary. {D}ata mining in medicine
	can deal with this problem. {I}t can also improve the management
	level of hospital information and promote the development of telemedicine
	and community medicine. {B}ecause the medical information is characteristic
	of redundancy, multi-attribution, incompletion and closely related
	with time, medical data mining differs from other one. {I}n this
	paper we have discussed the key techniques of medical data mining
	involving pretreatment of medical data, fusion of different pattern
	and resource, fast and robust mining algorithms and reliability of
	mining results. {T}he methods and applications of medical data mining
	based on computation intelligence such as artificial neural network,
	fuzzy system, evolutionary algorithms, rough set, and support vector
	machine have been introduced. {T}he features and problems in data
	mining are summarized in the last section.},
  keywords = {Algorithms, Anion Exchange Resins, Automatic Data Processing, Chemical,
	Chromatography, Computational Biology, Computer-Assisted, Data Interpretation,
	Databases, Decision Making, Decision Trees, English Abstract, Factual,
	Fuzzy Logic, Humans, Indicators and Reagents, Information Storage
	and Retrieval, Ion Exchange, Models, Neural Networks (Computer),
	Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, P.H.S., Proteins,
	Quantitative Structure-Activity Relationship, RNA, ROC Curve, Research
	Support, Sequence Analysis, Statistical, Transfer, U.S. Gov't, 14565039}
}

@article{Zhu1998FRAME:,
  author = {Zhu, S. C. and Wu, Y. and Mumford, D.},
  title = {F{RAME}: {F}ilters, {R}andom field {A}nd {M}aximum {E}ntropy: ---
	{T}owards a {U}nified {T}heory for {T}exture {M}odeling },
  journal = {Int'l {J}ournal of {C}omputer {V}ision},
  year = {1998},
  volume = {27},
  pages = {1--20},
  number = {2},
  pdf = {../local/zhu98.pdf},
  file = {zhu98.pdf:local/zhu98.pdf:PDF},
  subject = {stat},
  url = {http://www.cis.ohio-state.edu/~szhu/frame_ijcv.ps.gz}
}

@article{Zhu1997Minimax,
  author = {Zhu, S. C. and Wu, Z. N. and Mumford, D.},
  title = {Minimax {E}ntropy {P}rinciple and {I}ts {A}pplication to {T}exture
	{M}odeling},
  journal = {Neural {C}omput.},
  year = {1997},
  volume = {9},
  pages = {1627-1660},
  number = {8},
  pdf = {../local/zhu97.pdf},
  file = {zhu97.pdf:local/zhu97.pdf:PDF},
  subject = {stat},
  url = {http://www.cis.ohio-state.edu/~szhu/frame_neuro.ps.gz}
}

@comment{{jabref-meta: selector_author:}}

@comment{{jabref-meta: selector_journal:Adv. Drug Deliv. Rev.;Am. J. Hu
m. Genet.;Am. J. Pathol.;Ann. Appl. Stat.;Ann. Math. Statist.;Ann. N. 
Y. Acad. Sci.;Ann. Probab.;Ann. Stat.;Artif. Intell. Med.;Bernoulli;Bi
ochim. Biophys. Acta;Bioinformatics;Biometrika;BMC Bioinformatics;Br. 
J. Pharmacol.;Breast Cancer Res.;Cell;Cell. Signal.;Chem. Res. Toxicol
.;Clin. Cancer Res.;Combinator. Probab. Comput.;Comm. Pure Appl. Math.
;Comput. Chem.;Comput. Comm. Rev.;Comput. Stat. Data An.;Curr. Genom.;
Curr. Opin. Chem. Biol.;Curr. Opin. Drug Discov. Devel.;Data Min. Know
l. Discov.;Electron. J. Statist.;Eur. J. Hum. Genet.;FEBS Lett.;Found.
 Comput. Math.;Genome Biol.;IEEE T. Neural Networ.;IEEE T. Pattern. An
al.;IEEE T. Signal. Proces.;IEEE Trans. Inform. Theory;IEEE Trans. Kno
wl. Data Eng.;IEEE/ACM Trans. Comput. Biol. Bioinf.;Int. J. Comput. Vi
sion;Int. J. Data Min. Bioinform.;Int. J. Qantum Chem.;J Biol Syst;J. 
ACM;J. Am. Soc. Inf. Sci. Technol.;J. Am. Stat. Assoc.;J. Bioinform. C
omput. Biol.;J. Biol. Chem.;J. Biomed. Inform.;J. Cell. Biochem.;J. Ch
em. Inf. Comput. Sci.;J. Chem. Inf. Model.;J. Clin. Oncol.;J. Comput. 
Biol.;J. Comput. Graph. Stat.;J. Eur. Math. Soc.;J. Intell. Inform. Sy
st.;J. Mach. Learn. Res.;J. Med. Chem.;J. Mol. BIol.;J. R. Stat. Soc. 
Ser. B;Journal of Statistical Planning and Inference;Mach. Learn.;Math
. Program.;Meth. Enzymol.;Mol. Biol. Cell;Mol. Biol. Evol.;Mol. Cell. 
Biol.;Mol. Syst. Biol.;N. Engl. J. Med.;Nat. Biotechnol.;Nat. Genet.;N
at. Med.;Nat. Methods;Nat. Rev. Cancer;Nat. Rev. Drug Discov.;Nat. Rev
. Genet.;Nature;Neural Comput.;Neural Network.;Neurocomputing;Nucleic 
Acids Res.;Pattern Anal. Appl.;Pattern Recognit.;Phys. Rev. E;Phys. Re
v. Lett.;PLoS Biology;PLoS Comput. Biol.;Probab. Theory Relat. Fields;
Proc. IEEE;Proc. Natl. Acad. Sci. USA;Protein Eng.;Protein Eng. Des. S
el.;Protein Sci.;Protein. Struct. Funct. Genet.;Random Struct. Algorit
hm.;Rev. Mod. Phys.;Science;Stat. Probab. Lett.;Statistica Sinica;Theo
r. Comput. Sci.;Trans. Am. Math. Soc.;Trends Genet.;}}

@comment{{jabref-meta: selector_keywords:biogm;biosvm;breastcancer;cgh;
chemogenomics;chemoinformatics;csbcbook;csbcbook-ch1;csbcbook-ch2;csbc
book-ch3;csbcbook-ch4;csbcbook-ch5;csbcbook-ch6;csbcbook-ch7;csbcbook-
ch8;csbcbook-ch9;csbcbook-mustread;dimred;featureselection;glycans;her
g;hic;highcontentscreening;image;immunoinformatics;kernel-theory;kerne
lbook;lasso;microarray;ngs;nlp;plasmodium;proteomics;PUlearning;rnaseq
;segmentation;sirna;}}

@comment{{jabref-meta: selector_booktitle:Adv. Neural. Inform. Process 
Syst.;}}

This file was generated by bibtex2html 1.97.