@comment{{This file has been generated by bib2bib 1.97}}
@comment{{Command line: bib2bib ../bibli.bib -c 'subject:"stat" or keywords:"stat"' -ob tmp.bib}}
@article{Tayrac2009Simultaneous, author = {{de Tayrac}, M. and L\^e, S. and Aubry, M. and Mosser, J. and Husson, F.}, title = {Simultaneous analysis of distinct Omics data sets with integration of biological knowledge: Multiple Factor Analysis approach.}, journal = {BMC Genomics}, year = {2009}, volume = {10}, pages = {32}, abstract = {Genomic analysis will greatly benefit from considering in a global way various sources of molecular data with the related biological knowledge. It is thus of great importance to provide useful integrative approaches dedicated to ease the interpretation of microarray data.Here, we introduce a data-mining approach, Multiple Factor Analysis (MFA), to combine multiple data sets and to add formalized knowledge. MFA is used to jointly analyse the structure emerging from genomic and transcriptomic data sets. The common structures are underlined and graphical outputs are provided such that biological meaning becomes easily retrievable. Gene Ontology terms are used to build gene modules that are superimposed on the experimentally interpreted plots. Functional interpretations are then supported by a step-by-step sequence of graphical representations.When applied to genomic and transcriptomic data and associated Gene Ontology annotations, our method prioritize the biological processes linked to the experimental settings. Furthermore, it reduces the time and effort to analyze large amounts of 'Omics' data.}, doi = {10.1186/1471-2164-10-32}, institution = {CNRS UMR 6061, Université de Rennes 1, IFR 140, Faculté de Médecine, CS 34317, 35043 Rennes, France. marie.de-tayrac@univ-rennes1.fr}, keywords = {Animals; Comparative Genomic Hybridization; Factor Analysis, Statistical; Gene Expression Profiling, methods; Genomics, methods; Glioma, genetics; Humans; Mice; Models, Biological; Oligonucleotide Array Sequence Analysis, methods}, language = {eng}, medline-pst = {epublish}, owner = {jp}, pii = {1471-2164-10-32}, pmid = {19154582}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.1186/1471-2164-10-32} }
@article{Amari2001Information, author = {Amari, S.-I.}, title = {Information geometry on hierarchy of probability distributions}, journal = {I{EEE} {T}rans. {I}nform. {T}heory}, year = {2001}, volume = {47}, pages = {1701--1711}, number = {5}, month = {July}, pdf = {../local/amar01.pdf}, file = {amar01.pdf:local/amar01.pdf:PDF}, subject = {stat}, url = {http://www.islab.brain.riken.go.jp/~amari/pub/IGHI.ps.gz} }
@article{Bagga2005Quantitative, author = {Harmohina Bagga and David S Greenfield and William J Feuer}, title = {Quantitative assessment of atypical birefringence images using scanning laser polarimetry with variable corneal compensation.}, journal = {Am {J} {O}phthalmol}, year = {2005}, volume = {139}, pages = {437-46}, number = {3}, month = {Mar}, abstract = {P{URPOSE}: {T}o define the clinical characteristics of atypical birefringence images and to describe a quantitative method for their identification. {DESIGN}: {P}rospective, comparative, clinical observational study. {METHODS}: {N}ormal and glaucomatous eyes underwent complete examination, standard automated perimetry, scanning laser polarimetry with variable corneal compensation ({GD}x-{VCC}), and optical coherence tomography ({OCT}) of the macula, peripapillary retinal nerve fiber layer ({RNFL}), and optic disk. {E}yes were classified into two groups: normal birefringence pattern ({NBP}) and atypical birefringence pattern ({ABP}). {C}linical, functional, and structural characteristics were assessed separately. {A} multiple logistic regression model was used to predict eyes with {ABP} on the basis of a quantitative scan score generated by a support vector machine ({SVM}) with {GD}x-{VCC}. {RESULTS}: {S}ixty-five eyes of 65 patients were enrolled. {ABP} images were observed in 5 of 20 (25\%) normal eyes and 23 of 45 (51\%) glaucomatous eyes. {C}ompared with eyes with {NBP}, glaucomatous eyes with {ABP} demonstrated significantly lower {SVM} scores ({P} < .0001, < 0.0001, 0.008, 0.03, and 0.03, respectively) and greater temporal, mean, inferior, and nasal {RNFL} thickness using {GD}x-{VCC}; and a weaker correlation with {OCT} generated {RNFL} thickness ({R}(2) = .75 vs .27). {ABP} images were significantly correlated with older age ({R}(2) = .16, {P} = .001). {T}he {SVM} score was the only significant ({P} < .0001) predictor of {ABP} images and provided high discriminating power between eyes with {NBP} and {ABP} (area under the receiver operator characteristic curve = 0.98). {CONCLUSIONS}: {ABP} images exist in a subset of normal and glaucomatous eyes, are associated with older patient age, and produce an artifactual increase in {RNFL} thickness using {GD}x-{VCC}. {T}he {SVM} score is highly predictive of {ABP} images.}, doi = {10.1016/j.ajo.2004.10.019}, pdf = {../local/Bagga2005Quantitative.pdf}, file = {Bagga2005Quantitative.pdf:locql/Bagga2005Quantitative.pdf:PDF}, keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic, Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male, Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies, Protein, ROC Curve, Regression Analysis, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics, Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15767051}, pii = {S0002-9394(04)01265-6}, url = {http://dx.doi.org/10.1016/j.ajo.2004.10.019} }
@article{Bagirov2003New, author = {A. M. Bagirov and B. Ferguson and S. Ivkovic and G. Saunders and J. Yearwood}, title = {New algorithms for multi-class cancer diagnosis using tumor gene expression signatures.}, journal = {Bioinformatics}, year = {2003}, volume = {19}, pages = {1800-7}, number = {14}, month = {Sep}, abstract = {M{OTIVATION}: {T}he increasing use of {DNA} microarray-based tumor gene expression profiles for cancer diagnosis requires mathematical methods with high accuracy for solving clustering, feature selection and classification problems of gene expression data. {RESULTS}: {N}ew algorithms are developed for solving clustering, feature selection and classification problems of gene expression data. {T}he clustering algorithm is based on optimization techniques and allows the calculation of clusters step-by-step. {T}his approach allows us to find as many clusters as a data set contains with respect to some tolerance. {F}eature selection is crucial for a gene expression database. {O}ur feature selection algorithm is based on calculating overlaps of different genes. {T}he database used, contains over 16 000 genes and this number is considerably reduced by feature selection. {W}e propose a classification algorithm where each tissue sample is considered as the center of a cluster which is a ball. {T}he results of numerical experiments confirm that the classification algorithm in combination with the feature selection algorithm perform slightly better than the published results for multi-class classifiers based on support vector machines for this data set. {AVAILABILITY}: {A}vailable on request from the authors.}, pdf = {../local/Bagirov2003New.pdf}, file = {Bagirov2003New.pdf:local/Bagirov2003New.pdf:PDF}, keywords = {Algorithms, Amino Acid Sequence, Anion Exchange Resins, Antigen-Antibody Complex, Artificial Intelligence, Automated, Automatic Data Processing, Biological, Blood Cells, Chemical, Chromatography, Cluster Analysis, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Making, Decision Trees, Diffusion Magnetic Resonance Imaging, English Abstract, Epitopes, Expert Systems, Factual, Fuzzy Logic, Gene Expression Profiling, Gene Expression Regulation, Gene Targeting, Genetic, Genome, Histocompatibility Antigens Class I, Humans, Image Interpretation, Image Processing, In Vitro, Indicators and Reagents, Information Storage and Retrieval, Ion Exchange, Least-Squares Analysis, Liver Cirrhosis, Magnetic Resonance Imaging, Male, Models, Molecular Sequence Data, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition, Pro, Prostatic Neoplasms, Protein, Protein Binding, Protein Interaction Mapping, Proteins, Quantitative Structure-Activity Relationship, RNA, ROC Curve, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Severity of Illness Index, Statistical, Structure-Activity Relationship, Subtraction Technique, T-Lymphocyte, Transcription Factors, Transfer, Treatment Outcome, Tumor Markers, U.S. Gov't, User-Computer Interface, inear Dynamics, teome, 14512351}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/19/14/1800} }
@article{Bernardo2005Chemogenomica, author = {di Bernardo, D. and Thompson, M.J. and Gardner, T.S. and Chobot, S.E. and Eastwood, E.L. and Wojtovich, A.P. and Elliott, S.J. and Schaus, S.E. and Collins, J.J.}, title = {Chemogenomic profiling on a genome-wide scale using reverse-engineered gene networks.}, journal = {Nat Biotechnol}, year = {2005}, volume = {23}, pages = {377--383}, number = {3}, month = {Mar}, abstract = {A major challenge in drug discovery is to distinguish the molecular targets of a bioactive compound from the hundreds to thousands of additional gene products that respond indirectly to changes in the activity of the targets. Here, we present an integrated computational-experimental approach for computing the likelihood that gene products and associated pathways are targets of a compound. This is achieved by filtering the mRNA expression profile of compound-exposed cells using a reverse-engineered model of the cell's gene regulatory network. We apply the method to a set of 515 whole-genome yeast expression profiles resulting from a variety of treatments (compounds, knockouts and induced expression), and correctly enrich for the known targets and associated pathways in the majority of compounds examined. We demonstrate our approach with PTSB, a growth inhibitory compound with a previously unknown mode of action, by predicting and validating thioredoxin and thioredoxin reductase as its target.}, doi = {10.1038/nbt1075}, institution = {Telethon Institute for Genetics and Medicine, Naples, Italy.}, keywords = {Algorithms; Artificial Intelligence; Computer Simulation; Drug Delivery Systems; Drug Design; Gene Expression Profiling; Gene Expression Regulation; Models, Biological; Models, Statistical; Protein Engineering; Protein Interaction Mapping; Saccharomyces cerevisiae; Saccharomyces cerevisiae Proteins; Signal Transduction; Thioredoxin-Disulfide Reductase; Thioredoxins}, owner = {fantine}, pii = {nbt1075}, pmid = {15765094}, timestamp = {2010.10.21}, url = {http://dx.doi.org/10.1038/nbt1075} }
@article{Bernstein1977Protein, author = {F. C. Bernstein and T. F. Koetzle and G. J. Williams and E. F. Meyer and M. D. Brice and J. R. Rodgers and O. Kennard and T. Shimanouchi and M. Tasumi}, title = {The Protein Data Bank: a computer-based archival file for macromolecular structures.}, journal = {J. Mol. Biol.}, year = {1977}, volume = {112}, pages = {535--542}, number = {3}, month = {May}, keywords = {Computers; Great Britain; Information Systems; Japan; Protein Conformation; Proteins; United States}, owner = {bricehoffmann}, pmid = {875032}, timestamp = {2009.02.13} }
@article{Bhavani2006Substructure-based, author = {S. Bhavani and A. Nagargadde and A. Thawani and V. Sridhar and N. Chandra}, title = {Substructure-based support vector machine classifiers for prediction of adverse effects in diverse classes of drugs.}, journal = {J. Chem. Inform. Model.}, year = {2006}, volume = {46}, pages = {2478--2486}, number = {6}, abstract = {Unforeseen adverse effects exhibited by drugs contribute heavily to late-phase failure and even withdrawal of marketed drugs. Torsade de pointes (TdP) is one such important adverse effect, which causes cardiac arrhythmia and, in some cases, sudden death, making it crucial for potential drugs to be screened for torsadogenicity. The need to tap the power of computational approaches for the prediction of adverse effects such as TdP is increasingly becoming evident. The availability of screening data including those in organized databases greatly facilitates exploration of newer computational approaches. In this paper, we report the development of a prediction method based on a support machine vector algorithm. The method uses a combination of descriptors, encoding both the type of toxicophore as well as the position of the toxicophore in the drug molecule, thus considering both the pharmacophore and the three-dimensional shape information of the molecule. For delineating toxicophores, a novel pattern-recognition method that utilizes substructures within a molecule has been developed. The results obtained using the hybrid approach have been compared with those available in the literature for the same data set. An improvement in prediction accuracy is clearly seen, with the accuracy reaching up to 97\% in predicting compounds that can cause TdP and 90\% for predicting compounds that do not cause TdP. The generic nature of the method has been demonstrated with four data sets available for carcinogenicity, where prediction accuracies were significantly higher, with a best receiver operating characteristics (ROC) value of 0.81 as against a best ROC value of 0.7 reported in the literature for the same data set. Thus, the method holds promise for wide applicability in toxicity prediction.}, doi = {10.1021/ci060128l}, keywords = {Algorithms; Carcinogens; Chemistry, Pharmaceutical; Computational Biology; Drug Evaluation, Preclinical; Drug Industry; Humans; Models, Chemical; Models, Statistical; Neural Networks (Computer); Pattern Recognition, Automated; ROC Curve; Sequence Analysis, Protein; Software; Torsades de Pointes}, owner = {laurent}, pmid = {17125188}, timestamp = {2007.09.18}, url = {http://dx.doi.org/10.1021/ci060128l} }
@article{Boucheron2000sharp, author = {Boucheron, S. and Lugosi, G. and Massart, P.}, title = {A sharp concentration inequality with applications}, journal = {Random {S}tructures and {A}lgorithms}, year = {2000}, volume = {16}, pages = {277--292}, pdf = {../local/bouc00.pdf}, file = {bouc00.pdf:local/bouc00.pdf:PDF}, subject = {stat}, url = {http://www.econ.upf.es/~lugosi/concentration.ps} }
@article{Bowd2002Comparing, author = {Christopher Bowd and Kwokleung Chan and Linda M Zangwill and Michael H Goldbaum and Te-Won Lee and Terrence J Sejnowski and Robert N Weinreb}, title = {Comparing neural networks and linear discriminant functions for glaucoma detection using confocal scanning laser ophthalmoscopy of the optic disc.}, journal = {Invest {O}phthalmol {V}is {S}ci}, year = {2002}, volume = {43}, pages = {3444-54}, number = {11}, month = {Nov}, abstract = {P{URPOSE}: {T}o determine whether neural network techniques can improve differentiation between glaucomatous and nonglaucomatous eyes, using the optic disc topography parameters of the {H}eidelberg {R}etina {T}omograph ({HRT}; {H}eidelberg {E}ngineering, {H}eidelberg, {G}ermany). {METHODS}: {W}ith the {HRT}, one eye was imaged from each of 108 patients with glaucoma (defined as having repeatable visual field defects with standard automated perimetry) and 189 subjects without glaucoma (no visual field defects with healthy-appearing optic disc and retinal nerve fiber layer on clinical examination) and the optic nerve topography was defined by 17 global and 66 regional {HRT} parameters. {W}ith all the {HRT} parameters used as input, receiver operating characteristic ({ROC}) curves were generated for the classification of eyes, by three neural network techniques: linear and {G}aussian support vector machines ({SVM} linear and {SVM} {G}aussian, respectively) and a multilayer perceptron ({MLP}), as well as four previously proposed linear discriminant functions ({LDF}s) and one {LDF} developed on the current data with all {HRT} parameters used as input. {RESULTS}: {T}he areas under the {ROC} curves for {SVM} linear and {SVM} {G}aussian were 0.938 and 0.945, respectively; for {MLP}, 0.941; for the current {LDF}, 0.906; and for the best previously proposed {LDF}, 0.890. {W}ith the use of forward selection and backward elimination optimization techniques, the areas under the {ROC} curves for {SVM} {G}aussian and the current {LDF} were increased to approximately 0.96. {CONCLUSIONS}: {T}rained neural networks, with global and regional {HRT} parameters used as input, improve on previously proposed {HRT} parameter-based {LDF}s for discriminating between glaucomatous and nonglaucomatous eyes. {T}he performance of both neural networks and {LDF}s can be improved with optimization of the features in the input. {N}eural network analyses show promise for increasing diagnostic accuracy of tests for glaucoma.}, pdf = {../local/Bowd2002Comparing.pdf}, file = {Bowd2002Comparing.pdf:local/Bowd2002Comparing.pdf:PDF}, keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric Acid Cycle, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases, Decision Making, Diagnosis, Differential, Discriminant Analysis, Drug, Drug Design, Electrostatics, Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers, Glaucoma, Hemolysins, Humans, Internet, Intraocular Pressure, Ion Exchange, Lasers, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array Sequence Analysis, Open-Angle, Ophthalmoscopy, Optic Disk, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Probability Learning, Protein Binding, Protein Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors, Reference Values, Regression Analysis, Reproducibility of Results, Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12407155}, url = {http://www.iovs.org/cgi/content/abstract/43/11/3444} }
@article{Bowd2004Confocal, author = {Christopher Bowd and Linda M Zangwill and Felipe A Medeiros and Jiucang Hao and Kwokleung Chan and Te-Won Lee and Terrence J Sejnowski and Michael H Goldbaum and Pamela A Sample and Jonathan G Crowston and Robert N Weinreb}, title = {Confocal scanning laser ophthalmoscopy classifiers and stereophotograph evaluation for prediction of visual field abnormalities in glaucoma-suspect eyes.}, journal = {Invest {O}phthalmol {V}is {S}ci}, year = {2004}, volume = {45}, pages = {2255-62}, number = {7}, month = {Jul}, abstract = {P{URPOSE}: {T}o determine whether {H}eidelberg {R}etina {T}omograph ({HRT}; {H}eidelberg {E}ngineering, {D}ossenheim, {G}ermany) classification techniques and investigational support vector machine ({SVM}) analyses can detect optic disc abnormalities in glaucoma-suspect eyes before the development of visual field abnormalities. {METHODS}: {G}laucoma-suspect eyes (n = 226) were classified as converts or nonconverts based on the development of repeatable (either two or three consecutive) standard automated perimetry ({SAP})-detected abnormalities over the course of the study (mean follow-up, approximately 4.5 years). {H}azard ratios for development of {SAP} abnormalities were calculated based on baseline classification results, follow-up time, and end point status (convert, nonconvert). {C}lassification techniques applied were {HRT} classification ({HRTC}), {M}oorfields {R}egression {A}nalysis, forward-selection optimized {SVM} ({SVM} fwd) and backward elimination-optimized {SVM} ({SVM} back) analysis of {HRT} data, and stereophotograph assessment. {RESULTS}: {U}nivariate analyses indicated that all classification techniques were predictors of the development of two repeatable abnormal {SAP} results, with hazards ratios (95\% confidence interval [{CI}]) ranging from 1.32 (1.00-1.75) for {HRTC} to 2.0 (1.48-2.76) for stereophotograph assessment (all {P} < or = 0.05). {O}nly {SVM} ({SVM} fwd and {SVM} back) analysis of {HRT} data and stereophotograph assessment were univariate predictors of the development of three repeatable abnormal {SAP} results, with hazard ratios (95\% {CI}) ranging from 1.73 (1.16-2.82) for {SVM} fwd to 1.82 (1.19-3.12) for {SVM} back (both {P} < 0.007). {M}ultivariate analyses including each classification technique individually in a model with age, baseline {SAP} pattern standard deviation [{PSD}], and baseline {IOP} indicated that all classification techniques except {HRTC} ({P} = 0.06) were predictors of the development of two repeatable abnormal {SAP} results with hazards ratios ranging from 1.30 (0.99, 1.73) for {HRTC} to 1.90 (1.37, 2.69) for stereophotograph assessment. {O}nly {SVM} ({SVM} fwd and {SVM} back) analysis of {HRT} data and stereophotograph assessment were significant predictors of the development of three repeatable abnormal {SAP} results in multivariate analyses; hazard ratios of 1.57 (1.03, 2.59) and 1.70 (1.18, 2.51), respectively. {SAP} {PSD} was a significant predictor of two repeatable abnormal {SAP} results in multivariate models with all classification techniques, with hazard ratios ranging from 3.31 (1.39, 7.89) to 4.70 (2.02, 10.93) per 1-d{B} increase. {CONCLUSIONS}: {HRT} classifications techniques and stereophotograph assessment can detect optic disc topography abnormalities in glaucoma-suspect eyes before the development of {SAP} abnormalities. {T}hese data support strongly the importance of optic disc examination for early glaucoma diagnosis.}, doi = {10.1167/iovs.03-1087}, pdf = {../local/Bowd2004Confocal.pdf}, file = {Bowd2004Confocal.pdf:local/Bowd2004Confocal.pdf:PDF}, keywords = {80 and over, Adolescent, Adult, Aged, Algorithms, Artificial Intelligence, Auditory, Benchmarking, Binding Sites, Brain Stem, Breast Diseases, Chemical, Child, Chromosomes, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, Data Interpretation, Databases, Diagnosis, Diagnostic Errors, Differential, Drug Resistance, Electroencephalography, Epilepsy, Evoked Potentials, Female, Forecasting, Gene Expression, Gene Expression Profiling, Genetic, Genotype, Glaucoma, Greece, HIV Protease Inhibitors, HIV-1, Human, Humans, Infant, Information Management, Information Storage and Retrieval, Intraocular Pressure, Kinetics, Language Development Disorders, Lasers, Least-Squares Analysis, Linear Models, Male, Microbial Sensitivity Tests, Middle Aged, Models, Molecular, Monitoring, Nephroblastoma, Non-U.S. Gov't, Nonlinear Dynamics, Ocular Hypertension, Oligonucleotide Array Sequence Analysis, Ophthalmoscopy, Optic Disk, Optic Nerve Diseases, P.H.S., Pair 1, Perimetry, Periodicals, Phosphorylation, Phosphotransferases, Photography, Physiologic, Point Mutation, Preschool, Prognosis, Protein, Proteins, Pyrimidinones, Reaction Time, Recurrence, Reproducibility of Results, Research Support, Reverse Transcriptase Inhibitors, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Signal Processing, Software, Sound Localization, Statistical, Stochastic Processes, Structure-Activity Relationship, Theoretical, Time Factors, U.S. Gov't, Viral, Vision Disorders, Visual Fields, 15223803}, url = {http://dx.doi.org/10.1167/iovs.03-1087} }
@article{Bui2006Structural, author = {Bui, H.-H. and Schiewe, A. J. and von Grafenstein, H. and Haworth, I. S.}, title = {{S}tructural prediction of peptides binding to {MHC} class {I} molecules.}, journal = {Proteins}, year = {2006}, volume = {63}, pages = {43--52}, number = {1}, month = {Apr}, abstract = {Peptide binding to class I major histocompatibility complex (MHCI) molecules is a key step in the immune response and the structural details of this interaction are of importance in the design of peptide vaccines. Algorithms based on primary sequence have had success in predicting potential antigenic peptides for MHCI, but such algorithms have limited accuracy and provide no structural information. Here, we present an algorithm, PePSSI (peptide-MHC prediction of structure through solvated interfaces), for the prediction of peptide structure when bound to the MHCI molecule, HLA-A2. The algorithm combines sampling of peptide backbone conformations and flexible movement of MHC side chains and is unique among other prediction algorithms in its incorporation of explicit water molecules at the peptide-MHC interface. In an initial test of the algorithm, PePSSI was used to predict the conformation of eight peptides bound to HLA-A2, for which X-ray data are available. Comparison of the predicted and X-ray conformations of these peptides gave RMSD values between 1.301 and 2.475 A. Binding conformations of 266 peptides with known binding affinities for HLA-A2 were then predicted using PePSSI. Structural analyses of these peptide-HLA-A2 conformations showed that peptide binding affinity is positively correlated with the number of peptide-MHC contacts and negatively correlated with the number of interfacial water molecules. These results are consistent with the relatively hydrophobic binding nature of the HLA-A2 peptide binding interface. In summary, PePSSI is capable of rapid and accurate prediction of peptide-MHC binding conformations, which may in turn allow estimation of MHCI-peptide binding affinity.}, doi = {10.1002/prot.20870}, keywords = {Algorithms, Amino Acid Sequence, Antigens, Artificial Intelligence, Automated, Binding Sites, Chemical, Computational Biology, Computer Simulation, Crystallog, Crystallography, Electrostatics, Genes, Genetic, HLA Antigens, Histocompatibility Antigens Class I, Humans, Hydrogen Bonding, Ligands, MHC Class I, Major Histocompatibility Complex, Models, Molecular, Molecular Conformation, Molecular Sequence Data, Pattern Recognition, Peptides, Protein, Protein Binding, Protein Conformation, Proteomics, Quantitative Structure-Activity Relationship, Sequence Alignment, Sequence Analysis, Software, Structural Homology, Structure-Activity Relationship, Thermodynamics, Water, X-Ray, X-Rays, raphy, 16447245}, pmid = {16447245}, timestamp = {2007.01.25}, url = {http://dx.doi.org/10.1002/prot.20870} }
@inproceedings{Catoni2002Data, author = {Catoni, O.}, title = {Data {C}ompression and {A}daptive {H}istograms}, booktitle = {Foundations of {C}omputational {M}athematics, {P}roceedings of {S}malefest 2000}, year = {2002}, editor = {Felipe Cucker and J. Maurice Rojas}, publisher = {World Scientific}, pdf = {../local/cato02.pdf}, file = {cato02.pdf:local/cato02.pdf:PDF}, subject = {stat}, url = {http://www.proba.jussieu.fr/users/catoni/gibbsHist_doc/} }
@unpublished{CatoniGibbs, author = {Catoni, O.}, title = {Gibbs estimators}, note = {Revised version}, pdf = {../local/cato02.ps}, file = {cato02.ps:local/cato02.ps:PostScript}, subject = {stat}, url = {http://www.proba.jussieu.fr/users/catoni/homepage/gibbs5.dvi} }
@article{Chan2003Detection, author = {Ian Chan and William Wells and Robert V Mulkern and Steven Haker and Jianqing Zhang and Kelly H Zou and Stephan E Maier and Clare M C Tempany}, title = {Detection of prostate cancer by integration of line-scan diffusion, {T}2-mapping and {T}2-weighted magnetic resonance imaging; a multichannel statistical classifier.}, journal = {Med {P}hys}, year = {2003}, volume = {30}, pages = {2390-8}, number = {9}, month = {Sep}, abstract = {A multichannel statistical classifier for detecting prostate cancer was developed and validated by combining information from three different magnetic resonance ({MR}) methodologies: {T}2-weighted, {T}2-mapping, and line scan diffusion imaging ({LSDI}). {F}rom these {MR} sequences, four different sets of image intensities were obtained: {T}2-weighted ({T}2{W}) from {T}2-weighted imaging, {A}pparent {D}iffusion {C}oefficient ({ADC}) from {LSDI}, and proton density ({PD}) and {T}2 ({T}2 {M}ap) from {T}2-mapping imaging. {M}anually segmented tumor labels from a radiologist, which were validated by biopsy results, served as tumor "ground truth." {T}extural features were extracted from the images using co-occurrence matrix ({CM}) and discrete cosine transform ({DCT}). {A}natomical location of voxels was described by a cylindrical coordinate system. {A} statistical jack-knife approach was used to evaluate our classifiers. {S}ingle-channel maximum likelihood ({ML}) classifiers were based on 1 of the 4 basic image intensities. {O}ur multichannel classifiers: support vector machine ({SVM}) and {F}isher linear discriminant ({FLD}), utilized five different sets of derived features. {E}ach classifier generated a summary statistical map that indicated tumor likelihood in the peripheral zone ({PZ}) of the prostate gland. {T}o assess classifier accuracy, the average areas under the receiver operator characteristic ({ROC}) curves over all subjects were compared. {O}ur best {FLD} classifier achieved an average {ROC} area of 0.839(+/-0.064), and our best {SVM} classifier achieved an average {ROC} area of 0.761(+/-0.043). {T}he {T}2{W} {ML} classifier, our best single-channel classifier, only achieved an average {ROC} area of 0.599(+/-0.146). {C}ompared to the best single-channel {ML} classifier, our best multichannel {FLD} and {SVM} classifiers have statistically superior {ROC} performance ({P}=0.0003 and 0.0017, respectively) from pairwise two-sided t-test. {B}y integrating the information from multiple images and capturing the textural and anatomical features in tumor areas, summary statistical maps can potentially aid in image-guided prostate biopsy and assist in guiding and controlling delivery of localized therapy under image guidance.}, pdf = {../local/Chan2003Detection.pdf}, file = {Chan2003Detection.pdf:local/Chan2003Detection.pdf:PDF}, keywords = {Algorithms, Anion Exchange Resins, Antigen-Antibody Complex, Artificial Intelligence, Automated, Automatic Data Processing, Biological, Blood Cells, Chemical, Chromatography, Cluster Analysis, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, Data Interpretation, Databases, Decision Making, Decision Trees, Diffusion Magnetic Resonance Imaging, English Abstract, Epitopes, Expert Systems, Factual, Fuzzy Logic, Gene Expression Profiling, Gene Expression Regulation, Gene Targeting, Genome, Histocompatibility Antigens Class I, Humans, Image Interpretation, Image Processing, In Vitro, Indicators and Reagents, Information Storage and Retrieval, Ion Exchange, Least-Squares Analysis, Liver Cirrhosis, Magnetic Resonance Imaging, Male, Models, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic Acid Conformation, P.H.S., Pattern Recognition, Pro, Prostatic Neoplasms, Protein, Protein Binding, Protein Interaction Mapping, Proteins, Quantitative Structure-Activity Relationship, RNA, ROC Curve, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Analysis, Severity of Illness Index, Statistical, Structure-Activity Relationship, Subtraction Technique, T-Lymphocyte, Transcription Factors, Transfer, Treatment Outcome, U.S. Gov't, User-Computer Interface, inear Dynamics, teome, 14528961} }
@article{Chan2002Comparison, author = {Kwokleung Chan and Te-Won Lee and Pamela A Sample and Michael H Goldbaum and Robert N Weinreb and Terrence J Sejnowski}, title = {Comparison of machine learning and traditional classifiers in glaucoma diagnosis.}, journal = {I{EEE} {T}rans {B}iomed {E}ng}, year = {2002}, volume = {49}, pages = {963-74}, number = {9}, month = {Sep}, abstract = {Glaucoma is a progressive optic neuropathy with characteristic structural changes in the optic nerve head reflected in the visual field. {T}he visual-field sensitivity test is commonly used in a clinical setting to evaluate glaucoma. {S}tandard automated perimetry ({SAP}) is a common computerized visual-field test whose output is amenable to machine learning. {W}e compared the performance of a number of machine learning algorithms with {STATPAC} indexes mean deviation, pattern standard deviation, and corrected pattern standard deviation. {T}he machine learning algorithms studied included multilayer perceptron ({MLP}), support vector machine ({SVM}), and linear ({LDA}) and quadratic discriminant analysis ({QDA}), {P}arzen window, mixture of {G}aussian ({MOG}), and mixture of generalized {G}aussian ({MGG}). {MLP} and {SVM} are classifiers that work directly on the decision boundary and fall under the discriminative paradigm. {G}enerative classifiers, which first model the data probability density and then perform classification via {B}ayes' rule, usually give deeper insight into the structure of the data space. {W}e have applied {MOG}, {MGG}, {LDA}, {QDA}, and {P}arzen window to the classification of glaucoma from {SAP}. {P}erformance of the various classifiers was compared by the areas under their receiver operating characteristic curves and by sensitivities (true-positive rates) at chosen specificities (true-negative rates). {T}he machine-learning-type classifiers showed improved performance over the best indexes from {STATPAC}. {F}orward-selection and backward-elimination methodology further improved the classification rate and also has the potential to reduce testing time by diminishing the number of visual-field location measurements.}, doi = {10.1109/TBME.2002.802012}, pdf = {../local/Chan2002Comparison.pdf}, file = {Chan2002Comparison.pdf:local/Chan2002Comparison.pdf:PDF}, keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric Acid Cycle, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases, Decision Making, Diagnosis, Differential, Discriminant Analysis, Drug, Drug Design, Electrostatics, Epitopes, Eukaryotic Cells, Factual, False Negative Reactions, False Positive Reactions, Feasibility Studies, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers, Glaucoma, HLA Antigens, Hemolysins, Histocompatibility Antigens Class I, Humans, Internet, Intraocular Pressure, Ion Exchange, Lasers, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological, Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array Sequence Analysis, Open-Angle, Ophthalmoscopy, Optic Disk, Optic Nerve Diseases, Ovarian Neoplasms, P.H.S., Pattern Recognition, Peptides, Perimetry, Predictive Value of Tests, Probability, Probability Learning, Protein, Protein Binding, Protein Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, ROC Curve, Receptors, Reference Values, Regression Analysis, Reproducibility of Results, Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship, T-Lymphocyte, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12214886}, url = {http://dx.doi.org/10.1109/TBME.2002.802012} }
@article{Chen2011Removing, author = {Chao Chen and Kay Grennan and Judith Badner and Dandan Zhang and Elliot Gershon and Li Jin and Chunyu Liu}, title = {Removing batch effects in analysis of expression microarray data: an evaluation of six batch adjustment methods.}, journal = {PLoS One}, year = {2011}, volume = {6}, pages = {e17238}, number = {2}, abstract = {The expression microarray is a frequently used approach to study gene expression on a genome-wide scale. However, the data produced by the thousands of microarray studies published annually are confounded by "batch effects," the systematic error introduced when samples are processed in multiple batches. Although batch effects can be reduced by careful experimental design, they cannot be eliminated unless the whole study is done in a single batch. A number of programs are now available to adjust microarray data for batch effects prior to analysis. We systematically evaluated six of these programs using multiple measures of precision, accuracy and overall performance. ComBat, an Empirical Bayes method, outperformed the other five programs by most metrics. We also showed that it is essential to standardize expression data at the probe level when testing for correlation of expression profiles, due to a sizeable probe effect in microarray data that can inflate the correlation among replicates and unrelated samples.}, doi = {10.1371/journal.pone.0017238}, institution = {National Ministry of Education Key Laboratory of Contemporary Anthropology, Fudan University, Shanghai, People's Republic of China.}, keywords = {Bayes Theorem; Case-Control Studies; Data Interpretation, Statistical; Gene Expression Profiling, standards/statistics /&/ numerical data; Humans; Microarray Analysis, standards/statistics /&/ numerical data; ROC Curve; Reference Standards; Research Design; Sample Size; Selection Bias; Validation Studies as Topic}, language = {eng}, medline-pst = {epublish}, owner = {jp}, pmid = {21386892}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.1371/journal.pone.0017238} }
@article{Churchill2002Fundamentals, author = {Churchill, G. A.}, title = {Fundamentals of experimental design for cDNA microarrays}, journal = {Nat. Genet.}, year = {2002}, volume = {32 Suppl}, pages = {490--495}, month = {Dec}, abstract = {Microarray technology is now widely available and is being applied to address increasingly complex scientific questions. Consequently, there is a greater demand for statistical assessment of the conclusions drawn from microarray experiments. This review discusses fundamental issues of how to design an experiment to ensure that the resulting data are amenable to statistical analysis. The discussion focuses on two-color spotted cDNA microarrays, but many of the same issues apply to single-color gene-expression assays as well.}, doi = {10.1038/ng1031}, institution = {The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA. garyc@jax.org}, keywords = {Animals; DNA, Complementary, analysis; Gene Expression; Gene Expression Profiling, methods; Mice; Models, Biological; Oligonucleotide Array Sequence Analysis, methods; Reference Standards; Reproducibility of Results; Research Design; Statistics as Topic}, language = {eng}, medline-pst = {ppublish}, owner = {phupe}, pii = {ng1031}, pmid = {12454643}, timestamp = {2011.04.08}, url = {http://dx.doi.org/10.1038/ng1031} }
@article{Cohen2004application, author = {Gilles Cohen and M\'elanie Hilario and Hugo Sax and Stéphane Hugonnet and Christian Pellegrini and Antoine Geissbuhler}, title = {An application of one-class support vector machine to nosocomial infection detection.}, journal = {Medinfo}, year = {2004}, volume = {11}, pages = {716-20}, number = {Pt 1}, abstract = {Nosocomial infections ({NI}s)---those acquired in health care settings---are among the major causes of increased mortality among hospitalized patients. {T}hey are a significant burden for patients and health authorities alike; it is thus important to monitor and detect them through an effective surveillance system. {T}his paper describes a retrospective analysis of a prevalence survey of {NI}s done in the {G}eneva {U}niversity {H}ospital. {O}ur goal is to identify patients with one or more {NI}s on the basis of clinical and other data collected during the survey. {I}n this two-class classification task, the main difficulty lies in the significant imbalance between positive or infected (11\%) and negative (89\%) cases. {T}o cope with class imbalance, we investigate one-class {SVM}s which can be trained to distinguish two classes on the basis of examples from a single class (in this case, only "normal" or non infected patients). {T}he infected ones are then identified as "abnormal" cases or outliers that deviate significantly from the normal profile. {E}xperimental results are encouraging: whereas standard 2-class {SVM}s scored a baseline sensitivity of 50.6\% on this problem, the one-class approach increased sensitivity to as much as 92.6\%. {T}hese results are comparable to those obtained by the authors in a previous study on asymmetrical soft margin {SVM}s; they suggest that one-class {SVM}s can provide an effective and efficient way of overcoming data imbalance in classification problems.}, keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis, Cerebrovascular Accident, Cerebrovascular Circulation, Comparative Study, Computer-Assisted, Cross Infection, Cysteine, Data Collection, Decision Trees, Dementia, Diagnosis, Disulfides, Doppler, Embolism, Expert Systems, Extramural, Factor Analysis, Female, Gene Expression, Gene Expression Profiling, Health Status, Heart Septal Defects, Hospitals, Humans, Infection Control, Intracranial Embolism, Male, Models, Molecular, Myocardial Infarction, N.I.H., Neoplasms, Neural Networks (Computer), Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Oxidation-Reduction, P.H.S., Pattern Recognition, Population Surveillance, Prevalence, Prognosis, Protein Binding, Protein Folding, Proteins, ROC Curve, Research Support, Retrospective Studies, Sensitivity and Specificity, Software, Statistical, Switzerland, Transcranial, Treatment Outcome, U.S. Gov't, Ultrasonography, University, 15360906}, pii = {D040004219} }
@article{Consortium2006MicroArray, author = {M. A. Q. C. Consortium and Leming Shi and Laura H Reid and Wendell D Jones and Richard Shippy and Janet A Warrington and Shawn C Baker and Patrick J Collins and Francoise de Longueville and Ernest S Kawasaki and Kathleen Y Lee and Yuling Luo and Yongming Andrew Sun and James C Willey and Robert A Setterquist and Gavin M Fischer and Weida Tong and Yvonne P Dragan and David J Dix and Felix W Frueh and Frederico M Goodsaid and Damir Herman and Roderick V Jensen and Charles D Johnson and Edward K Lobenhofer and Raj K Puri and Uwe Schrf and Jean Thierry-Mieg and Charles Wang and Mike Wilson and Paul K Wolber and Lu Zhang and Shashi Amur and Wenjun Bao and Catalin C Barbacioru and Anne Bergstrom Lucas and Vincent Bertholet and Cecilie Boysen and Bud Bromley and Donna Brown and Alan Brunner and Roger Canales and Xiaoxi Megan Cao and Thomas A Cebula and James J Chen and Jing Cheng and Tzu-Ming Chu and Eugene Chudin and John Corson and J. Christopher Corton and Lisa J Croner and Christopher Davies and Timothy S Davison and Glenda Delenstarr and Xutao Deng and David Dorris and Aron C Eklund and Xiao-hui Fan and Hong Fang and Stephanie Fulmer-Smentek and James C Fuscoe and Kathryn Gallagher and Weigong Ge and Lei Guo and Xu Guo and Janet Hager and Paul K Haje and Jing Han and Tao Han and Heather C Harbottle and Stephen C Harris and Eli Hatchwell and Craig A Hauser and Susan Hester and Huixiao Hong and Patrick Hurban and Scott A Jackson and Hanlee Ji and Charles R Knight and Winston P Kuo and J. Eugene LeClerc and Shawn Levy and Quan-Zhen Li and Chunmei Liu and Ying Liu and Michael J Lombardi and Yunqing Ma and Scott R Magnuson and Botoul Maqsodi and Tim McDaniel and Nan Mei and Ola Myklebost and Baitang Ning and Natalia Novoradovskaya and Michael S Orr and Terry W Osborn and Adam Papallo and Tucker A Patterson and Roger G Perkins and Elizabeth H Peters and Ron Peterson and Kenneth L Philips and P. Scott Pine and Lajos Pusztai and Feng Qian and Hongzu Ren and Mitch Rosen and Barry A Rosenzweig and Raymond R Samaha and Mark Schena and Gary P Schroth and Svetlana Shchegrova and Dave D Smith and Frank Staedtler and Zhenqiang Su and Hongmei Sun and Zoltan Szallasi and Zivana Tezak and Danielle Thierry-Mieg and Karol L Thompson and Irina Tikhonova and Yaron Turpaz and Beena Vallanat and Christophe Van and Stephen J Walker and Sue Jane Wang and Yonghong Wang and Russ Wolfinger and Alex Wong and Jie Wu and Chunlin Xiao and Qian Xie and Jun Xu and Wen Yang and Liang Zhang and Sheng Zhong and Yaping Zong and William Slikker}, title = {The {M}icro{A}rray {Q}uality {C}ontrol ({MAQC}) project shows inter- and intraplatform reproducibility of gene expression measurements}, journal = {Nat. Biotechnol.}, year = {2006}, volume = {24}, pages = {1151--1161}, number = {9}, month = {Sep}, abstract = {Over the last decade, the introduction of microarray technology has had a profound impact on gene expression research. The publication of studies with dissimilar or altogether contradictory results, obtained using different microarray platforms to analyze identical RNA samples, has raised concerns about the reliability of this technology. The MicroArray Quality Control (MAQC) project was initiated to address these concerns, as well as other performance and data analysis issues. Expression data on four titration pools from two distinct reference RNA samples were generated at multiple test sites using a variety of microarray-based and alternative technology platforms. Here we describe the experimental design and probe mapping efforts behind the MAQC project. We show intraplatform consistency across test sites as well as a high level of interplatform concordance in terms of genes identified as differentially expressed. This study provides a resource that represents an important first step toward establishing a framework for the use of microarrays in clinical and regulatory settings.}, doi = {10.1038/nbt1239}, institution = {National Center for Toxicological Research, US Food and Drug Administration, Jefferson, Arkansas 72079, USA.}, keywords = {Equipment Design; Equipment Failure Analysis; Gene Expression Profiling, instrumentation/methods; Oligonucleotide Array Sequence Analysis, instrumentation; Quality Assurance, Health Care, methods; Quality Control; Reproducibility of Results; Sensitivity and Specificity; United States}, language = {eng}, medline-pst = {ppublish}, owner = {phupe}, pii = {nbt1239}, pmid = {16964229}, timestamp = {2011.04.08}, url = {http://dx.doi.org/10.1038/nbt1239} }
@article{Diekman2003Hybrid, author = {Casey Diekman and Wei He and Nagabhushana Prabhu and Harvey Cramer}, title = {Hybrid methods for automated diagnosis of breast tumors.}, journal = {Anal {Q}uant {C}ytol {H}istol}, year = {2003}, volume = {25}, pages = {183-90}, number = {4}, month = {Aug}, abstract = {O{BJECTIVE}: {T}o design and analyze a new family of hybrid methods for the diagnosis of breast tumors using fine needle aspirates. {STUDY} {DESIGN}: {W}e present a radically new approach to the design of diagnosis systems. {I}n the new approach, a nonlinear classifier with high sensitivity but low specificity is hybridized with a linear classifier having low sensitivity but high specificity. {D}ata from the {W}isconsin {B}reast {C}ancer {D}atabase are used to evaluate, computationally, the performance of the hybrid classifiers. {RESULTS}: {T}he diagnosis scheme obtained by hybridizing the nonlinear classifier ellipsoidal multisurface method ({EMSM}) with the linear classifier proximal support vector machine ({PSVM}) was found to have a mean sensitivity of 97.36\% and a mean specificity of 95.14\% and was found to yield a 2.44\% improvement in the reliability of positive diagnosis over that of {EMSM} at the expense of 0.4\% degradation in the reliability of negative diagnosis, again compared to {EMSM}. {A}t the 95\% confidence level we can trust the hybrid method to be 96.19-98.53\% correct in its malignant diagnosis of new tumors and 93.57-96.71\% correct in its benign diagnosis. {CONCLUSION}: {H}ybrid diagnosis schemes represent a significant paradigm shift and provide a promising new technique to improve the specificity of nonlinear classifiers without seriously affecting the high sensitivity of nonlinear classifiers.}, keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Anion Exchange Resins, Antigen-Antibody Complex, Artificial Intelligence, Automated, Automatic Data Processing, Benchmarking, Biological, Biological Markers, Biopsy, Blood Cells, Blood Proteins, Breast Neoplasms, Cell Line, Cellular Structures, Chemical, Chromatography, Chromosome Aberrations, Cluster Analysis, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, Computing Methodologies, DNA, Data Interpretation, Databases, Decision Making, Decision Trees, Diagnosis, Diffusion Magnetic Resonance Imaging, Disease, English Abstract, Epitopes, Expert Systems, Factual, Female, Fine-Needle, Fusion, Fuzzy Logic, Gene Expression Profiling, Gene Expression Regulation, Gene Targeting, Genetic, Genome, Histocompatibility Antigens Class I, Humans, Hydrogen Bonding, Hydrophobicity, Image Interpretation, Image Processing, In Vitro, Indicators and Reagents, Information Storage and Retrieval, Ion Exchange, Least-Squares Analysis, Leiomyosarcoma, Liver Cirrhosis, Lung Neoplasms, Magnetic Resonance Imaging, Male, Mass, Mathematical Computing, Matrix-Assisted Laser Desorption-Ionization, Models, Molecular, Molecular Sequence Data, Neoplasm Proteins, Neoplasms, Neoplastic, Nephroblastoma, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonl, Nucleic Acid Conformation, Nucleic Acid Hybridization, Oligonucleotide Array Sequence Analysis, Oncogene Proteins, Ovarian Neoplasms, P.H.S., Pattern Recognition, Predictive Value of Tests, Pro, Prostatic Neoplasms, Protein, Protein Binding, Protein Interaction Mapping, Protein Structure, Proteins, Quantitative Structure-Activity Relationship, RNA, ROC Curve, Reproducibility of Results, Research Support, Rhabdomyosarcoma, Secondary, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Severity of Illness Index, Software, Solubility, Spectrometry, Statistical, Structure-Activity Relationship, Subcellular Fractions, Subtraction Technique, T-Lymphocyte, Tissue Distribution, Transcription Factors, Transfer, Treatment Outcome, Tumor, Tumor Markers, U.S. Gov't, User-Computer Interface, inear Dynamics, teome, 12961824} }
@article{DiMasi2003price, author = {J. A. DiMasi and R. W. Hansen and H. G. Grabowski}, title = {{T}he price of innovation: new estimates of drug development costs.}, journal = {J Health Econ}, year = {2003}, volume = {22}, pages = {151--185}, number = {2}, month = {Mar}, abstract = {The research and development costs of 68 randomly selected new drugs were obtained from a survey of 10 pharmaceutical firms. These data were used to estimate the average pre-tax cost of new drug development. The costs of compounds abandoned during testing were linked to the costs of compounds that obtained marketing approval. The estimated average out-of-pocket cost per new drug is 403 million US dollars (2000 dollars). Capitalizing out-of-pocket costs to the point of marketing approval at a real discount rate of 11\% yields a total pre-approval cost estimate of 802 million US dollars (2000 dollars). When compared to the results of an earlier study with a similar methodology, total capitalized costs were shown to have increased at an annual rate of 7.4\% above general price inflation.}, keywords = {Capital Expenditures, Costs and Cost Analysis, Data Collection, Drug Approval, Drug Evaluation, Drug Industry, Drugs, Economic, Humans, Inflation, Investigational, Organizational Innovation, Preclinical, Research Support, United States, 16087260}, owner = {mahe}, pii = {S0167629602001261}, pmid = {16087260}, timestamp = {2006.08.12} }
@article{Ding2005Minimum, author = {Chris Ding and Hanchuan Peng}, title = {Minimum redundancy feature selection from microarray gene expression data.}, journal = {J {B}ioinform {C}omput {B}iol}, year = {2005}, volume = {3}, pages = {185-205}, number = {2}, month = {Apr}, abstract = {How to selecting a small subset out of the thousands of genes in microarray data is important for accurate classification of phenotypes. {W}idely used methods typically rank genes according to their differential expressions among phenotypes and pick the top-ranked genes. {W}e observe that feature sets so obtained have certain redundancy and study methods to minimize it. {W}e propose a minimum redundancy - maximum relevance ({MRMR}) feature selection framework. {G}enes selected via {MRMR} provide a more balanced coverage of the space and capture broader characteristics of phenotypes. {T}hey lead to significantly improved class predictions in extensive experiments on 6 gene expression data sets: {NCI}, {L}ymphoma, {L}ung, {C}hild {L}eukemia, {L}eukemia, and {C}olon. {I}mprovements are observed consistently among 4 classification methods: {N}aive {B}ayes, {L}inear discriminant analysis, {L}ogistic regression, and {S}upport vector machines. {SUPPLIMENTARY}: {T}he top 60 {MRMR} genes for each of the datasets are listed in http://crd.lbl.gov/~cding/{MRMR}/. {M}ore information related to {MRMR} methods can be found at http://www.hpeng.net/.}, keywords = {Adult, Aged, Aging, Algorithms, Animals, Apoptosis, Artificial Intelligence, Automated, Biological, Bone Marrow, Breast Neoplasms, Classification, Cluster Analysis, Comparative Study, Computer Simulation, Computer-Assisted, Diagnosis, Dose-Response Relationship, Drug, Female, Foot, Gait, Gene Expression Profiling, Gene Expression Regulation, Gene Silencing, Genetic Vectors, Humans, Image Interpretation, Information Storage and Retrieval, Kidney, Liver, Logistic Models, Male, Messenger, Models, Myocardium, Neoplasms, Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Pattern Recognition, Pharmaceutical Preparations, Polymerase Chain Reaction, Principal Component Analysis, Proteins, RNA, Rats, Reproducibility of Results, Research Support, Sensitivity and Specificity, Small Interfering, Sprague-Dawley, Statistical, Subcellular Fractions, Unknown Primary, 15852500}, pii = {S0219720005001004} }
@article{Driel2006text-mining, author = {van Driel, M.A. and Bruggeman, J. and Vriend, G. and Brunner, H.G. and Leunissen, J.A.M.}, title = {A text-mining analysis of the human phenome.}, journal = {Eur. J. Hum. Genet.}, year = {2006}, volume = {14}, pages = {535--542}, number = {5}, month = {May}, abstract = {A number of large-scale efforts are underway to define the relationships between genes and proteins in various species. But, few attempts have been made to systematically classify all such relationships at the phenotype level. Also, it is unknown whether such a phenotype map would carry biologically meaningful information. We have used text mining to classify over 5000 human phenotypes contained in the Online Mendelian Inheritance in Man database. We find that similarity between phenotypes reflects biological modules of interacting functionally related genes. These similarities are positively correlated with a number of measures of gene function, including relatedness at the level of protein sequence, protein motifs, functional annotation, and direct protein-protein interaction. Phenotype grouping reflects the modular nature of human disease genetics. Thus, phenotype mapping may be used to predict candidate genes for diseases as well as functional relations between genes and proteins. Such predictions will further improve if a unified system of phenotype descriptors is developed. The phenotype similarity data are accessible through a web interface at http://www.cmbi.ru.nl/MimMiner/.}, doi = {10.1038/sj.ejhg.5201585}, institution = {Centre for Molecular and Biomolecular Informatics, Radboud University Nijmegen, Toernooiveld 1, 6525ED Nijmegen, the Netherlands.}, keywords = {Chromosome Mapping; Databases, Genetic; Genetic Predisposition to Disease; Genetic Vectors; Genome, Human; Genotype; Humans; Models, Genetic; Models, Statistical; Multigene Family; Phenotype}, owner = {mordelet}, pii = {5201585}, pmid = {16493445}, timestamp = {2010.09.27}, url = {http://dx.doi.org/10.1038/sj.ejhg.5201585} }
@article{Ehlers2005NBS1, author = {Justis P Ehlers and J. William Harbour}, title = {N{BS}1 expression as a prognostic marker in uveal melanoma.}, journal = {Clin. {C}ancer {R}es.}, year = {2005}, volume = {11}, pages = {1849-53}, number = {5}, month = {Mar}, abstract = {P{URPOSE}: {U}p to half of uveal melanoma patients die of metastatic disease. {T}reatment of the primary eye tumor does not improve survival in high-risk patients due to occult micrometastatic disease, which is present at the time of eye tumor diagnosis but is not detected and treated until months to years later. {H}ere, we use microarray gene expression data to identify a new prognostic marker. {EXPERIMENTAL} {DESIGN}: {M}icroarray gene expression profiles were analyzed in 25 primary uveal melanomas. {T}umors were ranked by support vector machine ({SVM}) and by cytologic severity. {N}bs1 protein expression was assessed by quantitative immunohistochemistry in 49 primary uveal melanomas. {S}urvival was assessed using {K}aplan-{M}eier life-table analysis. {RESULTS}: {E}xpression of the {N}ijmegen breakage syndrome ({NBS}1) gene correlated strongly with {SVM} and cytologic tumor rankings ({P} < 0.0001). {F}urther, immunohistochemistry expression of the {N}bs1 protein correlated strongly with both {SVM} and cytologic rankings ({P} < 0.0001). {T}he 6-year actuarial survival was 100\% in patients with low immunohistochemistry expression of {N}bs1 and 22\% in those with high {N}bs1 expression ({P} = 0.01). {CONCLUSIONS}: {NBS}1 is a strong predictor of uveal melanoma survival and potentially could be used as a clinical marker for guiding clinical management.}, doi = {10.1158/1078-0432.CCR-04-2054}, pdf = {../local/Ehlers2005NBS1.pdf}, file = {Ehlers2005NBS1.pdf:local/Ehlers2005NBS1.pdf:PDF}, keywords = {80 and over, Adult, Aged, Algorithms, Amino Acid Sequence, Amino Acids, Analysis of Variance, Animals, Area Under Curve, Artifacts, Automated, Bacteriophage T4, Base Sequence, Biological, Birefringence, Brain Chemistry, Brain Neoplasms, Cell Cycle Proteins, Comparative Study, Computational Biology, Computer-Assisted, Cornea, Cross-Sectional Studies, Databases, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Extramural, Face, Female, Gene Expression Profiling, Genetic, Glaucoma, Humans, Immunohistochemistry, Intraocular Pressure, Lasers, Least-Squares Analysis, Likelihood Functions, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male, Markov Chains, Melanoma, Middle Aged, Models, Molecular, Mutation, N.I.H., Nerve Fibers, Non-P.H.S., Non-U.S. Gov't, Nuclear Proteins, Nucleic Acid, Nucleic Acid Conformation, Numerical Analysis, Oligonucleotide Array Sequence Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition, Photic Stimulation, Polymorphism, Prognosis, Prospective Studies, Protein, Protein Structure, Proteins, RNA, ROC Curve, Regression Analysis, Reproducibility of Results, Research Support, Retinal Ganglion Cells, Secondary, Sensitivity and Specificity, Sequence Analysis, Single Nucleotide, Single-Stranded Conformational, Software, Statistics, Survival Analysis, Tertiary, Tomography, Tumor Markers, U.S. Gov't, Untranslated, Uveal Neoplasms, Visual Fields, beta-Lactamases, 15756009}, pii = {11/5/1849}, url = {http://clincancerres.aacrjournals.org/cgi/content/abstract/11/5/1849} }
@article{Eroes2004Comparison, author = {D. Er\"os and G. K\'eri and I. K\"ovesdi and C. Sz\'antai-Kis and G. M\'esz\'aros and L. Orfi}, title = {{C}omparison of predictive ability of water solubility {QSPR} models generated by {MLR}, {PLS} and {ANN} methods.}, journal = {Mini Rev Med Chem}, year = {2004}, volume = {4}, pages = {167--177}, number = {2}, month = {Feb}, abstract = {ADME/Tox computational screening is one of the most hot topics of modern drug research. About one half of the potential drug candidates fail because of poor ADME/Tox properties. Since the experimental determination of water solubility is time-consuming also, reliable computational predictions are needed for the pre-selection of acceptable "drug-like" compounds from diverse combinatorial libraries. Recently many successful attempts were made for predicting water solubility of compounds. A comprehensive review of previously developed water solubility calculation methods is presented here, followed by the description of the solubility prediction method designed and used in our laboratory. We have selected carefully 1381 compounds from scientific publications in a unified database and used this dataset in the calculations. The externally validated models were based on calculated descriptors only. The aim of model optimization was to improve repeated evaluations statistics of the predictions and effective descriptor scoring functions were used to facilitate quick generation of multiple linear regression analysis (MLR), partial least squares method (PLS) and artificial neural network (ANN) models with optimal predicting ability. Standard error of prediction of the best model generated with ANN (with 39-7-1 network structure) was 0.72 in logS units while the cross validated squared correlation coefficient (Q(2)) was better than 0.85. These values give a good chance for successful pre-selection of screening compounds from virtual libraries, based on the predicted water solubility.}, keywords = {Chemical, Chemistry, Comparative Study, Cytochrome P-450 Enzyme System, Estradiol, Least-Squares Analysis, Ligands, Linear Models, Models, Molecular, Naphthalenes, Neural Networks (Computer), Non-U.S. Gov't, Physical, Quantitative Structure-Activity Relationship, Reproducibility of Results, Research Support, Solubility, Spectrum Analysis, Statistical, Water, 14965289}, owner = {mahe}, pmid = {14965289}, timestamp = {2006.09.07} }
@article{Faugeras2004Variational, author = {Olivier Faugeras and Geoffray Adde and Guillaume Charpiat and Christophe Chefd'hotel and Maureen Clerc and Thomas Deneux and Rachid Deriche and Gerardo Hermosillo and Renaud Keriven and Pierre Kornprobst and Jan Kybic and Christophe Lenglet and Lucero Lopez-Perez and Théo Papadopoulo and Jean-Philippe Pons and Florent Segonne and Bertrand Thirion and David Tschumperlé and Thierry Viéville and Nicolas Wotawa}, title = {Variational, geometric, and statistical methods for modeling brain anatomy and function.}, journal = {Neuroimage}, year = {2004}, volume = {23 Suppl 1}, pages = {S46-55}, abstract = {We survey the recent activities of the {O}dyssée {L}aboratory in the area of the application of mathematics to the design of models for studying brain anatomy and function. {W}e start with the problem of reconstructing sources in {MEG} and {EEG}, and discuss the variational approach we have developed for solving these inverse problems. {T}his motivates the need for geometric models of the head. {W}e present a method for automatically and accurately extracting surface meshes of several tissues of the head from anatomical magnetic resonance ({MR}) images. {A}natomical connectivity can be extracted from diffusion tensor magnetic resonance images but, in the current state of the technology, it must be preceded by a robust estimation and regularization stage. {W}e discuss our work based on variational principles and show how the results can be used to track fibers in the white matter ({WM}) as geodesics in some {R}iemannian space. {W}e then go to the statistical modeling of functional magnetic resonance imaging (f{MRI}) signals from the viewpoint of their decomposition in a pseudo-deterministic and stochastic part that we then use to perform clustering of voxels in a way that is inspired by the theory of support vector machines and in a way that is grounded in information theory. {M}ultimodal image matching is discussed next in the framework of image statistics and partial differential equations ({PDE}s) with an eye on registering f{MRI} to the anatomy. {T}he paper ends with a discussion of a new theory of random shapes that may prove useful in building anatomical and functional atlases.}, doi = {10.1016/j.neuroimage.2004.07.015}, pdf = {../local/Faugeras2004Variational.pdf}, file = {Faugeras2004Variational.pdf:local/Faugeras2004Variational.pdf:PDF}, keywords = {Adolescent, Adult, Algorithms, Anatomic, Bacterial Proteins, Brain, Brain Mapping, Comparative Study, Computer Simulation, Computer-Assisted, Diffusion Magnetic Resonance Imaging, Facial Asymmetry, Facial Expression, Facial Paralysis, Female, Gene Expression Profiling, Gram-Negative Bacteria, Gram-Positive Bacteria, Humans, Image Interpretation, Magnetoencephalography, Male, Middle Aged, Models, Motion, Neural Pathways, Non-U.S. Gov't, Photography, Protein, Proteome, Research Support, Retina, Sequence Alignment, Sequence Analysis, Severity of Illness Index, Software, Statistical, Subcellular Fractions, 15501100}, pii = {S1053-8119(04)00380-5}, url = {http://dx.doi.org/10.1016/j.neuroimage.2004.07.015} }
@article{Garrett2003Comparison, author = {D. Garrett and D. A Peterson and C. Anderson and M. Thaut}, title = {Comparison of linear, nonlinear, and feature selection methods for {EEG} signal classification.}, journal = {I{EEE} {T}rans {N}eural {S}yst {R}ehabil {E}ng}, year = {2003}, volume = {11}, pages = {141-4}, number = {2}, month = {Jun}, abstract = {The reliable operation of brain-computer interfaces ({BCI}s) based on spontaneous electroencephalogram ({EEG}) signals requires accurate classification of multichannel {EEG}. {T}he design of {EEG} representations and classifiers for {BCI} are open research questions whose difficulty stems from the need to extract complex spatial and temporal patterns from noisy multidimensional time series obtained from {EEG} measurements. {T}he high-dimensional and noisy nature of {EEG} may limit the advantage of nonlinear classification methods over linear ones. {T}his paper reports the results of a linear (linear discriminant analysis) and two nonlinear classifiers (neural networks and support vector machines) applied to the classification of spontaneous {EEG} during five mental tasks, showing that nonlinear classifiers produce only slightly better classification results. {A}n approach to feature selection based on genetic algorithms is also presented with preliminary results of application to {EEG} during finger movement.}, keywords = {80 and over, Adnexal Diseases, Adult, Aged, Algorithms, Artificial Intelligence, Automated, Bayes Theorem, Biological, Brain, Brain Mapping, Breast Neoplasms, Case-Control Studies, Chromatography, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Diagnosis, Differential, Discriminant Analysis, Electroencephalography, Evoked Potentials, Feasibility Studies, Female, Fingers, Gene Expression Profiling, Gene Expression Regulation, Genetic, Genetic Markers, Genetic Predisposition to Disease, Genetic Screening, Habituation (Psychophysiology), High Pressure Liquid, Humans, Linear Models, Logistic Models, Male, Middle Aged, Migraine, Models, Movement, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Nucleosides, Ovarian Neoplasms, Pattern Recognition, Photic Stimulation, Predictive Value of Tests, ROC Curve, Reproducibility of Results, Research Support, Sensitivity and Specificity, Signal Processing, Software, Statistical, Thinking, Tumor Markers, U.S. Gov't, User-Computer Interface, Visual, 12899257} }
@article{Ge2003Reducing, author = {Xijin Ge and Shuichi Tsutsumi and Hiroyuki Aburatani and Shuichi Iwata}, title = {Reducing false positives in molecular pattern recognition.}, journal = {Genome {I}nform {S}er {W}orkshop {G}enome {I}nform}, year = {2003}, volume = {14}, pages = {34-43}, abstract = {In the search for new cancer subtypes by gene expression profiling, it is essential to avoid misclassifying samples of unknown subtypes as known ones. {I}n this paper, we evaluated the false positive error rates of several classification algorithms through a 'null test' by presenting classifiers a large collection of independent samples that do not belong to any of the tumor types in the training dataset. {T}he benchmark dataset is available at www2.genome.rcast.u-tokyo.ac.jp/pm/. {W}e found that k-nearest neighbor ({KNN}) and support vector machine ({SVM}) have very high false positive error rates when fewer genes (<100) are used in prediction. {T}he error rate can be partially reduced by including more genes. {O}n the other hand, prototype matching ({PM}) method has a much lower false positive error rate. {S}uch robustness can be achieved without loss of sensitivity by introducing suitable measures of prediction confidence. {W}e also proposed a cluster-and-select technique to select genes for classification. {T}he nonparametric {K}ruskal-{W}allis {H} test is employed to select genes differentially expressed in multiple tumor types. {T}o reduce the redundancy, we then divided these genes into clusters with similar expression patterns and selected a given number of genes from each cluster. {T}he reliability of the new algorithm is tested on three public datasets.}, keywords = {Amino Acid Sequence, Amino Acids, Animals, Automated, Base Sequence, Bayes Theorem, Biological, Carbohydrate Conformation, Carbohydrate Sequence, Cattle, Computational Biology, Computer Simulation, Crystallography, DNA, Databases, Factual, False Positive Reactions, Gene Expression Profiling, Genes, Genetic, Genetic Techniques, Genome, Histocompatibility Antigens Class I, Human, Humans, Introns, Least-Squares Analysis, MHC Class I, Major Histocompatibility Complex, Markov Chains, Messenger, Mice, Models, Monosaccharides, Neoplasms, Non-U.S. Gov't, Nonparametric, Pattern Recognition, Peptides, Phylogeny, Plants, Poly A, Polysaccharides, Predictive Value of Tests, Protein, Protein Structure, Proteins, RNA, Rats, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sequence Alignment, Software, Species Specificity, Statistics, Theoretical, X-Ray, 15706518} }
@article{Girosi1998Equivalence, author = {Girosi}, title = {An {E}quivalence {B}etween {S}parse {A}pproximation and {S}upport {V}ector {M}achines.}, journal = {Neural {C}omput}, year = {1998}, volume = {10}, pages = {1455-80}, number = {6}, month = {Jul}, abstract = {This article shows a relationship between two different approximation techniques: the support vector machines ({SVM}), proposed by {V}. {V}apnik (1995) and a sparse approximation scheme that resembles the basis pursuit denoising algorithm ({C}hen, 1995; {C}hen, {D}onoho, and {S}aunders, 1995). {SVM} is a technique that can be derived from the structural risk minimization principle ({V}apnik, 1982) and can be used to estimate the parameters of several different approximation schemes, including radial basis functions, algebraic and trigonometric polynomials, {B}-splines, and some forms of multilayer perceptrons. {B}asis pursuit denoising is a sparse approximation technique in which a function is reconstructed by using a small number of basis functions chosen from a large set (the dictionary). {W}e show that if the data are noiseless, the modified version of basis pursuit denoising proposed in this article is equivalent to {SVM} in the following sense: if applied to the same data set, the two techniques give the same solution, which is obtained by solving the same quadratic programming problem. {I}n the appendix, we present a derivation of the {SVM} technique in one framework of regularization theory, rather than statistical learning theory, establishing a connection between {SVM}, sparse approximation, and regularization theory.}, keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual, Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes, Learning, Markov Chains, Models, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading Frames, P.H.S., Pattern Recognition, Protein, Protein Structure, Proteins, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical, Tertiary, U.S. Gov't, 9698353} }
@article{Glotsos2004Automated, author = {Dimitris Glotsos and Panagiota Spyridonos and Dionisis Cavouras and Panagiota Ravazoula and Petroula-Arampantoni Dadioti and George Nikiforidis}, title = {Automated segmentation of routinely hematoxylin-eosin-stained microscopic images by combining support vector machine clustering and active contour models.}, journal = {Anal {Q}uant {C}ytol {H}istol}, year = {2004}, volume = {26}, pages = {331-40}, number = {6}, month = {Dec}, abstract = {O{BJECTIVE}: {T}o develop a method for the automated segmentation of images of routinely hematoxylin-eosin ({H}-{E})-stained microscopic sections to guarantee correct results in computer-assisted microscopy. {STUDY} {DESIGN}: {C}linical material was composed 50 {H}-{E}-stained biopsies of astrocytomas and 50 {H}-{E}-stained biopsies of urinary bladder cancer. {T}he basic idea was to use a support vector machine clustering ({SVMC}) algorithm to provide gross segmentation of regions holding nuclei and subsequently to refine nuclear boundary detection with active contours. {T}he initialization coordinates of the active contour model were defined using a {SVMC} pixel-based classification algorithm that discriminated nuclear regions from the surrounding tissue. {S}tarting from the boundaries of these regions, the snake fired and propagated until converging to nuclear boundaries. {RESULTS}: {T}he method was validated for 2 different types of {H}-{E}-stained images. {R}esults were evaluated by 2 histopathologists. {O}n average, 94\% of nuclei were correctly delineated. {CONCLUSION}: {T}he proposed algorithm could be of value in computer-based systems for automated interpretation of microscopic images.}, keywords = {Adenosinetriphosphatase, Adolescent, Adult, Algorithms, Amino Acid Sequence, Amino Acids, Animals, Astrocytoma, Automated, Automation, Base Sequence, Bayes Theorem, Biological, Biopsy, Bladder Neoplasms, Breast Neoplasms, Carbohydrate Conformation, Carbohydrate Sequence, Cattle, Cell Cycle Proteins, Cell Nucleus, Computational Biology, Computer Simulation, Computer-Assisted, Crystallography, DNA, Databases, Diagnosis, Differential, Eosine Yellowish-(YS), Exoribonucleases, Factual, False Negative Reactions, False Positive Reactions, Female, Gene Expression, Gene Expression Profiling, Genes, Genetic, Genetic Techniques, Genetic Vectors, Genome, Hematoxylin, Histocompatibility Antigens Class I, Human, Humans, Image Interpretation, Image Processing, Introns, Least-Squares Analysis, MHC Class I, Major Histocompatibility Complex, Markov Chains, Messenger, Mice, Middle Aged, Models, Molecular Structure, Monosaccharides, Multigene Family, Mutation, Neoplasms, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonparametric, Nucleotidyltransferases, Observer Variation, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition, Peptides, Phenotype, Phylogeny, Plants, Poly A, Polysaccharides, Predictive Value of Tests, Protein, Protein Biosynthesis, Protein Kinase Inhibitors, Protein Structure, Proteins, RNA, RNA Helicases, RNA Splicing, Rats, Reproducibility of Results, Research Support, Retrospective Studies, Saccharomyces cerevisiae, Saccharomyces cerevisiae Proteins, Secondary, Sensitivity and Specificity, Sequence Alignment, Software, Species Specificity, Staining and Labeling, Statistics, Theoretical, Transcription, U.S. Gov't, Ultrasonography, X-Ray, 15678615} }
@article{Golland2005Detection, author = {Polina Golland and W. Eric L Grimson and Martha E Shenton and Ron Kikinis}, title = {Detection and analysis of statistical differences in anatomical shape.}, journal = {Med {I}mage {A}nal}, year = {2005}, volume = {9}, pages = {69-86}, number = {1}, month = {Feb}, abstract = {We present a computational framework for image-based analysis and interpretation of statistical differences in anatomical shape between populations. {A}pplications of such analysis include understanding developmental and anatomical aspects of disorders when comparing patients versus normal controls, studying morphological changes caused by aging, or even differences in normal anatomy, for example, differences between genders. {O}nce a quantitative description of organ shape is extracted from input images, the problem of identifying differences between the two groups can be reduced to one of the classical questions in machine learning of constructing a classifier function for assigning new examples to one of the two groups while making as few misclassifications as possible. {T}he resulting classifier must be interpreted in terms of shape differences between the two groups back in the image domain. {W}e demonstrate a novel approach to such interpretation that allows us to argue about the identified shape differences in anatomically meaningful terms of organ deformation. {G}iven a classifier function in the feature space, we derive a deformation that corresponds to the differences between the two classes while ignoring shape variability within each class. {B}ased on this approach, we present a system for statistical shape analysis using distance transforms for shape representation and the support vector machines learning algorithm for the optimal classifier estimation and demonstrate it on artificially generated data sets, as well as real medical studies.}, doi = {10.1016/j.media.2004.07.003}, keywords = {Algorithms, Amino Acid, Artificial Intelligence, Ascomycota, Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms, Comparative Study, Computer-Assisted, Crystallography, DNA, DNA Primers, Databases, Diagnostic Imaging, Gene Expression Profiling, Hordeum, Host-Parasite Relations, Humans, Image Interpretation, Informatics, Kinetics, Magnetic Resonance Spectroscopy, Models, Nanotechnology, Non-P.H.S., Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition, Plant, Plants, Predictive Value of Tests, Protein, Research Support, Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence Homology, Skin, Software, Statistical, Theoretical, Thermodynamics, U.S. Gov't, Viral Proteins, X-Ray, 15581813}, pii = {S1361-8415(04)00059-3}, url = {http://dx.doi.org/10.1016/j.media.2004.07.003} }
@article{Ifantis2003nonlinear, author = {A. Ifantis and S. Papadimitriou}, title = {The nonlinear predictability of the electrotelluric field variations data analyzed with support vector machines as an earthquake precursor.}, journal = {Int {J} {N}eural {S}yst}, year = {2003}, volume = {13}, pages = {315-32}, number = {5}, month = {Oct}, abstract = {This work investigates the nonlinear predictability of the {E}lectro {T}elluric {F}ield ({ETF}) variations data in order to develop new intelligent tools for the difficult task of earthquake prediction. {S}upport {V}ector {M}achines trained on a signal window have been used to predict the next sample. {W}e observe a significant increase at this short-term unpredictability of the {ETF} signal at about two weeks time period before the major earthquakes that took place in regions near the recording devices. {T}he unpredictability increase can be attributed to a quick time variation of the dynamics that produce the {ETF} signal due to the earthquake generation process. {T}hus, this increase can be taken into advantage for signaling for an increased possibility of a large earthquake within the next few days in the neighboring region of the recording station.}, keywords = {Air Pollutants, Aircraft, Algorithms, Artificial Intelligence, Automated, Base Composition, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, Computing Methodologies, Cytosine, Data Interpretation, Databases, Enhancer Elements (Genetics), Environmental Monitoring, Ethanol, Exons, Fourier Transform Infrared, Genetic, Guanine, Humans, Image Interpretation, Natural Disasters, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Online Systems, P.H.S., Pattern Recognition, Photography, Probability, Pyrimidines, RNA Precursors, RNA Splice Sites, RNA Splicing, Radiation, Reproducibility of Results, Research Support, Sensitivity and Specificity, Signal Processing, Spectroscopy, Statistical, Subtraction Technique, Thermodynamics, Time Factors, U.S. Gov't, Untranslated Regions, Video Recording, Walking, 14652873}, pii = {S0129065703001674} }
@article{Ioannidis2009Repeatability, author = {John P A Ioannidis and David B Allison and Catherine A Ball and Issa Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion and Tapan Mehta and Michael Nitzberg and Grier P Page and Enrico Petretto and Vera van Noort}, title = {Repeatability of published microarray gene expression analyses.}, journal = {Nat Genet}, year = {2009}, volume = {41}, pages = {149--155}, number = {2}, month = {Feb}, abstract = {Given the complexity of microarray-based gene expression studies, guidelines encourage transparent design and public data availability. Several journals require public data deposition and several public databases exist. However, not all data are publicly available, and even when available, it is unknown whether the published results are reproducible by independent scientists. Here we evaluated the replication of data analyses in 18 articles on microarray-based gene expression profiling published in Nature Genetics in 2005-2006. One table or figure from each article was independently evaluated by two teams of analysts. We reproduced two analyses in principle and six partially or with some discrepancies; ten could not be reproduced. The main reason for failure to reproduce was data unavailability, and discrepancies were mostly due to incomplete data annotation or specification of data processing and analysis. Repeatability of published microarray studies is apparently limited. More strict publication rules enforcing public data availability and explicit description of data processing and analysis should be considered.}, doi = {10.1038/ng.295}, institution = {Clinical and Molecular Epidemiology Unit, Department of Hygiene and Epidemiology, University of Ioannina School of Medicine, Ioannina 45110, Greece. jioannid@cc.uoi.gr}, keywords = {Animals; Data Interpretation, Statistical; Databases, Genetic; Gene Expression Profiling, standards; Genome-Wide Association Study, standards; Humans; Oligonucleotide Array Sequence Analysis, standards; Peer Review, Research; Publications, standards; Reproducibility of Results}, language = {eng}, medline-pst = {ppublish}, owner = {phupe}, pii = {ng.295}, pmid = {19174838}, timestamp = {2011.04.08}, url = {http://dx.doi.org/10.1038/ng.295} }
@article{Johnson2007Adjusting, author = {W. Evan Johnson and Cheng Li and Ariel Rabinovic}, title = {Adjusting batch effects in microarray expression data using empirical Bayes methods.}, journal = {Biostatistics}, year = {2007}, volume = {8}, pages = {118--127}, number = {1}, month = {Jan}, abstract = {Non-biological experimental variation or "batch effects" are commonly observed across multiple batches of microarray experiments, often rendering the task of combining data from these batches difficult. The ability to combine microarray data sets is advantageous to researchers to increase statistical power to detect biological phenomena from studies where logistical considerations restrict sample size or in studies that require the sequential hybridization of arrays. In general, it is inappropriate to combine data sets without adjusting for batch effects. Methods have been proposed to filter batch effects from data, but these are often complicated and require large batch sizes ( > 25) to implement. Because the majority of microarray studies are conducted using much smaller sample sizes, existing methods are not sufficient. We propose parametric and non-parametric empirical Bayes frameworks for adjusting data for batch effects that is robust to outliers in small sample sizes and performs comparable to existing methods for large samples. We illustrate our methods using two example data sets and show that our methods are justifiable, easy to apply, and useful in practice. Software for our method is freely available at: http://biosun1.harvard.edu/complab/batch/.}, doi = {10.1093/biostatistics/kxj037}, institution = {Department of Biostatistics and Computational Biology, Dana-Farber Cancer Institute, Boston, MA, USA.}, keywords = {Bayes Theorem; Data Interpretation, Statistical; Gene Expression Profiling, methods; Humans; Oligonucleotide Array Sequence Analysis, methods}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {kxj037}, pmid = {16632515}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.1093/biostatistics/kxj037} }
@article{Juditsky2000Functional, author = {Juditsky, A. and Nemirovski, A.}, title = {Functional {A}ggregation for {N}onparametric {E}stimation}, journal = {Ann. {S}tat.}, year = {2000}, volume = {28}, pages = {681--712}, number = {3}, month = {June}, pdf = {../local/judi00.pdf}, file = {judi00.pdf:local/judi00.pdf:PDF}, subject = {stat}, url = {http://ftp://ftp.irisa.fr/techreports/1996/PI-993.ps.gz} }
@article{Kahraman2007Shape, author = {A. Kahraman and R. J. Morris and R. A. Laskowski and J. M. Thornton}, title = {Shape variation in protein binding pockets and their ligands.}, journal = {J. Mol. Biol.}, year = {2007}, volume = {368}, pages = {283--301}, number = {1}, month = {Apr}, abstract = {A common assumption about the shape of protein binding pockets is that they are related to the shape of the small ligand molecules that can bind there. But to what extent is that assumption true? Here we use a recently developed shape matching method to compare the shapes of protein binding pockets to the shapes of their ligands. We find that pockets binding the same ligand show greater variation in their shapes than can be accounted for by the conformational variability of the ligand. This suggests that geometrical complementarity in general is not sufficient to drive molecular recognition. Nevertheless, we show when considering only shape and size that a significant proportion of the recognition power of a binding pocket for its ligand resides in its shape. Additionally, we observe a "buffer zone" or a region of free space between the ligand and protein, which results in binding pockets being on average three times larger than the ligand that they bind.}, doi = {10.1016/j.jmb.2007.01.086}, keywords = {Binding Sites; Computer Simulation; Ligands; Models, Molecular; Models, Statistical; Protein Binding; Protein Conformation; Protein Folding}, owner = {laurent}, pii = {S0022-2836(07)00164-7}, pmid = {17337005}, timestamp = {2008.07.08}, url = {http://dx.doi.org/10.1016/j.jmb.2007.01.086} }
@article{Kapp2006Discovery, author = {Amy V Kapp and Stefanie S Jeffrey and Anita Langerød and Anne-Lise Børresen-Dale and Wonshik Han and Dong-Young Noh and Ida R K Bukholm and Monica Nicolau and Patrick O Brown and Robert Tibshirani}, title = {Discovery and validation of breast cancer subtypes.}, journal = {BMC Genomics}, year = {2006}, volume = {7}, pages = {231}, abstract = {Previous studies demonstrated breast cancer tumor tissue samples could be classified into different subtypes based upon DNA microarray profiles. The most recent study presented evidence for the existence of five different subtypes: normal breast-like, basal, luminal A, luminal B, and ERBB2+.Based upon the analysis of 599 microarrays (five separate cDNA microarray datasets) using a novel approach, we present evidence in support of the most consistently identifiable subtypes of breast cancer tumor tissue microarrays being: ESR1+/ERBB2-, ESR1-/ERBB2-, and ERBB2+ (collectively called the ESR1/ERBB2 subtypes). We validate all three subtypes statistically and show the subtype to which a sample belongs is a significant predictor of overall survival and distant-metastasis free probability.As a consequence of the statistical validation procedure we have a set of centroids which can be applied to any microarray (indexed by UniGene Cluster ID) to classify it to one of the ESR1/ERBB2 subtypes. Moreover, the method used to define the ESR1/ERBB2 subtypes is not specific to the disease. The method can be used to identify subtypes in any disease for which there are at least two independent microarray datasets of disease samples.}, doi = {10.1186/1471-2164-7-231}, institution = {Department of Statistics, Stanford University, Stanford, CA, USA. AKapp@stanford.edu}, keywords = {Algorithms; Breast Neoplasms, classification/genetics/pathology; Female; Gene Expression Profiling, methods/statistics /&/ numerical data; Humans; Multivariate Analysis; Oligonucleotide Array Sequence Analysis, methods/statistics /&/ numerical data; Proportional Hazards Models; Risk Factors; Survival Analysis}, language = {eng}, medline-pst = {epublish}, owner = {jp}, pii = {1471-2164-7-231}, pmid = {16965636}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.1186/1471-2164-7-231} }
@article{Larsen2005integrative, author = {Mette Voldby Larsen and Claus Lundegaard and Kasper Lamberth and S\o ren Buus and S\o ren Brunak and Ole Lund and Morten Nielsen}, title = {An integrative approach to {CTL} epitope prediction: a combined algorithm integrating {MHC} class {I} binding, {TAP} transport efficiency, and proteasomal cleavage predictions.}, journal = {Eur. J. Immunol.}, year = {2005}, volume = {35}, pages = {2295--2303}, number = {8}, month = {Aug}, abstract = {Reverse immunogenetic approaches attempt to optimize the selection of candidate epitopes, and thus minimize the experimental effort needed to identify new epitopes. When predicting cytotoxic T cell epitopes, the main focus has been on the highly specific MHC class I binding event. Methods have also been developed for predicting the antigen-processing steps preceding MHC class I binding, including proteasomal cleavage and transporter associated with antigen processing (TAP) transport efficiency. Here, we use a dataset obtained from the SYFPEITHI database to show that a method integrating predictions of MHC class I binding affinity, TAP transport efficiency, and C-terminal proteasomal cleavage outperforms any of the individual methods. Using an independent evaluation dataset of HIV epitopes from the Los Alamos database, the validity of the integrated method is confirmed. The performance of the integrated method is found to be significantly higher than that of the two publicly available prediction methods BIMAS and SYFPEITHI. To identify 85\% of the epitopes in the HIV dataset, 9\% and 10\% of all possible nonamers in the HIV proteins must be tested when using the BIMAS and SYFPEITHI methods, respectively, for the selection of candidate epitopes. This number is reduced to 7\% when using the integrated method. In practical terms, this means that the experimental effort needed to identify an epitope in a hypothetical protein with 85\% probability is reduced by 20-30\% when using the integrated method.The method is available at http://www.cbs.dtu.dk/services/NetCTL. Supplementary material is available at http://www.cbs.dtu.dk/suppl/immunology/CTL.php.}, doi = {10.1002/eji.200425811}, keywords = {Algorithms; Data Interpretation, Statistical; Epitopes, T-Lymphocyte; Histocompatibility Antigens Class I; Humans; Hydrolysis; Predictive Value of Tests; Proteasome Endopeptidase Complex; Protein Binding; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.; T-Lymphocytes, Cytotoxic}, owner = {jacob}, pmid = {15997466}, timestamp = {2006.08.30}, url = {http://dx.doi.org/10.1002/eji.200425811} }
@book{Lauritzen1996Graphical, title = {Graphical {M}odels}, publisher = {Oxford}, year = {1996}, author = {S. Lauritzen}, subject = {stat} }
@article{Li2003Simple, author = {Jinyan Li and Huiqing Liu and James R Downing and Allen Eng-Juh Yeoh and Limsoon Wong}, title = {Simple rules underlying gene expression profiles of more than six subtypes of acute lymphoblastic leukemia ({ALL}) patients.}, journal = {Bioinformatics}, year = {2003}, volume = {19}, pages = {71-8}, number = {1}, month = {Jan}, abstract = {M{OTIVATIONS} {AND} {RESULTS}: {F}or classifying gene expression profiles or other types of medical data, simple rules are preferable to non-linear distance or kernel functions. {T}his is because rules may help us understand more about the application in addition to performing an accurate classification. {I}n this paper, we discover novel rules that describe the gene expression profiles of more than six subtypes of acute lymphoblastic leukemia ({ALL}) patients. {W}e also introduce a new classifier, named {PCL}, to make effective use of the rules. {PCL} is accurate and can handle multiple parallel classifications. {W}e evaluate this method by classifying 327 heterogeneous {ALL} samples. {O}ur test error rate is competitive to that of support vector machines, and it is 71\% better than {C}4.5, 50\% better than {N}aive {B}ayes, and 43\% better than k-nearest neighbour. {E}xperimental results on another independent data sets are also presented to show the strength of our method. {AVAILABILITY}: {U}nder http://sdmc.lit.org.sg/{GED}atasets/, click on {S}upplementary {I}nformation.}, keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative Study, Computer-Assisted, DNA, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Leukemia, Lymphocytic, Markov Chains, Messenger, Models, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks (Computer), Non-U.S. Gov't, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, Pattern Recognition, Quality Control, RNA, Research Support, Signal Processing, Statistical, Stomach Neoplasms, Tumor Markers, 12499295} }
@article{Li2004Fusing, author = {Shutao Li and James Tin-Yau Kwok and Ivor Wai-Hung Tsang and Yaonan Wang}, title = {Fusing images with different focuses using support vector machines.}, journal = {I{EEE} {T}rans {N}eural {N}etw}, year = {2004}, volume = {15}, pages = {1555-61}, number = {6}, month = {Nov}, abstract = {Many vision-related processing tasks, such as edge detection, image segmentation and stereo matching, can be performed more easily when all objects in the scene are in good focus. {H}owever, in practice, this may not be always feasible as optical lenses, especially those with long focal lengths, only have a limited depth of field. {O}ne common approach to recover an everywhere-in-focus image is to use wavelet-based image fusion. {F}irst, several source images with different focuses of the same scene are taken and processed with the discrete wavelet transform ({DWT}). {A}mong these wavelet decompositions, the wavelet coefficient with the largest magnitude is selected at each pixel location. {F}inally, the fused image can be recovered by performing the inverse {DWT}. {I}n this paper, we improve this fusion procedure by applying the discrete wavelet frame transform ({DWFT}) and the support vector machines ({SVM}). {U}nlike {DWT}, {DWFT} yields a translation-invariant signal representation. {U}sing features extracted from the {DWFT} coefficients, a {SVM} is trained to select the source image that has the best focus at each pixel location, and the corresponding {DWFT} coefficients are then incorporated into the composite wavelet representation. {E}xperimental results show that the proposed method outperforms the traditional approach both visually and quantitatively.}, keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota, Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms, Comparative Study, Computer Simulation, Computer-Assisted, Computing Methodologies, Crystallography, DNA, DNA Primers, Databases, Diagnostic Imaging, Enzymes, Fixation, Gene Expression Profiling, Genetic, Hordeum, Host-Parasite Relations, Humans, Image Enhancement, Image Interpretation, Informatics, Information Storage and Retrieval, Kinetics, Magnetic Resonance Spectroscopy, Models, Nanotechnology, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Ocular, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition, Plant, Plants, Predictive Value of Tests, Protein, Protein Conformation, Research Support, Sample Size, Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence Homology, Signal Processing, Skin, Software, Statistical, Subtraction Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral Proteins, X-Ray, 15565781} }
@unpublished{Lugosi1998On, author = {Lugosi, G.}, title = {On concentration-of-measure inequalities}, note = {Seminar notes}, year = {1998}, pdf = {../local/lugo98.pdf}, file = {lugo98.pdf:local/lugo98.pdf:PDF}, subject = {stat}, url = {http://www.econ.upf.es/~lugosi/concmeas.ps} }
@article{Lugosi1999Adaptive, author = {Lugosi, G. and Nobel, A.}, title = {Adaptive {M}odel {S}election {U}sing {E}mpirical {C}omplexities}, journal = {Ann. {S}tat.}, year = {1999}, volume = {27}, pages = {1830--1864}, number = {6}, month = dec, pdf = {../local/lugo99.pdf}, file = {lugo99.pdf:local/lugo99.pdf:PDF}, subject = {stat}, url = {http://www.econ.upf.es/~lugosi/amsec.ps} }
@article{Luo2004gene-silencing, author = {Luo, K. Q. and Chang, D. C.}, title = {The gene-silencing efficiency of si{RNA} is strongly dependent on the local structure of m{RNA} at the targeted region.}, journal = {Biochem. {B}iophys. {R}es. {C}ommun.}, year = {2004}, volume = {318}, pages = {303-10}, number = {1}, month = {May}, abstract = {The gene-silencing effect of short interfering {RNA} (si{RNA}) is known to vary strongly with the targeted position of the m{RNA}. {A} number of hypotheses have been suggested to explain this phenomenon. {W}e would like to test if this positional effect is mainly due to the secondary structure of the m{RNA} at the target site. {W}e proposed that this structural factor can be characterized by a single parameter called "the hydrogen bond ({H}-b) index," which represents the average number of hydrogen bonds formed between nucleotides in the target region and the rest of the m{RNA}. {T}his index can be determined using a computational approach. {W}e tested the correlation between the {H}-b index and the gene-silencing effects on three genes ({B}cl-2, h{TF}, and cyclin {B}1) using a variety of si{RNA}s. {W}e found that the gene-silencing effect is inversely dependent on the {H}-b index, indicating that the local m{RNA} structure at the targeted site is the main cause of the positional effect. {B}ased on this finding, we suggest that the {H}-b index can be a useful guideline for future si{RNA} design.}, doi = {10.1016/j.bbrc.2004.04.027}, keywords = {Animals, Apoptosis, Base Composition, Base Pairing, Base Sequence, Binding Sites, Cell Cycle, Cell Proliferation, Comparative Study, Cultured, Cyclin B, Cyclin D1, DNA-Binding Proteins, Down-Regulation, Extramural, Fluorescence, Gene Silencing, Gene Targeting, Genetic Vectors, Green Fluorescent Proteins, Hela Cells, Humans, Hydrogen Bonding, Luminescent Proteins, Male, Messenger, Mice, Microscopy, Models, Molecular, Molecular Sequence Data, N.I.H., Non-U.S. Gov't, Nucleic Acid Conformation, Nude, P.H.S., Prostatic Neoplasms, Proto-Oncogene Proteins c-bcl-2, Proto-Oncogene Proteins c-myc, RNA, Regression Analysis, Research Support, STAT3 Transcription Factor, Small Interfering, Thromboplastin, Trans-Activators, Tumor Cells, U.S. Gov't, 15110788}, pii = {S0006291X04007284}, url = {http://dx.doi.org/10.1016/j.bbrc.2004.04.027} }
@article{Madeira2004Biclustering, author = {Madeira, S. C. and Oliveira, A. L.}, title = {Biclustering algorithms for biological data analysis: a survey.}, journal = {IEEE/ACM Trans Comput Biol Bioinform}, year = {2004}, volume = {1}, pages = {24--45}, number = {1}, abstract = {A large number of clustering approaches have been proposed for the analysis of gene expression data obtained from microarray experiments. However, the results from the application of standard clustering methods to genes are limited. This limitation is imposed by the existence of a number of experimental conditions where the activity of genes is uncorrelated. A similar limitation exists when clustering of conditions is performed. For this reason, a number of algorithms that perform simultaneous clustering on the row and column dimensions of the data matrix has been proposed. The goal is to find submatrices, that is, subgroups of genes and subgroups of conditions, where the genes exhibit highly correlated activities for every condition. In this paper, we refer to this class of algorithms as biclustering. Biclustering is also referred in the literature as coclustering and direct clustering, among others names, and has also been used in fields such as information retrieval and data mining. In this comprehensive survey, we analyze a large number of existing approaches to biclustering, and classify them in accordance with the type of biclusters they can find, the patterns of biclusters that are discovered, the methods used to perform the search, the approaches used to evaluate the solution, and the target applications.}, doi = {10.1109/TCBB.2004.2}, institution = {University of Beira Interior, Rua Marquês D'Avila e Bolama, Covilhã, Portugal. smadeira@di.ubi.pt}, keywords = {Algorithms; Cluster Analysis; Computational Biology, methods; Gene Expression Profiling, statistics /&/ numerical data; Gene Expression, genetics; Humans; Models, Statistical; Oligonucleotide Array Sequence Analysis, methods; Saccharomyces cerevisiae, genetics}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pmid = {17048406}, timestamp = {2012.02.27}, url = {http://dx.doi.org/10.1109/TCBB.2004.2} }
@article{Markowetz2010How, author = {Florian Markowetz}, title = {How to understand the cell by breaking it: network analysis of gene perturbation screens.}, journal = {PLoS Comput Biol}, year = {2010}, volume = {6}, pages = {e1000655}, number = {2}, doi = {10.1371/journal.pcbi.1000655}, institution = {Cancer Research UK Cambridge Research Institute, Cambridge, United Kingdom.}, keywords = {Animals; Cell Physiological Processes; Cluster Analysis; Gene Regulatory Networks; Genomics; Humans; Models, Genetic; Models, Statistical; Phenotype; Signal Transduction; Systems Biology}, owner = {phupe}, pmid = {20195495}, timestamp = {2010.08.30}, url = {http://dx.doi.org/10.1371/journal.pcbi.1000655} }
@article{Marsland2002self-organising, author = {Stephen Marsland and Jonathan Shapiro and Ulrich Nehmzow}, title = {A self-organising network that grows when required.}, journal = {Neural {N}etw}, year = {2002}, volume = {15}, pages = {1041-58}, number = {8-9}, abstract = {The ability to grow extra nodes is a potentially useful facility for a self-organising neural network. {A} network that can add nodes into its map space can approximate the input space more accurately, and often more parsimoniously, than a network with predefined structure and size, such as the {S}elf-{O}rganising {M}ap. {I}n addition, a growing network can deal with dynamic input distributions. {M}ost of the growing networks that have been proposed in the literature add new nodes to support the node that has accumulated the highest error during previous iterations or to support topological structures. {T}his usually means that new nodes are added only when the number of iterations is an integer multiple of some pre-defined constant, {A}. {T}his paper suggests a way in which the learning algorithm can add nodes whenever the network in its current state does not sufficiently match the input. {I}n this way the network grows very quickly when new data is presented, but stops growing once the network has matched the data. {T}his is particularly important when we consider dynamic data sets, where the distribution of inputs can change to a new regime after some time. {W}e also demonstrate the preservation of neighbourhood relations in the data by the network. {T}he new network is compared to an existing growing network, the {G}rowing {N}eural {G}as ({GNG}), on a artificial dataset, showing how the network deals with a change in input distribution after some time. {F}inally, the new network is applied to several novelty detection tasks and is compared with both the {GNG} and an unsupervised form of the {R}educed {C}oulomb {E}nergy network on a robotic inspection task and with a {S}upport {V}ector {M}achine on two benchmark novelty detection tasks.}, keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric Acid Cycle, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases, Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics, Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet, Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Probability Learning, Protein Binding, Protein Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, Receptors, Reference Values, Regression Analysis, Reproducibility of Results, Research Support, Robotics, Saccharomyces cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12416693} }
@article{Martoglio2002decomposition, author = {Ann-Marie Martoglio and James W Miskin and Stephen K Smith and David J C MacKay}, title = {A decomposition model to track gene expression signatures: preview on observer-independent classification of ovarian cancer.}, journal = {Bioinformatics}, year = {2002}, volume = {18}, pages = {1617-24}, number = {12}, month = {Dec}, abstract = {M{OTIVATION}: {A} number of algorithms and analytical models have been employed to reduce the multidimensional complexity of {DNA} array data and attempt to extract some meaningful interpretation of the results. {T}hese include clustering, principal components analysis, self-organizing maps, and support vector machine analysis. {E}ach method assumes an implicit model for the data, many of which separate genes into distinct clusters defined by similar expression profiles in the samples tested. {A} point of concern is that many genes may be involved in a number of distinct behaviours, and should therefore be modelled to fit into as many separate clusters as detected in the multidimensional gene expression space. {T}he analysis of gene expression data using a decomposition model that is independent of the observer involved would be highly beneficial to improve standard and reproducible classification of clinical and research samples. {RESULTS}: {W}e present a variational independent component analysis ({ICA}) method for reducing high dimensional {DNA} array data to a smaller set of latent variables, each associated with a gene signature. {W}e present the results of applying the method to data from an ovarian cancer study, revealing a number of tissue type-specific and tissue type-independent gene signatures present in varying amounts among the samples surveyed. {T}he observer independent results of such molecular analysis of biological samples could help identify patients who would benefit from different treatment strategies. {W}e further explore the application of the model to similar high-throughput studies.}, keywords = {Acute, Algorithms, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Cluster Analysis, Comparative Study, Computer-Assisted, Cystadenoma, DNA, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Leukemia, Lymphocytic, Markov Chains, Messenger, Models, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks (Computer), Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, Pattern Recognition, Quality Control, RNA, Reference Values, Reproducibility of Results, Research Support, Sensitivity and Specificity, Signal Processing, Statistical, Stomach Neoplasms, Transcription, Tumor Markers, 12490446} }
@article{Mateos2002Systematic, author = {Alvaro Mateos and JoaquÃn Dopazo and Ronald Jansen and Yuhai Tu and Mark Gerstein and Gustavo Stolovitzky}, title = {Systematic learning of gene functional classes from {DNA} array expression data by using multilayer perceptrons.}, journal = {Genome {R}es.}, year = {2002}, volume = {12}, pages = {1703-15}, number = {11}, month = {Nov}, abstract = {Recent advances in microarray technology have opened new ways for functional annotation of previously uncharacterised genes on a genomic scale. {T}his has been demonstrated by unsupervised clustering of co-expressed genes and, more importantly, by supervised learning algorithms. {U}sing prior knowledge, these algorithms can assign functional annotations based on more complex expression signatures found in existing functional classes. {P}reviously, support vector machines ({SVM}s) and other machine-learning methods have been applied to a limited number of functional classes for this purpose. {H}ere we present, for the first time, the comprehensive application of supervised neural networks ({SNN}s) for functional annotation. {O}ur study is novel in that we report systematic results for ~100 classes in the {M}unich {I}nformation {C}enter for {P}rotein {S}equences ({MIPS}) functional catalog. {W}e found that only ~10\% of these are learnable (based on the rate of false negatives). {A} closer analysis reveals that false positives (and negatives) in a machine-learning context are not necessarily "false" in a biological sense. {W}e show that the high degree of interconnections among functional classes confounds the signatures that ought to be learned for a unique class. {W}e term this the "{B}orges effect" and introduce two new numerical indices for its quantification. {O}ur analysis indicates that classification systems with a lower {B}orges effect are better suitable for machine learning. {F}urthermore, we introduce a learning procedure for combining false positives with the original class. {W}e show that in a few iterations this process converges to a gene set that is learnable with considerably low rates of false positives and negatives and contains genes that are biologically related to the original class, allowing for a coarse reconstruction of the interactions between associated biological pathways. {W}e exemplify this methodology using the well-studied tricarboxylic acid cycle.}, doi = {10.1101/gr.192502}, pdf = {../local/Mateos2002Systematic.pdf}, file = {Mateos2002Systematic.pdf:local/Mateos2002Systematic.pdf:PDF}, keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Carcinoma, Chemical, Chromatography, Citric Acid Cycle, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Databases, Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics, Eukaryotic Cells, Factual, Feasibility Studies, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Heterogeneity, Genetic Markers, Hemolysins, Humans, Internet, Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding, Protein Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, Receptors, Reference Values, Regression Analysis, Reproducibility of Results, Research Support, Saccharomyces cerevisiae Proteins, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Structural, Structure-Activity Relationship, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12421757}, url = {http://dx.doi.org/10.1101/gr.192502} }
@article{Mathews1999Expandeda, author = {D. H. Mathews and J. Sabina and M. Zuker and D. H. Turner}, title = {{E}xpanded sequence dependence of thermodynamic parameters improves prediction of {RNA} secondary structure.}, journal = {J. Mol. Biol.}, year = {1999}, volume = {288}, pages = {911--940}, number = {5}, month = {May}, abstract = {An improved dynamic programming algorithm is reported for RNA secondary structure prediction by free energy minimization. Thermodynamic parameters for the stabilities of secondary structure motifs are revised to include expanded sequence dependence as revealed by recent experiments. Additional algorithmic improvements include reduced search time and storage for multibranch loop free energies and improved imposition of folding constraints. An extended database of 151,503 nt in 955 structures? determined by comparative sequence analysis was assembled to allow optimization of parameters not based on experiments and to test the accuracy of the algorithm. On average, the predicted lowest free energy structure contains 73 \% of known base-pairs when domains of fewer than 700 nt are folded; this compares with 64 \% accuracy for previous versions of the algorithm and parameters. For a given sequence, a set of 750 generated structures contains one structure that, on average, has 86 \% of known base-pairs. Experimental constraints, derived from enzymatic and flavin mononucleotide cleavage, improve the accuracy of structure predictions.}, doi = {10.1006/jmbi.1999.2700}, keywords = {16S, 23S, 5S, Affinity, Algorithms, Aluminum Silicates, Amino Acid, Amino Acid Sequence, Amyloidosis, Archaeal, Bacillus, Bacterial, Bacterial Proteins, Bacteriophage T4, Base Sequence, Chloroplast, Chromatography, Circular Dichroism, Comparative Study, Computational Biology, Databases, Electrophoresis, Entropy, Enzyme Stability, Escherichia coli, Factual, Fibroblast Growth Factor 2, Flavin Mononucleotide, Fluorescence, Genetic, Guanidine, Humans, Huntington Disease, Kinetics, Light, Models, Molecular Sequence Data, Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, P.H.S., Peptides, Phylogeny, Polyacrylamide Gel, Predictive Value of Tests, Protein Binding, Protein Denaturation, Protein Folding, Protein Structure, RNA, Radiation, Recombinant Proteins, Research Support, Ribosomal, Scattering, Secondary, Sequence Homology, Solutions, Spectrometry, Statistical, Temperature, Thermodynamics, Time Factors, Trinucleotide Repeat Expansion, U.S. Gov't, alpha-Amylase, 10329189}, owner = {vert}, pii = {S0022-2836(99)92700-6}, pmid = {10329189}, timestamp = {2006.04.27}, url = {http://dx.doi.org/10.1006/jmbi.1999.2700} }
@article{Mayr2003Cross-reactive, author = {Torsten Mayr and Christian Igel and Gregor Liebsch and Ingo Klimant and Otto S Wolfbeis}, title = {Cross-reactive metal ion sensor array in a micro titer plate format.}, journal = {Anal {C}hem}, year = {2003}, volume = {75}, pages = {4389-96}, number = {17}, month = {Sep}, abstract = {A cross-reactive array in a micro titer plate ({MTP}) format is described that is based on a versatile and highly flexible scheme. {I}t makes use of rather unspecific metal ions probes having almost identical fluorescence spectra, thus enabling (a) interrogation at identical analytical wavelengths, and (b) imaging of the probes contained in the wells of the {MTP} using a {CCD} camera and an array of blue-light-emitting diodes as a light source. {T}he unselective response of the indicators in the presence of mixtures of five divalent cations generates a characteristic pattern that was analyzed by chemometric tools. {T}he fluorescence intensity of the indicators was transferred into a time-dependent parameter applying a scheme called dual lifetime referencing. {I}n this method, the fluorescence decay profile of the indicator is referenced against the phosphorescence of an inert reference dye added to the system. {T}he intrinsically referenced measurements also were performed using blue {LED}s as light sources and a {CCD} camera without intensifiers as the detector. {T}he best performance was observed if each well was excited by a single {LED}. {T}he assembly allows the detection of dye concentrations in the nanomoles-per-liter range without amplification and the acquisition of 96 wells simultaneously. {T}he pictures obtained form the basis for evaluation by pattern recognition algorithms. {S}upport vector machines are capable of predicting the presence of significant concentrations of metal ions with high accuracy.}, keywords = {Agrochemicals, Air Pollutants, Aircraft, Algorithms, Artificial Intelligence, Automated, Base Composition, Base Sequence, Bayes Theorem, Carbonic Anhydrase Inhibitors, Cluster Analysis, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer Systems, Computer-Assisted, Computing Methodologies, Confidence Intervals, Cytosine, DNA, Data Interpretation, Databases, Diagnosis, Drug Design, Enhancer Elements (Genetics), Environmental Monitoring, Enzyme Inhibitors, Ethanol, Exons, Forecasting, Fourier Transform Infrared, Gene Expression Profiling, Gene Expression Regulation, Genetic, Genetic Screening, Glucuronosyltransferase, Guanine, Humans, Image Interpretation, Isoenzymes, Least-Squares Analysis, Leukemia, Linear Models, Lymphoma, Models, Molecular, Molecular Conformation, Molecular Sequence Data, Natural Disasters, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Oligonucleotide Array Sequence Analysis, Online Systems, P.H.S., Pattern Recognition, Pharmaceutical Preparations, Phenotype, Photography, Probability, Pyrimidines, Quantitative Structure-Activity Relationship, RNA Precursors, RNA Splice Sites, RNA Splicing, Radiation, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Signal Processing, Software, Spectroscopy, Statistical, Subtraction Technique, Terminology, Thermodynamics, Time Factors, U.S. Gov't, Untranslated Regions, Video Recording, Walking, 14632041} }
@article{Micchelli2005On, author = {Charles A Micchelli and Massimiliano Pontil}, title = {On learning vector-valued functions.}, journal = {Neural {C}omput}, year = {2005}, volume = {17}, pages = {177-204}, number = {1}, month = {Jan}, abstract = {In this letter, we provide a study of learning in a {H}ilbert space of vectorvalued functions. {W}e motivate the need for extending learning theory of scalar-valued functions by practical considerations and establish some basic results for learning vector-valued functions that should prove useful in applications. {S}pecifically, we allow an output space {Y} to be a {H}ilbert space, and we consider a reproducing kernel {H}ilbert space of functions whose values lie in {Y}. {I}n this setting, we derive the form of the minimal norm interpolant to a finite set of data and apply it to study some regularization functionals that are important in learning theory. {W}e consider specific examples of such functionals corresponding to multiple-output regularization networks and support vector machines, for both regression and classification. {F}inally, we provide classes of operator-valued kernels of the dot product and translation-invariant type.}, doi = {10.1162/0899766052530802}, keywords = {Algorithms, Amino Acid, Amino Acids, Artificial Intelligence, Ascomycota, Automated, Base Sequence, Chromosome Mapping, Codon, Colonic Neoplasms, Comparative Study, Computer Simulation, Computer-Assisted, Computing Methodologies, Crystallography, DNA, DNA Primers, Databases, Decision Support Techniques, Diagnostic Imaging, Enzymes, Feedback, Fixation, Gene Expression Profiling, Genetic, Hordeum, Host-Parasite Relations, Humans, Image Enhancement, Image Interpretation, Informatics, Information Storage and Retrieval, Kinetics, Logistic Models, Magnetic Resonance Spectroscopy, Mathematical Computing, Models, Nanotechnology, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Ocular, Oligonucleotide Array Sequence Analysis, P.H.S., Pattern Recognition, Plant, Plants, Predictive Value of Tests, Protein, Protein Conformation, Regression Analysis, Research Support, Sample Size, Selection (Genetics), Sequence Alignment, Sequence Analysis, Sequence Homology, Signal Processing, Skin, Software, Statistical, Subtraction Technique, Theoretical, Thermodynamics, U.S. Gov't, Viral Proteins, X-Ray, 15563752}, url = {http://dx.doi.org/10.1162/0899766052530802} }
@article{Miwakeichi2001comparison, author = {F. Miwakeichi and R. Ramirez-Padron and P. A. Valdes-Sosa and T. Ozaki}, title = {A comparison of non-linear non-parametric models for epilepsy data.}, journal = {Comput. {B}iol. {M}ed.}, year = {2001}, volume = {31}, pages = {41-57}, number = {1}, month = {Jan}, abstract = {E{EG} spike and wave ({SW}) activity has been described through a non-parametric stochastic model estimated by the {N}adaraya-{W}atson ({NW}) method. {I}n this paper the performance of the {NW}, the local linear polynomial regression and support vector machines ({SVM}) methods were compared. {T}he noise-free realizations obtained by the {NW} and {SVM} methods reproduced {SW} better than as reported in previous works. {T}he tuning parameters had to be estimated manually. {A}dding dynamical noise, only the {NW} method was capable of generating {SW} similar to training data. {T}he standard deviation of the dynamical noise was estimated by means of the correlation dimension.}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Classification, Codon, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination Learning, Electric Conductivity, Electroencephalography, Electrophysiology, Epilepsy, Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease, Genomics, Hemolysins, Humans, Indians, Information Storage and Retrieval, Initiator, Ion Channels, Kinetics, Leukemia, Likelihood Functions, Linear Models, Lipid Bilayers, Logistic Models, Lymphocytic, MEDLINE, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Biosynthesis, Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound Spectrography, Statistical, Stochastic Processes, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, Vertebrates, 11058693}, pii = {S0010482500000214} }
@article{Nabieva2005Whole-proteome, author = {Elena Nabieva and Kam Jim and Amit Agarwal and Bernard Chazelle and Mona Singh}, title = {Whole-proteome prediction of protein function via graph-theoretic analysis of interaction maps.}, journal = {Bioinformatics}, year = {2005}, volume = {21 Suppl 1}, pages = {i302--i310}, month = {Jun}, abstract = {MOTIVATION: Determining protein function is one of the most important problems in the post-genomic era. For the typical proteome, there are no functional annotations for one-third or more of its proteins. Recent high-throughput experiments have determined proteome-scale protein physical interaction maps for several organisms. These physical interactions are complemented by an abundance of data about other types of functional relationships between proteins, including genetic interactions, knowledge about co-expression and shared evolutionary history. Taken together, these pairwise linkages can be used to build whole-proteome protein interaction maps. RESULTS: We develop a network-flow based algorithm, FunctionalFlow, that exploits the underlying structure of protein interaction maps in order to predict protein function. In cross-validation testing on the yeast proteome, we show that FunctionalFlow has improved performance over previous methods in predicting the function of proteins with few (or no) annotated protein neighbors. By comparing several methods that use protein interaction maps to predict protein function, we demonstrate that FunctionalFlow performs well because it takes advantage of both network topology and some measure of locality. Finally, we show that performance can be improved substantially as we consider multiple data sources and use them to create weighted interaction networks. AVAILABILITY: http://compbio.cs.princeton.edu/function}, doi = {10.1093/bioinformatics/bti1054}, institution = {Computer Science Department, Princeton University Princeton, NJ 08544, USA.}, keywords = {Algorithms; Computational Biology, methods; Evolution, Molecular; Fungal Proteins, chemistry; Genomics; Models, Statistical; Models, Theoretical; Protein Interaction Mapping, methods; Proteins, chemistry; Proteomics, methods}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {21/suppl_1/i302}, pmid = {15961472}, timestamp = {2010.04.03}, url = {http://dx.doi.org/10.1093/bioinformatics/bti1054} }
@article{Neuvial2006Spatial, author = {Pierre Neuvial and Philippe Hupé and Isabel Brito and Stéphane Liva and Elodie Manié and Caroline Brennetot and François Radvanyi and Alain Aurias and Emmanuel Barillot}, title = {Spatial normalization of array-CGH data.}, journal = {BMC Bioinformatics}, year = {2006}, volume = {7}, pages = {264}, abstract = {BACKGROUND: Array-based comparative genomic hybridization (array-CGH) is a recently developed technique for analyzing changes in DNA copy number. As in all microarray analyses, normalization is required to correct for experimental artifacts while preserving the true biological signal. We investigated various sources of systematic variation in array-CGH data and identified two distinct types of spatial effect of no biological relevance as the predominant experimental artifacts: continuous spatial gradients and local spatial bias. Local spatial bias affects a large proportion of arrays, and has not previously been considered in array-CGH experiments. RESULTS: We show that existing normalization techniques do not correct these spatial effects properly. We therefore developed an automatic method for the spatial normalization of array-CGH data. This method makes it possible to delineate and to eliminate and/or correct areas affected by spatial bias. It is based on the combination of a spatial segmentation algorithm called NEM (Neighborhood Expectation Maximization) and spatial trend estimation. We defined quality criteria for array-CGH data, demonstrating significant improvements in data quality with our method for three data sets coming from two different platforms (198, 175 and 26 BAC-arrays). CONCLUSION: We have designed an automatic algorithm for the spatial normalization of BAC CGH-array data, preventing the misinterpretation of experimental artifacts as biologically relevant outliers in the genomic profile. This algorithm is implemented in the R package MANOR (Micro-Array NORmalization), which is described at http://bioinfo.curie.fr/projects/manor and available from the Bioconductor site http://www.bioconductor.org. It can also be tested on the CAPweb bioinformatics platform at http://bioinfo.curie.fr/CAPweb.}, doi = {10.1186/1471-2105-7-264}, institution = {Institut Curie, Service de Bioinformatique, 26, rue d'Ulm, Paris, 75248 cedex 05, France. pierre.neuvial@curie.fr}, keywords = {Algorithms; Artifacts; Base Sequence; Chromosome Mapping, methods; Computer Simulation; Data Interpretation, Statistical; Gene Dosage; In Situ Hybridization, methods; Models, Genetic; Models, Statistical; Molecular Sequence Data; Oligonucleotide Array Sequence Analysis, methods; Sequence Analysis, DNA, methods}, language = {eng}, medline-pst = {epublish}, owner = {philippe}, pii = {1471-2105-7-264}, pmid = {16716215}, timestamp = {2010.08.04}, url = {http://dx.doi.org/10.1186/1471-2105-7-264} }
@article{Opper2000Gaussian, author = {M. Opper and O. Winther}, title = {Gaussian processes for classification: mean-field algorithms.}, journal = {Neural {C}omput}, year = {2000}, volume = {12}, pages = {2655-84}, number = {11}, month = {Nov}, abstract = {We derive a mean-field algorithm for binary classification with gaussian processes that is based on the {TAP} approach originally proposed in statistical physics of disordered systems. {T}he theory also yields an approximate leave-one-out estimator for the generalization error, which is computed with no extra computational cost. {W}e show that from the {TAP} approach, it is possible to derive both a simpler "naive" mean-field theory and support vector machines ({SVM}s) as limiting cases. {F}or both mean-field algorithms and support vector machines, simulation results for three small benchmark data sets are presented. {T}hey show that one may get state-of-the-art performance by using the leave-one-out estimator for model selection and the built-in leave-one-out estimators are extremely precise when compared to the exact leave-one-out estimate. {T}he second result is taken as strong support for the internal consistency of the mean-field approach.}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Classification, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination Learning, Electric Conductivity, Electrophysiology, Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease, Hemolysins, Humans, Indians, Ion Channels, Kinetics, Leukemia, Likelihood Functions, Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, 11110131} }
@article{Pang2005Face, author = {Shaoning Pang and Daijin Kim and Sung Yang Bang}, title = {Face membership authentication using {SVM} classification tree generated by membership-based {LLE} data partition.}, journal = {I{EEE} {T}rans {N}eural {N}etw}, year = {2005}, volume = {16}, pages = {436-46}, number = {2}, month = {Mar}, abstract = {This paper presents a new membership authentication method by face classification using a support vector machine ({SVM}) classification tree, in which the size of membership group and the members in the membership group can be changed dynamically. {U}nlike our previous {SVM} ensemble-based method, which performed only one face classification in the whole feature space, the proposed method employed a divide and conquer strategy that first performs a recursive data partition by membership-based locally linear embedding ({LLE}) data clustering, then does the {SVM} classification in each partitioned feature subset. {O}ur experimental results show that the proposed {SVM} tree not only keeps the good properties that the {SVM} ensemble method has, such as a good authentication accuracy and the robustness to the change of members, but also has a considerable improvement on the stability under the change of membership group size.}, keywords = {80 and over, Aged, Algorithms, Area Under Curve, Cross-Sectional Studies, Decision Trees, Diagnostic Imaging, Diagnostic Techniques, Face, Glaucoma, Humans, Lasers, Least-Squares Analysis, Middle Aged, Nerve Fibers, Non-U.S. Gov't, Ophthalmological, Optic Nerve Diseases, P.H.S., Photic Stimulation, ROC Curve, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Statistics, U.S. Gov't, 15787150} }
@article{Parkhomenko2009Sparse, author = {Parkhomenko, E. and Tritchler, D. and Beyene, J.}, title = {Sparse canonical correlation analysis with application to genomic data integration.}, journal = {Stat Appl Genet Mol Biol}, year = {2009}, volume = {8}, pages = {Article 1}, number = {1}, month = {Jan}, abstract = {Large scale genomic studies with multiple phenotypic or genotypic measures may require the identification of complex multivariate relationships. In multivariate analysis a common way to inspect the relationship between two sets of variables based on their correlation is canonical correlation analysis, which determines linear combinations of all variables of each type with maximal correlation between the two linear combinations. However, in high dimensional data analysis, when the number of variables under consideration exceeds tens of thousands, linear combinations of the entire sets of features may lack biological plausibility and interpretability. In addition, insufficient sample size may lead to computational problems, inaccurate estimates of parameters and non-generalizable results. These problems may be solved by selecting sparse subsets of variables, i.e. obtaining sparse loadings in the linear combinations of variables of each type. In this paper we present Sparse Canonical Correlation Analysis (SCCA) which examines the relationships between two types of variables and provides sparse solutions that include only small subsets of variables of each type by maximizing the correlation between the subsets of variables of different types while performing variable selection. We also present an extension of SCCA--adaptive SCCA. We evaluate their properties using simulated data and illustrate practical use by applying both methods to the study of natural variation in human gene expression.}, doi = {10.2202/1544-6115.1406}, institution = {Hospital for Sick Children Research Institute. elena@utstat.toronto.edu}, keywords = {Algorithms; Genomics, statistics /&/ numerical data; Humans; Models, Statistical; Sample Size}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pmid = {19222376}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.2202/1544-6115.1406} }
@article{Pavlidis2004Support, author = {Paul Pavlidis and Ilan Wapinski and William Stafford Noble}, title = {Support vector machine classification on the web.}, journal = {Bioinformatics}, year = {2004}, volume = {20}, pages = {586-7}, number = {4}, month = {Mar}, abstract = {The support vector machine ({SVM}) learning algorithm has been widely applied in bioinformatics. {W}e have developed a simple web interface to our implementation of the {SVM} algorithm, called {G}ist. {T}his interface allows novice or occasional users to apply a sophisticated machine learning algorithm easily to their data. {M}ore advanced users can download the software and source code for local installation. {T}he availability of these tools will permit more widespread application of this powerful learning algorithm in bioinformatics.}, doi = {10.1093/bioinformatics/btg461}, pdf = {../local/Pavlidis2004Support.pdf}, file = {Pavlidis2004Support.pdf:local/Pavlidis2004Support.pdf:PDF}, keywords = {Adaptation, Algorithms, Ambergris, Amino Acid Sequence, Animals, Artifacts, Artificial Intelligence, Automated, Cadmium, Candida, Candida albicans, Capillary, Clinical, Cluster Analysis, Combinatorial Chemistry Techniques, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, Computing Methodologies, Databases, Decision Support Systems, Electrophoresis, Enzymes, Europe, Eye Enucleation, Humans, Image Interpretation, Image Processing, Information Storage and Retrieval, Internet, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Markov Chains, Melanoma, Models, Molecular, Molecular Conformation, Molecular Sequence Data, Molecular Structure, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Odors, P.H.S., Pattern Recognition, Perfume, Physiological, Predictive Value of Tests, Prognosis, Prospective Studies, Protein, Protein Structure, Proteins, Proteomics, Quantitative Structure-Activity Relationship, Rats, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Saccharomyces cerevisiae Proteins, Secondary, Sensitivity and Specificity, Signal Processing, Single-Blind Method, Soft Tissue Neoplasms, Software, Statistical, U.S. Gov't, Uveal Neoplasms, Visual, 14990457}, pii = {btg461}, url = {http://dx.doi.org/10.1093/bioinformatics/btg461} }
@article{Peters2005Generating, author = {Bjoern Peters and Alessandro Sette}, title = {Generating quantitative models describing the sequence specificity of biological processes with the stabilized matrix method.}, journal = {BMC Bioinformatics}, year = {2005}, volume = {6}, pages = {132}, abstract = {BACKGROUND: Many processes in molecular biology involve the recognition of short sequences of nucleic-or amino acids, such as the binding of immunogenic peptides to major histocompatibility complex (MHC) molecules. From experimental data, a model of the sequence specificity of these processes can be constructed, such as a sequence motif, a scoring matrix or an artificial neural network. The purpose of these models is two-fold. First, they can provide a summary of experimental results, allowing for a deeper understanding of the mechanisms involved in sequence recognition. Second, such models can be used to predict the experimental outcome for yet untested sequences. In the past we reported the development of a method to generate such models called the Stabilized Matrix Method (SMM). This method has been successfully applied to predicting peptide binding to MHC molecules, peptide transport by the transporter associated with antigen presentation (TAP) and proteasomal cleavage of protein sequences. RESULTS: Herein we report the implementation of the SMM algorithm as a publicly available software package. Specific features determining the type of problems the method is most appropriate for are discussed. Advantageous features of the package are: (1) the output generated is easy to interpret, (2) input and output are both quantitative, (3) specific computational strategies to handle experimental noise are built in, (4) the algorithm is designed to effectively handle bounded experimental data, (5) experimental data from randomized peptide libraries and conventional peptides can easily be combined, and (6) it is possible to incorporate pair interactions between positions of a sequence. CONCLUSION: Making the SMM method publicly available enables bioinformaticians and experimental biologists to easily access it, to compare its performance to other prediction methods, and to extend it to other applications.}, doi = {10.1186/1471-2105-6-132}, keywords = {Algorithms; Amino Acid Sequence; Biology; Computational Biology; Computer Simulation; Data Interpretation, Statistical; Databases, Protein; Models, Biological; Models, Statistical; Neural Networks (Computer); Peptide Library; Peptides; Programming Languages; Prote; Sensitivity and Specificity; Software; in Binding}, owner = {laurent}, pii = {1471-2105-6-132}, pmid = {15927070}, timestamp = {2007.07.12}, url = {http://dx.doi.org/10.1186/1471-2105-6-132} }
@article{Poggio1998Sparse, author = {Poggio and Girosi}, title = {A {S}parse {R}epresentation for {F}unction {A}pproximation.}, journal = {Neural {C}omput}, year = {1998}, volume = {10}, pages = {1445-54}, number = {6}, month = {Jul}, abstract = {We derive a new general representation for a function as a linear combination of local correlation kernels at optimal sparse locations (and scales) and characterize its relation to principal component analysis, regularization, sparsity principles, and support vector machines.}, keywords = {Algorithms, Automated, Biometry, Computers, DNA, Databases, Factual, Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes, Learning, Markov Chains, Models, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Hybridization, Open Reading Frames, P.H.S., Pattern Recognition, Protein, Protein Structure, Proteins, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical, Tertiary, U.S. Gov't, 9698352} }
@article{Pontil1998Properties, author = {M. Pontil and A. Verri}, title = {Properties of support vector machines.}, journal = {Neural {C}omput}, year = {1998}, volume = {10}, pages = {955-74}, number = {4}, month = {May}, abstract = {Support vector machines ({SVM}s) perform pattern recognition between two point classes by finding a decision surface determined by certain points of the training set, termed support vectors ({SV}). {T}his surface, which in some feature space of possibly infinite dimension can be regarded as a hyperplane, is obtained from the solution of a problem of quadratic programming that depends on a regularization parameter. {I}n this article, we study some mathematical properties of support vectors and show that the decision surface can be written as the sum of two orthogonal terms, the first depending on only the margin vectors (which are {SV}s lying on the margin), the second proportional to the regularization parameter. {F}or almost all values of the parameter, this enables us to predict how the decision surface varies for small parameter changes. {I}n the special but important case of feature space of finite dimension m, we also show that m + 1 {SV}s are usually sufficient to determine the decision surface fully. {F}or relatively small m, this latter result leads to a consistent reduction of the {SV} number.}, keywords = {Algorithms, Artificial Intelligence, Automated, Biometry, Computers, DNA, Databases, Factual, Fungal, Fungal Proteins, GTP-Binding Proteins, Gene Expression, Genes, Learning, Linear Models, Markov Chains, Mathematics, Models, Neural Networks (Computer), Neurological, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Nucleic Acid Hybridization, Open Reading Frames, P.H.S., Pattern Recognition, Protein, Protein Structure, Proteins, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Sequence Alignment, Sequence Analysis, Software, Statistical, Tertiary, U.S. Gov't, 9573414} }
@article{Prill2005PlosBiol, author = {Robert J Prill and Pablo A Iglesias and Andre Levchenko}, title = {Dynamic properties of network motifs contribute to biological network organization.}, journal = {PLoS Biol}, year = {2005}, volume = {3}, pages = {e343}, number = {11}, month = {Nov}, abstract = {Biological networks, such as those describing gene regulation, signal transduction, and neural synapses, are representations of large-scale dynamic systems. Discovery of organizing principles of biological networks can be enhanced by embracing the notion that there is a deep interplay between network structure and system dynamics. Recently, many structural characteristics of these non-random networks have been identified, but dynamical implications of the features have not been explored comprehensively. We demonstrate by exhaustive computational analysis that a dynamical property--stability or robustness to small perturbations--is highly correlated with the relative abundance of small subnetworks (network motifs) in several previously determined biological networks. We propose that robust dynamical stability is an influential property that can determine the non-random structure of biological networks.}, doi = {10.1371/journal.pbio.0030343}, institution = {Department of Biomedical Engineering, Johns Hopkins University, Baltimore, Maryland, USA.}, keywords = {Animals; Caenorhabditis elegans, physiology; Computational Biology, methods; Computer Simulation; Drosophila melanogaster, physiology; Escherichia coli, physiology; Models, Biological; Nerve Net; Saccharomyces cerevisiae, physiology; Signal Transduction; Statistics as Topic; Systems Theory; Transcription, Genetic}, language = {eng}, medline-pst = {ppublish}, owner = {Andrei Zinovyev}, pii = {05-PLBI-RA-0233R2}, pmid = {16187794}, timestamp = {2011.04.08}, url = {http://dx.doi.org/10.1371/journal.pbio.0030343} }
@article{Perez-Cruz2005Convergence, author = {Fernando Pérez-Cruz and Carlos Bousoño-Calzón and Antonio Artés-RodrÃguez}, title = {Convergence of the {IRWLS} {P}rocedure to the {S}upport {V}ector {M}achine {S}olution.}, journal = {Neural {C}omput}, year = {2005}, volume = {17}, pages = {7-18}, number = {1}, month = {Jan}, abstract = {An iterative reweighted least squares ({IRWLS}) procedure recently proposed is shown to converge to the support vector machine solution. {T}he convergence to a stationary point is ensured by modifying the original {IRWLS} procedure.}, keywords = {80 and over, Aged, Algorithms, Amino Acids, Animals, Area Under Curve, Automated, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted, Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Genetic, Glaucoma, Humans, Lasers, Least-Squares Analysis, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological, Optic Nerve Diseases, P.H.S., Pattern Recognition, Photic Stimulation, Protein, ROC Curve, Regression Analysis, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics, U.S. Gov't, beta-Lactamases, 15779160} }
@article{Qin2004[Automated, author = {Dong-mei Qin and Zhan-yi Hu and Yong-heng Zhao}, title = {Automated classification of celestial spectra based on support vector machines}, journal = {Guang {P}u {X}ue {Y}u {G}uang {P}u {F}en {X}i}, year = {2004}, volume = {24}, pages = {507-11}, number = {4}, month = {Apr}, abstract = {The main objective of an automatic recognition system of celestial objects via their spectra is to classify celestial spectra and estimate physical parameters automatically. {T}his paper proposes a new automatic classification method based on support vector machines to separate non-active objects from active objects via their spectra. {W}ith low {SNR} and unknown red-shift value, it is difficult to extract true spectral lines, and as a result, active objects can not be determined by finding strong spectral lines and the spectral classification between non-active and active objects becomes difficult. {T}he proposed method in this paper combines the principal component analysis with support vector machines, and can automatically recognize the spectra of active objects with unknown red-shift values from non-active objects. {I}t finds its applicability in the automatic processing of voluminous observed data from large sky surveys in astronomy.}, keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic, Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male, Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies, Protein, ROC Curve, Regression Analysis, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics, Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15766170} }
@article{Quackenbush2002Microarray, author = {John Quackenbush}, title = {Microarray data normalization and transformation.}, journal = {Nat Genet}, year = {2002}, volume = {32 Suppl}, pages = {496--501}, month = {Dec}, doi = {10.1038/ng1032}, keywords = {Animals; Data Interpretation, Statistical; Forecasting; Gene Expression Profiling, methods; Humans; Oligonucleotide Array Sequence Analysis, methods; Research Design}, language = {eng}, medline-pst = {ppublish}, owner = {philippe}, pii = {ng1032}, pmid = {12454644}, timestamp = {2010.08.04}, url = {http://dx.doi.org/10.1038/ng1032} }
@article{Rhodes2007Oncomine, author = {Rhodes, Daniel R. and Kalyana-Sundaram, Shanker and Mahavisno, Vasudeva and Varambally, Radhika and Yu, Jianjun and Briggs, Benjamin B. and Barrette, Terrence R. and Anstet, Matthew J. and Kincead-Beal, Colleen and Kulkarni, Prakash and Varambally, Sooryanaryana and Ghosh, Debashis and Chinnaiyan, Arul M.}, title = {Oncomine 3.0: genes, pathways, and networks in a collection of 18,000 cancer gene expression profiles.}, journal = {Neoplasia}, year = {2007}, volume = {9}, pages = {166--180}, number = {2}, month = {Feb}, abstract = {DNA microarrays have been widely applied to cancer transcriptome analysis; however, the majority of such data are not easily accessible or comparable. Furthermore, several important analytic approaches have been applied to microarray analysis; however, their application is often limited. To overcome these limitations, we have developed Oncomine, a bioinformatics initiative aimed at collecting, standardizing, analyzing, and delivering cancer transcriptome data to the biomedical research community. Our analysis has identified the genes, pathways, and networks deregulated across 18,000 cancer gene expression microarrays, spanning the majority of cancer types and subtypes. Here, we provide an update on the initiative, describe the database and analysis modules, and highlight several notable observations. Results from this comprehensive analysis are available at http://www.oncomine.org.}, institution = {Department of Pathology, University of Michigan Medical School, Ann Arbor, MI 48109-0940, USA.}, keywords = {Antineoplastic Agents, pharmacology; Automatic Data Processing; Chromosome Mapping; Chromosomes, Human, genetics; Computational Biology, organization /&/ administration; Data Collection; Data Display; Data Interpretation, Statistical; Databases, Genetic; Drug Design; Gene Expression Profiling, statistics /&/ numerical data; Gene Expression Regulation, Neoplastic; Genes, Neoplasm; Humans; Internet; Models, Biological; Neoplasm Proteins, biosynthesis/chemistry/genetics; Neoplasms, classification/genetics/metabolism; Oligonucleotide Array Sequence Analysis; Subtraction Technique; Transcription, Genetic}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pmid = {17356713}, timestamp = {2012.03.10} }
@article{Rice2005Reconstructing, author = {Rice, J.J. and Tu, Y. and Stolovitzky, G.}, title = {Reconstructing biological networks using conditional correlation analysis.}, journal = {Bioinformatics}, year = {2005}, volume = {21}, pages = {765--773}, number = {6}, month = {Mar}, abstract = {MOTIVATION: One of the present challenges in biological research is the organization of the data originating from high-throughput technologies. One way in which this information can be organized is in the form of networks of influences, physical or statistical, between cellular components. We propose an experimental method for probing biological networks, analyzing the resulting data and reconstructing the network architecture. METHODS: We use networks of known topology consisting of nodes (genes), directed edges (gene-gene interactions) and a dynamics for the genes' mRNA concentrations in terms of the gene-gene interactions. We proposed a network reconstruction algorithm based on the conditional correlation of the mRNA equilibrium concentration between two genes given that one of them was knocked down. Using simulated gene expression data on networks of known connectivity, we investigated how the reconstruction error is affected by noise, network topology, size, sparseness and dynamic parameters. RESULTS: Errors arise from correlation between nodes connected through intermediate nodes (false positives) and when the correlation between two directly connected nodes is obscured by noise, non-linearity or multiple inputs to the target node (false negatives). Two critical components of the method are as follows: (1) the choice of an optimal correlation threshold for predicting connections and (2) the reduction of errors arising from indirect connections (for which a novel algorithm is proposed). With these improvements, we can reconstruct networks with the topology of the transcriptional regulatory network in Escherichia coli with a reasonably low error rate.}, doi = {10.1093/bioinformatics/bti064}, institution = {Computational Biology Center, IBM T.J. Watson Research Center, PO Box 218, Yorktown Heights, NY 10598, USA.}, keywords = {Algorithms; Computer Simulation; Gene Expression Profiling; Gene Expression Regulation; Models, Biological; Models, Statistical; Oligonucleotide Array Sequence Analysis; Protein Interaction Mapping; Signal Transduction; Statistics as Topic; Transcription Factors}, owner = {fantine}, pii = {bti064}, pmid = {15486043}, timestamp = {2010.10.21}, url = {http://dx.doi.org/10.1093/bioinformatics/bti064} }
@article{Risau-Gusman2000Generalization, author = {Risau-Gusman and Gordon}, title = {Generalization properties of finite-size polynomial support vector machines}, journal = {Phys {R}ev {E} {S}tat {P}hys {P}lasmas {F}luids {R}elat {I}nterdiscip {T}opics}, year = {2000}, volume = {62}, pages = {7092-9}, number = {5 Pt B}, month = {Nov}, abstract = {The learning properties of finite-size polynomial support vector machines are analyzed in the case of realizable classification tasks. {T}he normalization of the high-order features acts as a squeezing factor, introducing a strong anisotropy in the patterns distribution in feature space. {A}s a function of the training set size, the corresponding generalization error presents a crossover, more or less abrupt depending on the distribution's anisotropy and on the task to be learned, between a fast-decreasing and a slowly decreasing regime. {T}his behavior corresponds to the stepwise decrease found by {D}ietrich et al. [{P}hys. {R}ev. {L}ett. 82, 2975 (1999)] in the thermodynamic limit. {T}he theoretical results are in excellent agreement with the numerical simulations.}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Classification, Codon, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination Learning, Electric Conductivity, Electrophysiology, Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease, Genomics, Hemolysins, Humans, Indians, Initiator, Ion Channels, Kinetics, Leukemia, Likelihood Functions, Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Biosynthesis, Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, Vertebrates, 0011102066} }
@article{Roth2004Bayesian, author = {Volker Roth and Tilman Lange}, title = {{B}ayesian class discovery in microarray datasets.}, journal = {IEEE Trans Biomed Eng}, year = {2004}, volume = {51}, pages = {707--718}, number = {5}, month = {May}, abstract = {A novel approach to class discovery in gene expression datasets is presented. In the context of clinical diagnosis, the central goal of class discovery algorithms is to simultaneously find putative (sub-)types of diseases and to identify informative subsets of genes with disease-type specific expression profile. Contrary to many other approaches in the literature, the method presented implements a wrapper strategy for feature selection, in the sense that the features are directly selected by optimizing the discriminative power of the used partitioning algorithm. The usual combinatorial problems associated with wrapper approaches are overcome by a Bayesian inference mechanism. On the technical side, we present an efficient optimization algorithm with guaranteed local convergence property. The only free parameter of the optimization method is selected by a resampling-based stability analysis. Experiments with Leukemia and Lymphoma datasets demonstrate that our method is able to correctly infer partitions and corresponding subsets of genes which both are relevant in a biological sense. Moreover, the frequently observed problem of ambiguities caused by different but equally high-scoring partitions is successfully overcome by the model selection method proposed.}, keywords = {Algorithms, Automated, Bayes Theorem, Cluster Analysis, Comparative Study, DNA, Databases, Gene Expression Profiling, Genetic, Genetic Screening, Humans, Leukemia, Models, Non-U.S. Gov't, Nucleic Acid, Oligonucleotide Array Sequence Analysis, Pattern Recognition, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Statistical, 15132496}, pmid = {15132496}, timestamp = {2006.07.27} }
@article{Sassi2005automated, author = {Alexander P Sassi and Frank Andel and Hans-Marcus L Bitter and Michael P S Brown and Robert G Chapman and Jeraldine Espiritu and Alfred C Greenquist and Isabelle Guyon and Mariana Horchi-Alegre and Kathy L Stults and Ann Wainright and Jonathan C Heller and John T Stults}, title = {An automated, sheathless capillary electrophoresis-mass spectrometry platform for discovery of biomarkers in human serum.}, journal = {Electrophoresis}, year = {2005}, volume = {26}, pages = {1500-12}, number = {7-8}, month = {Apr}, abstract = {A capillary electrophoresis-mass spectrometry ({CE}-{MS}) method has been developed to perform routine, automated analysis of low-molecular-weight peptides in human serum. {T}he method incorporates transient isotachophoresis for in-line preconcentration and a sheathless electrospray interface. {T}o evaluate the performance of the method and demonstrate the utility of the approach, an experiment was designed in which peptides were added to sera from individuals at each of two different concentrations, artificially creating two groups of samples. {T}he {CE}-{MS} data from the serum samples were divided into separate training and test sets. {A} pattern-recognition/feature-selection algorithm based on support vector machines was used to select the mass-to-charge (m/z) values from the training set data that distinguished the two groups of samples from each other. {T}he added peptides were identified correctly as the distinguishing features, and pattern recognition based on these peptides was used to assign each sample in the independent test set to its respective group. {A} twofold difference in peptide concentration could be detected with statistical significance (p-value < 0.0001). {T}he accuracy of the assignment was 95\%, demonstrating the utility of this technique for the discovery of patterns of biomarkers in serum.}, doi = {10.1002/elps.200410127}, pdf = {../local/Sassi2005automated.pdf}, file = {Sassi2005automated.pdf:local/Sassi2005automated.pdf:PDF}, keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic, Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male, Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies, Protein, ROC Curve, Regression Analysis, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics, Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15765480}, url = {http://dx.doi.org/10.1002/elps.200410127} }
@article{Schneider1998Artificial, author = {G. Schneider and P. Wrede}, title = {{A}rtificial neural networks for computer-based molecular design.}, journal = {Prog Biophys Mol Biol}, year = {1998}, volume = {70}, pages = {175--222}, number = {3}, abstract = {The theory of artificial neural networks is briefly reviewed focusing on supervised and unsupervised techniques which have great impact on current chemical applications. An introduction to molecular descriptors and representation schemes is given. In addition, worked examples of recent advances in this field are highlighted and pioneering publications are discussed. Applications of several types of artificial neural networks to compound classification, modelling of structure-activity relationships, biological target identification, and feature extraction from biopolymers are presented and compared to other techniques. Advantages and limitations of neural networks for computer-aided molecular design and sequence analysis are discussed.}, keywords = {Algorithms, Amino Acid Sequence, Amino Acids, Animals, Artificial Intelligence, Automated, Bacterial, Bacterial Proteins, Bicuculline, Binding Sites, Biological, Biological Availability, Blood Proteins, Blood-Brain Barrier, Cation Transport Proteins, Cats, Cell Membrane Permeability, Chemical, Chemistry, Cluster Analysis, Combinatorial Chemistry Techniques, Comparative Study, Computational Biology, Computer Simulation, Computer Systems, Computer-Aided Design, Computer-Assisted, Computing Methodologies, DNA-Binding Proteins, Databases, Dogs, Drug Design, Electric Stimulation, Electromyography, Enzyme Inhibitors, Ether-A-Go-Go Potassium Channels, Excitatory Amino Acid Antagonists, Factual, False Positive Reactions, Forecasting, Forelimb, GABA Antagonists, Gene Expression Profiling, Genome, Glutamic Acid, Humans, Hydrogen Bonding, Image Enhancement, Image Interpretation, Image Processing, Information Storage and Retrieval, Iontophoresis, Kynurenic Acid, Least-Squares Analysis, Linear Models, Liver, Markov Chains, Metabolic Clearance Rate, Metalloendopeptidases, Microelectrodes, Models, Molecular, Molecular Conformation, Molecular Sequence Data, Molecular Structure, Motor Cortex, Movement, Multivariate Analysis, Nerve Net, Neural Networks (Computer), Neuropeptides, Non-U.S. Gov't, Nonlinear Dynamics, Pattern Recognition, Pharmaceutical, Pharmaceutical Preparations, Pharmacokinetics, Phylogeny, Potassium Channels, Predictive Value of Tests, Protein Interaction Mapping, Protein Sorting Signals, Protein Structure, Proteins, Rats, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Shoulder, Signal Processing, Software, Statistical, Stereotaxic Techniques, Structure-Activity Relationship, Terminology, Tertiary, Trans-Activators, Voltage-Gated, Zinc, 9830312}, owner = {mahe}, pii = {S0079610798000261}, pmid = {9830312}, timestamp = {2006.09.06} }
@article{Seeger2004Gaussian, author = {Matthias Seeger}, title = {Gaussian processes for machine learning.}, journal = {Int {J} {N}eural {S}yst}, year = {2004}, volume = {14}, pages = {69-106}, number = {2}, month = {Apr}, abstract = {Gaussian processes ({GP}s) are natural generalisations of multivariate {G}aussian random variables to infinite (countably or continuous) index sets. {GP}s have been applied in a large number of fields to a diverse range of ends, and very many deep theoretical analyses of various properties are available. {T}his paper gives an introduction to {G}aussian processes on a fairly elementary level with special emphasis on characteristics relevant in machine learning. {I}t draws explicit connections to branches such as spline smoothing models and support vector machines in which similar ideas have been investigated. {G}aussian process models are routinely used to solve hard machine learning problems. {T}hey are attractive because of their flexible non-parametric nature and computational simplicity. {T}reated within a {B}ayesian framework, very powerful statistical methods can be implemented which offer valid estimates of uncertainties in our predictions and generic model selection procedures cast as nonlinear optimization problems. {T}heir main drawback of heavy computational scaling has recently been alleviated by the introduction of generic sparse approximations.13,78,31 {T}he mathematical literature on {GP}s is large and often uses deep concepts which are not required to fully understand most machine learning applications. {I}n this tutorial paper, we aim to present characteristics of {GP}s relevant to machine learning and to show up precise connections to other "kernel machines" popular in the community. {O}ur focus is on a simple presentation, but references to more detailed sources are provided.}, keywords = {Algorithms, Amino Acids, Antibodies, Artificial Intelligence, Astrocytoma, Automated, Bayes Theorem, Biological, Biopsy, Brain, Brain Mapping, Brain Neoplasms, Calibration, Comparative Study, Computational Biology, Computer-Assisted, Computing Methodologies, Cysteine, Cystine, Dysplastic Nevus Syndrome, Electrodes, Electroencephalography, Entropy, Eosine Yellowish-(YS), Evoked Potentials, Female, Gene Expression Profiling, Hematoxylin, Horseradish Peroxidase, Humans, Image Interpretation, Image Processing, Imagery (Psychotherapy), Imagination, Laterality, Linear Models, Male, Melanoma, Models, Monoclonal, Movement, Neoplasms, Neural Networks (Computer), Neuropeptides, Non-P.H.S., Non-U.S. Gov't, Nonparametric, Normal Distribution, P.H.S., Pattern Recognition, Perception, Principal Component Analysis, Protein, Protein Array Analysis, Protein Interaction Mapping, Proteins, Regression Analysis, Research Support, Sensitivity and Specificity, Sequence Alignment, Sequence Ana, Sequence Analysis, Skin Neoplasms, Software, Statistical, Statistics, Tumor Markers, U.S. Gov't, User-Computer Interface, World Health Organization, lysis, 15112367}, pii = {S0129065704001899} }
@article{Sheinerman2005High, author = {Felix B Sheinerman and Elie Giraud and Abdelazize Laoui}, title = {High affinity targets of protein kinase inhibitors have similar residues at the positions energetically important for binding.}, journal = {J. Mol. Biol.}, year = {2005}, volume = {352}, pages = {1134--1156}, number = {5}, month = {Oct}, abstract = {Inhibition of protein kinase activity is a focus of intense drug discovery efforts in several therapeutic areas. Major challenges facing the field include understanding of the factors determining the selectivity of kinase inhibitors and the development of compounds with the desired selectivity profile. Here, we report the analysis of sequence variability among high and low affinity targets of eight different small molecule kinase inhibitors (BIRB796, Tarceva, NU6102, Gleevec, SB203580, balanol, H89, PP1). It is observed that all high affinity targets of each inhibitor are found among a relatively small number of kinases, which have similar residues at the specific positions important for binding. The findings are highly statistically significant, and allow one to exclude the majority of kinases in a genome from a list of likely targets for an inhibitor. The findings have implications for the design of novel inhibitors with a desired selectivity profile (e.g. targeted at multiple kinases), the discovery of new targets for kinase inhibitor drugs, comparative analysis of different in vivo models, and the design of "a-la-carte" chemical libraries tailored for individual kinases.}, doi = {10.1016/j.jmb.2005.07.074}, keywords = {Amino Acid Sequence; Amino Acids; Binding Sites; Electrostatics; Humans; Ligands; Molecular Sequence Data; Piperazines; Protein Binding; Protein Kinase Inhibitors; Protein Kinases; Pyrazoles; Pyrimidines; Sequence Alignment; Thermodynamics}, owner = {laurent}, pii = {S0022-2836(05)00900-9}, pmid = {16139843}, timestamp = {2007.01.03}, url = {http://dx.doi.org/10.1016/j.jmb.2005.07.074} }
@article{Shen2005[Detection, author = {Li Shen and Jie Yang and Yue Zhou}, title = {Detection of {PVC}s with support vector machine}, journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi}, year = {2005}, volume = {22}, pages = {78-81}, number = {1}, month = {Feb}, abstract = {The classifiction of heart beats is the foundation for automated arrhythmia monitoring devices. {S}upport vector machnies ({SVM}s) have meant a great advance in solving classification or pattern recognition. {T}his study describes {SVM} for the identification of premature ventricular contractions ({PVC}s) in surface {ECG}s. {F}eatures for the classification task are extracted by analyzing the heart rate, morphology and wavelet energy of the heart beats from a single lead. {T}he performance of different {SVM}s is evaluated on the {MIT}-{BIH} arrhythmia database following the association for the advancement of medical instrumentation ({AAMI}) recommendations.}, keywords = {80 and over, Adult, Aged, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Automated, Birefringence, Brain Chemistry, Brain Neoplasms, Comparative Study, Computer-Assisted, Cornea, Cross-Sectional Studies, Decision Trees, Diagnosis, Diagnostic Imaging, Diagnostic Techniques, Discriminant Analysis, Evolution, Face, Female, Genetic, Glaucoma, Humans, Intraocular Pressure, Lasers, Least-Squares Analysis, Magnetic Resonance Imaging, Magnetic Resonance Spectroscopy, Male, Middle Aged, Models, Molecular, Nerve Fibers, Non-U.S. Gov't, Numerical Analysis, Ophthalmological, Optic Nerve Diseases, Optical Coherence, P.H.S., Pattern Recognition, Photic Stimulation, Prospective Studies, Protein, ROC Curve, Regression Analysis, Research Support, Retinal Ganglion Cells, Sensitivity and Specificity, Sequence Analysis, Statistics, Tomography, U.S. Gov't, Visual Fields, beta-Lactamases, 15762121} }
@article{Sherry2001dbSNP, author = {S. T. Sherry and M. H. Ward and M. Kholodov and J. Baker and L. Phan and E. M. Smigielski and K. Sirotkin}, title = {dbSNP: the NCBI database of genetic variation.}, journal = {Nucleic Acids Res}, year = {2001}, volume = {29}, pages = {308--311}, number = {1}, month = {Jan}, abstract = {In response to a need for a general catalog of genome variation to address the large-scale sampling designs required by association studies, gene mapping and evolutionary biology, the National Center for Biotechnology Information (NCBI) has established the dbSNP database [S.T.Sherry, M.Ward and K. Sirotkin (1999) Genome Res., 9, 677-679]. Submissions to dbSNP will be integrated with other sources of information at NCBI such as GenBank, PubMed, LocusLink and the Human Genome Project data. The complete contents of dbSNP are available to the public at website: http://www.ncbi.nlm.nih.gov/SNP. The complete contents of dbSNP can also be downloaded in multiple formats via anonymous FTP at ftp://ncbi.nlm.nih.gov/snp/.}, institution = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA. sherry@ncbi.nlm.nih.gov}, keywords = {Animals; Biotechnology; Databases, Factual; Genetic Variation; Humans; Information Services; Internet; National Institutes of Health (U.S.); National Library of Medicine (U.S.); Polymorphism, Single Nucleotide, genetics; United States}, language = {eng}, medline-pst = {ppublish}, owner = {philippe}, pmid = {11125122}, timestamp = {2010.08.01} }
@article{Song2002Prediction, author = {Minghu Song and Curt M Breneman and Jinbo Bi and N. Sukumar and Kristin P Bennett and Steven Cramer and Nihal Tugcu}, title = {Prediction of protein retention times in anion-exchange chromatography systems using support vector regression.}, journal = {J {C}hem {I}nf {C}omput {S}ci}, year = {2002}, volume = {42}, pages = {1347-57}, number = {6}, abstract = {Quantitative {S}tructure-{R}etention {R}elationship ({QSRR}) models are developed for the prediction of protein retention times in anion-exchange chromatography systems. {T}opological, subdivided surface area, and {TAE} ({T}ransferable {A}tom {E}quivalent) electron-density-based descriptors are computed directly for a set of proteins using molecular connectivity patterns and crystal structure geometries. {A} novel algorithm based on {S}upport {V}ector {M}achine ({SVM}) regression has been employed to obtain predictive {QSRR} models using a two-step computational strategy. {I}n the first step, a sparse linear {SVM} was utilized as a feature selection procedure to remove irrelevant or redundant information. {S}ubsequently, the selected features were used to produce an ensemble of nonlinear {SVM} regression models that were combined using bootstrap aggregation (bagging) techniques, where various combinations of training and validation data sets were selected from the pool of available data. {A} visualization scheme (star plots) was used to display the relative importance of each selected descriptor in the final set of "bagged" models. {O}nce these predictive models have been validated, they can be used as an automated prediction tool for virtual high-throughput screening ({VHTS}).}, keywords = {Acute, Algorithms, Animals, Anion Exchange Resins, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Carcinoma, Chemical, Chromatography, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Decision Making, Diagnosis, Differential, Drug, Drug Design, Electrostatics, Eukaryotic Cells, Feasibility Studies, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Internet, Ion Exchange, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lung Neoplasms, Lymphocytic, Lymphoma, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-Small-Cell Lung, Non-U.S. Gov't, Nucleic Acid Conformation, Nucleic Acid Hybridization, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding, Protein Conformation, Proteins, Quality Control, Quantum Theory, RNA, RNA Splicing, Receptors, Reference Values, Regression Analysis, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12444731}, pii = {ci025580t} }
@article{Song2004Comparison, author = {Xiaowei Song and Arnold Mitnitski and Jafna Cox and Kenneth Rockwood}, title = {Comparison of machine learning techniques with classical statistical models in predicting health outcomes.}, journal = {Medinfo}, year = {2004}, volume = {11}, pages = {736-40}, number = {Pt 1}, abstract = {Several machine learning techniques (multilayer and single layer perceptron, logistic regression, least square linear separation and support vector machines) are applied to calculate the risk of death from two biomedical data sets, one from patient care records, and another from a population survey. {E}ach dataset contained multiple sources of information: history of related symptoms and other illnesses, physical examination findings, laboratory tests, medications (patient records dataset), health attitudes, and disabilities in activities of daily living (survey dataset). {E}ach technique showed very good mortality prediction in the acute patients data sample ({AUC} up to 0.89) and fair prediction accuracy for six year mortality ({AUC} from 0.70 to 0.76) in individuals from epidemiological database surveys. {T}he results suggest that the nature of data is of primary importance rather than the learning technique. {H}owever, the consistently superior performance of the artificial neural network (multi-layer perceptron) indicates that nonlinear relationships (which cannot be discerned by linear separation techniques) can provide additional improvement in correctly predicting health outcomes.}, keywords = {Aged, Air, Algorithms, Amino Acids, Animals, Area Under Curve, Artifacts, Artificial Intelligence, Atrial, Automated, Canada, Carotid Stenosis, Cerebrovascular Accident, Cerebrovascular Circulation, Comparative Study, Computer-Assisted, Cysteine, Decision Trees, Dementia, Diagnosis, Disulfides, Doppler, Embolism, Expert Systems, Extramural, Factor Analysis, Female, Gene Expression, Gene Expression Profiling, Health Status, Heart Septal Defects, Humans, Intracranial Embolism, Male, Models, Molecular, Myocardial Infarction, N.I.H., Neoplasms, Neural Networks (Computer), Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Oxidation-Reduction, P.H.S., Pattern Recognition, Prognosis, Protein Binding, Protein Folding, Proteins, ROC Curve, Research Support, Sensitivity and Specificity, Software, Statistical, Transcranial, Treatment Outcome, U.S. Gov't, Ultrasonography, 15360910}, pii = {D040004933} }
@article{Sturn2002Genesis:, author = {Alexander Sturn and John Quackenbush and Zlatko Trajanoski}, title = {Genesis: cluster analysis of microarray data.}, journal = {Bioinformatics}, year = {2002}, volume = {18}, pages = {207-8}, number = {1}, month = {Jan}, abstract = {A versatile, platform independent and easy to use {J}ava suite for large-scale gene expression analysis was developed. {G}enesis integrates various tools for microarray data analysis such as filters, normalization and visualization tools, distance measures as well as common clustering algorithms including hierarchical clustering, self-organizing maps, k-means, principal component analysis, and support vector machines. {T}he results of the clustering are transparent across all implemented methods and enable the analysis of the outcome of different algorithms and parameters. {A}dditionally, mapping of gene expression data onto chromosomal sequences was implemented to enhance promoter analysis and investigation of transcriptional control mechanisms.}, keywords = {Algorithms, Artificial Intelligence, Cluster Analysis, Comparative Study, Computational Biology, Databases, Gene Expression Profiling, Genetic, Models, Molecular Structure, Neural Networks (Computer), Non-U.S. Gov't, Oligonucleotide Array Sequence Analysis, Principal Component Analysis, Programming Languages, Promoter Regions (Genetics), Protein, Proteins, Research Support, Software, Statistical, Transcription, 11836235} }
@article{Suykens2001Optimal, author = {J. A. Suykens and J. Vandewalle and B. De Moor}, title = {Optimal control by least squares support vector machines.}, journal = {Neural {N}etw}, year = {2001}, volume = {14}, pages = {23-35}, number = {1}, month = {Jan}, abstract = {Support vector machines have been very successful in pattern recognition and function estimation problems. {I}n this paper we introduce the use of least squares support vector machines ({LS}-{SVM}'s) for the optimal control of nonlinear systems. {L}inear and neural full static state feedback controllers are considered. {T}he problem is formulated in such a way that it incorporates the {N}-stage optimal control problem as well as a least squares support vector machine approach for mapping the state space into the action space. {T}he solution is characterized by a set of nonlinear equations. {A}n alternative formulation as a constrained nonlinear optimization problem in less unknowns is given, together with a method for imposing local stability in the {LS}-{SVM} control scheme. {T}he results are discussed for support vector machines with radial basis function kernel. {A}dvantages of {LS}-{SVM} control are that no number of hidden units has to be determined for the controller and that no centers have to be specified for the {G}aussian kernels when applying {M}ercer's condition. {T}he curse of dimensionality is avoided in comparison with defining a regular grid for the centers in classical radial basis function networks. {T}his is at the expense of taking the trajectory of state variables as additional unknowns in the optimization problem, while classical neural network approaches typically lead to parametric optimization problems. {I}n the {SVM} methodology the number of unknowns equals the number of training data, while in the primal space the number of unknowns can be infinite dimensional. {T}he method is illustrated both on stabilization and tracking problems including examples on swinging up an inverted pendulum with local stabilization at the endpoint and a tracking problem for a ball and beam system.}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric Conductivity, Electrophysiology, Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, Nucleic Acid Conformation, Organ Specificity, Organelles, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, U.S. Gov't, 11213211}, pii = {S0893608000000770} }
@article{Talagrand1996Majorizing, author = {Talagrand, M.}, title = {Majorizing measures: {T}he generic chaining}, journal = {Ann. {P}robab.}, year = {1996}, volume = {24}, pages = {1049--1103}, pdf = {../local/tala96b.pdf}, file = {tala96b.pdf:local/tala96b.pdf:PDF}, subject = {stat}, url = {http://www.math.ohio-state.edu/~talagran/preprints/majmeas.dvi} }
@article{Talagrand1996New, author = {Talagrand, M.}, title = {New concentration inequalities for product spaces}, journal = {Inventionnes {M}ath.}, year = {1996}, volume = {126}, pages = {505--563}, pdf = {../local/tala96.pdf}, file = {tala96.pdf:local/tala96.pdf:PDF}, subject = {stat}, url = {http://www.math.ohio-state.edu/~talagran/preprints/newcon.dvi} }
@article{Talagrand1996Newa, author = {Talagrand, M.}, title = {A {N}ew {L}ook at {I}ndependence}, journal = {Ann. {P}robab.}, year = {1996}, volume = {24}, pages = {1--34}, pdf = {../local/tala96c.pdf}, file = {tala96c.pdf:local/tala96c.pdf:PDF}, subject = {stat}, url = {http://www.math.ohio-state.edu/~talagran/preprints/newlook.dvi} }
@article{Talagrand1995Concentration, author = {Talagrand, M.}, title = {Concentration of measure and isoperimetric inequalities in product spaces}, journal = {Publ. {M}ath. {I}.{H}.{E}.{S}.}, year = {1995}, volume = {81}, pages = {73--203}, pdf = {../local/tala95.pdf}, file = {tala95.pdf:local/tala95.pdf:PDF}, subject = {stat}, url = {http://www.math.ohio-state.edu/~talagran/preprints/ihes.dvi} }
@article{Tzeng2004Predicting, author = {Huey-Ming Tzeng and Jer-Guang Hsieh and Yih-Lon Lin}, title = {Predicting nurses' intention to quit with a support vector machine: a new approach to set up an early warning mechanism in human resource management.}, journal = {Comput {I}nform {N}urs}, year = {2004}, volume = {22}, pages = {232-42}, number = {4}, abstract = {This project developed a {S}upport {V}ector {M}achine for predicting nurses' intention to quit, using working motivation, job satisfaction, and stress levels as predictors. {T}his study was conducted in three hospitals located in southern {T}aiwan. {T}he target population was all nurses (389 valid cases). {F}or cross-validation, we randomly split cases into four groups of approximately equal sizes, and performed four training runs. {A}fter the training, the average percentage of misclassification on the training data was 0.86, while that on the testing data was 10.8, resulting in predictions with 89.2\% accuracy. {T}his {S}upport {V}ector {M}achine can predict nurses' intention to quit, without asking these nurses whether they have an intention to quit.}, keywords = {Adolescent, Adult, Algorithms, Amino Acid Sequence, Amino Acids, Anatomic, Attitude of Health Personnel, Bacterial Proteins, Bias (Epidemiology), Brain, Brain Mapping, Burnout, Comparative Study, Computer Simulation, Computer-Assisted, Data Interpretation, Diffusion Magnetic Resonance Imaging, Facial Asymmetry, Facial Expression, Facial Paralysis, Female, Gene Expression Profiling, Gram-Negative Bacteria, Gram-Positive Bacteria, Hospital, Humans, Image Interpretation, Intention, Job Satisfaction, Logistic Models, Magnetoencephalography, Male, Middle Aged, Models, Motion, Neural Networks (Computer), Neural Pathways, Non-U.S. Gov't, Nonlinear Dynamics, Nursing Administration Research, Nursing Staff, Personnel Management, Personnel Turnover, Photography, Predictive Value of Tests, Professional, Protein, Proteins, Proteome, Psychological, Questionnaires, Regression Analysis, Reproducibility of Results, Research Support, Retina, Risk Factors, Sequence Alignment, Sequence Analysis, Severity of Illness Index, Software, Statistical, Subcellular Fractions, Taiwan, Theoretical, Workplace, 15494654}, pii = {00024665-200407000-00012} }
@article{Vanunu2010Associating, author = {Vanunu, O. and Magger, O. and Ruppin, E. and Shlomi, T. and Sharan, R.}, title = {Associating genes and protein complexes with disease via network propagation.}, journal = {PLoS Comput. Biol.}, year = {2010}, volume = {6}, pages = {e1000641}, number = {1}, month = {Jan}, abstract = {A fundamental challenge in human health is the identification of disease-causing genes. Recently, several studies have tackled this challenge via a network-based approach, motivated by the observation that genes causing the same or similar diseases tend to lie close to one another in a network of protein-protein or functional interactions. However, most of these approaches use only local network information in the inference process and are restricted to inferring single gene associations. Here, we provide a global, network-based method for prioritizing disease genes and inferring protein complex associations, which we call PRINCE. The method is based on formulating constraints on the prioritization function that relate to its smoothness over the network and usage of prior information. We exploit this function to predict not only genes but also protein complex associations with a disease of interest. We test our method on gene-disease association data, evaluating both the prioritization achieved and the protein complexes inferred. We show that our method outperforms extant approaches in both tasks. Using data on 1,369 diseases from the OMIM knowledgebase, our method is able (in a cross validation setting) to rank the true causal gene first for 34\% of the diseases, and infer 139 disease-related complexes that are highly coherent in terms of the function, expression and conservation of their member proteins. Importantly, we apply our method to study three multi-factorial diseases for which some causal genes have been found already: prostate cancer, alzheimer and type 2 diabetes mellitus. PRINCE's predictions for these diseases highly match the known literature, suggesting several novel causal genes and protein complexes for further investigation.}, doi = {10.1371/journal.pcbi.1000641}, institution = {School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel.}, keywords = {Algorithms; Alzheimer Disease; Databases, Genetic; Diabetes Mellitus; Disease; Genes; Humans; Male; Multiprotein Complexes; Prostatic Neoplasms; Protein Interaction Mapping; Proteins; Reproducibility of Results}, owner = {mordelet}, pmid = {20090828}, timestamp = {2010.09.27}, url = {http://dx.doi.org/10.1371/journal.pcbi.1000641} }
@article{Vercoutere2001Rapid, author = {W. Vercoutere and S. Winters-Hilt and H. Olsen and D. Deamer and D. Haussler and M. Akeson}, title = {Rapid discrimination among individual {DNA} hairpin molecules at single-nucleotide resolution using an ion channel.}, journal = {Nat {B}iotechnol}, year = {2001}, volume = {19}, pages = {248-52}, number = {3}, month = {Mar}, abstract = {R{NA} and {DNA} strands produce ionic current signatures when driven through an alpha-hemolysin channel by an applied voltage. {H}ere we combine this nanopore detector with a support vector machine ({SVM}) to analyze {DNA} hairpin molecules on the millisecond time scale. {M}easurable properties include duplex stem length, base pair mismatches, and loop length. {T}his nanopore instrument can discriminate between individual {DNA} hairpins that differ by one base pair or by one nucleotide.}, doi = {10.1038/85696}, pdf = {../local/Vercoutere2001Rapid.pdf}, file = {Vercoutere2001Rapid.pdf:local/Vercoutere2001Rapid.pdf:PDF}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diagnosis, Discriminant Analysis, Electric Conductivity, Electrophysiology, Escherichia coli Proteins, Factual, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Ion Channels, Kinetics, Leukemia, Lipid Bilayers, Logistic Models, Lymphocytic, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplastic, Neural Networks (Computer), Nevus, Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Organ Specificity, Organelles, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, U.S. Gov't, 11231558}, pii = {85696}, url = {http://dx.doi.org/10.1038/85696} }
@article{Wahba2002Soft, author = {Grace Wahba}, title = {Soft and hard classification by reproducing kernel {H}ilbert space methods.}, journal = {Proc {N}atl {A}cad {S}ci {U} {S} {A}}, year = {2002}, volume = {99}, pages = {16524-30}, number = {26}, month = {Dec}, abstract = {Reproducing kernel {H}ilbert space ({RKHS}) methods provide a unified context for solving a wide variety of statistical modelling and function estimation problems. {W}e consider two such problems: {W}e are given a training set [yi, ti, i = 1, em leader, n], where yi is the response for the ith subject, and ti is a vector of attributes for this subject. {T}he value of y(i) is a label that indicates which category it came from. {F}or the first problem, we wish to build a model from the training set that assigns to each t in an attribute domain of interest an estimate of the probability pj(t) that a (future) subject with attribute vector t is in category j. {T}he second problem is in some sense less ambitious; it is to build a model that assigns to each t a label, which classifies a future subject with that t into one of the categories or possibly "none of the above." {T}he approach to the first of these two problems discussed here is a special case of what is known as penalized likelihood estimation. {T}he approach to the second problem is known as the support vector machine. {W}e also note some alternate but closely related approaches to the second problem. {T}hese approaches are all obtained as solutions to optimization problems in {RKHS}. {M}any other problems, in particular the solution of ill-posed inverse problems, can be obtained as solutions to optimization problems in {RKHS} and are mentioned in passing. {W}e caution the reader that although a large literature exists in all of these topics, in this inaugural article we are selectively highlighting work of the author, former students, and other collaborators.}, doi = {10.1073/pnas.242574899}, pdf = {../local/Wahba2002Soft.pdf}, file = {Wahba2002Soft.pdf:local/Wahba2002Soft.pdf:PDF}, keywords = {Acute, Algorithms, Animals, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Drug, Drug Design, Eukaryotic Cells, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Leukemia, Ligands, Likelihood Functions, Lymphocytic, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding, Proteins, Quality Control, RNA, RNA Splicing, Receptors, Reference Values, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Statistical, Stomach Neoplasms, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12477931}, pii = {242574899}, url = {http://dx.doi.org/10.1073/pnas.242574899} }
@article{Weber2002Building, author = {Griffin Weber and Staal Vinterbo and Lucila Ohno-Machado}, title = {Building an asynchronous web-based tool for machine learning classification.}, journal = {Proc {AMIA} {S}ymp}, year = {2002}, pages = {869-73}, abstract = {Various unsupervised and supervised learning methods including support vector machines, classification trees, linear discriminant analysis and nearest neighbor classifiers have been used to classify high-throughput gene expression data. {S}impler and more widely accepted statistical tools have not yet been used for this purpose, hence proper comparisons between classification methods have not been conducted. {W}e developed free software that implements logistic regression with stepwise variable selection as a quick and simple method for initial exploration of important genetic markers in disease classification. {T}o implement the algorithm and allow our collaborators in remote locations to evaluate and compare its results against those of other methods, we developed a user-friendly asynchronous web-based application with a minimal amount of programming using free, downloadable software tools. {W}ith this program, we show that classification using logistic regression can perform as well as other more sophisticated algorithms, and it has the advantages of being easy to interpret and reproduce. {B}y making the tool freely and easily available, we hope to promote the comparison of classification methods. {I}n addition, we believe our web application can be used as a model for other bioinformatics laboratories that need to develop web-based analysis tools in a short amount of time and on a limited budget.}, keywords = {Acute, Algorithms, Animals, Artificial Intelligence, Automated, Base Pair Mismatch, Base Pairing, Base Sequence, Biological, Biosensing Techniques, Classification, Cluster Analysis, Comparative Study, Computational Biology, Computer-Assisted, Cystadenoma, DNA, Drug, Drug Design, Eukaryotic Cells, Female, Gene Expression, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Hemolysins, Humans, Internet, Leukemia, Ligands, Likelihood Functions, Logistic Models, Lymphocytic, Markov Chains, Mathematics, Messenger, Models, Molecular, Molecular Probe Techniques, Molecular Sequence Data, Nanotechnology, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, Observer Variation, Oligonucleotide Array Sequence Analysis, Ovarian Neoplasms, P.H.S., Pattern Recognition, Probability, Protein Binding, Proteins, Quality Control, RNA, RNA Splicing, Receptors, Reference Values, Reproducibility of Results, Research Support, Sensitivity and Specificity, Sequence Analysis, Signal Processing, Software, Statistical, Stomach Neoplasms, Thermodynamics, Transcription, Tumor Markers, U.S. Gov't, 12463949}, pii = {D020001919} }
@article{Wilbur2000Boosting, author = {W. J. Wilbur}, title = {Boosting naive {B}ayesian learning on a large subset of {MEDLINE}.}, journal = {Proc {AMIA} {S}ymp}, year = {2000}, pages = {918-22}, abstract = {We are concerned with the rating of new documents that appear in a large database ({MEDLINE}) and are candidates for inclusion in a small specialty database ({REBASE}). {T}he requirement is to rank the new documents as nearly in order of decreasing potential to be added to the smaller database as possible, so as to improve the coverage of the smaller database without increasing the effort of those who manage this specialty database. {T}o perform this ranking task we have considered several machine learning approaches based on the naï ve {B}ayesian algorithm. {W}e find that adaptive boosting outperforms naï ve {B}ayes, but that a new form of boosting which we term staged {B}ayesian retrieval outperforms adaptive boosting. {S}taged {B}ayesian retrieval involves two stages of {B}ayesian retrieval and we further find that if the second stage is replaced by a support vector machine we again obtain a significant improvement over the strictly {B}ayesian approach.}, keywords = {Acute, Acute Disease, Adenocarcinoma, Algorithms, Amino Acid Sequence, Animals, Artificial Intelligence, Automated, B-Lymphocytes, Bacterial Proteins, Base Pair Mismatch, Base Sequence, Bayes Theorem, Binding Sites, Biological, Bone Marrow Cells, Brachyura, Cell Compartmentation, Chemistry, Child, Chromosome Aberrations, Classification, Codon, Colonic Neoplasms, Comparative Study, Computational Biology, Computer Simulation, Computer-Assisted, DNA, Data Interpretation, Databases, Decision Trees, Diabetes Mellitus, Diagnosis, Discriminant Analysis, Discrimination Learning, Electric Conductivity, Electrophysiology, Escherichia coli Proteins, Factual, Feedback, Female, Fungal, Gastric Emptying, Gene Expression Profiling, Gene Expression Regulation, Genes, Genetic, Genetic Markers, Genetic Predisposition to Disease, Genomics, Hemolysins, Humans, Indians, Information Storage and Retrieval, Initiator, Ion Channels, Kinetics, Leukemia, Likelihood Functions, Lipid Bilayers, Logistic Models, Lymphocytic, MEDLINE, Male, Markov Chains, Melanoma, Models, Molecular, Myeloid, Neoplasm, Neoplasms, Neoplastic, Neural Networks (Computer), Neurological, Nevus, Non-P.H.S., Non-U.S. Gov't, Nonlinear Dynamics, Normal Distribution, North American, Nucleic Acid Conformation, Oligonucleotide Array Sequence Analysis, Organ Specificity, Organelles, Ovarian Neoplasms, Ovary, P.H.S., Pattern Recognition, Physical, Pigmented, Predictive Value of Tests, Promoter Regions (Genetics), Protein Biosynthesis, Protein Folding, Protein Structure, Proteins, Proteome, RNA, Reproducibility of Results, Research Support, Saccharomyces cerevisiae, Secondary, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, Sex Characteristics, Skin Diseases, Skin Neoplasms, Skin Pigmentation, Software, Sound Spectrography, Statistical, Stomach Diseases, T-Lymphocytes, Thermodynamics, Transcription, Transcription Factors, Tumor Markers, Type 2, U.S. Gov't, Vertebrates, 11080018}, pii = {D200250} }
@article{Xie2009Unified, author = {Lei Xie and Li Xie and Philip E Bourne}, title = {A unified statistical model to support local sequence order independent similarity searching for ligand-binding sites and its application to genome-based drug discovery.}, journal = {Bioinformatics}, year = {2009}, volume = {25}, pages = {i305--i312}, number = {12}, month = {Jun}, abstract = {Functional relationships between proteins that do not share global structure similarity can be established by detecting their ligand-binding-site similarity. For a large-scale comparison, it is critical to accurately and efficiently assess the statistical significance of this similarity. Here, we report an efficient statistical model that supports local sequence order independent ligand-binding-site similarity searching. Most existing statistical models only take into account the matching vertices between two sites that are defined by a fixed number of points. In reality, the boundary of the binding site is not known or is dependent on the bound ligand making these approaches limited. To address these shortcomings and to perform binding-site mapping on a genome-wide scale, we developed a sequence-order independent profile-profile alignment (SOIPPA) algorithm that is able to detect local similarity between unknown binding sites a priori. The SOIPPA scoring integrates geometric, evolutionary and physical information into a unified framework. However, this imposes a significant challenge in assessing the statistical significance of the similarity because the conventional probability model that is based on fixed-point matching cannot be applied. Here we find that scores for binding-site matching by SOIPPA follow an extreme value distribution (EVD). Benchmark studies show that the EVD model performs at least two-orders faster and is more accurate than the non-parametric statistical method in the previous SOIPPA version. Efficient statistical analysis makes it possible to apply SOIPPA to genome-based drug discovery. Consequently, we have applied the approach to the structural genome of Mycobacterium tuberculosis to construct a protein-ligand interaction network. The network reveals highly connected proteins, which represent suitable targets for promiscuous drugs.}, doi = {10.1093/bioinformatics/btp220}, institution = {San Diego Supercomputer Center, University of California, San Diego, La Jolla, CA 92093, USA. lxie@sdsc.edu}, keywords = {Binding Sites; Computational Biology, methods; Drug Discovery, methods; Genome; Ligands; Models, Statistical; Mycobacterium tuberculosis, genetics/metabolism; Proteins, chemistry}, language = {eng}, medline-pst = {ppublish}, owner = {bricehoffmann}, pii = {btp220}, pmid = {19478004}, timestamp = {2009.07.27}, url = {http://dx.doi.org/10.1093/bioinformatics/btp220} }
@article{Yan2007Determining, author = {Yan, Mingjin and Ye, Keying}, title = {Determining the number of clusters using the weighted gap statistic.}, journal = {Biometrics}, year = {2007}, volume = {63}, pages = {1031--1037}, number = {4}, month = {Dec}, abstract = {Estimating the number of clusters in a data set is a crucial step in cluster analysis. In this article, motivated by the gap method (Tibshirani, Walther, and Hastie, 2001, Journal of the Royal Statistical Society B63, 411-423), we propose the weighted gap and the difference of difference-weighted (DD-weighted) gap methods for estimating the number of clusters in data using the weighted within-clusters sum of errors: a measure of the within-clusters homogeneity. In addition, we propose a "multilayer" clustering approach, which is shown to be more accurate than the original gap method, particularly in detecting the nested cluster structure of the data. The methods are applicable when the input data contain continuous measurements and can be used with any clustering method. Simulation studies and real data are investigated and compared among these proposed methods as well as with the original gap method.}, doi = {10.1111/j.1541-0420.2007.00784.x}, institution = {Medtronic Sofamor Danek, 1800 Pyramid Place, Memphis, Tennessee 38132, USA. mingjin.yan@medtronic.com}, keywords = {Algorithms; Biometry, methods; Cluster Analysis; Computer Simulation; Data Interpretation, Statistical; Models, Biological; Models, Statistical; Pattern Recognition, Automated, methods}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {BIOM784}, pmid = {17425640}, timestamp = {2011.12.29}, url = {http://dx.doi.org/10.1111/j.1541-0420.2007.00784.x} }
@article{Yu2004Advances, author = {Yu, J. and Smith, V.A. and Wang, P.P. and Hartemink, A.J. and Jarvis, E.D.}, title = {Advances to Bayesian network inference for generating causal networks from observational biological data.}, journal = {Bioinformatics}, year = {2004}, volume = {20}, pages = {3594--3603}, number = {18}, month = {Dec}, abstract = {MOTIVATION: Network inference algorithms are powerful computational tools for identifying putative causal interactions among variables from observational data. Bayesian network inference algorithms hold particular promise in that they can capture linear, non-linear, combinatorial, stochastic and other types of relationships among variables across multiple levels of biological organization. However, challenges remain when applying these algorithms to limited quantities of experimental data collected from biological systems. Here, we use a simulation approach to make advances in our dynamic Bayesian network (DBN) inference algorithm, especially in the context of limited quantities of biological data. RESULTS: We test a range of scoring metrics and search heuristics to find an effective algorithm configuration for evaluating our methodological advances. We also identify sampling intervals and levels of data discretization that allow the best recovery of the simulated networks. We develop a novel influence score for DBNs that attempts to estimate both the sign (activation or repression) and relative magnitude of interactions among variables. When faced with limited quantities of observational data, combining our influence score with moderate data interpolation reduces a significant portion of false positive interactions in the recovered networks. Together, our advances allow DBN inference algorithms to be more effective in recovering biological networks from experimentally collected data. AVAILABILITY: Source code and simulated data are available upon request. SUPPLEMENTARY INFORMATION: http://www.jarvislab.net/Bioinformatics/BNAdvances/}, doi = {10.1093/bioinformatics/bth448}, institution = {>}, keywords = {Algorithms; Bayes Theorem; Computer Simulation; Gene Expression Profiling; Gene Expression Regulation; Models, Genetic; Models, Statistical; Oligonucleotide Array Sequence Analysis; Signal Transduction; Software}, owner = {fantine}, pii = {bth448}, pmid = {15284094}, timestamp = {2010.10.21}, url = {http://dx.doi.org/10.1093/bioinformatics/bth448} }
@article{Zhu2003Introduction, author = {Lingyun Zhu and Baoming Wu and Changxiu Cao}, title = {Introduction to medical data mining}, journal = {Sheng {W}u {Y}i {X}ue {G}ong {C}heng {X}ue {Z}a {Z}hi}, year = {2003}, volume = {20}, pages = {559-62}, number = {3}, month = {Sep}, abstract = {Modern medicine generates a great deal of information stored in the medical database. {E}xtracting useful knowledge and providing scientific decision-making for the diagnosis and treatment of disease from the database increasingly becomes necessary. {D}ata mining in medicine can deal with this problem. {I}t can also improve the management level of hospital information and promote the development of telemedicine and community medicine. {B}ecause the medical information is characteristic of redundancy, multi-attribution, incompletion and closely related with time, medical data mining differs from other one. {I}n this paper we have discussed the key techniques of medical data mining involving pretreatment of medical data, fusion of different pattern and resource, fast and robust mining algorithms and reliability of mining results. {T}he methods and applications of medical data mining based on computation intelligence such as artificial neural network, fuzzy system, evolutionary algorithms, rough set, and support vector machine have been introduced. {T}he features and problems in data mining are summarized in the last section.}, keywords = {Algorithms, Anion Exchange Resins, Automatic Data Processing, Chemical, Chromatography, Computational Biology, Computer-Assisted, Data Interpretation, Databases, Decision Making, Decision Trees, English Abstract, Factual, Fuzzy Logic, Humans, Indicators and Reagents, Information Storage and Retrieval, Ion Exchange, Models, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Nucleic Acid Conformation, P.H.S., Proteins, Quantitative Structure-Activity Relationship, RNA, ROC Curve, Research Support, Sequence Analysis, Statistical, Transfer, U.S. Gov't, 14565039} }
@article{Zhu1998FRAME:, author = {Zhu, S. C. and Wu, Y. and Mumford, D.}, title = {F{RAME}: {F}ilters, {R}andom field {A}nd {M}aximum {E}ntropy: --- {T}owards a {U}nified {T}heory for {T}exture {M}odeling }, journal = {Int'l {J}ournal of {C}omputer {V}ision}, year = {1998}, volume = {27}, pages = {1--20}, number = {2}, pdf = {../local/zhu98.pdf}, file = {zhu98.pdf:local/zhu98.pdf:PDF}, subject = {stat}, url = {http://www.cis.ohio-state.edu/~szhu/frame_ijcv.ps.gz} }
@article{Zhu1997Minimax, author = {Zhu, S. C. and Wu, Z. N. and Mumford, D.}, title = {Minimax {E}ntropy {P}rinciple and {I}ts {A}pplication to {T}exture {M}odeling}, journal = {Neural {C}omput.}, year = {1997}, volume = {9}, pages = {1627-1660}, number = {8}, pdf = {../local/zhu97.pdf}, file = {zhu97.pdf:local/zhu97.pdf:PDF}, subject = {stat}, url = {http://www.cis.ohio-state.edu/~szhu/frame_neuro.ps.gz} }
@comment{{jabref-meta: selector_author:}}
@comment{{jabref-meta: selector_journal:Adv. Drug Deliv. Rev.;Am. J. Hu m. Genet.;Am. J. Pathol.;Ann. Appl. Stat.;Ann. Math. Statist.;Ann. N. Y. Acad. Sci.;Ann. Probab.;Ann. Stat.;Artif. Intell. Med.;Bernoulli;Bi ochim. Biophys. Acta;Bioinformatics;Biometrika;BMC Bioinformatics;Br. J. Pharmacol.;Breast Cancer Res.;Cell;Cell. Signal.;Chem. Res. Toxicol .;Clin. Cancer Res.;Combinator. Probab. Comput.;Comm. Pure Appl. Math. ;Comput. Chem.;Comput. Comm. Rev.;Comput. Stat. Data An.;Curr. Genom.; Curr. Opin. Chem. Biol.;Curr. Opin. Drug Discov. Devel.;Data Min. Know l. Discov.;Electron. J. Statist.;Eur. J. Hum. Genet.;FEBS Lett.;Found. Comput. Math.;Genome Biol.;IEEE T. Neural Networ.;IEEE T. Pattern. An al.;IEEE T. Signal. Proces.;IEEE Trans. Inform. Theory;IEEE Trans. Kno wl. Data Eng.;IEEE/ACM Trans. Comput. Biol. Bioinf.;Int. J. Comput. Vi sion;Int. J. Data Min. Bioinform.;Int. J. Qantum Chem.;J Biol Syst;J. ACM;J. Am. Soc. Inf. Sci. Technol.;J. Am. Stat. Assoc.;J. Bioinform. C omput. Biol.;J. Biol. Chem.;J. Biomed. Inform.;J. Cell. Biochem.;J. Ch em. Inf. Comput. Sci.;J. Chem. Inf. Model.;J. Clin. Oncol.;J. Comput. Biol.;J. Comput. Graph. Stat.;J. Eur. Math. Soc.;J. Intell. Inform. Sy st.;J. Mach. Learn. Res.;J. Med. Chem.;J. Mol. BIol.;J. R. Stat. Soc. Ser. B;Journal of Statistical Planning and Inference;Mach. Learn.;Math . Program.;Meth. Enzymol.;Mol. Biol. Cell;Mol. Biol. Evol.;Mol. Cell. Biol.;Mol. Syst. Biol.;N. Engl. J. Med.;Nat. Biotechnol.;Nat. Genet.;N at. Med.;Nat. Methods;Nat. Rev. Cancer;Nat. Rev. Drug Discov.;Nat. Rev . Genet.;Nature;Neural Comput.;Neural Network.;Neurocomputing;Nucleic Acids Res.;Pattern Anal. Appl.;Pattern Recognit.;Phys. Rev. E;Phys. Re v. Lett.;PLoS Biology;PLoS Comput. Biol.;Probab. Theory Relat. Fields; Proc. IEEE;Proc. Natl. Acad. Sci. USA;Protein Eng.;Protein Eng. Des. S el.;Protein Sci.;Protein. Struct. Funct. Genet.;Random Struct. Algorit hm.;Rev. Mod. Phys.;Science;Stat. Probab. Lett.;Statistica Sinica;Theo r. Comput. Sci.;Trans. Am. Math. Soc.;Trends Genet.;}}
@comment{{jabref-meta: selector_keywords:biogm;biosvm;breastcancer;cgh; chemogenomics;chemoinformatics;csbcbook;csbcbook-ch1;csbcbook-ch2;csbc book-ch3;csbcbook-ch4;csbcbook-ch5;csbcbook-ch6;csbcbook-ch7;csbcbook- ch8;csbcbook-ch9;csbcbook-mustread;dimred;featureselection;glycans;her g;hic;highcontentscreening;image;immunoinformatics;kernel-theory;kerne lbook;lasso;microarray;ngs;nlp;plasmodium;proteomics;PUlearning;rnaseq ;segmentation;sirna;}}
@comment{{jabref-meta: selector_booktitle:Adv. Neural. Inform. Process Syst.;}}
This file was generated by bibtex2html 1.97.