@comment{{This file has been generated by bib2bib 1.97}}
@comment{{Command line: bib2bib ../bibli.bib -c 'subject:"microarray" or keywords:"microarray"' -ob tmp.bib}}
@inproceedings{Aliferis2002Machine, author = {Aliferis, C.F. and Hardin, D.P. and Massion, P.}, title = {Machine {L}earning {M}odels {F}or {L}ung {C}ancer {C}lassification {U}sing {A}rray {C}omparative {G}enomic {H}ybridization}, booktitle = {Proceedings of the 2002 {A}merican {M}edical {I}nformatics {A}ssociation ({AMIA}) {A}nnual {S}ymposium}, year = {2002}, pages = {7-11}, abstract = {Array {CGH} is a recently introduced technology that measures changes in the gene copy number of hundreds of genes in a single experiment. {T}he primary goal of this study was to develop machine learning models that classify non-small {L}ung {C}ancers according to histopathology types and to compare several machine learning methods in this learning task. {DNA} from tumors of 37 patients (21 squamous carcinomas, and 16 adenocarcinomas) were extracted and hybridized onto a 452 {BAC} clone array. {T}he following algorithms were used: {KNN}, {D}ecision {T}ree {I}nduction, {S}upport {V}ector {M}achines and {F}eed-{F}orward {N}eural {N}etworks. {P}erformance was measured via leave-one-out classification accuracy. {T}he best multi-gene model found had a leave-one-out accuracy of 89.2\%. {D}ecision {T}rees performed poorer than the other methods in this learning task and dataset. {W}e conclude that gene copy numbers as measured by array {CGH} are, collectively, an excellent indicator of histological subtype. {S}everal interesting research directions are discussed.}, pdf = {../local/Aliferis2002Machine.pdf}, file = {Aliferis2002Machine.pdf:local/Aliferis2002Machine.pdf:PDF}, keywords = {biosvm microarray, cgh}, owner = {jeanphilippevert} }
@article{Bao2002Identifying, author = {Bao, L. and Sun, Z.}, title = {Identifying genes related to drug anticancer mechanisms using support vector machine}, journal = {F{EBS} {L}ett.}, year = {2002}, volume = {521}, pages = {109--114}, abstract = {In an effort to identify genes related to the cell line chemosensitivity and to evaluate the functional relationships between genes and anticancer drugs acting by the same mechanism, a supervised machine learning approach called support vector machine was used to label genes into any of the five predefined anticancer drug mechanistic categories. {A}mong dozens of unequivocally categorized genes, many were known to be causally related to the drug mechanisms. {F}or example, a few genes were found to be involved in the biological process triggered by the drugs (e.g. {DNA} polymerase epsilon was the direct target for the drugs from {DNA} antimetabolites category). {DNA} repair-related genes were found to be enriched for about eight-fold in the resulting gene set relative to the entire gene set. {S}ome uncharacterized transcripts might be of interest in future studies. {T}his method of correlating the drugs and genes provides a strategy for finding novel biologically significant relationships for molecular pharmacology.}, pdf = {../local/bao02.pdf}, file = {bao02.pdf:local/bao02.pdf:PDF}, keywords = {biosvm microarray}, subject = {biokernel}, url = {http://www.elsevier.com/febs/402/19/42/article.html} }
@article{Ben-Dor2000Tissue, author = {Ben-Dor, A. and Bruhn, L. and Friedman, N. and Nachman, I. and Schummer, M. and Yakhini, Z.}, title = {Tissue Classification with Gene Expression Profiles}, journal = {J. Comput. Biol.}, year = {2000}, volume = {7}, pages = {559-583}, number = {3-4}, abstract = {Constantly improving gene expression profiling technologies are expected to provide understanding and insight into cancer-related cellular processes. {G}ene expression data is also expected to significantly aid in the development of efficient cancer diagnosis and classification platforms. {I}n this work we examine three sets of gene expression data measured across sets of tumor(s) and normal clinical samples: {T}he first set consists of 2,000 genes, measured in 62 epithelial colon samples ({A}lon et al., 1999). {T}he second consists of approximately equal to 100,000 clones, measured in 32 ovarian samples (unpublished extension of data set described in {S}chummer et al. (1999)). {T}he third set consists of approximately equal to 7,100 genes, measured in 72 bone marrow and peripheral blood samples ({G}olub et al, 1999). {W}e examine the use of scoring methods, measuring separation of tissue type (e.g., tumors from normals) using individual gene expression levels. {T}hese are then coupled with high-dimensional classification methods to assess the classification power of complete expression profiles. {W}e present results of performing leave-one-out cross validation ({LOOCV}) experiments on the three data sets, employing nearest neighbor classifier, {SVM} ({C}ortes and {V}apnik, 1995), {A}da{B}oost ({F}reund and {S}chapire, 1997) and a novel clustering-based classification technique. {A}s tumor samples can differ from normal samples in their cell-type composition, we also perform {LOOCV} experiments using appropriately modified sets of genes, attempting to eliminate the resulting bias. {W}e demonstrate success rate of at least 90% in tumor versus normal classification, using sets of selected genes, with, as well as without, cellular-contamination-related members. {T}hese results are insensitive to the exact selection mechanism, over a certain range.}, pdf = {../local/Ben-Dor2000Tissue.pdf}, file = {Ben-Dor2000Tissue.pdf:local/Ben-Dor2000Tissue.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://www.liebertonline.com/doi/abs/10.1089/106652700750050943} }
@article{Bozdech2004Antioxidant, author = {Bozdech, Z. and Ginsburg, H.}, title = {Antioxidant defense in {P}lasmodium falciparum - data mining of the transcriptome}, journal = {Malaria {J}ournal}, year = {2004}, volume = {3}, pages = {23}, number = {1}, abstract = {The intraerythrocytic malaria parasite is under constant oxidative stress originating both from endogenous and exogenous processes. {T}he parasite is endowed with a complete network of enzymes and proteins that protect it from those threats, but also uses redox activities to regulate enzyme activities. {I}n the present analysis, the transcription of the genes coding for the antioxidant defense elements are viewed in the time-frame of the intraerythrocytic cycle. {T}ime-dependent transcription data were taken from the transcriptome of the human malaria parasite {P}lasmodium falciparum. {W}hereas for several processes the transcription of the many participating genes is coordinated, in the present case there are some outstanding deviations where gene products that utilize glutathione or thioredoxin are transcribed before the genes coding for elements that control the levels of those substrates are transcribed. {S}uch insights may hint to novel, non-classical pathways that necessitate further investigations.}, doi = {10.1186/1475-2875-3-23}, pdf = {../local/Bozdech2004Antioxidant.pdf}, file = {Bozdech2004Antioxidant.pdf:local/Bozdech2004Antioxidant.pdf:PDF}, keywords = {microarray plasmodium}, owner = {vert}, url = {http://www.malariajournal.com/content/3/1/23} }
@article{Bozdech2003Transcriptome, author = {Bozdech, Z. and Llinas, M. and Pulliam, B. L. and Wong, E. D. and Zhu, J. and DeRisi, J. L.}, title = {The {T}ranscriptome of the {I}ntraerythrocytic {D}evelopmental {C}ycle of {P}lasmodium falciparum }, journal = {P{L}o{S} {B}iology}, year = {2003}, volume = {1}, pages = {e5}, number = {1}, abstract = {Plasmodium falciparum is the causative agent of the most burdensome form of human malaria, affecting 200-300 million individuals per year worldwide. {T}he recently sequenced genome of {P}. falciparum revealed over 5,400 genes, of which 60{percnt} encode proteins of unknown function. {I}nsights into the biochemical function and regulation of these genes will provide the foundation for future drug and vaccine development efforts toward eradication of this disease. {B}y analyzing the complete asexual intraerythrocytic developmental cycle ({IDC}) transcriptome of the {HB}3 strain of {P}. falciparum, we demonstrate that at least 60{percnt} of the genome is transcriptionally active during this stage. {O}ur data demonstrate that this parasite has evolved an extremely specialized mode of transcriptional regulation that produces a continuous cascade of gene expression, beginning with genes corresponding to general cellular processes, such as protein synthesis, and ending with {P}lasmodium-specific functionalities, such as genes involved in erythrocyte invasion. {T}he data reveal that genes contiguous along the chromosomes are rarely coregulated, while transcription from the plastid genome is highly coregulated and likely polycistronic. {C}omparative genomic hybridization between {HB}3 and the reference genome strain (3{D}7) was used to distinguish between genes not expressed during the {IDC} and genes not detected because of possible sequence variations. {G}enomic differences between these strains were found almost exclusively in the highly antigenic subtelomeric regions of chromosomes. {T}he simple cascade of gene regulation that directs the asexual development of {P}. falciparum is unprecedented in eukaryotic biology. {T}he transcriptome of the {IDC} resembles a "just-in-time" manufacturing process whereby induction of any given gene occurs once per cycle and only at a time when it is required. {T}hese data provide to our knowledge the first comprehensive view of the timing of transcription throughout the intraerythrocytic development of {P}. falciparum and provide a resource for the identification of new chemotherapeutic and vaccine candidates.}, comment = {(JP Vert) The paper that monitors the 48h cell cycle of P. falciparum}, doi = {10.1371/journal.pbio.0000005}, pdf = {../local/Bozdech2003Transcriptome.pdf}, file = {Bozdech2003Transcriptome.pdf:local/Bozdech2003Transcriptome.pdf:PDF}, keywords = {microarray plasmodium}, owner = {vert}, url = {http://dx.doi.org/10.1371/journal.pbio.0000005 } }
@article{Bozdech2003Expression, author = {Bozdech, Z. and Zhu, J. and Joachimiak, M. and Cohen, F. and Pulliam, B. and DeRisi, J.}, title = {Expression profiling of the schizont and trophozoite stages of {P}lasmodium falciparum with a long-oligonucleotide microarray}, journal = {Genome {B}iology}, year = {2003}, volume = {4}, pages = {R9}, number = {2}, abstract = {B{ACKGROUND}:{T}he worldwide persistence of drug-resistant {P}lasmodium falciparum, the most lethal variety of human malaria, is a global health concern. {T}he {P}. falciparum sequencing project has brought new opportunities for identifying molecular targets for antimalarial drug and vaccine development.{RESULTS}:{W}e developed a software package, {A}rray{O}ligo{S}elector, to design an open reading frame ({ORF})-specific {DNA} microarray using the publicly available {P}. falciparum genome sequence. {E}ach gene was represented by one or more long 70 mer oligonucleotides selected on the basis of uniqueness within the genome, exclusion of low-complexity sequence, balanced base composition and proximity to the 3' end. {A} first-generation microarray representing approximately 6,000 {ORF}s of the {P}. falciparum genome was constructed. {A}rray performance was evaluated through the use of control oligonucleotide sets with increasing levels of introduced mutations, as well as traditional northern blotting. {U}sing this array, we extensively characterized the gene-expression profile of the intraerythrocytic trophozoite and schizont stages of {P}. falciparum. {T}he results revealed extensive transcriptional regulation of genes specialized for processes specific to these two stages.{CONCLUSIONS}:{DNA} microarrays based on long oligonucleotides are powerful tools for the functional annotation and exploration of the {P}. falciparum genome. {E}xpression profiling of trophozoites and schizonts revealed genes associated with stage-specific processes and may serve as the basis for future drug targets and vaccine development.}, doi = {10.1186/gb-2003-4-2-r9}, pdf = {../local/Bozdech2003Expression.pdf}, file = {Bozdech2003Expression.pdf:local/Bozdech2003Expression.pdf:PDF}, keywords = {microarray plasmodium}, owner = {vert}, url = {http://genomebiology.com/2003/4/2/R9} }
@article{Brown2000Knowledge-based, author = {Brown, M. P. and Grundy, W. N. and Lin, D. and Cristianini, N. and Sugnet, C. W. and Furey, T. S. and Ares, M. and Haussler, D.}, title = {Knowledge-based analysis of microarray gene expression data by using support vector machines.}, journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}}, year = {2000}, volume = {97}, pages = {262-7}, number = {1}, month = {Jan}, abstract = {We introduce a method of functionally classifying genes by using gene expression data from {DNA} microarray hybridization experiments. {T}he method is based on the theory of support vector machines ({SVM}s). {SVM}s are considered a supervised computer learning method because they exploit prior knowledge of gene function to identify unknown genes of similar function from expression data. {SVM}s avoid several problems associated with unsupervised clustering methods, such as hierarchical clustering and self-organizing maps. {SVM}s have many mathematical features that make them attractive for gene expression analysis, including their flexibility in choosing a similarity function, sparseness of solution when dealing with large data sets, the ability to handle large feature spaces, and the ability to identify outliers. {W}e test several {SVM}s that use different similarity metrics, as well as some other supervised learning methods, and find that the {SVM}s best identify sets of genes with a common function using expression data. {F}inally, we use {SVM}s to predict functional roles for uncharacterized yeast {ORF}s based on their expression data.}, pdf = {../local/Brown2000Knowledge-based.pdf}, file = {Brown2000Knowledge-based.pdf:local/Brown2000Knowledge-based.pdf:PDF}, keywords = {biosvm microarray}, url = {http://www.pnas.org/cgi/content/abstract/97/1/262} }
@article{Brown2000Exploring, author = {P.O. Brown and D. Botstein}, title = {Exploring the new world of the genome with {DNA} microarrays}, journal = {Nat. {G}enet.}, year = {2000}, volume = {21}, pages = {33--37}, pdf = {../local/brow00b.pdf}, file = {brow00b.pdf:local/brow00b.pdf:PDF}, subject = {microarray}, url = {http://www.nature.com/ng/journal/v21/n1s/abs/ng0199supp_33.html} }
@article{Burckin2005Exploring, author = {Burckin, T. and Nagel, R. and Mandel-Gutfreund, Y. and Shiue, L. and Clark, T. A. and Chong, J.-L. and Chang, T.-H. and Squazzo, S. and Hartzog, G. and Ares, M.}, title = {Exploring functional relationships between components of the gene expression machinery.}, journal = {Nat. {S}truct. {M}ol. {B}iol.}, year = {2005}, volume = {12}, pages = {175-82}, number = {2}, month = {Feb}, abstract = {Eukaryotic gene expression requires the coordinated activity of many macromolecular machines including transcription factors and {RNA} polymerase, the spliceosome, m{RNA} export factors, the nuclear pore, the ribosome and decay machineries. {Y}east carrying mutations in genes encoding components of these machineries were examined using microarrays to measure changes in both pre-m{RNA} and m{RNA} levels. {W}e used these measurements as a quantitative phenotype to ask how steps in the gene expression pathway are functionally connected. {A} multiclass support vector machine was trained to recognize the gene expression phenotypes caused by these mutations. {I}n several cases, unexpected phenotype assignments by the computer revealed functional roles for specific factors at multiple steps in the gene expression pathway. {T}he ability to resolve gene expression pathway phenotypes provides insight into how the major machineries of gene expression communicate with each other.}, doi = {10.1038/nsmb891}, pdf = {../local/Burckin2005Exploring.pdf}, file = {Burckin2005Exploring.pdf:local/Burckin2005Exploring.pdf:PDF}, keywords = {biosvm microarray}, pii = {nsmb891}, url = {http://dx.doi.org/10.1038/nsmb891} }
@article{Bussemaker2001Regulatory, author = {Bussemaker, H. J. and Li, H. and Siggia, E. D.}, title = {Regulatory element detection using correlation with expression}, journal = {Nat. {G}enet.}, year = {2001}, volume = {27}, pages = {167--174}, pdf = {../local/buss01.pdf}, file = {buss01.pdf:local/buss01.pdf:PDF}, subject = {microarray}, url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v27/n2/full/ng0201_167.html&filetype=pdf} }
@article{Chen2011Removing, author = {Chao Chen and Kay Grennan and Judith Badner and Dandan Zhang and Elliot Gershon and Li Jin and Chunyu Liu}, title = {Removing batch effects in analysis of expression microarray data: an evaluation of six batch adjustment methods.}, journal = {PLoS One}, year = {2011}, volume = {6}, pages = {e17238}, number = {2}, abstract = {The expression microarray is a frequently used approach to study gene expression on a genome-wide scale. However, the data produced by the thousands of microarray studies published annually are confounded by "batch effects," the systematic error introduced when samples are processed in multiple batches. Although batch effects can be reduced by careful experimental design, they cannot be eliminated unless the whole study is done in a single batch. A number of programs are now available to adjust microarray data for batch effects prior to analysis. We systematically evaluated six of these programs using multiple measures of precision, accuracy and overall performance. ComBat, an Empirical Bayes method, outperformed the other five programs by most metrics. We also showed that it is essential to standardize expression data at the probe level when testing for correlation of expression profiles, due to a sizeable probe effect in microarray data that can inflate the correlation among replicates and unrelated samples.}, doi = {10.1371/journal.pone.0017238}, institution = {National Ministry of Education Key Laboratory of Contemporary Anthropology, Fudan University, Shanghai, People's Republic of China.}, keywords = {Bayes Theorem; Case-Control Studies; Data Interpretation, Statistical; Gene Expression Profiling, standards/statistics /&/ numerical data; Humans; Microarray Analysis, standards/statistics /&/ numerical data; ROC Curve; Reference Standards; Research Design; Sample Size; Selection Bias; Validation Studies as Topic}, language = {eng}, medline-pst = {epublish}, owner = {jp}, pmid = {21386892}, timestamp = {2012.02.29}, url = {http://dx.doi.org/10.1371/journal.pone.0017238} }
@article{Chiang2001Visualizing, author = {Chiang, D. Y. and Brown, P. O. and Eisen, M. B.}, title = {Visualizing associations between genome sequences and gene expression data using genome-mean expression profiles}, journal = {Bioinformatics}, year = {2001}, volume = {17}, pages = {49S--55S}, pdf = {../local/chia01.pdf}, file = {chia01.pdf:local/chia01.pdf:PDF}, subject = {microarray}, url = {http://bioinformatics.oupjournals.org/cgi/reprint/17/suppl_1/S49.pdf} }
@article{Chu1998Transcriptional, author = {S. Chu and J. DeRisi and M. Eisen and J. Mulholland and D. Botstein and P.O. Brown and I. Herskowitz}, title = {The {T}ranscriptional {P}rogram of {S}porulation in {B}udding {Y}east}, journal = {Science}, year = {1998}, volume = {282}, pages = {699--705}, pdf = {../local/chu98.pdf}, file = {chu98.pdf:local/chu98.pdf:PDF}, owner = {phupe}, subject = {microarray}, timestamp = {2009.10.15}, url = {http://www.sciencemag.org/cgi/reprint/282/5389/699.pdf} }
@article{DeRisi1997Exploring, author = {DeRisi, J. L. and Iyer, V. R. and Brown, P. O.}, title = {Exploring the metabolic and genetic control of gene expression on a genomic scale}, journal = {Science}, year = {1997}, volume = {278}, pages = {680--686}, number = {5338}, pdf = {../local/deri97.pdf}, file = {deri97.pdf:local/deri97.pdf:PDF}, subject = {microarray}, url = {http://www.sciencemag.org/cgi/reprint/278/5338/680.pdf} }
@article{Dong2005Fast, author = {Jian-xiong Dong and Adam Krzyzak and Ching Y Suen}, title = {Fast {SVM} training algorithm with decomposition on very large data sets.}, journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell}, year = {2005}, volume = {27}, pages = {603-18}, number = {4}, month = {Apr}, abstract = {Training a support vector machine on a data set of huge size with thousands of classes is a challenging problem. {T}his paper proposes an efficient algorithm to solve this problem. {T}he key idea is to introduce a parallel optimization step to quickly remove most of the nonsupport vectors, where block diagonal matrices are used to approximate the original kernel matrix so that the original problem can be split into hundreds of subproblems which can be solved more efficiently. {I}n addition, some effective strategies such as kernel caching and efficient computation of kernel matrix are integrated to speed up the training process. {O}ur analysis of the proposed algorithm shows that its time complexity grows linearly with the number of classes and size of the data set. {I}n the experiments, many appealing properties of the proposed algorithm have been investigated and the results show that the proposed algorithm has a much better scaling capability than {L}ibsvm, {SVM}light, and {SVMT}orch. {M}oreover, the good generalization performances on several large databases have also been achieved.}, keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence, Automated, Automatic Data Processing, Butadienes, Chloroplasts, Comparative Study, Computer Simulation, Computer-Assisted, Database Management Systems, Databases, Diagnosis, Disinfectants, Dose-Response Relationship, Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines, Expert Systems, Factual, Feedback, Fungicides, Gene Expression Profiling, Genes, Genetic Markers, Humans, Image Enhancement, Image Interpretation, Implanted, Industrial, Information Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement, Natural Language Processing, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Numerical Analysis, Pattern Recognition, Plant Proteins, Predictive Value of Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility of Results, Research Support, Sensitivity and Specificity, Signal Processing, Sprague-Dawley, Subcellular Fractions, Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer Interface, 15794164} }
@article{Ein-Dor2005Outcome, author = {Ein-Dor, L. and Kela, I. and Getz, G. and Givol, D. and Domany, E.}, title = {Outcome signature genes in breast cancer: is there a unique set?}, journal = {Bioinformatics}, year = {2005}, volume = {21}, pages = {171--178}, number = {2}, month = {Jan}, abstract = {MOTIVATION: Predicting the metastatic potential of primary malignant tissues has direct bearing on the choice of therapy. Several microarray studies yielded gene sets whose expression profiles successfully predicted survival. Nevertheless, the overlap between these gene sets is almost zero. Such small overlaps were observed also in other complex diseases, and the variables that could account for the differences had evoked a wide interest. One of the main open questions in this context is whether the disparity can be attributed only to trivial reasons such as different technologies, different patients and different types of analyses. RESULTS: To answer this question, we concentrated on a single breast cancer dataset, and analyzed it by a single method, the one which was used by van't Veer et al. to produce a set of outcome-predictive genes. We showed that, in fact, the resulting set of genes is not unique; it is strongly influenced by the subset of patients used for gene selection. Many equally predictive lists could have been produced from the same analysis. Three main properties of the data explain this sensitivity: (1) many genes are correlated with survival; (2) the differences between these correlations are small; (3) the correlations fluctuate strongly when measured over different subsets of patients. A possible biological explanation for these properties is discussed. CONTACT: eytan.domany@weizmann.ac.il SUPPLEMENTARY INFORMATION: http://www.weizmann.ac.il/physics/complex/compphys/downloads/liate/}, doi = {10.1093/bioinformatics/bth469}, pdf = {../local/Ein-Dor2005Outcome.pdf}, file = {Ein-Dor2005Outcome.pdf:Ein-Dor2005Outcome.pdf:PDF}, institution = {Department of Physics of Complex Systems, Weizmann Institute of Science Rehovot 76100, Israel.}, keywords = {breastcancer, microarray, featureselection}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {bth469}, pmid = {15308542}, timestamp = {2010.10.12}, url = {http://dx.doi.org/10.1093/bioinformatics/bth469} }
@article{Eisen1998Cluster, author = {Eisen, M. B. and Spellman, P. T. and Brown, P. O. and Botstein, D.}, title = {Cluster analysis and display of genome-wide expression patterns}, journal = {Proc. Natl. Acad. Sci. USA}, year = {1998}, volume = {95}, pages = {14863--14868}, month = {Dec}, pdf = {../local/Eisen1998Cluster.pdf}, file = {Eisen1998Cluster.pdf:Eisen1998Cluster.pdf:PDF}, subject = {microarray}, url = {http://www.pnas.org/cgi/reprint/95/25/14863.pdf} }
@article{Fan2006Concordance, author = {Fan, C. and Oh, D.S. and Wessels, L. and Weigelt, B. and Nuyten, D.S.A. and Nobel, A.B. and van't Veer, L.J. and Perou, C.M.}, title = {Concordance among gene-expression-based predictors for breast cancer}, journal = {N. Engl. J. Med.}, year = {2006}, volume = {355}, pages = {560}, number = {6}, doi = {10.1056/NEJMoa052933}, pdf = {../local/Fan2006Concordance.pdf}, file = {Fan2006Concordance.pdf:Fan2006Concordance.pdf:PDF}, keywords = {breastcancer, microarray}, owner = {jp}, publisher = {Mass Med Soc}, timestamp = {2011.01.13}, url = {http://dx.doi.org/10.1056/NEJMoa052933} }
@article{Ferea1999Systematic, author = {Ferea, T. L. and Botstein, D. and Brown, P. O. and Rosenzweig, R. F.}, title = {Systematic changes in gene expression patterns following adaptive evolution in yeast}, journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}}, year = {1999}, volume = {96}, pages = {9721--9726}, number = {17}, pdf = {../local/fere99.pdf}, file = {fere99.pdf:local/fere99.pdf:PDF}, subject = {microarray}, url = {http://www.pnas.org/cgi/reprint/96/17/9721.pdf} }
@article{Friedman2000Using, author = {Friedman, N. and Linial, M. and Nachman, I. and Pe'er, D.}, title = {Using {B}ayesian Networks to Analyze Expression Data}, journal = {J. Comput. Biol.}, year = {2000}, volume = {7}, pages = {601--620}, number = {3-4}, abstract = {D{NA} hybridization arrays simultaneously measure the expression level for thousands of genes. {T}hese measurements provide a "snapshot" of transcription levels within the cell. {A} major challenge in computational biology is to uncover, from such measurements, gene/protein interactions and key biological features of cellular systems. {I}n this paper, we propose a new framework for discovering interactions between genes based on multiple expression measurements. {T}his framework builds on the use of {B}ayesian networks for representing statistical dependencies. {A} {B}ayesian network is a graph-based model of joint multivariate probability distributions that captures properties of conditional independence between variables. {S}uch models are attractive for their ability to describe complex stochastic processes and because they provide a clear methodology for learning from (noisy) observations. {W}e start by showing how {B}ayesian networks can describe interactions between genes. {W}e then describe a method for recovering gene interactions from microarray data using tools for learning {B}ayesian networks. {F}inally, we demonstrate this method on the {S}. cerevisiae cell-cycle measurements of {S}pellman et al. (1998).}, doi = {10.1089/106652700750050961}, pdf = {../local/Friedman2000Using.pdf}, file = {Friedman2000Using.pdf:local/Friedman2000Using.pdf:PDF}, keywords = {biogm}, subject = {microarray}, url = {http://dx.doi.org/10.1089/106652700750050961} }
@article{Garnis2006High, author = {C. Garnis and W. W. Lockwood and E. Vucic and Y. Ge and L. Girard and J. D. Minna and A. F. Gazdar and S. Lam and C. MacAulay and W. L. Lam}, title = {High resolution analysis of non-small cell lung cancer cell lines by whole genome tiling path array {CGH}.}, journal = {Int. J. Cancer}, year = {2006}, volume = {118}, pages = {1556--1564}, number = {6}, abstract = {Chromosomal regions harboring tumor suppressors and oncogenes are often deleted or amplified. Array comparative genomic hybridization detects segmental DNA copy number alterations in tumor DNA relative to a normal control. The recent development of a bacterial artificial chromosome array, which spans the human genome in a tiling path manner with >32,000 clones, has facilitated whole genome profiling at an unprecedented resolution. Using this technology, we comprehensively describe and compare the genomes of 28 commonly used non-small cell lung carcinoma (NSCLC) cell models, derived from 18 adenocarcinomas (AC), 9 squamous cell carcinomas and 1 large cell carcinoma. Analysis at such resolution not only provided a detailed genomic alteration template for each of these model cell lines, but revealed novel regions of frequent duplication and deletion. Significantly, a detailed analysis of chromosome 7 identified 6 distinct regions of alterations across this chromosome, implicating the presence of multiple novel oncogene loci on this chromosome. As well, a comparison between the squamous and AC cells revealed alterations common to both subtypes, such as the loss of 3p and gain of 5p, in addition to multiple hotspots more frequently associated with only 1 subtype. Interestingly, chromosome 3q, which is known to be amplified in both subtypes, showed 2 distinct regions of alteration, 1 frequently altered in squamous and 1 more frequently altered in AC. In summary, our data demonstrate the unique information generated by high resolution analysis of NSCLC genomes and uncover the presence of genetic alterations prevalent in the different NSCLC subtypes.}, doi = {10.1002/ijc.21491}, institution = {British Columbia Cancer Research Centre, Vancouver, BC, Canada. cgarnis@bccrc.ca}, keywords = {Carcinoma, Non-Small-Cell Lung, genetics/pathology; Cell Line, Tumor; Chromosomes, Artificial, Bacterial, genetics; Gene Amplification; Gene Dosage; Gene Expression Profiling; Genome, Human, genetics; Humans; Loss of Heterozygosity; Lung Neoplasms, genetics/pathology; Microarray Analysis, methods; Nucleic Acid Hybridization, methods}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pmid = {16187286}, timestamp = {2010.01.08}, url = {http://dx.doi.org/10.1002/ijc.21491} }
@article{Gasch2001Genomic, author = {A.P. Gasch and M. Huang and S. Metzner and D. Botstein and S.J. Elledge and P.O. Brown}, title = {Genomic expression responses to {DNA}-damaging agents and the regulatory role of the yeast {ATR} homolog {M}ec1p}, journal = {Mol. {B}iol. {C}ell}, year = {2001}, volume = {12}, pages = {2987--3003}, number = {10}, pdf = {../local/gasc01.pdf}, file = {gasc01.pdf:local/gasc01.pdf:PDF}, subject = {microarray}, url = {http://www.molbiolcell.org/cgi/content/full/12/10/2987} }
@article{Gasch2000Genomic, author = {Gasch, A. P. and Spellman, P. T. and Kao, C. M. and Carmel-Harel, O. and Eisen, M. B. and Storz, G. and Botstein, D. and Brown, P. O.}, title = {Genomic {E}xpression {P}rograms in the {R}esponse of {Y}east {C}ells to {E}nvironmental {C}hanges}, journal = {Mol. {B}iol. {C}ell}, year = {2000}, volume = {11}, pages = {4241--4257}, month = {Dec}, pdf = {../local/gasc00.pdf}, file = {gasc00.pdf:local/gasc00.pdf:PDF}, subject = {microarray}, url = {http://www.molbiolcell.org/cgi/reprint/11/12/4241.pdf} }
@article{Golub1999Molecular, author = {Golub, T. R. and Slonim, D. K. and Tamayo, P. and Huard, C. and Gaasenbeek, M. and Mesirov, J. P. and Coller, H. and Loh, M. L. and Downing, J. R. and Caligiuri, M. A. and Bloomfield, C. D. and Lander, E. S.}, title = {Molecular classification of cancer: class discovery and class prediction by gene expression monitoring}, journal = {Science}, year = {1999}, volume = {286}, pages = {531--537}, abstract = {Although cancer classification has improved over the past 30 years, there has been no general approach for identifying new cancer classes (class discovery) or for assigning tumors to known classes (class prediction). Here, a generic approach to cancer classification based on gene expression monitoring by DNA microarrays is described and applied to human acute leukemias as a test case. A class discovery procedure automatically discovered the distinction between acute myeloid leukemia (AML) and acute lymphoblastic leukemia (ALL) without previous knowledge of these classes. An automatically derived class predictor was able to determine the class of new leukemia cases. The results demonstrate the feasibility of cancer classification based solely on gene expression moni- toring and suggest a general strategy for discovering and predicting cancer classes for other types of cancer, independent of previous biological knowledge.}, doi = {10.1126/science.286.5439.531}, pdf = {../local/Golub1999Molecular.pdf}, file = {Golub1999Molecular.pdf:Golub1999Molecular.pdf:PDF}, keywords = {csbcbook, csbcbook-ch3, csbcbook-ch4}, subject = {microarray}, url = {http://dx.doi.org/10.1126/science.286.5439.531} }
@article{Gross2000Identification, author = {C. Gross and M. Kelleher and V.R. Iyer and P.O. Brown and D.R. Winge}, title = {Identification of the copper regulon in {S}accharomyces cerevisiae by {DNA} microarrays}, journal = {J. {B}iol. {C}hem.}, year = {2000}, volume = {275}, pages = {32310--32316}, number = {41}, pdf = {../local/gros00.pdf}, file = {gros00.pdf:local/gros00.pdf:PDF}, subject = {microarray}, url = {http://www.jbc.org/cgi/content/full/275/41/32310} }
@article{Haasdonk2005Feature, author = {Bernard Haasdonk}, title = {Feature space interpretation of {SVM}s with indefinite kernels.}, journal = {I{EEE} {T}rans {P}attern {A}nal {M}ach {I}ntell}, year = {2005}, volume = {27}, pages = {482-92}, number = {4}, month = {Apr}, abstract = {Kernel methods are becoming increasingly popular for various kinds of machine learning tasks, the most famous being the support vector machine ({SVM}) for classification. {T}he {SVM} is well understood when using conditionally positive definite (cpd) kernel functions. {H}owever, in practice, non-cpd kernels arise and demand application in {SVM}s. {T}he procedure of "plugging" these indefinite kernels in {SVM}s often yields good empirical classification results. {H}owever, they are hard to interpret due to missing geometrical and theoretical understanding. {I}n this paper, we provide a step toward the comprehension of {SVM} classifiers in these situations. {W}e give a geometric interpretation of {SVM}s with indefinite kernel functions. {W}e show that such {SVM}s are optimal hyperplane classifiers not by margin maximization, but by minimization of distances between convex hulls in pseudo-{E}uclidean spaces. {B}y this, we obtain a sound framework and motivation for indefinite {SVM}s. {T}his interpretation is the basis for further theoretical analysis, e.g., investigating uniqueness, and for the derivation of practical guidelines like characterizing the suitability of indefinite {SVM}s.}, doi = {10.1109/TPAMI.2005.78}, pdf = {../local/Haasdonk2005Feature.pdf}, file = {Haasdonk2005Feature.pdf:local/Haasdonk2005Feature.pdf:PDF}, keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence, Automated, Automatic Data Processing, Butadienes, Chloroplasts, Cluster Analysis, Comparative Study, Computer Simulation, Computer-Assisted, Computing Methodologies, Database Management Systems, Databases, Diagnosis, Disinfectants, Dose-Response Relationship, Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines, Expert Systems, Factual, Feedback, Fungicides, Gene Expression Profiling, Genes, Genetic Markers, Humans, Image Enhancement, Image Interpretation, Implanted, Industrial, Information Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement, Natural Language Processing, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Numerical Analysis, Pattern Recognition, Plant Proteins, Predictive Value of Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility of Results, Research Support, Sensitivity and Specificity, Signal Processing, Sprague-Dawley, Subcellular Fractions, Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer Interface, 15794155}, url = {http://dx.doi.org/10.1109/TPAMI.2005.78} }
@article{Haferlach2005AML, author = {Torsten Haferlach and Alexander Kohlmann and Susanne Schnittger and Martin Dugas and Wolfgang Hiddemann and Wolfgang Kern and Claudia Schoch}, title = {A{ML} {M}3 and {AML} {M}3 variant each have a distinct gene expression signature but also share patterns different from other genetically defined {AML} subtypes.}, journal = {Genes {C}hromosomes {C}ancer}, year = {2005}, volume = {43}, pages = {113-27}, number = {2}, month = {Jun}, abstract = {Acute promyelocytic leukemia ({APL}) with t(15;17) appears in two phenotypes: {AML} {M}3, with abnormal promyelocytes showing heavy granulation and bundles of {A}uer rods, and {AML} {M}3 variant ({M}3v), with non- or hypogranular cytoplasm and a bilobed nucleus. {W}e investigated the global gene expression profiles of 35 {APL} patients (19 {AML} {M}3, 16 {AML} {M}3v) by using high-density {DNA}-oligonucleotide microarrays. {F}irst, an unsupervised approach clearly separated {APL} samples from other {AML}s characterized genetically as t(8;21) (n = 35), inv(16) (n = 35), or t(11q23)/{MLL} (n = 35) or as having a normal karyotype (n = 50). {S}econd, we found genes with functional relevance for blood coagulation that were differentially expressed between {APL} and other {AML}s. {F}urthermore, a supervised pairwise comparison between {M}3 and {M}3v revealed differential expression of genes that encode for biological functions and pathways such as granulation and maturation of hematologic cells, explaining morphologic and clinical differences. {D}iscrimination between {M}3 and {M}3v based on gene signatures showed a median classification accuracy of 90\% by use of 10-fold {CV} and support vector machines. {A}dditional molecular mutations such as {FLT}3-{LM}, which were significantly more frequent in {M}3v than in {M}3 ({P} < 0.0001), may partly contribute to the different phenotypes. {H}owever, linear regression analysis demonstrated that genes differentially expressed between {M}3 and {M}3v did not correlate with {FLT}3-{LM}.}, doi = {10.1002/gcc.20175}, pdf = {../local/Haferlach2005AML.pdf}, file = {Haferlach2005AML.pdf:local/Haferlach2005AML.pdf:PDF}, keywords = {biosvm microarray}, url = {http://dx.doi.org/10.1002/gcc.20175} }
@article{Haferlach2005global, author = {Torsten Haferlach and Alexander Kohlmann and Susanne Schnittger and Martin Dugas and Wolfgang Hiddemann and Wolfgang Kern and Claudia Schoch}, title = {A global approach to the diagnosis of leukemia using gene expression profiling.}, journal = {Blood}, year = {2005}, volume = {106}, pages = {1189-1198}, number = {4}, month = {Aug}, abstract = {Accurate diagnosis and classification of leukemias are the bases for the appropriate management of patients. {T}he diagnostic accuracy and efficiency of present methods may be improved by the use of microarrays for gene expression profiling. {W}e analyzed gene expression profiles in bone marrow and peripheral blood samples from 937 patients with all clinically relevant leukemia subtypes (n=892) and non-leukemic controls (n=45) by {U}133{A} and {B} {G}ene{C}hips ({A}ffymetrix). {F}or each subgroup differentially expressed genes were calculated. {C}lass prediction was performed using support vector machines. {P}rediction accuracies were estimated by 10-fold cross validation and assessed for robustness in a 100-fold resampling approach using randomly chosen test-sets consisting of 1/3 of the samples. {A}pplying the top 100 genes of each subgroup an overall prediction accuracy of 95.1\% was achieved which was confirmed by resampling (median, 93.8\%; 95\% confidence interval, 91.4\%-95.8\%). {I}n particular, {AML} with t(15;17), t(8;21), or inv(16), {CLL}, and {P}ro-{B}-{ALL} with t(11q23) were classified with 100\% sensitivity and 100\% specificity. {A}ccordingly, cluster analysis completely separated all of the 13 subgroups analyzed. {G}ene expression profiling can predict all clinically relevant subentities of leukemia with high accuracy.}, doi = {10.1182/blood-2004-12-4938}, pdf = {../local/Haferlach2005global.pdf}, file = {Haferlach2005global.pdf:local/Haferlach2005global.pdf:PDF}, keywords = {biosvm microarray}, pii = {2004-12-4938}, url = {http://dx.doi.org/10.1182/blood-2004-12-4938} }
@article{Hanisch2002Co-clustering, author = {D. Hanisch and A. Zien and R. Zimmer and T. Lengauer}, title = {Co-clustering of biological networks and gene expression data}, journal = {Bioinformatics}, year = {2002}, annote = {To appear}, subject = {microarraybionet}, url = {http://cartan.gmd.de/~hanisch/paper/CoClustering.pdf} }
@article{Ioannidis2005Microarrays, author = {Ioannidis, J. P. A.}, title = {Microarrays and molecular research: noise discovery?}, journal = {Lancet}, year = {2005}, volume = {365}, pages = {454}, number = {9458}, pdf = {../local/Ioannidis2005Microarrays.pdf}, file = {Ioannidis2005Microarrays.pdf:Ioannidis2005Microarrays.pdf:PDF}, keywords = {microarray}, owner = {jp}, timestamp = {2011.01.12} }
@article{Ishkanian2004tiling, author = {Ishkanian, A. S. and Malloff, C. A. and Watson, S. K. and DeLeeuw, R. J. and Chi, B. and Coe, B. P. and Snijders, A. and Albertson, D. G. and Pinkel, D. and Marra, M. A. and Ling, V. and MacAulay, C. and Lam, W. L.}, title = {A tiling resolution {DNA} microarray with complete coverage of the human genome}, journal = {Nat. Genet.}, year = {2004}, volume = {36}, pages = {299--303}, number = {3}, month = {Mar}, abstract = {We constructed a tiling resolution array consisting of 32,433 overlapping BAC clones covering the entire human genome. This increases our ability to identify genetic alterations and their boundaries throughout the genome in a single comparative genomic hybridization (CGH) experiment. At this tiling resolution, we identified minute DNA alterations not previously reported. These alterations include microamplifications and deletions containing oncogenes, tumor-suppressor genes and new genes that may be associated with multiple tumor types. Our findings show the need to move beyond conventional marker-based genome comparison approaches, that rely on inference of continuity between interval markers. Our submegabase resolution tiling set for array CGH (SMRT array) allows comprehensive assessment of genomic integrity and thereby the identification of new genes associated with disease.}, doi = {10.1038/ng1307}, pdf = {../local/Ishkanian2004tiling.pdf}, file = {Ishkanian2004tiling.pdf:Ishkanian2004tiling.pdf:PDF}, institution = {British Columbia Cancer Research Centre, 601 West 10th Avenue, Vancouver, British Columbia V5Z 1L3, Canada.}, keywords = {csbcbook, microarray}, owner = {jp}, pii = {ng1307}, pmid = {14981516}, timestamp = {2009.10.08}, url = {http://dx.doi.org/10.1038/ng1307} }
@article{Kuhn2001Global, author = {K. M. Kuhn and J. L. DeRisi and P. O. Brown and P. Sarnow}, title = {Global and specific translational regulation in the genomic response of {S}accharomyces cerevisiae to a rapid transfer from a fermentable to a nonfermentable carbon source}, journal = {Mol. {C}ell. {B}iol.}, year = {2001}, volume = {21}, pages = {916--927}, number = {3}, pdf = {../local/kuhn01.pdf}, file = {kuhn01.pdf:local/kuhn01.pdf:PDF}, subject = {microarray}, url = {http://mcb.asm.org/cgi/content/full/21/3/916?view=full&pmid=11154278} }
@article{LeRoch2003Discovery, author = {Le Roch, K. G. and Zhou, Y. and Blair, P. L. and Grainger, M. and Moch, J. K. and Haynes, J. D. and De la Vega, P. and Holder, A. A. and Batalov, S. and Carucci, D. J. and Winzeler, E. A.}, title = {Discovery of Gene Function by Expression Profiling of the Malaria Parasite Life Cycle}, journal = {Science}, year = {2003}, volume = {301}, pages = {1503-1508}, number = {5639}, abstract = {The completion of the genome sequence for {P}lasmodium falciparum, the species responsible for most malaria human deaths, has the potential to reveal hundreds of new drug targets and proteins involved in pathogenesis. {H}owever, only approximately 35% of the genes code for proteins with an identifiable function. {T}he absence of routine genetic tools for studying {P}lasmodium parasites suggests that this number is unlikely to change quickly if conventional serial methods are used to characterize encoded proteins. {H}ere, we use a high-density oligonucleotide array to generate expression profiles of human and mosquito stages of the malaria parasite's life cycle. {G}enes with highly correlated levels and temporal patterns of expression were often involved in similar functions or cellular processes.}, doi = {10.1126/science.1087025}, pdf = {../local/LeRoch2003Discovery.pdf}, file = {LeRoch2003Discovery.pdf:LeRoch2003Discovery.pdf:PDF}, keywords = {microarray plasmodium}, owner = {vert}, url = {http://www.sciencemag.org/cgi/content/full/301/5639/1503} }
@article{Levy2007Diploid, author = {Samuel Levy and Granger Sutton and Pauline C Ng and Lars Feuk and Aaron L Halpern and Brian P Walenz and Nelson Axelrod and Jiaqi Huang and Ewen F Kirkness and Gennady Denisov and Yuan Lin and Jeffrey R MacDonald and Andy Wing Chun Pang and Mary Shago and Timothy B Stockwell and Alexia Tsiamouri and Vineet Bafna and Vikas Bansal and Saul A Kravitz and Dana A Busam and Karen Y Beeson and Tina C McIntosh and Karin A Remington and Josep F Abril and John Gill and Jon Borman and Yu-Hui Rogers and Marvin E Frazier and Stephen W Scherer and Robert L Strausberg and J. Craig Venter}, title = {The diploid genome sequence of an individual human.}, journal = {PLoS Biol}, year = {2007}, volume = {5}, pages = {e254}, number = {10}, month = {Sep}, abstract = {Presented here is a genome sequence of an individual human. It was produced from approximately 32 million random DNA fragments, sequenced by Sanger dideoxy technology and assembled into 4,528 scaffolds, comprising 2,810 million bases (Mb) of contiguous sequence with approximately 7.5-fold coverage for any given region. We developed a modified version of the Celera assembler to facilitate the identification and comparison of alternate alleles within this individual diploid genome. Comparison of this genome and the National Center for Biotechnology Information human reference assembly revealed more than 4.1 million DNA variants, encompassing 12.3 Mb. These variants (of which 1,288,319 were novel) included 3,213,401 single nucleotide polymorphisms (SNPs), 53,823 block substitutions (2-206 bp), 292,102 heterozygous insertion/deletion events (indels)(1-571 bp), 559,473 homozygous indels (1-82,711 bp), 90 inversions, as well as numerous segmental duplications and copy number variation regions. Non-SNP DNA variation accounts for 22\% of all events identified in the donor, however they involve 74\% of all variant bases. This suggests an important role for non-SNP genetic alterations in defining the diploid genome structure. Moreover, 44\% of genes were heterozygous for one or more variants. Using a novel haplotype assembly strategy, we were able to span 1.5 Gb of genome sequence in segments >200 kb, providing further precision to the diploid nature of the genome. These data depict a definitive molecular portrait of a diploid human genome that provides a starting point for future genome comparisons and enables an era of individualized genomic information.}, doi = {10.1371/journal.pbio.0050254}, institution = {J. Craig Venter Institute, Rockville, Maryland, USA. slevy@jcvi.org}, keywords = {Base Sequence; Chromosome Mapping, instrumentation/methods; Chromosomes, Human; Chromosomes, Human, Y, genetics; Diploidy; Gene Dosage; Genome, Human; Genotype; Haplotypes; Human Genome Project; Humans; INDEL Mutation; In Situ Hybridization, Fluorescence; Male; Microarray Analysis; Middle Aged; Molecular Sequence Data; Pedigree; Phenotype; Polymorphism, Single Nucleotide; Reproducibility of Results; Sequence Analysis, DNA, instrumentation/methods}, language = {eng}, medline-pst = {ppublish}, owner = {philippe}, pii = {07-PLBI-RA-1258}, pmid = {17803354}, timestamp = {2010.07.28}, url = {http://dx.doi.org/10.1371/journal.pbio.0050254} }
@article{Mavroforakis2005Significance, author = {Michael Mavroforakis and Harris Georgiou and Nikos Dimitropoulos and Dionisis Cavouras and Sergios Theodoridis}, title = {Significance analysis of qualitative mammographic features, using linear classifiers, neural networks and support vector machines.}, journal = {Eur {J} {R}adiol}, year = {2005}, volume = {54}, pages = {80-9}, number = {1}, month = {Apr}, abstract = {Advances in modern technologies and computers have enabled digital image processing to become a vital tool in conventional clinical practice, including mammography. {H}owever, the core problem of the clinical evaluation of mammographic tumors remains a highly demanding cognitive task. {I}n order for these automated diagnostic systems to perform in levels of sensitivity and specificity similar to that of human experts, it is essential that a robust framework on problem-specific design parameters is formulated. {T}his study is focused on identifying a robust set of clinical features that can be used as the base for designing the input of any computer-aided diagnosis system for automatic mammographic tumor evaluation. {A} thorough list of clinical features was constructed and the diagnostic value of each feature was verified against current clinical practices by an expert physician. {T}hese features were directly or indirectly related to the overall morphological properties of the mammographic tumor or the texture of the fine-scale tissue structures as they appear in the digitized image, while others contained external clinical data of outmost importance, like the patient's age. {T}he entire feature set was used as an annotation list for describing the clinical properties of mammographic tumor cases in a quantitative way, such that subsequent objective analyses were possible. {F}or the purposes of this study, a mammographic image database was created, with complete clinical evaluation descriptions and positive histological verification for each case. {A}ll tumors contained in the database were characterized according to the identified clinical features' set and the resulting dataset was used as input for discrimination and diagnostic value analysis for each one of these features. {S}pecifically, several standard methodologies of statistical significance analysis were employed to create feature rankings according to their discriminating power. {M}oreover, three different classification models, namely linear classifiers, neural networks and support vector machines, were employed to investigate the true efficiency of each one of them, as well as the overall complexity of the diagnostic task of mammographic tumor characterization. {B}oth the statistical and the classification results have proven the explicit correlation of all the selected features with the final diagnosis, qualifying them as an adequate input base for any type of similar automated diagnosis system. {T}he underlying complexity of the diagnostic task has justified the high value of sophisticated pattern recognition architectures.}, doi = {10.1016/j.ejrad.2004.12.015}, pdf = {../local/Mavroforakis2005Significance.pdf}, file = {Mavroforakis2005Significance.pdf:local/Mavroforakis2005Significance.pdf:PDF}, keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence, Butadienes, Chloroplasts, Comparative Study, Computer Simulation, Computer-Assisted, Diagnosis, Disinfectants, Dose-Response Relationship, Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines, Expert Systems, Feedback, Fungicides, Gene Expression Profiling, Genes, Genetic Markers, Humans, Implanted, Industrial, Information Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement, Natural Language Processing, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Plant Proteins, Predictive Value of Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility of Results, Research Support, Sprague-Dawley, Subcellular Fractions, Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer Interface, 15797296}, pii = {S0720-048X(05)00023-9}, url = {http://dx.doi.org/10.1016/j.ejrad.2004.12.015} }
@article{Meireles2003Differentially, author = {Meireles, S.I. and Carvalho, A.F. and Hirata, R. and Montagnini, A.L. and Martins, W.K. and Runza, F.B. and Stolf, B.S. and Termini, L. and Neto, C.E. and Silva, R.L. and Soares, F.A. and Neves, E.J. and Reis, L.F.}, title = {Differentially expressed genes in gastric tumors identified by c{DNA} array.}, journal = {Cancer {L}ett.}, year = {2003}, volume = {190}, pages = {199-211}, number = {2}, month = {Feb}, abstract = {Using c{DNA} fragments from the {FAPESP}/l{ICR} {C}ancer {G}enome {P}roject, we constructed a c{DNA} array having 4512 elements and determined gene expression in six normal and six tumor gastric tissues. {U}sing t-statistics, we identified 80 c{DNA}s whose expression in normal and tumor samples differed more than 3.5 sample standard deviations. {U}sing {S}elf-{O}rganizing {M}ap, the expression profile of these c{DNA}s allowed perfect separation of malignant and non-malignant samples. {U}sing the supervised learning procedure {S}upport {V}ector {M}achine, we identified trios of c{DNA}s that could be used to classify samples as normal or tumor, based on single-array analysis. {F}inally, we identified genes with altered linear correlation when their expression in normal and tumor samples were compared. {F}urther investigation concerning the function of these genes could contribute to the understanding of gastric carcinogenesis and may prove useful in molecular diagnostics.}, doi = {10.1016/S0304-3835(02)00587}, pdf = {../local/Meireles2003Differentially.pdf}, file = {Meireles2003Differentially.pdf:local/Meireles2003Differentially.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://dx.doi.org/10.1016/S0304-3835(02)00587-6} }
@article{Michiels2005Prediction, author = {Michiels, S. and Koscielny, S. and Hill, C.}, title = {Prediction of cancer outcome with microarrays: a multiple random validation strategy}, journal = {Lancet}, year = {2005}, volume = {365}, pages = {488--492}, number = {9458}, abstract = {BACKGROUND: General studies of microarray gene-expression profiling have been undertaken to predict cancer outcome. Knowledge of this gene-expression profile or molecular signature should improve treatment of patients by allowing treatment to be tailored to the severity of the disease. We reanalysed data from the seven largest published studies that have attempted to predict prognosis of cancer patients on the basis of DNA microarray analysis. METHODS: The standard strategy is to identify a molecular signature (ie, the subset of genes most differentially expressed in patients with different outcomes) in a training set of patients and to estimate the proportion of misclassifications with this signature on an independent validation set of patients. We expanded this strategy (based on unique training and validation sets) by using multiple random sets, to study the stability of the molecular signature and the proportion of misclassifications. FINDINGS: The list of genes identified as predictors of prognosis was highly unstable; molecular signatures strongly depended on the selection of patients in the training sets. For all but one study, the proportion misclassified decreased as the number of patients in the training set increased. Because of inadequate validation, our chosen studies published overoptimistic results compared with those from our own analyses. Five of the seven studies did not classify patients better than chance. INTERPRETATION: The prognostic value of published microarray results in cancer studies should be considered with caution. We advocate the use of validation by repeated random sampling.}, doi = {10.1016/S0140-6736(05)17866-0}, institution = {Biostatistics and Epidemiology Unit, Institut Gustave Roussy, Villejuif, France.}, keywords = {featureselection, breastcancer, microarray}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {S0140-6736(05)17866-0}, pmid = {15705458}, timestamp = {2010.10.12}, url = {http://dx.doi.org/10.1016/S0140-6736(05)17866-0} }
@techreport{Mukherjee1998Support, author = {S. Mukherjee and P. Tamayo and J. P. Mesirov and D. Slonim and A. Verri and T. Poggio}, title = {Support vector machine classification of microarray data}, institution = {C.B.L.C.}, year = {1998}, number = {182}, note = {A.I. Memo 1677}, pdf = {../local/Mukherjee1998Support.pdf}, file = {Mukherjee1998Support.pdf:local/Mukherjee1998Support.pdf:PDF}, keywords = {biosvm microarray}, subject = {biokernel}, url = {http://citeseer.nj.nec.com/437379.html} }
@article{ODonnell2005Gene, author = {Rebekah K O'Donnell and Michael Kupferman and S. Jack Wei and Sunil Singhal and Randal Weber and Bert O'Malley and Yi Cheng and Mary Putt and Michael Feldman and Barry Ziober and Ruth J Muschel}, title = {Gene expression signature predicts lymphatic metastasis in squamous cell carcinoma of the oral cavity.}, journal = {Oncogene}, year = {2005}, volume = {24}, pages = {1244-51}, number = {7}, month = {Feb}, abstract = {Metastasis via the lymphatics is a major risk factor in squamous cell carcinoma of the oral cavity ({OSCC}). {W}e sought to determine whether the presence of metastasis in the regional lymph node could be predicted by a gene expression signature of the primary tumor. {A} total of 18 {OSCC}s were characterized for gene expression by hybridizing {RNA} to {A}ffymetrix {U}133{A} gene chips. {G}enes with differential expression were identified using a permutation technique and verified by quantitative {RT}-{PCR} and immunohistochemistry. {A} predictive rule was built using a support vector machine, and the accuracy of the rule was evaluated using crossvalidation on the original data set and prediction of an independent set of four patients. {M}etastatic primary tumors could be differentiated from nonmetastatic primary tumors by a signature gene set of 116 genes. {T}his signature gene set correctly predicted the four independent patients as well as associating five lymph node metastases from the original patient set with the metastatic primary tumor group. {W}e concluded that lymph node metastasis could be predicted by gene expression profiles of primary oral cavity squamous cell carcinomas. {T}he presence of a gene expression signature for lymph node metastasis indicates that clinical testing to assess risk for lymph node metastasis should be possible.}, doi = {10.1038/sj.onc.1208285}, pdf = {../local/O'Donnell2005Gene.pdf}, file = {O'Donnell2005Gene.pdf:local/O'Donnell2005Gene.pdf:PDF}, keywords = {biosvm microarray}, pii = {1208285}, url = {http://dx.doi.org/10.1038/sj.onc.1208285} }
@article{Ogawa2000New, author = {Nobuo Ogawa and Joseph DeRisi and Patrick O. Brown}, title = {New {C}omponents of a {S}ystem for {P}hosphate {A}ccumulation and {P}olyphosphate {M}etabolism in {S}accharomyces cerevisiae {R}evealed by {G}enomic {E}xpression {A}nalysis}, journal = {Mol. {B}iol. {C}ell}, year = {2000}, volume = {11}, pages = {4309--4321}, month = {Dec}, pdf = {../local/ogaw00.pdf}, file = {ogaw00.pdf:local/ogaw00.pdf:PDF}, subject = {microarray}, url = {http://www.molbiolcell.org/cgi/reprint/11/12/4309.pdf} }
@article{Pavey2004Microarray, author = {Pavey, S. and Johansson, P. and Packer, L. and Taylor, J. and Stark, M. and Pollock, P.M. and Walker, G.J. and Boyle, G.M. and Harper, U. and Cozzi, S.J. and Hansen, K. and Yudt, L. and Schmidt, C. and Hersey, P. and Ellem, K.A. and O'Rourke, M.G. and Parsons, P.G. and Meltzer, P. and Ringner, M. and Hayward, N.K.}, title = {Microarray expression profiling in melanoma reveals a {BRAF} mutation signature}, journal = {Oncogene}, year = {2004}, volume = {23}, pages = {4060-4067}, number = {23}, month = {May}, abstract = {We have used microarray gene expression profiling and machine learning to predict the presence of {BRAF} mutations in a panel of 61 melanoma cell lines. {T}he {BRAF} gene was found to be mutated in 42 samples (69%) and intragenic mutations of the {NRAS} gene were detected in seven samples (11%). {N}o cell line carried mutations of both genes. {U}sing support vector machines, we have built a classifier that differentiates between melanoma cell lines based on {BRAF} mutation status. {A}s few as 83 genes are able to discriminate between {BRAF} mutant and {BRAF} wild-type samples with clear separation observed using hierarchical clustering. {M}ultidimensional scaling was used to visualize the relationship between a {BRAF} mutation signature and that of a generalized mitogen-activated protein kinase ({MAPK}) activation (either {BRAF} or {NRAS} mutation) in the context of the discriminating gene list. {W}e observed that samples carrying {NRAS} mutations lie somewhere between those with or without {BRAF} mutations. {T}hese observations suggest that there are gene-specific mutation signals in addition to a common {MAPK} activation that result from the pleiotropic effects of either {BRAF} or {NRAS} on other signaling pathways, leading to measurably different transcriptional changes.}, doi = {10.1038/sj.onc.1207563}, pdf = {../local/Pavey2004Microarray.pdf}, file = {Pavey2004Microarray.pdf:local/Pavey2004Microarray.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://dx.doi.org/10.1038/sj.onc.1207563} }
@article{Peng2003Molecular, author = {Peng, S. and Xu, Q. and Ling, X.B. and Peng, X. and Du, W. and Chen, L.}, title = {Molecular classification of cancer types from microarray data using the combination of genetic algorithms and support vector machines.}, journal = {F{EBS} {L}ett.}, year = {2003}, volume = {555}, pages = {358-362}, number = {2}, abstract = {Simultaneous multiclass classification of tumor types is essential for future clinical implementations of microarray-based cancer diagnosis. {I}n this study, we have combined genetic algorithms ({GA}s) and all paired support vector machines ({SVM}s) for multiclass cancer identification. {T}he predictive features have been selected through iterative {SVM}s/{GA}s, and recursive feature elimination post-processing steps, leading to a very compact cancer-related predictive gene set. {L}eave-one-out cross-validations yielded accuracies of 87.93% for the eight-class and 85.19% for the fourteen-class cancer classifications, outperforming the results derived from previously published methods.}, doi = {10.1016/S0014-5793(03)01275-4}, pdf = {../local/Peng2003Molecular.pdf}, file = {Peng2003Molecular.pdf:local/Peng2003Molecular.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://dx.doi.org/10.1016/S0014-5793(03)01275-4} }
@article{Pilpel2001Identifying, author = {Pilpel, Y. and Sudarsanam, P. and Church, G. M.}, title = {Identifying regulatory networks by combinatorial analysis of promoter elements}, journal = {Nature}, year = {2001}, volume = {29}, pages = {153--159}, pdf = {../local/pilp01.pdf}, file = {pilp01.pdf:local/pilp01.pdf:PDF}, subject = {microarray}, url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/ng/journal/v29/n2/full/ng724.html&filetype=PDF} }
@article{Pochet2004Systematic, author = {Pochet, N. and De Smet, F. and Suykens, J. A. K. and De Moor, B. L. R.}, title = {Systematic benchmarking of microarray data classification: assessing the role of non-linearity and dimensionality reduction}, journal = {Bioinformatics}, year = {2004}, volume = {20}, pages = {3185-3195}, number = {17}, month = {Nov}, abstract = {Motivation: {M}icroarrays are capable of determining the expression levels of thousands of genes simultaneously. {I}n combination with classification methods, this technology can be useful to support clinical management decisions for individual patients, e.g. in oncology. {T}he aim of this paper is to systematically benchmark the role of non-linear versus linear techniques and dimensionality reduction methods. {R}esults: {A} systematic benchmarking study is performed by comparing linear versions of standard classification and dimensionality reduction techniques with their non-linear versions based on non-linear kernel functions with a radial basis function ({RBF}) kernel. {A} total of 9 binary cancer classification problems, derived from 7 publicly available microarray datasets, and 20 randomizations of each problem are examined. {C}onclusions: {T}hree main conclusions can be formulated based on the performances on independent test sets. (1) {W}hen performing classification with least squares support vector machines ({LS}-{SVM}s) (without dimensionality reduction), {RBF} kernels can be used without risking too much overfitting. {T}he results obtained with well-tuned {RBF} kernels are never worse and sometimes even statistically significantly better compared to results obtained with a linear kernel in terms of test set receiver operating characteristic and test set accuracy performances. (2) {E}ven for classification with linear classifiers like {LS}-{SVM} with linear kernel, using regularization is very important. (3) {W}hen performing kernel principal component analysis (kernel {PCA}) before classification, using an {RBF} kernel for kernel {PCA} tends to result in overfitting, especially when using supervised feature selection. {I}t has been observed that an optimal selection of a large number of features is often an indication for overfitting. {K}ernel {PCA} with linear kernel gives better results. {A}vailability: {M}atlab scripts are available on request. {S}upplementary information: http://www.esat.kuleuven.ac.be/~npochet/{B}ioinformatics/}, doi = {10.1093/bioinformatics/bth383}, pdf = {../local/Pochet2004Systematic.pdf}, file = {Pochet2004Systematic.pdf:local/Pochet2004Systematic.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://dx.doi.org/10.1093/bioinformatics/bth383} }
@article{Ramaswamy2001Multiclass, author = {Ramaswamy, S. and Tamayo, P. and Rifkin, R. and Mukherjee, S. and Yeang, C.H. and Angelo, M. and Ladd, C. and Reich, M. and Latulippe, E. and Mesirov, J.P. and Poggio, T. and Gerald, W. and Loda, M. and Lander, E.S. and Golub, T.R.}, title = {Multiclass cancer diagnosis using tumor gene expression signatures}, journal = {Proc. {N}atl. {A}cad. {S}ci. {USA}}, year = {2001}, volume = {98}, pages = {15149-15154}, number = {26}, month = {Dec}, abstract = {The optimal treatment of patients with cancer depends on establishing accurate diagnoses by using a complex combination of clinical and histopathological data. {I}n some instances, this task is difficult or impossible because of atypical clinical presentation or histopathology. {T}o determine whether the diagnosis of multiple common adult malignancies could be achieved purely by molecular classification, we subjected 218 tumor samples, spanning 14 common tumor types, and 90 normal tissue samples to oligonucleotide microarray gene expression analysis. {T}he expression levels of 16,063 genes and expressed sequence tags were used to evaluate the accuracy of a multiclass classifier based on a support vector machine algorithm. {O}verall classification accuracy was 78%, far exceeding the accuracy of random classification (9%). {P}oorly differentiated cancers resulted in low-confidence predictions and could not be accurately classified according to their tissue of origin, indicating that they are molecularly distinct entities with dramatically different gene expression patterns compared with their well differentiated counterparts. {T}aken together, these results demonstrate the feasibility of accurate, multiclass molecular cancer classification and suggest a strategy for future clinical implementation of molecular cancer diagnostics.}, doi = {10.1073/pnas.211566398}, pdf = {../local/Ramaswamy2001Multiclass.pdf}, file = {Ramaswamy2001Multiclass.pdf:local/Ramaswamy2001Multiclass.pdf:PDF}, keywords = {biosvm microarray}, owner = {vert}, url = {http://dx.doi.org/10.1073/pnas.211566398} }
@phdthesis{Reyal2009Analyse, author = {Reyal, F.}, title = {Analyse du profil d'expression par la technique des puces {\`a} ADN. Application \`a la caract\'erisation mol\'eculaire et \`a la d\'etermination du pronostic des cancers canalaires infiltrants du sein.}, school = {Universit\'e Paris 11}, year = {2009}, keywords = {breastcancer, microarray}, owner = {jp}, timestamp = {2009.10.31} }
@article{Schena1995Quantitative, author = {M. Schena and D. Shalon and R. W. Davis and P. O. Brown}, title = {Quantitative monitoring of gene expression patterns with a complementary DNA microarray.}, journal = {Science}, year = {1995}, volume = {270}, pages = {467--470}, number = {5235}, month = {Oct}, abstract = {A high-capacity system was developed to monitor the expression of many genes in parallel. Microarrays prepared by high-speed robotic printing of complementary DNAs on glass were used for quantitative expression measurements of the corresponding genes. Because of the small format and high density of the arrays, hybridization volumes of 2 microliters could be used that enabled detection of rare transcripts in probe mixtures derived from 2 micrograms of total cellular messenger RNA. Differential expression measurements of 45 Arabidopsis genes were made by means of simultaneous, two-color fluorescence hybridization.}, doi = {10.1126/science.270.5235.467}, pdf = {../local/Schena1995Quantitative.pdf}, file = {Schena1995Quantitative.pdf:Schena1995Quantitative.pdf:PDF}, institution = {Department of Biochemistry, Beckman Center, Stanford University Medical Center, CA 94305, USA.}, keywords = {microarray}, owner = {jp}, pmid = {7569999}, timestamp = {2009.02.08}, url = {http://dx.doi.org/10.1126/science.270.5235.467} }
@article{Segal2005From, author = {Segal, E. and Friedman, N. and Kaminski, N. and Regev, A. and Koller, D.}, title = {From signatures to models: understanding cancer using microarrays}, journal = {Nat {G}enet}, year = {2005}, volume = {37}, pages = {S38-45}, number = {6 Suppl}, abstract = {Genomics has the potential to revolutionize the diagnosis and management of cancer by offering an unprecedented comprehensive view of the molecular underpinnings of pathology. {C}omputational analysis is essential to transform the masses of generated data into a mechanistic understanding of disease. {H}ere we review current research aimed at uncovering the modular organization and function of transcriptional networks and responses in cancer. {W}e first describe how methods that analyze biological processes in terms of higher-level modules can identify robust signatures of disease mechanisms. {W}e then discuss methods that aim to identify the regulatory mechanisms underlying these modules and processes. {F}inally, we show how comparative analysis, combining human data with model organisms, can lead to more robust findings. {W}e conclude by discussing the challenges of generalizing these methods from cells to tissues and the opportunities they offer to improve cancer diagnosis and management.}, doi = {10.1038/ng1561}, pdf = {../local/Segal2005From.pdf}, file = {Segal2005From.pdf:Segal2005From.pdf:PDF}, keywords = {microarray}, url = {http://dx.doi.org/10.1038/ng1561} }
@article{Selinger2000RNA, author = {Douglas W. Selinger and Kevin J. Cheung and Rui Mei and Erik M. Johansson and Craig S. Richmond and Frederick R. Blattner and David J. Lockhart and George M. Church}, title = {R{NA} expression analysis using a 30 base pair resolution {E}scherichia coli genome array}, journal = {Nat. {B}iotechnol.}, year = {2000}, volume = {18}, pages = {1262--1268}, pdf = {../local/seli00.pdf}, file = {seli00.pdf:local/seli00.pdf:PDF}, subject = {microarray}, url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nbt/journal/v18/n12/full/nbt1200_1262.html&filetype=PDF} }
@article{Sherlock2001Stanford, author = {G. Sherlock and T. Hernandez-Boussard and A. Kasarskis and G. Binkley and J.C. Matese and S.S. Dwight and M. Kaloper and S. Weng and H. Jin and C.A. Ball and M.B. Eisen and P.T. Spellman}, title = {The {S}tanford {M}icroarray {D}atabase}, journal = {Nucleic {A}cids {R}es.}, year = {2001}, volume = {29}, pages = {152--155}, number = {1}, month = {Jan}, pdf = {../local/sher01.pdf}, file = {sher01.pdf:local/sher01.pdf:PDF}, subject = {microarray}, url = {http://genome-www5.Stanford.EDU/MicroArray/SMD/SMD.pdf} }
@article{Spellman1998Comprehensive, author = {Spellman, P.T. and Sherlock, G. and Zhang, M.Q. and Iyer, V.R. and Anders, K. and Eisen, M.B. and Brown, P.O. and Botstein, D. and Futcher, B.}, title = {Comprehensive {I}dentification of {C}ell {C}ycle-regulated {G}enes of the {Y}east {S}accharomyces cerevisiae by {M}icroarray {H}ybridization}, journal = {Mol. {B}iol. {C}ell}, year = {1998}, volume = {9}, pages = {3273--3297}, pdf = {../local/spel98.pdf}, file = {spel98.pdf:local/spel98.pdf:PDF}, subject = {microarray}, url = {http://www.molbiolcell.org/cgi/reprint/9/12/3273.pdf} }
@article{Statnikov2005comprehensive, author = {Statnikov, A. and Aliferis, C. F. and Tsamardinos, I. and Hardin, D. and Levy, S.}, title = {A comprehensive evaluation of multicategory classification methods for microarray gene expression cancer diagnosis}, journal = {Bioinformatics}, year = {2005}, note = {To appear}, abstract = {Motivation: {C}ancer diagnosis is one of the most important emerging clinical applications of gene expression microarray technology. {W}e are seeking to develop a computer system for powerful and reliable cancer diagnostic model creation based on microarray data. {T}o keep a realistic perspective on clinical applications we focus on multicategory diagnosis. {I}n order to equip the system with the optimum combination of classifier, gene selection and cross-validation methods, we performed a systematic and comprehensive evaluation of several major algorithms for multicategory classification, several gene selection methods, multiple ensemble classifier methods, and two cross validation designs using 11 datasets spanning 74 diagnostic categories and 41 cancer types and 12 normal tissue types.{R}esults: {M}ulticategory {S}upport {V}ector {M}achines ({MC}-{SVM}s) are the most effective classifiers in performing accurate cancer diagnosis from gene expression data. {T}he {MC}-{SVM} techniques by {C}rammer and {S}inger, {W}eston and {W}atkins, and one-versus-rest were found to be the best methods in this domain. {MC}-{SVM}s outperform other popular machine learning algorithms such as {K}-{N}earest {N}eighbors, {B}ackpropagation and {P}robabilistic {N}eural {N}etworks, often to a remarkable degree. {G}ene selection techniques can significantly improve classification performance of both {MC}-{SVM}s and other non-{SVM} learning algorithms. {E}nsemble classifiers do not generally improve performance of the best non-ensemble models. {T}hese results guided the construction of a software system {GEMS} ({G}ene {E}xpression {M}odel {S}elector) that automates high-quality model construction and enforces sound optimization and performance estimation procedures. {T}his is the first such system to be informed by a rigorous comparative analysis of the available algorithms and datasets.{A}vailability: {T}he software system {GEMS} is available for download from http://www.gems-system.org for non-commercial use.}, pdf = {../local/Statnikov2005comprehensive.pdf}, file = {Statnikov2005comprehensive.pdf:local/Statnikov2005comprehensive.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://bioinformatics.oupjournals.org/cgi/content/abstract/bti033v1} }
@article{Tavazoie1999Systematic, author = {Tavazoie, S. and Hughes, J. D. and Campbell, M. J. and Cho, R. J. and Church, G. M.}, title = {Systematic determination of genetic network architecture}, journal = {Nat. Genet.}, year = {1999}, volume = {22}, pages = {281--285}, doi = {doi:10.1038/10343}, pdf = {../local/Tavazoie1999Systematic.pdf}, file = {Tavazoie1999Systematic.pdf:local/Tavazoie1999Systematic.pdf:PDF}, subject = {microarray}, url = {http://dx.doi.org/10.1038/10343} }
@article{Thukral2005Prediction, author = {Sushil K Thukral and Paul J Nordone and Rong Hu and Leah Sullivan and Eric Galambos and Vincent D Fitzpatrick and Laura Healy and Michael B Bass and Mary E Cosenza and Cynthia A Afshari}, title = {Prediction of nephrotoxicant action and identification of candidate toxicity-related biomarkers.}, journal = {Toxicol {P}athol}, year = {2005}, volume = {33}, pages = {343-55}, number = {3}, abstract = {A vast majority of pharmacological compounds and their metabolites are excreted via the urine, and within the complex structure of the kidney,the proximal tubules are a main target site of nephrotoxic compounds. {W}e used the model nephrotoxicants mercuric chloride, 2-bromoethylamine hydrobromide, hexachlorobutadiene, mitomycin, amphotericin, and puromycin to elucidate time- and dose-dependent global gene expression changes associated with proximal tubular toxicity. {M}ale {S}prague-{D}awley rats were dosed via intraperitoneal injection once daily for mercuric chloride and amphotericin (up to 7 doses), while a single dose was given for all other compounds. {A}nimals were exposed to 2 different doses of these compounds and kidney tissues were collected on day 1, 3, and 7 postdosing. {G}ene expression profiles were generated from kidney {RNA} using 17{K} rat c{DNA} dual dye microarray and analyzed in conjunction with histopathology. {A}nalysis of gene expression profiles showed that the profiles clustered based on similarities in the severity and type of pathology of individual animals. {F}urther, the expression changes were indicative of tubular toxicity showing hallmarks of tubular degeneration/regeneration and necrosis. {U}se of gene expression data in predicting the type of nephrotoxicity was then tested with a support vector machine ({SVM})-based approach. {A} {SVM} prediction module was trained using 120 profiles of total profiles divided into four classes based on the severity of pathology and clustering. {A}lthough mitomycin {C} and amphotericin {B} treatments did not cause toxicity, their expression profiles were included in the {SVM} prediction module to increase the sample size. {U}sing this classifier, the {SVM} predicted the type of pathology of 28 test profiles with 100\% selectivity and 82\% sensitivity. {T}hese data indicate that valid predictions could be made based on gene expression changes from a small set of expression profiles. {A} set of potential biomarkers showing a time- and dose-response with respect to the progression of proximal tubular toxicity were identified. {T}hese include several transporters ({S}lc21a2, {S}lc15, {S}lc34a2), {K}im 1, {IGF}bp-1, osteopontin, alpha-fibrinogen, and {G}stalpha.}, doi = {10.1080/01926230590927230}, keywords = {Algorithms, Animals, Antibiotics, Antineoplastic, Artificial Intelligence, Butadienes, Chloroplasts, Comparative Study, Computer Simulation, Computer-Assisted, Diagnosis, Disinfectants, Dose-Response Relationship, Drug, Drug Toxicity, Electrodes, Electroencephalography, Ethylamines, Expert Systems, Feedback, Fungicides, Gene Expression Profiling, Genes, Genetic Markers, Humans, Implanted, Industrial, Information Storage and Retrieval, Kidney, Kidney Tubules, MEDLINE, Male, Mercuric Chloride, Microarray Analysis, Molecular Biology, Motor Cortex, Movement, Natural Language Processing, Neural Networks (Computer), Non-P.H.S., Non-U.S. Gov't, Plant Proteins, Predictive Value of Tests, Proteins, Proteome, Proximal, Puromycin Aminonucleoside, Rats, Reproducibility of Results, Research Support, Sprague-Dawley, Subcellular Fractions, Terminology, Therapy, Time Factors, Toxicogenetics, U.S. Gov't, User-Computer Interface, 15805072}, pii = {X3U2206L2747H31G}, url = {http://dx.doi.org/10.1080/01926230590927230} }
@article{Tothill2005expression-based, author = {Richard W Tothill and Adam Kowalczyk and Danny Rischin and Alex Bousioutas and Izhak Haviv and Ryan K van Laar and Paul M Waring and John Zalcberg and Robyn Ward and Andrew V Biankin and Robert L Sutherland and Susan M Henshall and Kwun Fong and Jonathan R Pollack and David D L Bowtell and Andrew J Holloway}, title = {An expression-based site of origin diagnostic method designed for clinical application to cancer of unknown origin.}, journal = {Cancer {R}es.}, year = {2005}, volume = {65}, pages = {4031-40}, number = {10}, month = {May}, abstract = {Gene expression profiling offers a promising new technique for the diagnosis and prognosis of cancer. {W}e have applied this technology to build a clinically robust site of origin classifier with the ultimate aim of applying it to determine the origin of cancer of unknown primary ({CUP}). {A} single c{DNA} microarray platform was used to profile 229 primary and metastatic tumors representing 14 tumor types and multiple histologic subtypes. {T}his data set was subsequently used for training and validation of a support vector machine ({SVM}) classifier, demonstrating 89\% accuracy using a 13-class model. {F}urther, we show the translation of a five-class classifier to a quantitative {PCR}-based platform. {S}electing 79 optimal gene markers, we generated a quantitative-{PCR} low-density array, allowing the assay of both fresh-frozen and formalin-fixed paraffin-embedded ({FFPE}) tissue. {D}ata generated using both quantitative {PCR} and microarray were subsequently used to train and validate a cross-platform {SVM} model with high prediction accuracy. {F}inally, we applied our {SVM} classifiers to 13 cases of {CUP}. {W}e show that the microarray {SVM} classifier was capable of making high confidence predictions in 11 of 13 cases. {T}hese predictions were supported by comprehensive review of the patients' clinical histories.}, doi = {10.1158/0008-5472.CAN-04-3617}, pdf = {../local/Tothill2005expression-based.pdf}, file = {Tothill2005expression-based.pdf:Tothill2005expression-based.pdf:PDF}, keywords = {biosvm microarray}, pii = {65/10/4031}, url = {http://dx.doi.org/10.1158/0008-5472.CAN-04-3617} }
@article{Tsai2004Gene, author = {Tsai, C.A. and Chen, C.H. and Lee, T.C. and Ho, I.C. and Yang, U.C. and Chen, J.J.}, title = {Gene selection for sample classifications in microarray experiments.}, journal = {D{NA} {C}ell {B}iol.}, year = {2004}, volume = {23}, pages = {607-614}, number = {10}, abstract = {D{NA} microarray technology provides useful tools for profiling global gene expression patterns in different cell/tissue samples. {O}ne major challenge is the large number of genes relative to the number of samples. {T}he use of all genes can suppress or reduce the performance of a classification rule due to the noise of nondiscriminatory genes. {S}election of an optimal subset from the original gene set becomes an important prestep in sample classification. {I}n this study, we propose a family-wise error ({FWE}) rate approach to selection of discriminatory genes for two-sample or multiple-sample classification. {T}he {FWE} approach controls the probability of the number of one or more false positives at a prespecified level. {A} public colon cancer data set is used to evaluate the performance of the proposed approach for the two classification methods: k nearest neighbors (k-{NN}) and support vector machine ({SVM}). {T}he selected gene sets from the proposed procedure appears to perform better than or comparable to several results reported in the literature using the univariate analysis without performing multivariate search. {I}n addition, we apply the {FWE} approach to a toxicogenomic data set with nine treatments (a control and eight metals, {A}s, {C}d, {N}i, {C}r, {S}b, {P}b, {C}u, and {A}s{V}) for a total of 55 samples for a multisample classification. {T}wo gene sets are considered: the gene set omega{F} formed by the {ANOVA} {F}-test, and a gene set omega{T} formed by the union of one-versus-all t-tests. {T}he predicted accuracies are evaluated using the internal and external crossvalidation. {U}sing the {SVM} classification, the overall accuracies to predict 55 samples into one of the nine treatments are above 80% for internal crossvalidation. {O}mega{F} has slightly higher accuracy rates than omega{T}. {T}he overall predicted accuracies are above 70% for the external crossvalidation; the two gene sets omega{T} and omega{F} performed equally well.}, doi = {10.1089/1044549042476947}, pdf = {../local/Tsai2004Gene.pdf}, file = {Tsai2004Gene.pdf:local/Tsai2004Gene.pdf:PDF}, keywords = {biosvm microarray}, owner = {jeanphilippevert}, url = {http://dx.doi.org/10.1089/1044549042476947} }
@article{Wang2005Gene-expression, author = {Wang, Y. and Klijn, J.G.M. and Zhang, Y. and Sieuwerts, A.M. and Look, M.P. and Yang, F. and Talantov, D. and Timmermans, M. and Meijer-van Gelder, M.E. and Yu, J. and Jatkoe, T. and Berns, E.M.J.J. and Atkins, D. and Foekens, J.A.}, title = {Gene-expression profiles to predict distant metastasis of lymph-node-negative primary breast cancers}, journal = {Lancet}, year = {2005}, volume = {365}, pages = {671--679}, number = {9460}, abstract = {BACKGROUND: Genome-wide measures of gene expression can identify patterns of gene activity that subclassify tumours and might provide a better means than is currently available for individual risk assessment in patients with lymph-node-negative breast cancer. METHODS: We analysed, with Affymetrix Human U133a GeneChips, the expression of 22000 transcripts from total RNA of frozen tumour samples from 286 lymph-node-negative patients who had not received adjuvant systemic treatment. FINDINGS: In a training set of 115 tumours, we identified a 76-gene signature consisting of 60 genes for patients positive for oestrogen receptors (ER) and 16 genes for ER-negative patients. This signature showed 93\% sensitivity and 48\% specificity in a subsequent independent testing set of 171 lymph-node-negative patients. The gene profile was highly informative in identifying patients who developed distant metastases within 5 years (hazard ratio 5.67 [95\% CI 2.59-12.4]), even when corrected for traditional prognostic factors in multivariate analysis (5.55 [2.46-12.5]). The 76-gene profile also represented a strong prognostic factor for the development of metastasis in the subgroups of 84 premenopausal patients (9.60 [2.28-40.5]), 87 postmenopausal patients (4.04 [1.57-10.4]), and 79 patients with tumours of 10-20 mm (14.1 [3.34-59.2]), a group of patients for whom prediction of prognosis is especially difficult. INTERPRETATION: The identified signature provides a powerful tool for identification of patients at high risk of distant recurrence. The ability to identify patients who have a favourable prognosis could, after independent confirmation, allow clinicians to avoid adjuvant systemic therapy or to choose less aggressive therapeutic options.}, doi = {10.1016/S0140-6736(05)17947-1}, pdf = {../local/Wang2005Gene-expression.pdf}, file = {Wang2005Gene-expression.pdf:local/Wang2005Gene-expression.pdf:PDF}, keywords = {microarray, breastcancer}, owner = {jp}, pii = {S0140673605179471}, pmid = {15894094}, timestamp = {2006.07.06}, url = {http://dx.doi.org/10.1016/S0140-6736(05)17947-1} }
@article{Wang2005Gene, author = {Yu Wang and Igor V Tetko and Mark A Hall and Eibe Frank and Axel Facius and Klaus F X Mayer and Hans W Mewes}, title = {Gene selection from microarray data for cancer classification--a machine learning approach.}, journal = {Comput. {B}iol. {C}hem.}, year = {2005}, volume = {29}, pages = {37-46}, number = {1}, month = {Feb}, abstract = {A {DNA} microarray can track the expression levels of thousands of genes simultaneously. {P}revious research has demonstrated that this technology can be useful in the classification of cancers. {C}ancer microarray data normally contains a small number of samples which have a large number of gene expression levels as features. {T}o select relevant genes involved in different types of cancer remains a challenge. {I}n order to extract useful gene information from cancer microarray data and reduce dimensionality, feature selection algorithms were systematically investigated in this study. {U}sing a correlation-based feature selector combined with machine learning algorithms such as decision trees, naïve {B}ayes and support vector machines, we show that classification performance at least as good as published results can be obtained on acute leukemia and diffuse large {B}-cell lymphoma microarray data sets. {W}e also demonstrate that a combined use of different classification and feature selection approaches makes it possible to select relevant genes with high confidence. {T}his is also the first paper which discusses both computational and biological evidence for the involvement of zyxin in leukaemogenesis.}, doi = {10.1016/j.compbiolchem.2004.11.001}, pdf = {../local/Wang2005Gene.pdf}, file = {Wang2005Gene.pdf:local/Wang2005Gene.pdf:PDF}, keywords = {biosvm microarray}, pii = {S1476-9271(04)00108-2}, url = {http://dx.doi.org/10.1016/j.compbiolchem.2004.11.001} }
@article{Wirapati2008Meta-analysis, author = {Wirapati, P. and Sotiriou, C. and Kunkel, S. and Farmer, P. and Pradervand, S. and Haibe-Kains, B. and Desmedt, C. and Ignatiadis, M. and Sengstag, T. and Sch\"utz, F. and Goldstein, D. R. and Piccart, M. and Delorenzi, M.}, title = {Meta-analysis of gene expression profiles in breast cancer: toward a unified understanding of breast cancer subtyping and prognosis signatures.}, journal = {Breast Cancer Res.}, year = {2008}, volume = {10}, pages = {R65}, number = {4}, abstract = {INTRODUCTION: Breast cancer subtyping and prognosis have been studied extensively by gene expression profiling, resulting in disparate signatures with little overlap in their constituent genes. Although a previous study demonstrated a prognostic concordance among gene expression signatures, it was limited to only one dataset and did not fully elucidate how the different genes were related to one another nor did it examine the contribution of well-known biological processes of breast cancer tumorigenesis to their prognostic performance. METHOD: To address the above issues and to further validate these initial findings, we performed the largest meta-analysis of publicly available breast cancer gene expression and clinical data, which are comprised of 2,833 breast tumors. Gene coexpression modules of three key biological processes in breast cancer (namely, proliferation, estrogen receptor [ER], and HER2 signaling) were used to dissect the role of constituent genes of nine prognostic signatures. RESULTS: Using a meta-analytical approach, we consolidated the signatures associated with ER signaling, ERBB2 amplification, and proliferation. Previously published expression-based nomenclature of breast cancer 'intrinsic' subtypes can be mapped to the three modules, namely, the ER-/HER2- (basal-like), the HER2+ (HER2-like), and the low- and high-proliferation ER+/HER2- subtypes (luminal A and B). We showed that all nine prognostic signatures exhibited a similar prognostic performance in the entire dataset. Their prognostic abilities are due mostly to the detection of proliferation activity. Although ER- status (basal-like) and ERBB2+ expression status correspond to bad outcome, they seem to act through elevated expression of proliferation genes and thus contain only indirect information about prognosis. Clinical variables measuring the extent of tumor progression, such as tumor size and nodal status, still add independent prognostic information to proliferation genes. CONCLUSION: This meta-analysis unifies various results of previous gene expression studies in breast cancer. It reveals connections between traditional prognostic factors, expression-based subtyping, and prognostic signatures, highlighting the important role of proliferation in breast cancer prognosis.}, doi = {10.1186/bcr2124}, pdf = {../local/Wirapati2008Meta-analysis.pdf}, file = {Wirapati2008Meta-analysis.pdf:Wirapati2008Meta-analysis.pdf:PDF}, institution = {Swiss Institute of Bioinformatics, 'Batiment Genopode', University of Lausanne, 1015 Lausanne, Switzerland. Pratyaksha.Wirapati@isb-sib.ch}, keywords = {microarray, breastcancer}, language = {eng}, medline-pst = {ppublish}, owner = {jp}, pii = {bcr2124}, pmid = {18662380}, timestamp = {2010.10.13}, url = {http://dx.doi.org/10.1186/bcr2124} }
@article{Zhou2005LS, author = {Xin Zhou and K. Z. Mao}, title = {L{S} {B}ound based gene selection for {DNA} microarray data.}, journal = {Bioinformatics}, year = {2005}, volume = {21}, pages = {1559-64}, number = {8}, month = {Apr}, abstract = {M{OTIVATION}: {O}ne problem with discriminant analysis of {DNA} microarray data is that each sample is represented by quite a large number of genes, and many of them are irrelevant, insignificant or redundant to the discriminant problem at hand. {M}ethods for selecting important genes are, therefore, of much significance in microarray data analysis. {I}n the present study, a new criterion, called {LS} {B}ound measure, is proposed to address the gene selection problem. {T}he {LS} {B}ound measure is derived from leave-one-out procedure of {LS}-{SVM}s (least squares support vector machines), and as the upper bound for leave-one-out classification results it reflects to some extent the generalization performance of gene subsets. {RESULTS}: {W}e applied this {LS} {B}ound measure for gene selection on two benchmark microarray datasets: colon cancer and leukemia. {W}e also compared the {LS} {B}ound measure with other evaluation criteria, including the well-known {F}isher's ratio and {M}ahalanobis class separability measure, and other published gene selection algorithms, including {W}eighting factor and {SVM} {R}ecursive {F}eature {E}limination. {T}he strength of the {LS} {B}ound measure is that it provides gene subsets leading to more accurate classification results than the filter method while its computational complexity is at the level of the filter method. {AVAILABILITY}: {A} companion website can be accessed at http://www.ntu.edu.sg/home5/pg02776030/lsbound/. {T}he website contains: (1) the source code of the gene selection algorithm; (2) the complete set of tables and figures regarding the experimental study; (3) proof of the inequality (9). {CONTACT}: ekzmao@ntu.edu.sg.}, doi = {10.1093/bioinformatics/bti216}, pdf = {../local/Zhou2005LS.pdf}, file = {Zhou2005LS.pdf:local/Zhou2005LS.pdf:PDF}, keywords = {biosvm featureselection microarray}, pii = {bti216}, url = {http://dx.doi.org/10.1093/bioinformatics/bti216} }
@article{Zhu2000Two, author = {Zhu, G. and Spellman, P. T. and Volpe, T. and Brown, P. O. and Botstein, D. and Davis, T. N. and Futcher, B.}, title = {Two yeast forkhead genes regulate the cell cycle and pseudohyphal growth}, journal = {Nature}, year = {2000}, volume = {406}, pages = {90--94}, pdf = {../local/zhu00.pdf}, file = {zhu00.pdf:local/zhu00.pdf:PDF}, subject = {microarray}, url = {http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v406/n6791/full/406090a0_fs.html&content_filetype=pdf} }
@comment{{jabref-meta: selector_author:}}
@comment{{jabref-meta: selector_journal:Adv. Drug Deliv. Rev.;Am. J. Hu m. Genet.;Am. J. Pathol.;Ann. Appl. Stat.;Ann. Math. Statist.;Ann. N. Y. Acad. Sci.;Ann. Probab.;Ann. Stat.;Artif. Intell. Med.;Bernoulli;Bi ochim. Biophys. Acta;Bioinformatics;Biometrika;BMC Bioinformatics;Br. J. Pharmacol.;Breast Cancer Res.;Cell;Cell. Signal.;Chem. Res. Toxicol .;Clin. Cancer Res.;Combinator. Probab. Comput.;Comm. Pure Appl. Math. ;Comput. Chem.;Comput. Comm. Rev.;Comput. Stat. Data An.;Curr. Genom.; Curr. Opin. Chem. Biol.;Curr. Opin. Drug Discov. Devel.;Data Min. Know l. Discov.;Electron. J. Statist.;Eur. J. Hum. Genet.;FEBS Lett.;Found. Comput. Math.;Genome Biol.;IEEE T. Neural Networ.;IEEE T. Pattern. An al.;IEEE T. Signal. Proces.;IEEE Trans. Inform. Theory;IEEE Trans. Kno wl. Data Eng.;IEEE/ACM Trans. Comput. Biol. Bioinf.;Int. J. Comput. Vi sion;Int. J. Data Min. Bioinform.;Int. J. Qantum Chem.;J Biol Syst;J. ACM;J. Am. Soc. Inf. Sci. Technol.;J. Am. Stat. Assoc.;J. Bioinform. C omput. Biol.;J. Biol. Chem.;J. Biomed. Inform.;J. Cell. Biochem.;J. Ch em. Inf. Comput. Sci.;J. Chem. Inf. Model.;J. Clin. Oncol.;J. Comput. Biol.;J. Comput. Graph. Stat.;J. Eur. Math. Soc.;J. Intell. Inform. Sy st.;J. Mach. Learn. Res.;J. Med. Chem.;J. Mol. BIol.;J. R. Stat. Soc. Ser. B;Journal of Statistical Planning and Inference;Mach. Learn.;Math . Program.;Meth. Enzymol.;Mol. Biol. Cell;Mol. Biol. Evol.;Mol. Cell. Biol.;Mol. Syst. Biol.;N. Engl. J. Med.;Nat. Biotechnol.;Nat. Genet.;N at. Med.;Nat. Methods;Nat. Rev. Cancer;Nat. Rev. Drug Discov.;Nat. Rev . Genet.;Nature;Neural Comput.;Neural Network.;Neurocomputing;Nucleic Acids Res.;Pattern Anal. Appl.;Pattern Recognit.;Phys. Rev. E;Phys. Re v. Lett.;PLoS Biology;PLoS Comput. Biol.;Probab. Theory Relat. Fields; Proc. IEEE;Proc. Natl. Acad. Sci. USA;Protein Eng.;Protein Eng. Des. S el.;Protein Sci.;Protein. Struct. Funct. Genet.;Random Struct. Algorit hm.;Rev. Mod. Phys.;Science;Stat. Probab. Lett.;Statistica Sinica;Theo r. Comput. Sci.;Trans. Am. Math. Soc.;Trends Genet.;}}
@comment{{jabref-meta: selector_keywords:biogm;biosvm;breastcancer;cgh; chemogenomics;chemoinformatics;csbcbook;csbcbook-ch1;csbcbook-ch2;csbc book-ch3;csbcbook-ch4;csbcbook-ch5;csbcbook-ch6;csbcbook-ch7;csbcbook- ch8;csbcbook-ch9;csbcbook-mustread;dimred;featureselection;glycans;her g;hic;highcontentscreening;image;immunoinformatics;kernel-theory;kerne lbook;lasso;microarray;ngs;nlp;plasmodium;proteomics;PUlearning;rnaseq ;segmentation;sirna;}}
@comment{{jabref-meta: selector_booktitle:Adv. Neural. Inform. Process Syst.;}}
This file was generated by bibtex2html 1.97.