ngs.bib

@comment{{This file has been generated by bib2bib 1.97}}
@comment{{Command line: bib2bib ../bibli.bib -c 'subject:"ngs" or keywords:"ngs"' -ob tmp.bib}}
@article{Alkan2009Personalized,
  author = {Can Alkan and Jeffrey M Kidd and Tomas Marques-Bonet and Gozde Aksay
	and Francesca Antonacci and Fereydoun Hormozdiari and Jacob O Kitzman
	and Carl Baker and Maika Malig and Onur Mutlu and S. Cenk Sahinalp
	and Richard A Gibbs and Evan E Eichler},
  title = {Personalized copy number and segmental duplication maps using next-generation
	sequencing.},
  journal = {Nat. Genet.},
  year = {2009},
  volume = {41},
  pages = {1061--1067},
  number = {10},
  month = {Oct},
  abstract = {Despite their importance in gene innovation and phenotypic variation,
	duplicated regions have remained largely intractable owing to difficulties
	in accurately resolving their structure, copy number and sequence
	content. We present an algorithm (mrFAST) to comprehensively map
	next-generation sequence reads, which allows for the prediction of
	absolute copy-number variation of duplicated segments and genes.
	We examine three human genomes and experimentally validate genome-wide
	copy number differences. We estimate that, on average, 73-87 genes
	vary in copy number between any two individuals and find that these
	genic differences overwhelmingly correspond to segmental duplications
	(odds ratio = 135; P < 2.2 x 10(-16)). Our method can distinguish
	between different copies of highly identical genes, providing a more
	accurate assessment of gene content and insight into functional constraint
	without the limitations of array-based technology.},
  doi = {10.1038/ng.437},
  pdf = {../local/Alkan2009Personalized.pdf},
  file = {Alkan2009Personalized.pdf:Alkan2009Personalized.pdf:PDF},
  institution = {Department of Genome Sciences, University of Washington School of
	Medicine, Seattle, Washington, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {ng.437},
  pmid = {19718026},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/ng.437}
}
@article{Bashir2008Evaluation,
  author = {Ali Bashir and Stanislav Volik and Colin Collins and Vineet Bafna
	and Benjamin J Raphael},
  title = {Evaluation of paired-end sequencing strategies for detection of genome
	rearrangements in cancer.},
  journal = {PLoS Comput. Biol.},
  year = {2008},
  volume = {4},
  pages = {e1000051},
  number = {4},
  month = {Apr},
  abstract = {Paired-end sequencing is emerging as a key technique for assessing
	genome rearrangements and structural variation on a genome-wide scale.
	This technique is particularly useful for detecting copy-neutral
	rearrangements, such as inversions and translocations, which are
	common in cancer and can produce novel fusion genes. We address the
	question of how much sequencing is required to detect rearrangement
	breakpoints and to localize them precisely using both theoretical
	models and simulation. We derive a formula for the probability that
	a fusion gene exists in a cancer genome given a collection of paired-end
	sequences from this genome. We use this formula to compute fusion
	gene probabilities in several breast cancer samples, and we find
	that we are able to accurately predict fusion genes in these samples
	with a relatively small number of fragments of large size. We further
	demonstrate how the ability to detect fusion genes depends on the
	distribution of gene lengths, and we evaluate how different parameters
	of a sequencing strategy impact breakpoint detection, breakpoint
	localization, and fusion gene detection, even in the presence of
	errors that suggest false rearrangements. These results will be useful
	in calibrating future cancer sequencing efforts, particularly large-scale
	studies of many cancer genomes that are enabled by next-generation
	sequencing technologies.},
  doi = {10.1371/journal.pcbi.1000051},
  pdf = {../local/Bashir2008Evaluation.pdf},
  file = {Bashir2008Evaluation.pdf:Bashir2008Evaluation.pdf:PDF},
  institution = {Bioinformatics Graduate Program, University of California San Diego,
	San Diego, California, United States of America. abashir@ucsd.edu},
  keywords = {ngs},
  owner = {jp},
  pmid = {18404202},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1371/journal.pcbi.1000051}
}
@article{Ben-Elazar2013Spatial,
  author = {Ben-Elazar, S. and Yakhini, Z. and Yanai, I.},
  title = {Spatial localization of co-regulated genes exceeds genomic gene clustering
	in the Saccharomyces cerevisiae genome.},
  journal = {Nucleic Acids Res},
  year = {2013},
  volume = {41},
  pages = {2191--2201},
  number = {4},
  month = {Feb},
  abstract = {While it has been long recognized that genes are not randomly positioned
	along the genome, the degree to which its 3D structure influences
	the arrangement of genes has remained elusive. In particular, several
	lines of evidence suggest that actively transcribed genes are spatially
	co-localized, forming transcription factories; however, a generalized
	systematic test has hitherto not been described. Here we reveal transcription
	factories using a rigorous definition of genomic structure based
	on Saccharomyces cerevisiae chromosome conformation capture data,
	coupled with an experimental design controlling for the primary gene
	order. We develop a data-driven method for the interpolation and
	the embedding of such datasets and introduce statistics that enable
	the comparison of the spatial and genomic densities of genes. Combining
	these, we report evidence that co-regulated genes are clustered in
	space, beyond their observed clustering in the context of gene order
	along the genome and show this phenomenon is significant for 64 out
	of 117 transcription factors. Furthermore, we show that those transcription
	factors with high spatially co-localized targets are expressed higher
	than those whose targets are not spatially clustered. Collectively,
	our results support the notion that, at a given time, the physical
	density of genes is intimately related to regulatory activity.},
  doi = {10.1093/nar/gks1360},
  pdf = {../local/Ben-Elazar2013Spatial.pdf},
  file = {Ben-Elazar2013Spatial.pdf:Ben-Elazar2013Spatial.pdf:PDF},
  institution = {Department of Biology, Technion - Israel Institute of Technology,
	Haifa, Israel, Department of Computer Science, Technion - Israel
	Institute of Technology, Haifa, Israel and Agilent Laboratories,
	Tel Aviv, Israel.},
  keywords = {ngs, hic},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gks1360},
  pmid = {23303780},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/nar/gks1360}
}
@article{Berkum2010HiC,
  author = {van Berkum, N. L. and Lieberman-Aiden, E. and Williams, L. and Imakaev,
	M. and Gnirke, A. and Mirny, L. A. and Dekker, J. and Lander, E.
	S.},
  title = {{Hi-C}: a method to study the three-dimensional architecture of genomes.},
  journal = {J. Vis. Exp.},
  year = {2010},
  volume = {39},
  pages = {e1869},
  abstract = {The three-dimensional folding of chromosomes compartmentalizes the
	genome and and can bring distant functional elements, such as promoters
	and enhancers, into close spatial proximity (2-6). Deciphering the
	relationship between chromosome organization and genome activity
	will aid in understanding genomic processes, like transcription and
	replication. However, little is known about how chromosomes fold.
	Microscopy is unable to distinguish large numbers of loci simultaneously
	or at high resolution. To date, the detection of chromosomal interactions
	using chromosome conformation capture (3C) and its subsequent adaptations
	required the choice of a set of target loci, making genome-wide studies
	impossible (7-10). We developed Hi-C, an extension of 3C that is
	capable of identifying long range interactions in an unbiased, genome-wide
	fashion. In Hi-C, cells are fixed with formaldehyde, causing interacting
	loci to be bound to one another by means of covalent DNA-protein
	cross-links. When the DNA is subsequently fragmented with a restriction
	enzyme, these loci remain linked. A biotinylated residue is incorporated
	as the 5' overhangs are filled in. Next, blunt-end ligation is performed
	under dilute conditions that favor ligation events between cross-linked
	DNA fragments. This results in a genome-wide library of ligation
	products, corresponding to pairs of fragments that were originally
	in close proximity to each other in the nucleus. Each ligation product
	is marked with biotin at the site of the junction. The library is
	sheared, and the junctions are pulled-down with streptavidin beads.
	The purified junctions can subsequently be analyzed using a high-throughput
	sequencer, resulting in a catalog of interacting fragments. Direct
	analysis of the resulting contact matrix reveals numerous features
	of genomic organization, such as the presence of chromosome territories
	and the preferential association of small gene-rich chromosomes.
	Correlation analysis can be applied to the contact matrix, demonstrating
	that the human genome is segregated into two compartments: a less
	densely packed compartment containing open, accessible, and active
	chromatin and a more dense compartment containing closed, inaccessible,
	and inactive chromatin regions. Finally, ensemble analysis of the
	contact matrix, coupled with theoretical derivations and computational
	simulations, revealed that at the megabase scale Hi-C reveals features
	consistent with a fractal globule conformation.},
  doi = {10.3791/1869},
  institution = {Program in Gene Function and Expression, Department of Biochemistry
	and Molecular Pharmacology, University of Massachusetts Medical School.},
  keywords = {ngs, hic},
  language = {eng},
  medline-pst = {epublish},
  owner = {philippe},
  pii = {1869},
  pmid = {20461051},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.3791/1869}
}
@article{Boeva2011Control-free,
  author = {Boeva, V. and Zinovyev, A. and Bleakley, K. and Vert, J.-P. and Janoueix-Lerosey,
	I. and Delattre, O. and Barillot, E.},
  title = {Control-free calling of copy number alterations in deep-sequencing
	data using {GC}-content normalization.},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {268--269},
  number = {2},
  month = {Jan},
  abstract = {We present a tool for control-free copy number alteration (CNA) detection
	using deep-sequencing data, particularly useful for cancer studies.
	The tool deals with two frequent problems in the analysis of cancer
	deep-sequencing data: absence of control sample and possible polyploidy
	of cancer cells. FREEC (control-FREE Copy number caller) automatically
	normalizes and segments copy number profiles (CNPs) and calls CNAs.
	If ploidy is known, FREEC assigns absolute copy number to each predicted
	CNA. To normalize raw CNPs, the user can provide a control dataset
	if available; otherwise GC content is used. We demonstrate that for
	Illumina single-end, mate-pair or paired-end sequencing, GC-contentr
	normalization provides smooth profiles that can be further segmented
	and analyzed in order to predict CNAs.Source code and sample data
	are available at http://bioinfo-out.curie.fr/projects/freec/.freec@curie.frSupplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btq635},
  pdf = {../local/Boeva2011Control-free.pdf},
  file = {Boeva2011Control-free.pdf:Boeva2011Control-free.pdf:PDF},
  institution = {Institut Curie, INSERM, U900, Paris, F-75248, Mines ParisTech, Fontainebleau,
	F-77300 and INSERM, U830, Paris, F-75248 France.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btq635},
  pmid = {21081509},
  timestamp = {2011.01.25},
  url = {http://dx.doi.org/10.1093/bioinformatics/btq635}
}
@article{Bohnert2009Transcript,
  author = {Bohnert, R. and Behr, J. and R\"atsch, G.},
  title = {Transcript quantification with {RNA-Seq} data},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10 (Suppl 13)},
  pages = {P5},
  doi = {10.1186/1471-2105-10-S13-P5},
  pdf = {../local/Bohnert2009Transcript.pdf},
  file = {Bohnert2009Transcript.pdf:Bohnert2009Transcript.pdf:PDF},
  keywords = {ngs, rnaseq},
  owner = {jp},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1186/1471-2105-10-S13-P5}
}
@article{Campbell2008Identification,
  author = {Peter J Campbell and Philip J Stephens and Erin D Pleasance and Sarah
	O'Meara and Heng Li and Thomas Santarius and Lucy A Stebbings and
	Catherine Leroy and Sarah Edkins and Claire Hardy and Jon W Teague
	and Andrew Menzies and Ian Goodhead and Daniel J Turner and Christopher
	M Clee and Michael A Quail and Antony Cox and Clive Brown and Richard
	Durbin and Matthew E Hurles and Paul A W Edwards and Graham R Bignell
	and Michael R Stratton and P. Andrew Futreal},
  title = {Identification of somatically acquired rearrangements in cancer using
	genome-wide massively parallel paired-end sequencing.},
  journal = {Nat. Genet.},
  year = {2008},
  volume = {40},
  pages = {722--729},
  number = {6},
  month = {Jun},
  abstract = {Human cancers often carry many somatically acquired genomic rearrangements,
	some of which may be implicated in cancer development. However, conventional
	strategies for characterizing rearrangements are laborious and low-throughput
	and have low sensitivity or poor resolution. We used massively parallel
	sequencing to generate sequence reads from both ends of short DNA
	fragments derived from the genomes of two individuals with lung cancer.
	By investigating read pairs that did not align correctly with respect
	to each other on the reference human genome, we characterized 306
	germline structural variants and 103 somatic rearrangements to the
	base-pair level of resolution. The patterns of germline and somatic
	rearrangement were markedly different. Many somatic rearrangements
	were from amplicons, although rearrangements outside these regions,
	notably including tandem duplications, were also observed. Some somatic
	rearrangements led to abnormal transcripts, including two from internal
	tandem duplications and two fusion transcripts created by interchromosomal
	rearrangements. Germline variants were predominantly mediated by
	retrotransposition, often involving AluY and LINE elements. The results
	demonstrate the feasibility of systematic, genome-wide characterization
	of rearrangements in complex human cancer genomes, raising the prospect
	of a new harvest of genes associated with cancer using this strategy.},
  doi = {10.1038/ng.128},
  pdf = {../local/Campbell2008Identification.pdf},
  file = {Campbell2008Identification.pdf:Campbell2008Identification.pdf:PDF},
  institution = {Wellcome Trust Sanger Institute, Hinxton CB10 1SA, UK.},
  keywords = {ngs},
  owner = {jp},
  pii = {ng.128},
  pmid = {18438408},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/ng.128}
}
@article{Chen2008Mapping,
  author = {Wei Chen and Vera Kalscheuer and Andreas Tzschach and Corinna Menzel
	and Reinhard Ullmann and Marcel Holger Schulz and Fikret Erdogan
	and Na Li and Zofia Kijas and Ger Arkesteijn and Isidora Lopez Pajares
	and Margret Goetz-Sothmann and Uwe Heinrich and Imma Rost and Andreas
	Dufke and Ute Grasshoff and Birgitta Glaeser and Martin Vingron and
	H. Hilger Ropers},
  title = {Mapping translocation breakpoints by next-generation sequencing.},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {1143--1149},
  number = {7},
  month = {Jul},
  abstract = {Balanced chromosome rearrangements (BCRs) can cause genetic diseases
	by disrupting or inactivating specific genes, and the characterization
	of breakpoints in disease-associated BCRs has been instrumental in
	the molecular elucidation of a wide variety of genetic disorders.
	However, mapping chromosome breakpoints using traditional methods,
	such as in situ hybridization with fluorescent dye-labeled bacterial
	artificial chromosome clones (BAC-FISH), is rather laborious and
	time-consuming. In addition, the resolution of BAC-FISH is often
	insufficient to unequivocally identify the disrupted gene. To overcome
	these limitations, we have performed shotgun sequencing of flow-sorted
	derivative chromosomes using "next-generation" (Illumina/Solexa)
	multiplex sequencing-by-synthesis technology. As shown here for three
	different disease-associated BCRs, the coverage attained by this
	platform is sufficient to bridge the breakpoints by PCR amplification,
	and this procedure allows the determination of their exact nucleotide
	positions within a few weeks. Its implementation will greatly facilitate
	large-scale breakpoint mapping and gene finding in patients with
	disease-associated balanced translocations.},
  doi = {10.1101/gr.076166.108},
  pdf = {../local/Chen2008Mapping.pdf},
  file = {Chen2008Mapping.pdf:Chen2008Mapping.pdf:PDF},
  institution = {Max Planck Institute for Molecular Genetics, 14195 Berlin, Germany.
	wei@molgen.mpg.de},
  keywords = {ngs, csbcbook, csbcbook-ch2},
  owner = {jp},
  pii = {gr.076166.108},
  pmid = {18326688},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.076166.108}
}
@article{Chiang2009High-resolution,
  author = {Derek Y Chiang and Gad Getz and David B Jaffe and Michael J T O'Kelly
	and Xiaojun Zhao and Scott L Carter and Carsten Russ and Chad Nusbaum
	and Matthew Meyerson and Eric S Lander},
  title = {High-resolution mapping of copy-number alterations with massively
	parallel sequencing.},
  journal = {Nat. Methods},
  year = {2009},
  volume = {6},
  pages = {99--103},
  number = {1},
  month = {Jan},
  abstract = {Cancer results from somatic alterations in key genes, including point
	mutations, copy-number alterations and structural rearrangements.
	A powerful way to discover cancer-causing genes is to identify genomic
	regions that show recurrent copy-number alterations (gains and losses)
	in tumor genomes. Recent advances in sequencing technologies suggest
	that massively parallel sequencing may provide a feasible alternative
	to DNA microarrays for detecting copy-number alterations. Here we
	present: (i) a statistical analysis of the power to detect copy-number
	alterations of a given size; (ii) SegSeq, an algorithm to segment
	equal copy numbers from massively parallel sequence data; and (iii)
	analysis of experimental data from three matched pairs of tumor and
	normal cell lines. We show that a collection of approximately 14
	million aligned sequence reads from human cell lines has comparable
	power to detect events as the current generation of DNA microarrays
	and has over twofold better precision for localizing breakpoints
	(typically, to within approximately 1 kilobase).},
  doi = {10.1038/nmeth.1276},
  pdf = {../local/Chiang2009High-resolution.pdf},
  file = {Chiang2009High-resolution.pdf:Chiang2009High-resolution.pdf:PDF},
  institution = {Broad Institute, Massachusetts Institute of Technology, 7 Cambridge
	Center, Cambridge, MA 02142, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {nmeth.1276},
  pmid = {19043412},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1038/nmeth.1276}
}
@article{Dixon2012Topological,
  author = {Dixon, J. R. and Selvaraj, S. and Yue, F. and Kim, A. and Li, Y.
	and Shen, Y. and Hu, M. and Liu, J. S. and Ren, B.},
  title = {Topological domains in mammalian genomes identified by analysis of
	chromatin interactions.},
  journal = {Nature},
  year = {2012},
  volume = {485},
  pages = {376-80},
  number = {5},
  doi = {10.1038/nature11082},
  pdf = {../local/Dixon2012Topological.pdf},
  file = {Dixon2012Topological.pdf:Dixon2012Topological.pdf:PDF},
  keywords = {ngs, hic},
  owner = {nelle},
  timestamp = {2013.03.30},
  url = {http://dx.doi.org/10.1038/nature11082}
}
@article{Girirajan2009Sequencing,
  author = {Santhosh Girirajan and Lin Chen and Tina Graves and Tomas Marques-Bonet
	and Mario Ventura and Catrina Fronick and Lucinda Fulton and Mariano
	Rocchi and Robert S Fulton and Richard K Wilson and Elaine R Mardis
	and Evan E Eichler},
  title = {Sequencing human-gibbon breakpoints of synteny reveals mosaic new
	insertions at rearrangement sites},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {178--190},
  number = {2},
  month = {Feb},
  abstract = {The gibbon genome exhibits extensive karyotypic diversity with an
	increased rate of chromosomal rearrangements during evolution. In
	an effort to understand the mechanistic origin and implications of
	these rearrangement events, we sequenced 24 synteny breakpoint regions
	in the white-cheeked gibbon (Nomascus leucogenys, NLE) in the form
	of high-quality BAC insert sequences (4.2 Mbp). While there is a
	significant deficit of breakpoints in genes, we identified seven
	human gene structures involved in signaling pathways (DEPDC4, GNG10),
	phospholipid metabolism (ENPP5, PLSCR2), beta-oxidation (ECH1), cellular
	structure and transport (HEATR4), and transcription (ZNF461), that
	have been disrupted in the NLE gibbon lineage. Notably, only three
	of these genes show the expected evolutionary signatures of pseudogenization.
	Sequence analysis of the breakpoints suggested both nonclassical
	nonhomologous end-joining (NHEJ) and replication-based mechanisms
	of rearrangement. A substantial number (11/24) of human-NLE gibbon
	breakpoints showed new insertions of gibbon-specific repeats and
	mosaic structures formed from disparate sequences including segmental
	duplications, LINE, SINE, and LTR elements. Analysis of these sites
	provides a model for a replication-dependent repair mechanism for
	double-strand breaks (DSBs) at rearrangement sites and insights into
	the structure and formation of primate segmental duplications at
	sites of genomic rearrangements during evolution.},
  doi = {10.1101/gr.086041.108},
  pdf = {../local/Girirajan2009Sequencing.pdf},
  file = {Girirajan2009Sequencing.pdf:Girirajan2009Sequencing.pdf:PDF},
  institution = {Department of Genome Sciences, Howard Hughes Medical Institute, University
	of Washington School of Medicine, Seattle, Washington 98195, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.086041.108},
  pmid = {19029537},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.086041.108}
}
@article{Harismendy2009Evaluation,
  author = {Harismendy, O. and Ng, P. C. and Strausberg, R. L. and Wang, X. and
	Stockwell, T. B. and Beeson, K. Y. and Schork, N. J. and Murray,
	S. S. and Topol, E. J. and Levy, S. and Frazer, K. A.},
  title = {Evaluation of next generation sequencing platforms for population
	targeted sequencing studies.},
  journal = {Genome Biol.},
  year = {2009},
  volume = {10},
  pages = {R32},
  number = {3},
  abstract = {Next generation sequencing (NGS) platforms are currently being utilized
	for targeted sequencing of candidate genes or genomic intervals to
	perform sequence-based association studies. To evaluate these platforms
	for this application, we analyzed human sequence generated by the
	Roche 454, Illumina GA, and the ABI SOLiD technologies for the same
	260 kb in four individuals.Local sequence characteristics contribute
	to systematic variability in sequence coverage (>100-fold difference
	in per-base coverage), resulting in patterns for each NGS technology
	that are highly correlated between samples. A comparison of the base
	calls to 88 kb of overlapping ABI 3730xL Sanger sequence generated
	for the same samples showed that the NGS platforms all have high
	sensitivity, identifying >95\% of variant sites. At high coverage,
	depth base calling errors are systematic, resulting from local sequence
	contexts; as the coverage is lowered additional 'random sampling'
	errors in base calling occur.Our study provides important insights
	into systematic biases and data variability that need to be considered
	when utilizing NGS platforms for population targeted sequencing studies.},
  doi = {10.1186/gb-2009-10-3-r32},
  pdf = {../local/Harismendy2009Evaluation.pdf},
  file = {Harismendy2009Evaluation.pdf:Harismendy2009Evaluation.pdf:PDF},
  institution = {Scripps Genomic Medicine, Scripps Translational Science Institute,
	The Scripps Research Institute, La Jolla, CA 92037, USA. oharis@scripps.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2009-10-3-r32},
  pmid = {19327155},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1186/gb-2009-10-3-r32}
}
@article{Homouz20133D,
  author = {Homouz, D. and Kudlicki, A. S.},
  title = {The {3D} Organization of the Yeast Genome Correlates with Co-Expression
	and Reflects Functional Relations between Genes},
  journal = {PLoS ONE},
  year = {2013},
  volume = {8},
  pages = {e54699},
  number = {1},
  month = {01},
  abstract = {

The spatial organization of eukaryotic genomes is thought to play an important role in regulating gene expression. The recent advances in experimental methods including chromatin capture techniques, as well as the large amounts of accumulated gene expression data allow studying the relationship between spatial organization of the genome and co-expression of protein-coding genes. To analyse this genome-wide relationship at a single gene resolution, we combined the interchromosomal DNA contacts in the yeast genome measured by Duan et al. with a comprehensive collection of 1,496 gene expression datasets. We find significant enhancement of co-expression among genes with contact links. The co-expression is most prominent when two gene loci fall within 1,000 base pairs from the observed contact. We also demonstrate an enrichment of inter-chromosomal links between functionally related genes, which suggests that the non random nature of the genome organization serves to facilitate coordinated transcription in groups of genes.

}, doi = {10.1371/journal.pone.0054699}, pdf = {../local/Homouz20133D.pdf}, file = {Homouz20133D.pdf:Homouz20133D.pdf:PDF}, keywords = {hic, ngs}, owner = {nelle}, publisher = {Public Library of Science}, timestamp = {2013.03.30}, url = {http://dx.doi.org/10.1371/journal.pone.0054699} }
@article{Hormozdiari2009Combinatorial,
  author = {Fereydoun Hormozdiari and Can Alkan and Evan E Eichler and S. Cenk
	Sahinalp},
  title = {Combinatorial algorithms for structural variation detection in high-throughput
	sequenced genomes.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1270--1278},
  number = {7},
  month = {Jul},
  abstract = {Recent studies show that along with single nucleotide polymorphisms
	and small indels, larger structural variants among human individuals
	are common. The Human Genome Structural Variation Project aims to
	identify and classify deletions, insertions, and inversions (>5 Kbp)
	in a small number of normal individuals with a fosmid-based paired-end
	sequencing approach using traditional sequencing technologies. The
	realization of new ultra-high-throughput sequencing platforms now
	makes it feasible to detect the full spectrum of genomic variation
	among many individual genomes, including cancer patients and others
	suffering from diseases of genomic origin. Unfortunately, existing
	algorithms for identifying structural variation (SV) among individuals
	have not been designed to handle the short read lengths and the errors
	implied by the "next-gen" sequencing (NGS) technologies. In this
	paper, we give combinatorial formulations for the SV detection between
	a reference genome sequence and a next-gen-based, paired-end, whole
	genome shotgun-sequenced individual. We describe efficient algorithms
	for each of the formulations we give, which all turn out to be fast
	and quite reliable; they are also applicable to all next-gen sequencing
	methods (Illumina, 454 Life Sciences [Roche], ABI SOLiD, etc.) and
	traditional capillary sequencing technology. We apply our algorithms
	to identify SV among individual genomes very recently sequenced by
	Illumina technology.},
  doi = {10.1101/gr.088633.108},
  pdf = {../local/Hormozdiari2009Combinatorial.pdf},
  file = {Hormozdiari2009Combinatorial.pdf:Hormozdiari2009Combinatorial.pdf:PDF},
  institution = {School of Computing Science, Simon Fraser University, Burnaby, British
	Columbia, Canada V5A 1S6.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.088633.108},
  pmid = {19447966},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.088633.108}
}
@article{Horner2009Bioinformatics,
  author = {Horner, D. S. and Pavesi, G. and Castrignan{\`o}, T. and De Meo,
	P. D. and Liuni, S. and Sammeth, M. and Picardi, E. and Pesole, G.},
  title = {Bioinformatics approaches for genomics and post genomics applications
	of next-generation sequencing.},
  journal = {Brief Bioinform},
  year = {2009},
  month = {Oct},
  abstract = {Technical advances such as the development of molecular cloning, Sanger
	sequencing, PCR and oligonucleotide microarrays are key to our current
	capacity to sequence, annotate and study complete organismal genomes.
	Recent years have seen the development of a variety of so-called
	'next-generation' sequencing platforms, with several others anticipated
	to become available shortly. The previously unimaginable scale and
	economy of these methods, coupled with their enthusiastic uptake
	by the scientific community and the potential for further improvements
	in accuracy and read length, suggest that these technologies are
	destined to make a huge and ongoing impact upon genomic and post-genomic
	biology. However, like the analysis of microarray data and the assembly
	and annotation of complete genome sequences from conventional sequencing
	data, the management and analysis of next-generation sequencing data
	requires (and indeed has already driven) the development of informatics
	tools able to assemble, map, and interpret huge quantities of relatively
	or extremely short nucleotide sequence data. Here we provide a broad
	overview of bioinformatics approaches that have been introduced for
	several genomics and functional genomics applications of next-generation
	sequencing.},
  doi = {10.1093/bib/bbp046},
  pdf = {../local/Horner2009Bioinformatics.pdf},
  file = {Horner2009Bioinformatics.pdf:Horner2009Bioinformatics.pdf:PDF},
  keywords = {ngs},
  language = {eng},
  medline-pst = {aheadofprint},
  owner = {jp},
  pii = {bbp046},
  pmid = {19864250},
  timestamp = {2010.01.07},
  url = {http://dx.doi.org/10.1093/bib/bbp046}
}
@article{Jiang2009Statistical,
  author = {Jiang, H. and Wong, W. H.},
  title = {Statistical inferences for isoform expression in {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {1026--1032},
  number = {8},
  month = {Apr},
  abstract = {SUMMARY: The development of RNA sequencing (RNA-Seq) makes it possible
	for us to measure transcription at an unprecedented precision and
	throughput. However, challenges remain in understanding the source
	and distribution of the reads, modeling the transcript abundance
	and developing efficient computational methods. In this article,
	we develop a method to deal with the isoform expression estimation
	problem. The count of reads falling into a locus on the genome annotated
	with multiple isoforms is modeled as a Poisson variable. The expression
	of each individual isoform is estimated by solving a convex optimization
	problem and statistical inferences about the parameters are obtained
	from the posterior distribution by importance sampling. Our results
	show that isoform expression inference in RNA-Seq is possible by
	employing appropriate statistical methods.},
  doi = {10.1093/bioinformatics/btp113},
  pdf = {../local/Jiang2009Statistical.pdf},
  file = {Jiang2009Statistical.pdf:Jiang2009Statistical.pdf:PDF},
  institution = {Institute for Computational and Mathematical Engineering and Department
	of Statistics, Stanford University, Stanford, CA 94305, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btp113},
  pmid = {19244387},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp113}
}
@article{Korbel2009PEMer,
  author = {Korbel, J. and Abyzov, A. and Mu, X. and Carriero, N. and Cayting,
	P. and Zhang, Z. and Snyder, Z. and Gerstein, M.},
  title = {{PEMer}: a computational framework with simulation-based error models
	for inferring genomic structural variants from massive paired-end
	sequencing data.},
  journal = {Genome Biol.},
  year = {2009},
  volume = {10},
  pages = {R23},
  number = {2},
  month = {Feb},
  abstract = {ABSTRACT: Personal-genomics endeavors, such as the 1000 Genomes project,
	are generating maps of genomic structural variants by analyzing ends
	of massively sequenced genome fragments. To process these we developed
	Paired-End Mapper (PEMer; http://sv.gersteinlab.org/pemer). This
	comprises an analysis pipeline, compatible with several next-generation
	sequencing platforms; simulation-based error models, yielding confidence-values
	for each structural variant; and a back-end database. The simulations
	demonstrated high structural variant reconstruction efficiency for
	PEMer's coverage-adjusted multi-cutoff scoring-strategy and showed
	its relative insensitivity to base-calling errors.},
  doi = {10.1186/gb-2009-10-2-r23},
  pdf = {../local/Korbel2009PEMer.pdf},
  file = {Korbel2009PEMer.pdf:Korbel2009PEMer.pdf:PDF},
  institution = {Gene Expression Unit, European Molecular Biology Laboratory (EMBL),
	Meyerhofstr,, Heidelberg, 69117, Germany. korbel@embl.de.},
  keywords = {ngs},
  owner = {jp},
  pii = {gb-2009-10-2-r23},
  pmid = {19236709},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1186/gb-2009-10-2-r23}
}
@article{Korbel2007Paired-end,
  author = {Jan O Korbel and Alexander Eckehart Urban and Jason P Affourtit and
	Brian Godwin and Fabian Grubert and Jan Fredrik Simons and Philip
	M Kim and Dean Palejev and Nicholas J Carriero and Lei Du and Bruce
	E Taillon and Zhoutao Chen and Andrea Tanzer and A. C Eugenia Saunders
	and Jianxiang Chi and Fengtang Yang and Nigel P Carter and Matthew
	E Hurles and Sherman M Weissman and Timothy T Harkins and Mark B
	Gerstein and Michael Egholm and Michael Snyder},
  title = {Paired-end mapping reveals extensive structural variation in the
	human genome.},
  journal = {Science},
  year = {2007},
  volume = {318},
  pages = {420--426},
  number = {5849},
  month = {Oct},
  abstract = {Structural variation of the genome involves kilobase- to megabase-sized
	deletions, duplications, insertions, inversions, and complex combinations
	of rearrangements. We introduce high-throughput and massive paired-end
	mapping (PEM), a large-scale genome-sequencing method to identify
	structural variants (SVs) approximately 3 kilobases (kb) or larger
	that combines the rescue and capture of paired ends of 3-kb fragments,
	massive 454 sequencing, and a computational approach to map DNA reads
	onto a reference genome. PEM was used to map SVs in an African and
	in a putatively European individual and identified shared and divergent
	SVs relative to the reference genome. Overall, we fine-mapped more
	than 1000 SVs and documented that the number of SVs among humans
	is much larger than initially hypothesized; many of the SVs potentially
	affect gene function. The breakpoint junction sequences of more than
	200 SVs were determined with a novel pooling strategy and computational
	analysis. Our analysis provided insights into the mechanisms of SV
	formation in humans.},
  doi = {10.1126/science.1149504},
  pdf = {../local/Korbel2007Paired-end.pdf},
  file = {Korbel2007Paired-end.pdf:Korbel2007Paired-end.pdf:PDF},
  institution = {Molecular Biophysics and Biochemistry Department, Yale University,
	New Haven, CT 06520, USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {1149504},
  pmid = {17901297},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1126/science.1149504}
}
@article{Langmead2009Ultrafast,
  author = {Langmead, B. and Trapnell, C. and Pop, M. and Salzberg, S. L.},
  title = {Ultrafast and memory-efficient alignment of short {DNA} sequences
	to the human genome.},
  journal = {Genome Biol},
  year = {2009},
  volume = {10},
  pages = {R25},
  number = {3},
  __markedentry = {[jp:]},
  abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning
	short DNA sequence reads to large genomes. For the human genome,
	Burrows-Wheeler indexing allows Bowtie to align more than 25 million
	reads per CPU hour with a memory footprint of approximately 1.3 gigabytes.
	Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware
	backtracking algorithm that permits mismatches. Multiple processor
	cores can be used simultaneously to achieve even greater alignment
	speeds. Bowtie is open source (http://bowtie.cbcb.umd.edu).},
  doi = {10.1186/gb-2009-10-3-r25},
  pdf = {../local/Langmead2009Ultrafast.pdf},
  file = {Langmead2009Ultrafast.pdf:Langmead2009Ultrafast.pdf:PDF},
  institution = {Center for Bioinformatics and Computational Biology, Institute for
	Advanced Computer Studies, University of Maryland, College Park,
	MD 20742, USA. langmead@cs.umd.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2009-10-3-r25},
  pmid = {19261174},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1186/gb-2009-10-3-r25}
}
@article{Li2008Mapping,
  author = {Li, H. and Ruan, J. and Durbin, R.},
  title = {Mapping short {DNA} sequencing reads and calling variants using mapping
	quality scores.},
  journal = {Genome Res.},
  year = {2008},
  volume = {18},
  pages = {1851--1858},
  number = {11},
  month = {Nov},
  abstract = {New sequencing technologies promise a new era in the use of DNA sequence.
	However, some of these technologies produce very short reads, typically
	of a few tens of base pairs, and to use these reads effectively requires
	new algorithms and software. In particular, there is a major issue
	in efficiently aligning short reads to a reference genome and handling
	ambiguity or lack of accuracy in this alignment. Here we introduce
	the concept of mapping quality, a measure of the confidence that
	a read actually comes from the position it is aligned to by the mapping
	algorithm. We describe the software MAQ that can build assemblies
	by mapping shotgun short reads to a reference genome, using quality
	scores to derive genotype calls of the consensus sequence of a diploid
	genome, e.g., from a human sample. MAQ makes full use of mate-pair
	information and estimates the error probability of each read alignment.
	Error probabilities are also derived for the final genotype calls,
	using a Bayesian statistical model that incorporates the mapping
	qualities, error probabilities from the raw sequence quality scores,
	sampling of the two haplotypes, and an empirical model for correlated
	errors at a site. Both read mapping and genotype calling are evaluated
	on simulated data and real data. MAQ is accurate, efficient, versatile,
	and user-friendly. It is freely available at http://maq.sourceforge.net.},
  doi = {10.1101/gr.078212.108},
  pdf = {../local/Li2008Mapping.pdf},
  file = {Li2008Mapping.pdf:Li2008Mapping.pdf:PDF},
  institution = {The Wellcome Trust Sanger Institute, Hinxton CB10 1SA, United Kingdom.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.078212.108},
  pmid = {18714091},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1101/gr.078212.108}
}
@article{Li2011Sparse,
  author = {Li, J. J. and Jiang, C.-R. and Brown, J. B. and Huang, H. and Bickel,
	P. J.},
  title = {Sparse linear modeling of next-generation {mRNA} sequencing ({RNA-Seq})
	data for isoform discovery and abundance estimation},
  journal = {Proc. Natl. Acad. Sci. USA},
  year = {2011},
  volume = {108},
  pages = {19867--19872},
  number = {50},
  month = dec,
  abstract = {{Since the inception of next-generation mRNA sequencing (RNA-Seq)
	technology, various attempts have been made to utilize RNA-Seq data
	in assembling full-length mRNA isoforms de novo and estimating abundance
	of isoforms. However, for genes with more than a few exons, the problem
	tends to be challenging and often involves identifiability issues
	in statistical modeling. We have developed a statistical method called
	” sparse linear modeling of RNA-Seq data for isoform discovery
	and abundance estimation” (SLIDE) that takes exon boundaries and
	RNA-Seq data as input to discern the set of mRNA isoforms that are
	most likely to present in an RNA-Seq sample. SLIDE is based on a
	linear model with a design matrix that models the sampling probability
	of RNA-Seq reads from different mRNA isoforms. To tackle the model
	unidentifiability issue, SLIDE uses a modified Lasso procedure for
	parameter estimation. Compared with deterministic isoform assembly
	algorithms (e.g., Cufflinks), SLIDE considers the stochastic aspects
	of RNA-Seq reads in exons from different isoforms and thus has increased
	power in detecting more novel isoforms. Another advantage of SLIDE
	is its flexibility of incorporating other transcriptomic data such
	as RACE, CAGE, and EST into its model to further increase isoform
	discovery accuracy. SLIDE can also work downstream of other RNA-Seq
	assembly algorithms to integrate newly discovered genes and exons.
	Besides isoform discovery, SLIDE sequentially uses the same linear
	model to estimate the abundance of discovered isoforms. Simulation
	and real data studies show that SLIDE performs as well as or better
	than major competitors in both isoform discovery and abundance estimation.
	The SLIDE software package is available at https://sites.google.com/site/jingyijli/SLIDE.zip.}},
  citeulike-article-id = {10102447},
  citeulike-linkout-0 = {http://dx.doi.org/10.1073/pnas.1113972108},
  citeulike-linkout-1 = {http://www.pnas.org/content/early/2011/11/23/1113972108.abstract},
  citeulike-linkout-2 = {http://www.pnas.org/content/early/2011/11/23/1113972108.full.pdf},
  citeulike-linkout-3 = {http://www.pnas.org/cgi/content/abstract/108/50/19867},
  citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/22135461},
  citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=22135461},
  day = {13},
  doi = {10.1073/pnas.1113972108},
  pdf = {../local/Li2011Sparse.pdf},
  file = {Li2011Sparse.pdf:Li2011Sparse.pdf:PDF},
  issn = {1091-6490},
  keywords = {ngs, rnaseq},
  pmid = {22135461},
  posted-at = {2011-12-16 22:07:32},
  priority = {2},
  publisher = {National Academy of Sciences},
  url = {http://dx.doi.org/10.1073/pnas.1113972108}
}
@article{Li2009SNP,
  author = {Li, R. and Li, Y. and Fang, X. and Yang, H. and Wang, J. and Kristiansen,
	K. and Wang, J.},
  title = {{SNP} detection for massively parallel whole-genome resequencing.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1124--1132},
  number = {6},
  month = {Jun},
  abstract = {Next-generation massively parallel sequencing technologies provide
	ultrahigh throughput at two orders of magnitude lower unit cost than
	capillary Sanger sequencing technology. One of the key applications
	of next-generation sequencing is studying genetic variation between
	individuals using whole-genome or target region resequencing. Here,
	we have developed a consensus-calling and SNP-detection method for
	sequencing-by-synthesis Illumina Genome Analyzer technology. We designed
	this method by carefully considering the data quality, alignment,
	and experimental errors common to this technology. All of this information
	was integrated into a single quality score for each base under Bayesian
	theory to measure the accuracy of consensus calling. We tested this
	methodology using a large-scale human resequencing data set of 36x
	coverage and assembled a high-quality nonrepetitive consensus sequence
	for 92.25\% of the diploid autosomes and 88.07\% of the haploid X
	chromosome. Comparison of the consensus sequence with Illumina human
	1M BeadChip genotyped alleles from the same DNA sample showed that
	98.6\% of the 37,933 genotyped alleles on the X chromosome and 98\%
	of 999,981 genotyped alleles on autosomes were covered at 99.97\%
	and 99.84\% consistency, respectively. At a low sequencing depth,
	we used prior probability of dbSNP alleles and were able to improve
	coverage of the dbSNP sites significantly as compared to that obtained
	using a nonimputation model. Our analyses demonstrate that our method
	has a very low false call rate at any sequencing depth and excellent
	genome coverage at a high sequencing depth.},
  doi = {10.1101/gr.088013.108},
  pdf = {../local/Li2009SNP.pdf},
  file = {Li2009SNP.pdf:Li2009SNP.pdf:PDF},
  institution = {Beijing Genomics Institute at Shenzhen, Shenzhen 518000, China},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.088013.108},
  pmid = {19420381},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1101/gr.088013.108}
}
@article{Li2011IsoLasso,
  author = {Li, W. and Feng, J. and Jiang, T.},
  title = {IsoLasso: a {LASSO} regression approach to {RNA-Seq} based transcriptome
	assembly.},
  journal = {J Comput Biol},
  year = {2011},
  volume = {18},
  pages = {1693--1707},
  number = {11},
  month = {Nov},
  __markedentry = {[jp:6]},
  abstract = {The new second generation sequencing technology revolutionizes many
	biology-related research fields and poses various computational biology
	challenges. One of them is transcriptome assembly based on RNA-Seq
	data, which aims at reconstructing all full-length mRNA transcripts
	simultaneously from millions of short reads. In this article, we
	consider three objectives in transcriptome assembly: the maximization
	of prediction accuracy, minimization of interpretation, and maximization
	of completeness. The first objective, the maximization of prediction
	accuracy, requires that the estimated expression levels based on
	assembled transcripts should be as close as possible to the observed
	ones for every expressed region of the genome. The minimization of
	interpretation follows the parsimony principle to seek as few transcripts
	in the prediction as possible. The third objective, the maximization
	of completeness, requires that the maximum number of mapped reads
	(or ?expressed segments? in gene models) be explained by (i.e., contained
	in) the predicted transcripts in the solution. Based on the above
	three objectives, we present IsoLasso, a new RNA-Seq based transcriptome
	assembly tool. IsoLasso is based on the well-known LASSO algorithm,
	a multivariate regression method designated to seek a balance between
	the maximization of prediction accuracy and the minimization of interpretation.
	By including some additional constraints in the quadratic program
	involved in LASSO, IsoLasso is able to make the set of assembled
	transcripts as complete as possible. Experiments on simulated and
	real RNA-Seq datasets show that IsoLasso achieves, simultaneously,
	higher sensitivity and precision than the state-of-art transcript
	assembly tools.},
  doi = {10.1089/cmb.2011.0171},
  pdf = {../local/Li2011IsoLasso.pdf},
  file = {Li2011IsoLasso.pdf:Li2011IsoLasso.pdf:PDF},
  institution = {Department of Computer Science and Engineering, University of California,
	Riverside, Riverside, CA 92507, USA. liw@cs.ucr.edu},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pmid = {21951053},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1089/cmb.2011.0171}
}
@article{Lieberman-Aiden2009Comprehensive,
  author = {Erez Lieberman-Aiden and Nynke L van Berkum and Louise Williams and
	Maxim Imakaev and Tobias Ragoczy and Agnes Telling and Ido Amit and
	Bryan R Lajoie and Peter J Sabo and Michael O Dorschner and Richard
	Sandstrom and Bradley Bernstein and M. A. Bender and Mark Groudine
	and Andreas Gnirke and John Stamatoyannopoulos and Leonid A Mirny
	and Eric S Lander and Job Dekker},
  title = {Comprehensive mapping of long-range interactions reveals folding
	principles of the human genome.},
  journal = {Science},
  year = {2009},
  volume = {326},
  pages = {289--293},
  number = {5950},
  month = {Oct},
  abstract = {We describe Hi-C, a method that probes the three-dimensional architecture
	of whole genomes by coupling proximity-based ligation with massively
	parallel sequencing. We constructed spatial proximity maps of the
	human genome with Hi-C at a resolution of 1 megabase. These maps
	confirm the presence of chromosome territories and the spatial proximity
	of small, gene-rich chromosomes. We identified an additional level
	of genome organization that is characterized by the spatial segregation
	of open and closed chromatin to form two genome-wide compartments.
	At the megabase scale, the chromatin conformation is consistent with
	a fractal globule, a knot-free, polymer conformation that enables
	maximally dense packing while preserving the ability to easily fold
	and unfold any genomic locus. The fractal globule is distinct from
	the more commonly used globular equilibrium model. Our results demonstrate
	the power of Hi-C to map the dynamic conformations of whole genomes.},
  doi = {10.1126/science.1181369},
  pdf = {../local/Lieberman-Aiden2009Comprehensive.pdf},
  file = {Lieberman-Aiden2009Comprehensive.pdf:Lieberman-Aiden2009Comprehensive.pdf:PDF},
  institution = {Broad Institute of Harvard and Massachusetts Institute of Technology
	(MIT), MA 02139, USA.},
  keywords = {hic, ngs},
  owner = {phupe},
  pii = {326/5950/289},
  pmid = {19815776},
  timestamp = {2010.08.26},
  url = {http://dx.doi.org/10.1126/science.1181369}
}
@article{McKernan2009Sequence,
  author = {Kevin Judd McKernan and Heather E Peckham and Gina L Costa and Stephen
	F McLaughlin and Yutao Fu and Eric F Tsung and Christopher R Clouser
	and Cisyla Duncan and Jeffrey K Ichikawa and Clarence C Lee and Zheng
	Zhang and Swati S Ranade and Eileen T Dimalanta and Fiona C Hyland
	and Tanya D Sokolsky and Lei Zhang and Andrew Sheridan and Haoning
	Fu and Cynthia L Hendrickson and Bin Li and Lev Kotler and Jeremy
	R Stuart and Joel A Malek and Jonathan M Manning and Alena A Antipova
	and Damon S Perez and Michael P Moore and Kathleen C Hayashibara
	and Michael R Lyons and Robert E Beaudoin and Brittany E Coleman
	and Michael W Laptewicz and Adam E Sannicandro and Michael D Rhodes
	and Rajesh K Gottimukkala and Shan Yang and Vineet Bafna and Ali
	Bashir and Andrew MacBride and Can Alkan and Jeffrey M Kidd and Evan
	E Eichler and Martin G Reese and Francisco M De La Vega and Alan
	P Blanchard},
  title = {Sequence and structural variation in a human genome uncovered by
	short-read, massively parallel ligation sequencing using two-base
	encoding.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1527--1541},
  number = {9},
  month = {Sep},
  abstract = {We describe the genome sequencing of an anonymous individual of African
	origin using a novel ligation-based sequencing assay that enables
	a unique form of error correction that improves the raw accuracy
	of the aligned reads to >99.9\%, allowing us to accurately call SNPs
	with as few as two reads per allele. We collected several billion
	mate-paired reads yielding approximately 18x haploid coverage of
	aligned sequence and close to 300x clone coverage. Over 98\% of the
	reference genome is covered with at least one uniquely placed read,
	and 99.65\% is spanned by at least one uniquely placed mate-paired
	clone. We identify over 3.8 million SNPs, 19\% of which are novel.
	Mate-paired data are used to physically resolve haplotype phases
	of nearly two-thirds of the genotypes obtained and produce phased
	segments of up to 215 kb. We detect 226,529 intra-read indels, 5590
	indels between mate-paired reads, 91 inversions, and four gene fusions.
	We use a novel approach for detecting indels between mate-paired
	reads that are smaller than the standard deviation of the insert
	size of the library and discover deletions in common with those detected
	with our intra-read approach. Dozens of mutations previously described
	in OMIM and hundreds of nonsynonymous single-nucleotide and structural
	variants in genes previously implicated in disease are identified
	in this individual. There is more genetic variation in the human
	genome still to be uncovered, and we provide guidance for future
	surveys in populations and cancer biopsies.},
  doi = {10.1101/gr.091868.109},
  pdf = {../local/McKernan2009Sequence.pdf},
  file = {McKernan2009Sequence.pdf:McKernan2009Sequence.pdf:PDF},
  institution = {Life Technologies, Beverly, Massachusetts 01915, USA. Kevin.McKernan@appliedbiosystems.com},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.091868.109},
  pmid = {19546169},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.091868.109}
}
@article{Metzker2010Sequencing,
  author = {Metzker, M. L.},
  title = {Sequencing technologies - the next generation.},
  journal = {Nat. Rev. Genet.},
  year = {2010},
  volume = {11},
  pages = {31--46},
  number = {1},
  month = {Jan},
  abstract = {Demand has never been greater for revolutionary technologies that
	deliver fast, inexpensive and accurate genome information. This challenge
	has catalysed the development of next-generation sequencing (NGS)
	technologies. The inexpensive production of large volumes of sequence
	data is the primary advantage over conventional methods. Here, I
	present a technical review of template preparation, sequencing and
	imaging, genome alignment and assembly approaches, and recent advances
	in current and near-term commercially available NGS instruments.
	I also outline the broad range of applications for NGS technologies,
	in addition to providing guidelines for platform selection to address
	biological questions of interest.},
  doi = {10.1038/nrg2626},
  institution = { Human Genetics, Baylor College of Medicine, Houston, Texas 77030,
	USA. mmetzker@bcm.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {philippe},
  pii = {nrg2626},
  pmid = {19997069},
  timestamp = {2010.07.27},
  url = {http://dx.doi.org/10.1038/nrg2626}
}
@article{Mezlini2013iReckon,
  author = {Mezlini, A. M. and Smith, E. J. M. and Fiume, M. and Buske, O. and
	Savich, G. L. and Shah, S. and Aparicio, S. and Chiang, D. Y. and
	Goldenberg, A. and Brudno, M.},
  title = {{iReckon}: Simultaneous isoform discovery and abundance estimation
	from {RNA}-seq data.},
  journal = {Genome Res},
  year = {2013},
  volume = {23},
  pages = {519--529},
  number = {3},
  month = {Mar},
  abstract = {High-throughput RNA sequencing (RNA-seq) promises to revolutionize
	our understanding of genes and their role in human disease by characterizing
	the RNA content of tissues and cells. The realization of this promise,
	however, is conditional on the development of effective computational
	methods for the identification and quantification of transcripts
	from incomplete and noisy data. In this article, we introduce iReckon,
	a method for simultaneous determination of the isoforms and estimation
	of their abundances. Our probabilistic approach incorporates multiple
	biological and technical phenomena, including novel isoforms, intron
	retention, unspliced pre-mRNA, PCR amplification biases, and multimapped
	reads. iReckon utilizes regularized expectation-maximization to accurately
	estimate the abundances of known and novel isoforms. Our results
	on simulated and real data demonstrate a superior ability to discover
	novel isoforms with a significantly reduced number of false-positive
	predictions, and our abundance accuracy prediction outmatches that
	of other state-of-the-art tools. Furthermore, we have applied iReckon
	to two cancer transcriptome data sets, a triple-negative breast cancer
	patient sample and the MCF7 breast cancer cell line, and show that
	iReckon is able to reconstruct the complex splicing changes that
	were not previously identified. QT-PCR validations of the isoforms
	detected in the MCF7 cell line confirmed all of iReckon's predictions
	and also showed strong agreement (r = 0.94) with the predicted abundances.},
  doi = {10.1101/gr.142232.112},
  pdf = {../local/Mezlini2013iReckon.pdf},
  file = {Mezlini2013iReckon.pdf:Mezlini2013iReckon.pdf:PDF},
  institution = {Department of Computer Science, University of Toronto, Ontario M5S
	2E4, Canada;},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.142232.112},
  pmid = {23204306},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1101/gr.142232.112}
}
@article{Morin2008Application,
  author = {Ryan D Morin and Michael D O'Connor and Malachi Griffith and Florian
	Kuchenbauer and Allen Delaney and Anna-Liisa Prabhu and Yongjun Zhao
	and Helen McDonald and Thomas Zeng and Martin Hirst and Connie J
	Eaves and Marco A Marra},
  title = {Application of massively parallel sequencing to microRNA profiling
	and discovery in human embryonic stem cells.},
  journal = {Genome Res},
  year = {2008},
  volume = {18},
  pages = {610--621},
  number = {4},
  month = {Apr},
  abstract = {MicroRNAs (miRNAs) are emerging as important, albeit poorly characterized,
	regulators of biological processes. Key to further elucidation of
	their roles is the generation of more complete lists of their numbers
	and expression changes in different cell states. Here, we report
	a new method for surveying the expression of small RNAs, including
	microRNAs, using Illumina sequencing technology. We also present
	a set of methods for annotating sequences deriving from known miRNAs,
	identifying variability in mature miRNA sequences, and identifying
	sequences belonging to previously unidentified miRNA genes. Application
	of this approach to RNA from human embryonic stem cells obtained
	before and after their differentiation into embryoid bodies revealed
	the sequences and expression levels of 334 known plus 104 novel miRNA
	genes. One hundred seventy-one known and 23 novel microRNA sequences
	exhibited significant expression differences between these two developmental
	states. Owing to the increased number of sequence reads, these libraries
	represent the deepest miRNA sampling to date, spanning nearly six
	orders of magnitude of expression. The predicted targets of those
	miRNAs enriched in either sample shared common features. Included
	among the high-ranked predicted gene targets are those implicated
	in differentiation, cell cycle control, programmed cell death, and
	transcriptional regulation.},
  doi = {10.1101/gr.7179508},
  pdf = {../local/Morin2008Application.pdf},
  file = {Morin2008Application.pdf:Morin2008Application.pdf:PDF},
  institution = {Genome Sciences Centre, BC Cancer Agency, Vancouver, British Columbia
	V5Z 1L3, Canada.},
  keywords = {ngs, sirna},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gr.7179508},
  pmid = {18285502},
  timestamp = {2009.10.28},
  url = {http://dx.doi.org/10.1101/gr.7179508}
}
@article{Nielsen2011Genotype,
  author = {Nielsen, R. and Paul, J. S. and Albrechtsen, A. and Song, Y. S.},
  title = {Genotype and {SNP} calling from next-generation sequencing data.},
  journal = {Nat. Rev. Genet.},
  year = {2011},
  volume = {12},
  pages = {443--451},
  number = {6},
  month = {Jun},
  abstract = {Meaningful analysis of next-generation sequencing (NGS) data, which
	are produced extensively by genetics and genomics studies, relies
	crucially on the accurate calling of SNPs and genotypes. Recently
	developed statistical methods both improve and quantify the considerable
	uncertainty associated with genotype calling, and will especially
	benefit the growing number of studies using low- to medium-coverage
	data. We review these methods and provide a guide for their use in
	NGS studies.},
  doi = {10.1038/nrg2986},
  pdf = {../local/Nielsen2011Genotype.pdf},
  file = {Nielsen2011Genotype.pdf:Nielsen2011Genotype.pdf:PDF},
  institution = {Department of Integrative Biology, University of California, Berkeley,
	CA 94720, USA. rasmus_nielsen@berkeley.edu},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nrg2986},
  pmid = {21587300},
  timestamp = {2011.10.27},
  url = {http://dx.doi.org/10.1038/nrg2986}
}
@article{Praz2009CleanEx:,
  author = {Viviane Praz and Philipp Bucher},
  title = {CleanEx: new data extraction and merging tools based on MeSH term
	annotation.},
  journal = {Nucleic Acids Res},
  year = {2009},
  volume = {37},
  pages = {D880--D884},
  number = {Database issue},
  month = {Jan},
  abstract = {The CleanEx expression database (http://www.cleanex.isb-sib.ch) provides
	access to public gene expression data via unique gene names as well
	as via experiments biomedical characteristics. To reach this, a dual
	annotation of both sequences and experiments has been generated.
	First, the system links official gene symbols to any kind of sequences
	used for gene expression measurements (cDNA, Affymetrix, oligonucleotide
	arrays, SAGE or MPSS tags, Expressed Sequence Tags or other mRNA
	sequences, etc.). For the biomedical annotation, we re-annotate each
	experiment from the CleanEx database with the MeSH (Medical Subject
	Headings) terms, primarily used by NLM (National Library of Medicine)
	for indexing articles for the MEDLINE/PubMED database. This annotation
	allows a fast and easy retrieval of expression data with common biological
	or medical features. The numerical data can then be exported as matrix-like
	tab-delimited text files. Data can be extracted from either one dataset
	or from heterogeneous datasets.},
  doi = {10.1093/nar/gkn878},
  institution = {ISREC, Swiss Institute of Bioinformatics, Boveresses 155, Epalinges,
	VD 1066, Switzerland. viviane.praz@unil.ch},
  keywords = {Animals; Chromosome Mapping; Databases, Genetic; Gene Expression Profiling;
	Humans; Medical Subject Headings; Mice; Oligonucleotide Array Sequence
	Analysis; Software},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gkn878},
  pmid = {19073704},
  timestamp = {2011.09.21},
  url = {http://dx.doi.org/10.1093/nar/gkn878}
}
@article{Roberts2011Identification,
  author = {Roberts, A. and Pimentel, H. and Trapnell, C. and Pachter, L.},
  title = {Identification of novel transcripts in annotated genomes using {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2011},
  volume = {27},
  pages = {2325--2329},
  number = {17},
  month = {Sep},
  abstract = {We describe a new 'reference annotation based transcript assembly'
	problem for RNA-Seq data that involves assembling novel transcripts
	in the context of an existing annotation. This problem arises in
	the analysis of expression in model organisms, where it is desirable
	to leverage existing annotations for discovering novel transcripts.
	We present an algorithm for reference annotation-based transcript
	assembly and show how it can be used to rapidly investigate novel
	transcripts revealed by RNA-Seq in comparison with a reference annotation.The
	methods described in this article are implemented in the Cufflinks
	suite of software for RNA-Seq, freely available from http://bio.math.berkeley.edu/cufflinks.
	The software is released under the BOOST license.cole@broadinstitute.org;
	lpachter@math.berkeley.eduSupplementary data are available at Bioinformatics
	online.},
  doi = {10.1093/bioinformatics/btr355},
  pdf = {../local/Roberts2011Identification.pdf},
  file = {Roberts2011Identification.pdf:Roberts2011Identification.pdf:PDF},
  institution = {Department of Computer Science, UC Berkeley, Berkeley, CA, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btr355},
  pmid = {21697122},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/bioinformatics/btr355}
}
@article{Roberts2011Improving,
  author = {Roberts, A. and Trapnell, C. and Donaghey, J. and Rinn, J. L. and
	Pachter, L.},
  title = {Improving {RNA-Seq} expression estimates by correcting for fragment
	bias.},
  journal = {Genome Biol},
  year = {2011},
  volume = {12},
  pages = {R22},
  number = {3},
  abstract = {The biochemistry of RNA-Seq library preparation results in cDNA fragments
	that are not uniformly distributed within the transcripts they represent.
	This non-uniformity must be accounted for when estimating expression
	levels, and we show how to perform the needed corrections using a
	likelihood based approach. We find improvements in expression estimates
	as measured by correlation with independently performed qRT-PCR and
	show that correction of bias leads to improved replicability of results
	across libraries and sequencing technologies.},
  doi = {10.1186/gb-2011-12-3-r22},
  pdf = {../local/Roberts2011Improving.pdf},
  file = {Roberts2011Improving.pdf:Roberts2011Improving.pdf:PDF},
  institution = {Department of Computer Science, 387 Soda Hall, UC Berkeley, Berkeley,
	CA 94720, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gb-2011-12-3-r22},
  pmid = {21410973},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1186/gb-2011-12-3-r22}
}
@article{Shah2009Mutational,
  author = {Shah, S. P. and Morin, R. D. and Khattra, J. and Prentice, L. and
	Pugh, T. and Burleigh, A. and Delaney, A. and Gelmon, K. and Guliany,
	R. and Senz, J. and Steidl, C. and Holt, R.A . and Jones, S. and
	Sun, M. and Leung, G. and Moore, R. and Severson, T. and Taylor,
	G. A. and Teschendorff, A. E. and Tse, K. and Turashvili, G. and
	Varhol, R. and Warren, R. L. and Watson, P. and Zhao, Y. and Caldas,
	C. and Huntsman, D. and Hirst, M. and Marra, M. A. and Aparicio,
	A.},
  title = {Mutational evolution in a lobular breast tumour profiled at single
	nucleotide resolution},
  journal = {Nature},
  year = {2009},
  volume = {461},
  pages = {809--813},
  number = {7265},
  month = {Oct},
  abstract = {Recent advances in next generation sequencing have made it possible
	to precisely characterize all somatic coding mutations that occur
	during the development and progression of individual cancers. Here
	we used these approaches to sequence the genomes (>43-fold coverage)
	and transcriptomes of an oestrogen-receptor-alpha-positive metastatic
	lobular breast cancer at depth. We found 32 somatic non-synonymous
	coding mutations present in the metastasis, and measured the frequency
	of these somatic mutations in DNA from the primary tumour of the
	same patient, which arose 9 years earlier. Five of the 32 mutations
	(in ABCB11, HAUS3, SLC24A4, SNX4 and PALB2) were prevalent in the
	DNA of the primary tumour removed at diagnosis 9 years earlier, six
	(in KIF1C, USP28, MYH8, MORC1, KIAA1468 and RNASEH2A) were present
	at lower frequencies (1-13\%), 19 were not detected in the primary
	tumour, and two were undetermined. The combined analysis of genome
	and transcriptome data revealed two new RNA-editing events that recode
	the amino acid sequence of SRP9 and COG3. Taken together, our data
	show that single nucleotide mutational heterogeneity can be a property
	of low or intermediate grade primary breast cancers and that significant
	evolution can occur with disease progression.},
  doi = {10.1038/nature08489},
  pdf = {../local/Shah2009Mutational.pdf},
  file = {Shah2009Mutational.pdf:Shah2009Mutational.pdf:PDF},
  institution = {Molecular Oncology, BC Cancer Agency, 675 West 10th Avenue, Vancouver
	V5Z 1L3, Canada.},
  keywords = {ngs},
  owner = {jp},
  pii = {nature08489},
  pmid = {19812674},
  timestamp = {2009.10.12},
  url = {http://dx.doi.org/10.1038/nature08489}
}
@article{Spyrou2009BayesPeak,
  author = {Christiana Spyrou and Rory Stark and Andy G Lynch and Simon Tavaré},
  title = {BayesPeak: Bayesian analysis of ChIP-seq data.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {299},
  abstract = {BACKGROUND: High-throughput sequencing technology has become popular
	and widely used to study protein and DNA interactions. Chromatin
	immunoprecipitation, followed by sequencing of the resulting samples,
	produces large amounts of data that can be used to map genomic features
	such as transcription factor binding sites and histone modifications.
	METHODS: Our proposed statistical algorithm, BayesPeak, uses a fully
	Bayesian hidden Markov model to detect enriched locations in the
	genome. The structure accommodates the natural features of the Solexa/Illumina
	sequencing data and allows for overdispersion in the abundance of
	reads in different regions. Moreover, a control sample can be incorporated
	in the analysis to account for experimental and sequence biases.
	Markov chain Monte Carlo algorithms are applied to estimate the posterior
	distributions of the model parameters, and posterior probabilities
	are used to detect the sites of interest. CONCLUSION: We have presented
	a flexible approach for identifying peaks from ChIP-seq reads, suitable
	for use on both transcription factor binding and histone modification
	data. Our method estimates probabilities of enrichment that can be
	used in downstream analysis. The method is assessed using experimentally
	verified data and is shown to provide high-confidence calls with
	low false positive rates.},
  doi = {10.1186/1471-2105-10-299},
  pdf = {../local/Spyrou2009BayesPeak.pdf},
  file = {Spyrou2009BayesPeak.pdf:Spyrou2009BayesPeak.pdf:PDF},
  institution = {Statistical Laboratory, Centre for Mathematical Sciences, Wilberforce
	Road, Cambridge, UK. C.Spyrou@statslab.cam.ac.uk},
  keywords = {ngs},
  language = {eng},
  medline-pst = {epublish},
  owner = {jp},
  pii = {1471-2105-10-299},
  pmid = {19772557},
  timestamp = {2009.10.29},
  url = {http://dx.doi.org/10.1186/1471-2105-10-299}
}
@article{Trapnell2013Differential,
  author = {Trapnell, C. and Hendrickson, D. G. and Sauvageau, M. and Goff, L.
	and Rinn, J. L. and Pachter, L.},
  title = {Differential analysis of gene regulation at transcript resolution
	with {RNA-seq}.},
  journal = {Nat Biotechnol},
  year = {2013},
  volume = {31},
  pages = {46--53},
  number = {1},
  month = {Jan},
  abstract = {Differential analysis of gene and transcript expression using high-throughput
	RNA sequencing (RNA-seq) is complicated by several sources of measurement
	variability and poses numerous statistical challenges. We present
	Cuffdiff 2, an algorithm that estimates expression at transcript-level
	resolution and controls for variability evident across replicate
	libraries. Cuffdiff 2 robustly identifies differentially expressed
	transcripts and genes and reveals differential splicing and promoter-preference
	changes. We demonstrate the accuracy of our approach through differential
	analysis of lung fibroblasts in response to loss of the developmental
	transcription factor HOXA1, which we show is required for lung fibroblast
	and HeLa cell cycle progression. Loss of HOXA1 results in significant
	expression level changes in thousands of individual transcripts,
	along with isoform switching events in key regulators of the cell
	cycle. Cuffdiff 2 performs robust differential analysis in RNA-seq
	experiments at transcript resolution, revealing a layer of regulation
	not readily observable with other high-throughput technologies.},
  doi = {10.1038/nbt.2450},
  pdf = {../local/Trapnell2013Differential.pdf},
  file = {Trapnell2013Differential.pdf:Trapnell2013Differential.pdf:PDF},
  institution = {Department of Stem Cell and Regenerative Biology, Harvard University,
	Cambridge, Massachusetts, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt.2450},
  pmid = {23222703},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1038/nbt.2450}
}
@article{Trapnell2009TopHat,
  author = {Trapnell, C. and Pachter, L. and Salzberg, S. L.},
  title = {{TopHat}: discovering splice junctions with {RNA-Seq}.},
  journal = {Bioinformatics},
  year = {2009},
  volume = {25},
  pages = {1105--1111},
  number = {9},
  month = {May},
  abstract = {A new protocol for sequencing the messenger RNA in a cell, known as
	RNA-Seq, generates millions of short sequence fragments in a single
	run. These fragments, or 'reads', can be used to measure levels of
	gene expression and to identify novel splice variants of genes. However,
	current software for aligning RNA-Seq data to a genome relies on
	known splice junctions and cannot identify novel ones. TopHat is
	an efficient read-mapping algorithm designed to align reads from
	an RNA-Seq experiment to a reference genome without relying on known
	splice sites.We mapped the RNA-Seq reads from a recent mammalian
	RNA-Seq experiment and recovered more than 72\% of the splice junctions
	reported by the annotation-based software from that study, along
	with nearly 20,000 previously unreported junctions. The TopHat pipeline
	is much faster than previous systems, mapping nearly 2.2 million
	reads per CPU hour, which is sufficient to process an entire RNA-Seq
	experiment in less than a day on a standard desktop computer. We
	describe several challenges unique to ab initio splice site discovery
	from RNA-Seq reads that will require further algorithm development.TopHat
	is free, open-source software available from http://tophat.cbcb.umd.edu.Supplementary
	data are available at Bioinformatics online.},
  doi = {10.1093/bioinformatics/btp120},
  pdf = {../local/Trapnell2009TopHat.pdf},
  file = {Trapnell2009TopHat.pdf:Trapnell2009TopHat.pdf:PDF},
  institution = {Center for Bioinformatics and Computational Biology, University of
	Maryland, College Park, MD 20742, USA. cole@cs.umd.edu},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {btp120},
  pmid = {19289445},
  timestamp = {2013.03.29},
  url = {http://dx.doi.org/10.1093/bioinformatics/btp120}
}
@article{Trapnell2012Differential,
  author = {Trapnell, C. and Roberts, A. and Goff, L. and Pertea, G. and Kim,
	D. and Kelley, D. R. and Pimentel, H. and Salzberg, S. L. and Rinn,
	J. L. and Pachter, L.},
  title = {Differential gene and transcript expression analysis of {RNA-seq}
	experiments with {TopHat} and {Cufflinks}.},
  journal = {Nat Protoc},
  year = {2012},
  volume = {7},
  pages = {562--578},
  number = {3},
  month = {Mar},
  abstract = {Recent advances in high-throughput cDNA sequencing (RNA-seq) can reveal
	new genes and splice variants and quantify expression genome-wide
	in a single assay. The volume and complexity of data from RNA-seq
	experiments necessitate scalable, fast and mathematically principled
	analysis software. TopHat and Cufflinks are free, open-source software
	tools for gene discovery and comprehensive expression analysis of
	high-throughput mRNA sequencing (RNA-seq) data. Together, they allow
	biologists to identify new genes and new splice variants of known
	ones, as well as compare gene and transcript expression under two
	or more conditions. This protocol describes in detail how to use
	TopHat and Cufflinks to perform such analyses. It also covers several
	accessory tools and utilities that aid in managing data, including
	CummeRbund, a tool for visualizing RNA-seq analysis results. Although
	the procedure assumes basic informatics skills, these tools assume
	little to no background with RNA-seq analysis and are meant for novices
	and experts alike. The protocol begins with raw sequencing reads
	and produces a transcriptome assembly, lists of differentially expressed
	and regulated genes and transcripts, and publication-quality visualizations
	of analysis results. The protocol's execution time depends on the
	volume of transcriptome sequencing data and available computing resources
	but takes less than 1 d of computer time for typical experiments
	and ∼1 h of hands-on time.},
  doi = {10.1038/nprot.2012.016},
  pdf = {../local/Trapnell2012Differential.pdf},
  file = {Trapnell2012Differential.pdf:Trapnell2012Differential.pdf:PDF},
  institution = {Broad Institute of MIT and Harvard, Cambridge, Massachusetts, USA.
	cole@broadinstitute.org},
  keywords = {ngs, rnaseq},
  owner = {laurent},
  pii = {nprot.2012.016},
  pmid = {22383036},
  timestamp = {2012.04.11},
  url = {http://dx.doi.org/10.1038/nprot.2012.016}
}
@article{Trapnell2010Transcript,
  author = {Trapnell, C. and Williams, B. A. and Pertea, G. and Mortazavi, A.
	and Kwan, G. and {van Baren}, M. J. and Salzberg, S. L. and Wold,
	B. J. and Pachter, L.},
  title = {Transcript assembly and quantification by RNA-Seq reveals unannotated
	transcripts and isoform switching during cell differentiation.},
  journal = {Nat Biotechnol},
  year = {2010},
  volume = {28},
  pages = {511--515},
  number = {5},
  month = {May},
  abstract = {High-throughput mRNA sequencing (RNA-Seq) promises simultaneous transcript
	discovery and abundance estimation. However, this would require algorithms
	that are not restricted by prior gene annotations and that account
	for alternative transcription and splicing. Here we introduce such
	algorithms in an open-source software program called Cufflinks. To
	test Cufflinks, we sequenced and analyzed >430 million paired 75-bp
	RNA-Seq reads from a mouse myoblast cell line over a differentiation
	time series. We detected 13,692 known transcripts and 3,724 previously
	unannotated ones, 62\% of which are supported by independent expression
	data or by homologous genes in other species. Over the time series,
	330 genes showed complete switches in the dominant transcription
	start site (TSS) or splice isoform, and we observed more subtle shifts
	in 1,304 other genes. These results suggest that Cufflinks can illuminate
	the substantial regulatory flexibility and complexity in even this
	well-studied model of muscle development and that it can improve
	transcriptome-based genome annotation.},
  doi = {10.1038/nbt.1621},
  pdf = {../local/Trapnell2010Transcript.pdf},
  file = {Trapnell2010Transcript.pdf:Trapnell2010Transcript.pdf:PDF},
  institution = {Department of Computer Science, University of Maryland, College Park,
	Maryland, USA.},
  keywords = {ngs, rnaseq},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nbt.1621},
  pmid = {20436464},
  timestamp = {2012.03.06},
  url = {http://dx.doi.org/10.1038/nbt.1621}
}
@article{Tuteja2009Extracting,
  author = {Geetu Tuteja and Peter White and Jonathan Schug and Klaus H Kaestner},
  title = {Extracting transcription factor targets from ChIP-Seq data.},
  journal = {Nucleic Acids Res},
  year = {2009},
  volume = {37},
  pages = {e113},
  number = {17},
  month = {Sep},
  abstract = {ChIP-Seq technology, which combines chromatin immunoprecipitation
	(ChIP) with massively parallel sequencing, is rapidly replacing ChIP-on-chip
	for the genome-wide identification of transcription factor binding
	events. Identifying bound regions from the large number of sequence
	tags produced by ChIP-Seq is a challenging task. Here, we present
	GLITR (GLobal Identifier of Target Regions), which accurately identifies
	enriched regions in target data by calculating a fold-change based
	on random samples of control (input chromatin) data. GLITR uses a
	classification method to identify regions in ChIP data that have
	a peak height and fold-change which do not resemble regions in an
	input sample. We compare GLITR to several recent methods and show
	that GLITR has improved sensitivity for identifying bound regions
	closely matching the consensus sequence of a given transcription
	factor, and can detect bona fide transcription factor targets missed
	by other programs. We also use GLITR to address the issue of sequencing
	depth, and show that sequencing biological replicates identifies
	far more binding regions than re-sequencing the same sample.},
  doi = {10.1093/nar/gkp536},
  pdf = {../local/Tuteja2009Extracting.pdf},
  file = {Tuteja2009Extracting.pdf:Tuteja2009Extracting.pdf:PDF},
  institution = {Department of Genetics and Institute of Diabetes, Obesity and Metabolism,
	University of Pennsylvania School of Medicine, Philadelphia, PA 19104,
	USA.},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {gkp536},
  pmid = {19553195},
  timestamp = {2009.10.30},
  url = {http://dx.doi.org/10.1093/nar/gkp536}
}
@article{Wang2008diploid,
  author = {Wang, Jun and Wang, Wei and Li, Ruiqiang and Li, Yingrui and Tian,
	Geng and Goodman, Laurie and Fan, Wei and Zhang, Junqing and Li,
	Jun and Zhang, Juanbin and Guo, Yiran and Feng, Binxiao and Li, Heng
	and Lu, Yao and Fang, Xiaodong and Liang, Huiqing and Du, Zhenglin
	and Li, Dong and Zhao, Yiqing and Hu, Yujie and Yang, Zhenzhen and
	Zheng, Hancheng and Hellmann, Ines and Inouye, Michael and Pool,
	John and Yi, Xin and Zhao, Jing and Duan, Jinjie and Zhou, Yan and
	Qin, Junjie and Ma, Lijia and Li, Guoqing and Yang, Zhentao and Zhang,
	Guojie and Yang, Bin and Yu, Chang and Liang, Fang and Li, Wenjie
	and Li, Shaochuan and Li, Dawei and Ni, Peixiang and Ruan, Jue and
	Li, Qibin and Zhu, Hongmei and Liu, Dongyuan and Lu, Zhike and Li,
	Ning and Guo, Guangwu and Zhang, J. and Ye, J. and Fang, L. and Hao,
	Q. and Chen, Q. and Liang, Y. and Su, Y. and San, A. and Ping, C.
	and Yang, S. and Chen, F. and Li, L. and Zhou, K. and Zheng, H. and
	Ren, Y. and Yang, L. and Gao, Y. and Yang, G. and Li, Z. and Feng,
	X. and Kristiansen, K. and Wong, G. K.-S. and Nielsen, R. and Durbin,
	R. and Bolund, L. and Zhang, X. and Li, S. and Yang, H. and Wang,
	J.},
  title = {The diploid genome sequence of an {A}sian individual.},
  journal = {Nature},
  year = {2008},
  volume = {456},
  pages = {60--65},
  number = {7218},
  month = {Nov},
  abstract = {Here we present the first diploid genome sequence of an Asian individual.
	The genome was sequenced to 36-fold average coverage using massively
	parallel sequencing technology. We aligned the short reads onto the
	NCBI human reference genome to 99.97\% coverage, and guided by the
	reference genome, we used uniquely mapped reads to assemble a high-quality
	consensus sequence for 92\% of the Asian individual's genome. We
	identified approximately 3 million single-nucleotide polymorphisms
	(SNPs) inside this region, of which 13.6\% were not in the dbSNP
	database. Genotyping analysis showed that SNP identification had
	high accuracy and consistency, indicating the high sequence quality
	of this assembly. We also carried out heterozygote phasing and haplotype
	prediction against HapMap CHB and JPT haplotypes (Chinese and Japanese,
	respectively), sequence comparison with the two available individual
	genomes (J. D. Watson and J. C. Venter), and structural variation
	identification. These variations were considered for their potential
	biological impact. Our sequence data and analyses demonstrate the
	potential usefulness of next-generation sequencing technologies for
	personal genomics.},
  doi = {10.1038/nature07484},
  institution = {Beijing Genomics Institute at Shenzhen, Shenzhen 518000, China. wangj@genomics.org.cn},
  keywords = {ngs},
  language = {eng},
  medline-pst = {ppublish},
  owner = {jp},
  pii = {nature07484},
  pmid = {18987735},
  timestamp = {2011.10.28},
  url = {http://dx.doi.org/10.1038/nature07484}
}
@article{Wang2009RNA,
  author = {Wang, Z. and Gerstein, M. and Snyder, M.},
  title = {{RNA-Seq}: a revolutionary tool for transcriptomics.},
  journal = {Nat. Rev. Genet.},
  year = {2009},
  volume = {10},
  pages = {57--63},
  number = {1},
  month = {Jan},
  abstract = {RNA-Seq is a recently developed approach to transcriptome profiling
	that uses deep-sequencing technologies. Studies using this method
	have already altered our view of the extent and complexity of eukaryotic
	transcriptomes. RNA-Seq also provides a far more precise measurement
	of levels of transcripts and their isoforms than other methods. This
	article describes the RNA-Seq approach, the challenges associated
	with its application, and the advances made so far in characterizing
	several eukaryote transcriptomes.},
  doi = {10.1038/nrg2484},
  pdf = {../local/Wang2009RNA.pdf},
  file = {Wang2009RNA.pdf:Wang2009RNA.pdf:PDF},
  institution = {Department of Molecular, Cellular and Developmental Biology, Yale
	University, 219 Prospect Street, New Haven, Connecticut 06520, USA.},
  keywords = {ngs, rnaseq},
  owner = {ljacob},
  pii = {nrg2484},
  pmid = {19015660},
  timestamp = {2009.09.14},
  url = {http://dx.doi.org/10.1038/nrg2484}
}
@article{Xie2009CNV-seq,
  author = {Chao Xie and Martti T Tammi},
  title = {CNV-seq, a new method to detect copy number variation using high-throughput
	sequencing.},
  journal = {BMC Bioinformatics},
  year = {2009},
  volume = {10},
  pages = {80},
  abstract = {BACKGROUND: DNA copy number variation (CNV) has been recognized as
	an important source of genetic variation. Array comparative genomic
	hybridization (aCGH) is commonly used for CNV detection, but the
	microarray platform has a number of inherent limitations. RESULTS:
	Here, we describe a method to detect copy number variation using
	shotgun sequencing, CNV-seq. The method is based on a robust statistical
	model that describes the complete analysis procedure and allows the
	computation of essential confidence values for detection of CNV.
	Our results show that the number of reads, not the length of the
	reads is the key factor determining the resolution of detection.
	This favors the next-generation sequencing methods that rapidly produce
	large amount of short reads. CONCLUSION: Simulation of various sequencing
	methods with coverage between 0.1x to 8x show overall specificity
	between 91.7 - 99.9\%, and sensitivity between 72.2 - 96.5\%. We
	also show the results for assessment of CNV between two individual
	human genomes.},
  doi = {10.1186/1471-2105-10-80},
  pdf = {../local/Xie2009CNV-seq.pdf},
  file = {Xie2009CNV-seq.pdf:Xie2009CNV-seq.pdf:PDF},
  institution = {Department of Biological Sciences, National University of Singapore,
	Singapore. xie@nus.edu.sg},
  keywords = {ngs},
  owner = {jp},
  pii = {1471-2105-10-80},
  pmid = {19267900},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1186/1471-2105-10-80}
}
@article{Yaffe2011Probabilistic,
  author = {Yaffe, E. and Tanay, A.},
  title = {Probabilistic modeling of {Hi-C} contact maps eliminates systematic
	biases to characterize global chromosomal architecture},
  journal = {Nat. Genet.},
  year = {2011},
  volume = {43},
  pages = {1059--1065},
  number = {11},
  abstract = {Hi-C experiments measure the probability of physical proximity between
	pairs of chromosomal loci on a genomic scale. We report on several
	systematic biases that substantially affect the Hi-C experimental
	procedure, including the distance between restriction sites, the
	GC content of trimmed ligation junctions and sequence uniqueness.
	To address these biases, we introduce an integrated probabilistic
	background model and develop algorithms to estimate its parameters
	and renormalize Hi-C data. Analysis of corrected human lymphoblast
	contact maps provides genome-wide evidence for interchromosomal aggregation
	of active chromatin marks, including DNase-hypersensitive sites and
	transcriptionally active foci. We observe extensive long-range (up
	to 400 kb) cis interactions at active promoters and derive asymmetric
	contact profiles next to transcription start sites and CTCF binding
	sites. Clusters of interacting chromosomal domains suggest physical
	separation of centromere-proximal and centromere-distal regions.
	These results provide a computational basis for the inference of
	chromosomal architectures from Hi-C experiments.},
  doi = {10.1038/ng.947},
  pdf = {../local/Yaffe2011Probabilistic.pdf},
  file = {Yaffe2011Probabilistic.pdf:Yaffe2011Probabilistic.pdf:PDF},
  issn = {1061-4036},
  keywords = {hic, ngs},
  owner = {nelle},
  url = {http://dx.doi.org/10.1038/ng.947},
  urldate = {2012-01-11}
}
@article{Yoon2009Sensitive,
  author = {Seungtai Yoon and Zhenyu Xuan and Vladimir Makarov and Kenny Ye and
	Jonathan Sebat},
  title = {Sensitive and accurate detection of copy number variants using read
	depth of coverage.},
  journal = {Genome Res.},
  year = {2009},
  volume = {19},
  pages = {1586--1592},
  number = {9},
  month = {Sep},
  abstract = {Methods for the direct detection of copy number variation (CNV) genome-wide
	have become effective instruments for identifying genetic risk factors
	for disease. The application of next-generation sequencing platforms
	to genetic studies promises to improve sensitivity to detect CNVs
	as well as inversions, indels, and SNPs. New computational approaches
	are needed to systematically detect these variants from genome sequence
	data. Existing sequence-based approaches for CNV detection are primarily
	based on paired-end read mapping (PEM) as reported previously by
	Tuzun et al. and Korbel et al. Due to limitations of the PEM approach,
	some classes of CNVs are difficult to ascertain, including large
	insertions and variants located within complex genomic regions. To
	overcome these limitations, we developed a method for CNV detection
	using read depth of coverage. Event-wise testing (EWT) is a method
	based on significance testing. In contrast to standard segmentation
	algorithms that typically operate by performing likelihood evaluation
	for every point in the genome, EWT works on intervals of data points,
	rapidly searching for specific classes of events. Overall false-positive
	rate is controlled by testing the significance of each possible event
	and adjusting for multiple testing. Deletions and duplications detected
	in an individual genome by EWT are examined across multiple genomes
	to identify polymorphism between individuals. We estimated error
	rates using simulations based on real data, and we applied EWT to
	the analysis of chromosome 1 from paired-end shotgun sequence data
	(30x) on five individuals. Our results suggest that analysis of read
	depth is an effective approach for the detection of CNVs, and it
	captures structural variants that are refractory to established PEM-based
	methods.},
  doi = {10.1101/gr.092981.109},
  pdf = {../local/Yoon2009Sensitive.pdf},
  file = {Yoon2009Sensitive.pdf:Yoon2009Sensitive.pdf:PDF},
  institution = {Cold Spring Harbor Laboratory, Cold Spring Harbor, New York 11724,
	USA.},
  keywords = {ngs},
  owner = {jp},
  pii = {gr.092981.109},
  pmid = {19657104},
  timestamp = {2009.10.09},
  url = {http://dx.doi.org/10.1101/gr.092981.109}
}
@article{Zhang2012Spatial,
  author = {Zhang, Y. and McCord, R. A. and Ho, Y.-J. and Lajoie, B. R. and Hildebrand,
	D. G. and Simon, A. C. and Becker, M. S. and Alt, F. W. and Dekker,
	J.},
  title = {Spatial Organization of the Mouse Genome and Its Role in Recurrent
	Chromosomal Translocations},
  journal = {Cell},
  year = {2012},
  volume = {148},
  pages = {908 - 921},
  number = {5},
  abstract = {Summary The extent to which the three-dimensional organization of
	the genome contributes to chromosomal translocations is an important
	question in cancer genomics. We generated a high-resolution Hi-C
	spatial organization map of the G1-arrested mouse pro-B cell genome
	and used high-throughput genome-wide translocation sequencing to
	map translocations from target DNA double-strand breaks (DSBs) within
	it. RAG endonuclease-cleaved antigen-receptor loci are dominant translocation
	partners for target DSBs regardless of genomic position, reflecting
	high-frequency DSBs at these loci and their colocalization in a fraction
	of cells. To directly assess spatial proximity contributions, we
	normalized genomic DSBs via ionizing radiation. Under these conditions,
	translocations were highly enriched in cis along single chromosomes
	containing target DSBs and within other chromosomes and subchromosomal
	domains in a manner directly related to pre-existing spatial proximity.
	By combining two high-throughput genomic methods in a genetically
	tractable system, we provide a new lens for viewing cancer genomes.},
  doi = {10.1016/j.cell.2012.02.002},
  pdf = {../local/Zhang2012Spatial.pdf},
  file = {Zhang2012Spatial.pdf:Zhang2012Spatial.pdf:PDF},
  issn = {0092-8674},
  keywords = {hic, ngs},
  owner = {nelle},
  url = {http://www.sciencedirect.com/science/article/pii/S0092867412001584}
}
@comment{{jabref-meta: selector_author:}}
@comment{{jabref-meta: selector_journal:Adv. Drug Deliv. Rev.;Am. J. Hu
m. Genet.;Am. J. Pathol.;Ann. Appl. Stat.;Ann. Math. Statist.;Ann. N. 
Y. Acad. Sci.;Ann. Probab.;Ann. Stat.;Artif. Intell. Med.;Bernoulli;Bi
ochim. Biophys. Acta;Bioinformatics;Biometrika;BMC Bioinformatics;Br. 
J. Pharmacol.;Breast Cancer Res.;Cell;Cell. Signal.;Chem. Res. Toxicol
.;Clin. Cancer Res.;Combinator. Probab. Comput.;Comm. Pure Appl. Math.
;Comput. Chem.;Comput. Comm. Rev.;Comput. Stat. Data An.;Curr. Genom.;
Curr. Opin. Chem. Biol.;Curr. Opin. Drug Discov. Devel.;Data Min. Know
l. Discov.;Electron. J. Statist.;Eur. J. Hum. Genet.;FEBS Lett.;Found.
 Comput. Math.;Genome Biol.;IEEE T. Neural Networ.;IEEE T. Pattern. An
al.;IEEE T. Signal. Proces.;IEEE Trans. Inform. Theory;IEEE Trans. Kno
wl. Data Eng.;IEEE/ACM Trans. Comput. Biol. Bioinf.;Int. J. Comput. Vi
sion;Int. J. Data Min. Bioinform.;Int. J. Qantum Chem.;J Biol Syst;J. 
ACM;J. Am. Soc. Inf. Sci. Technol.;J. Am. Stat. Assoc.;J. Bioinform. C
omput. Biol.;J. Biol. Chem.;J. Biomed. Inform.;J. Cell. Biochem.;J. Ch
em. Inf. Comput. Sci.;J. Chem. Inf. Model.;J. Clin. Oncol.;J. Comput. 
Biol.;J. Comput. Graph. Stat.;J. Eur. Math. Soc.;J. Intell. Inform. Sy
st.;J. Mach. Learn. Res.;J. Med. Chem.;J. Mol. BIol.;J. R. Stat. Soc. 
Ser. B;Journal of Statistical Planning and Inference;Mach. Learn.;Math
. Program.;Meth. Enzymol.;Mol. Biol. Cell;Mol. Biol. Evol.;Mol. Cell. 
Biol.;Mol. Syst. Biol.;N. Engl. J. Med.;Nat. Biotechnol.;Nat. Genet.;N
at. Med.;Nat. Methods;Nat. Rev. Cancer;Nat. Rev. Drug Discov.;Nat. Rev
. Genet.;Nature;Neural Comput.;Neural Network.;Neurocomputing;Nucleic 
Acids Res.;Pattern Anal. Appl.;Pattern Recognit.;Phys. Rev. E;Phys. Re
v. Lett.;PLoS Biology;PLoS Comput. Biol.;Probab. Theory Relat. Fields;
Proc. IEEE;Proc. Natl. Acad. Sci. USA;Protein Eng.;Protein Eng. Des. S
el.;Protein Sci.;Protein. Struct. Funct. Genet.;Random Struct. Algorit
hm.;Rev. Mod. Phys.;Science;Stat. Probab. Lett.;Statistica Sinica;Theo
r. Comput. Sci.;Trans. Am. Math. Soc.;Trends Genet.;}}
@comment{{jabref-meta: selector_keywords:biogm;biosvm;breastcancer;cgh;
chemogenomics;chemoinformatics;csbcbook;csbcbook-ch1;csbcbook-ch2;csbc
book-ch3;csbcbook-ch4;csbcbook-ch5;csbcbook-ch6;csbcbook-ch7;csbcbook-
ch8;csbcbook-ch9;csbcbook-mustread;dimred;featureselection;glycans;her
g;hic;highcontentscreening;image;immunoinformatics;kernel-theory;kerne
lbook;lasso;microarray;ngs;nlp;plasmodium;proteomics;PUlearning;rnaseq
;segmentation;sirna;}}
@comment{{jabref-meta: selector_booktitle:Adv. Neural. Inform. Process 
Syst.;}}

This file was generated by bibtex2html 1.97.