Pacbio Data

Revision as of 2 April 2014 03:02 by admin (Comments | Contribs) | (First Set)
Contents

Dataset 5, E. coli K-12 MG1655, 17 SMRT cells

All Data

The 17 SMRT cells of Ecoli MG 1655 were downloaded from NCBI SRA, as described in Data.
m120208_071634_42139_c100288480630000001523009507231245_s1_p0.bas.h5
m120228_190630_42139_c100301722550000001523012308061206_s1_p0.bas.h5
m120208_122534_42139_c100290260310000001523009507231262_s1_p0.bas.h5
m120228_192221_42129_c100298890010000001523009207231260_s1_p0.bas.h5
m120208_160812_42139_c100290260310000001523009507231264_s1_p0.bas.h5
m120228_205404_42139_c100301722550000001523012308061207_s1_p0.bas.h5
m120228_082105_42139_c100301722550000001523012308061200_s1_p0.bas.h5
m120228_210845_42129_c000304152550000001500000112311370_s1_p0.bas.h5
m120228_100807_42139_c100301722550000001523012308061201_s1_p0.bas.h5
m120228_223624_richard_c001202352550000001500000112311330_s1_p0.bas.h5
m120228_115504_42139_c100301722550000001523012308061202_s1_p0.bas.h5
m120229_004752_42129_c000304192550000001500000112311350_s1_p0.bas.h5
m120228_134222_42139_c100301722550000001523012308061203_s1_p0.bas.h5
m120229_012852_42139_c000301732550000001500000112311360_s1_p0.bas.h5
m120228_152936_42139_c100301722550000001523012308061204_s1_p0.bas.h5
m120229_193409_42129_c000304212550000001500000112311380_s1_p0.bas.h5
m120228_171636_42139_c100301722550000001523012308061205_s1_p0.bas.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>

Randomly Selected Four SMRT cells

First Set

m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0

Filtered_four.fastq (Genome size: 4.65Mb)
seqs amount:142746
seq avg len:2286.987698
total:326.46 Mb
depth: 70.21X

Second Set

m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0

Filtered_four.fastq
seqs amount:157329
seq avg len:2274.563278
total:357.85 Mb
depth: 76.96X

Third Set

m120228_192221_42129_c100298890010000001523009207231260_s1_p0
m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120229_193409_42129_c000304212550000001500000112311380_s1_p0

Filtered_four.fastq
seqs amount:146090
seq avg len:2309.804908
total:337.44 Mb
depth: 72.57X

Randomly Selected Six SMRT cells

First Set

m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0

Filtered_six.fastq (Genome size: 4.65Mb)
seqs amount:222213
seq avg len:2293.543920
total:509.66 Mb
depth: 109.60X

Second Set

m120208_071634_42139_c100288480630000001523009507231245_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0

Filtered_six.fastq
seqs amount:204851
seq avg len:2225.530273
total:455.90 Mb
depth: 98.04X

Third Set

m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0

Filtered_six.fastq
seqs amount:218732
seq avg len:2255.988808
total:493.46 Mb
depth: 106.12X

Randomly Selected eight SMRT cells

First Set

m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0
m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120228_134222_42139_c100301722550000001523012308061203_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120208_071634_42139_c100288480630000001523009507231245_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0

Filtered_eight.fastq (Genome size=4.65Mb)
seqs amount:276503
seq avg len:2270.814845
total:627.89 Mb
depth: 135.03X

Second Set

m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120229_193409_42129_c000304212550000001500000112311380_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0
m120229_004752_42129_c000304192550000001500000112311350_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0

Third Set

m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120229_004752_42129_c000304192550000001500000112311350_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0

Dataset 6, E. coli K-12 MG1655, 8 SMRT cells

All Data

The eight SMRT cells of Ecoli MG1655 were downloaded from HGAP. We used all SMRT cells and randomly selected four and six SMRT cells three times for each in our study.

m121023_202553_42178_c100389662550000001523034410251200_s1_p0.bas.h5
m121023_224605_42178_c100389662550000001523034410251201_s1_p0.bas.h5
m121024_010654_42178_c100389662550000001523034410251202_s1_p0.bas.h5
m121024_032737_42178_c100389662550000001523034410251203_s1_p0.bas.h5
m121024_074656_42178_c100389662550000001523034410251204_s1_p0.bas.h5
m121024_100442_42178_c100389662550000001523034410251205_s1_p0.bas.h5
m121024_122509_42178_c100389662550000001523034410251206_s1_p0.bas.h5
m121024_144608_42178_c100389662550000001523034410251207_s1_p0.bas.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>

Randomly Selected Four SMRT cells

First Set

m121024_100442_42178_c100389662550000001523034410251205_s1_p0
m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0

Second Set

m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Third Set

m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121024_074656_42178_c100389662550000001523034410251204_s1_p0

Randomly Selected Six SMRT cells

First Set

m121024_100442_42178_c100389662550000001523034410251205_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_074656_42178_c100389662550000001523034410251204_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Second Set

m121024_074656_42178_c100389662550000001523034410251204_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121024_100442_42178_c100389662550000001523034410251205_s1_p0

Third Set

m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Dataset 7, M. ruber DSM1279, 4 SMRT cells

Th four SMRT cells of Mruber were downloaded from HGAP.

m120803_022519_42141_c100388772550000001523034210251234_s1_p0.bas.h5
m120803_041200_42141_c100388772550000001523034210251235_s1_p0.bas.h5
m120803_055858_42141_c100388772550000001523034210251236_s1_p0.bas.h5
m120803_074648_42141_c100388772550000001523034210251237_s1_p0.bas.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>
filtered_subreads.fastq (genome size: 3.1Mb)
seqs amount:156840
seq avg len:2444.683308
total:383.42 Mb
depth: 123.69X

Dataset 8, P. heparinus DSM2366, 7 SMRT cells

The seven SMRT cells of Phep were downloaded from HGAP. We used all SMRT cells and randomly selected four SMRT cells three times for each in our study.

All Data

m120802_210418_42142_c100388622550000001523034210251211_s1_p0.bas.h5
m120802_225401_42142_c100388622550000001523034210251212_s1_p0.bas.h5
m120803_004251_42142_c100388622550000001523034210251213_s1_p0.bas.h5
m120803_023226_42142_c100388622550000001523034210251214_s1_p0.bas.h5
m120803_041958_42142_c100388622550000001523034210251215_s1_p0.bas.h5
m120803_060809_42142_c100388622550000001523034210251216_s1_p0.bas.h5
m120803_075818_42142_c100388622550000001523034210251217_s1_p0.bas.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>

Randomly Selected Four SMRT cell

First Set

m120802_225401_42142_c100388622550000001523034210251212_s1_p0
m120803_060809_42142_c100388622550000001523034210251216_s1_p0
m120803_004251_42142_c100388622550000001523034210251213_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0

Second Set

m120803_023226_42142_c100388622550000001523034210251214_s1_p0
m120803_075818_42142_c100388622550000001523034210251217_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0
m120803_060809_42142_c100388622550000001523034210251216_s1_p0

Third Set

m120803_041958_42142_c100388622550000001523034210251215_s1_p0
m120803_075818_42142_c100388622550000001523034210251217_s1_p0
m120803_023226_42142_c100388622550000001523034210251214_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0

Dataset 9, E. coli P4-C2 chemistry, 20Kbp library, 1 SMRT cells

This dataset includes one SMRT Cell of data gathered with a PacBio RS II System and P4-C2 chemistry on a size selected 20kb library of E. coli K12. The data were downloaded from PacBio Devnet.

m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.1.bax.h5
m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.2.bax.h5
m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.3.bax.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>