Pacbio Data

Revision as of 13 March 2014 03:04 by admin (Comments | Contribs) | (Dataset 7, M. ruber DSM1279, 4 SMRT cells)
Contents

Dataset 5, E. coli K-12 MG1655, 17 SMRT cells

Alldata

The 17 SMRT cells of Ecoli MG 1655 were downloaded from NCBI SRA, as described in Data.
m120208_071634_42139_c100288480630000001523009507231245_s1_p0.bas.h5
m120228_190630_42139_c100301722550000001523012308061206_s1_p0.bas.h5
m120208_122534_42139_c100290260310000001523009507231262_s1_p0.bas.h5
m120228_192221_42129_c100298890010000001523009207231260_s1_p0.bas.h5
m120208_160812_42139_c100290260310000001523009507231264_s1_p0.bas.h5
m120228_205404_42139_c100301722550000001523012308061207_s1_p0.bas.h5
m120228_082105_42139_c100301722550000001523012308061200_s1_p0.bas.h5
m120228_210845_42129_c000304152550000001500000112311370_s1_p0.bas.h5
m120228_100807_42139_c100301722550000001523012308061201_s1_p0.bas.h5
m120228_223624_richard_c001202352550000001500000112311330_s1_p0.bas.h5
m120228_115504_42139_c100301722550000001523012308061202_s1_p0.bas.h5
m120229_004752_42129_c000304192550000001500000112311350_s1_p0.bas.h5
m120228_134222_42139_c100301722550000001523012308061203_s1_p0.bas.h5
m120229_012852_42139_c000301732550000001500000112311360_s1_p0.bas.h5
m120228_152936_42139_c100301722550000001523012308061204_s1_p0.bas.h5
m120229_193409_42129_c000304212550000001500000112311380_s1_p0.bas.h5
m120228_171636_42139_c100301722550000001523012308061205_s1_p0.bas.h5

Randomly Selected Four SMRT cells

First Set

m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0

Second Set

m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0

Third Set

m120228_192221_42129_c100298890010000001523009207231260_s1_p0
m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120229_193409_42129_c000304212550000001500000112311380_s1_p0

Randomly Selected Six SMRT cells

First Set

m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0

First Set

m120208_071634_42139_c100288480630000001523009507231245_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_210845_42129_c000304152550000001500000112311370_s1_p0

First Set

m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0

Randomly Selected eight SMRT cells

First Set

m120228_210845_42129_c000304152550000001500000112311370_s1_p0
m120208_122534_42139_c100290260310000001523009507231262_s1_p0
m120228_152936_42139_c100301722550000001523012308061204_s1_p0
m120228_223624_richard_c001202352550000001500000112311330_s1_p0
m120228_134222_42139_c100301722550000001523012308061203_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120208_071634_42139_c100288480630000001523009507231245_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0

Second Set

m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120229_193409_42129_c000304212550000001500000112311380_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120228_100807_42139_c100301722550000001523012308061201_s1_p0
m120229_004752_42129_c000304192550000001500000112311350_s1_p0
m120228_082105_42139_c100301722550000001523012308061200_s1_p0
m120208_160812_42139_c100290260310000001523009507231264_s1_p0

Third Set

m120208_160812_42139_c100290260310000001523009507231264_s1_p0
m120228_190630_42139_c100301722550000001523012308061206_s1_p0
m120228_205404_42139_c100301722550000001523012308061207_s1_p0
m120228_115504_42139_c100301722550000001523012308061202_s1_p0
m120229_004752_42129_c000304192550000001500000112311350_s1_p0
m120228_171636_42139_c100301722550000001523012308061205_s1_p0
m120229_012852_42139_c000301732550000001500000112311360_s1_p0
m120228_192221_42129_c100298890010000001523009207231260_s1_p0

Dataset 6, E. coli K-12 MG1655, 8 SMRT cells

All Data

The eight SMRT cells of Ecoli MG1655 were downloaded from HGAP. We used all SMRT cells and randomly selected four and six SMRT cells three times for each in our study.

m121023_202553_42178_c100389662550000001523034410251200_s1_p0.bas.h5
m121023_224605_42178_c100389662550000001523034410251201_s1_p0.bas.h5
m121024_010654_42178_c100389662550000001523034410251202_s1_p0.bas.h5
m121024_032737_42178_c100389662550000001523034410251203_s1_p0.bas.h5
m121024_074656_42178_c100389662550000001523034410251204_s1_p0.bas.h5
m121024_100442_42178_c100389662550000001523034410251205_s1_p0.bas.h5
m121024_122509_42178_c100389662550000001523034410251206_s1_p0.bas.h5
m121024_144608_42178_c100389662550000001523034410251207_s1_p0.bas.h5

We have run smrtpipe.py (SMRT analysis) with the following params.xml to get filtered subreads of continuous long reads (CLR).

 <param name="minLength">
   <value>50</value>
 </param>
 <param name="readScore">
   <value>0.75</value>
 </param>
 <param name="minSubReadLength">
   <value>50</value>

Randomly Selected Four SMRT cells

First Set

m121024_100442_42178_c100389662550000001523034410251205_s1_p0
m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0

Second Set

m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Third Set

m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121024_074656_42178_c100389662550000001523034410251204_s1_p0

Randomly Selected Six SMRT cells

First Set

m121024_100442_42178_c100389662550000001523034410251205_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_074656_42178_c100389662550000001523034410251204_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Second Set

m121024_074656_42178_c100389662550000001523034410251204_s1_p0
m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121024_100442_42178_c100389662550000001523034410251205_s1_p0

Third Set

m121023_224605_42178_c100389662550000001523034410251201_s1_p0
m121023_202553_42178_c100389662550000001523034410251200_s1_p0
m121024_032737_42178_c100389662550000001523034410251203_s1_p0
m121024_122509_42178_c100389662550000001523034410251206_s1_p0
m121024_010654_42178_c100389662550000001523034410251202_s1_p0
m121024_144608_42178_c100389662550000001523034410251207_s1_p0

Dataset 7, M. ruber DSM1279, 4 SMRT cells

Th four SMRT cells of Mruber were downloaded from HGAP. The reference genome was downloaded from NCBI

m120803_022519_42141_c100388772550000001523034210251234_s1_p0.bas.h5
m120803_041200_42141_c100388772550000001523034210251235_s1_p0.bas.h5
m120803_055858_42141_c100388772550000001523034210251236_s1_p0.bas.h5
m120803_074648_42141_c100388772550000001523034210251237_s1_p0.bas.h5

Dataset 8, P. heparinus DSM2366, 7 SMRT cells

The seven SMRT cells of Phep were downloaded from HGAP. We used all SMRT cells and randomly selected four SMRT cells three times for each in our study.
m120802_210418_42142_c100388622550000001523034210251211_s1_p0.bas.h5
m120802_225401_42142_c100388622550000001523034210251212_s1_p0.bas.h5
m120803_004251_42142_c100388622550000001523034210251213_s1_p0.bas.h5
m120803_023226_42142_c100388622550000001523034210251214_s1_p0.bas.h5
m120803_041958_42142_c100388622550000001523034210251215_s1_p0.bas.h5
m120803_060809_42142_c100388622550000001523034210251216_s1_p0.bas.h5
m120803_075818_42142_c100388622550000001523034210251217_s1_p0.bas.h5

Randomly Selected Four SMRT cell

First Set

m120802_225401_42142_c100388622550000001523034210251212_s1_p0
m120803_060809_42142_c100388622550000001523034210251216_s1_p0
m120803_004251_42142_c100388622550000001523034210251213_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0

Second Set

m120803_023226_42142_c100388622550000001523034210251214_s1_p0
m120803_075818_42142_c100388622550000001523034210251217_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0
m120803_060809_42142_c100388622550000001523034210251216_s1_p0

Third Set

m120803_041958_42142_c100388622550000001523034210251215_s1_p0
m120803_075818_42142_c100388622550000001523034210251217_s1_p0
m120803_023226_42142_c100388622550000001523034210251214_s1_p0
m120802_210418_42142_c100388622550000001523034210251211_s1_p0

Dataset 9, E. coli P4-C2 chemistry, 20Kbp library, 1 SMRT cells

The seven SMRT cells of Phep were downloaded from HGAP. We used all SMRT cells in our study.

m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.1.bax.h5
m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.2.bax.h5
m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.3.bax.h5