Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
ng6
Commits
55900cc1
Commit
55900cc1
authored
Oct 17, 2013
by
Jerome Mariette
Browse files
add some parameters to the miseq workflow
parent
9cb7ea96
Changes
2
Hide whitespace changes
Inline
Side-by-side
workflows/miseq_diversity/__init__.py
View file @
55900cc1
...
...
@@ -85,7 +85,8 @@ class MiSeqDiversity (NG6Workflow):
addrawfiles
=
self
.
add_component
(
"AddRawFiles"
,
[
self
.
runobj
,
saved_files
,
self
.
args
[
"compression"
]])
# make some statistics on raw file
fastqc
=
self
.
add_component
(
"FastQC"
,
[
filtered_read1_files
+
filtered_read2_files
,
(
group_prefix
is
not
None
),
True
,
run_name
+
"_fastqc.tar.gz"
],
parent
=
fastqilluminafilter
)
fastqc
=
self
.
add_component
(
"FastQC"
,
[
filtered_read1_files
+
filtered_read2_files
,
(
group_prefix
is
not
None
),
True
,
run_name
+
"_fastqc.tar.gz"
],
parent
=
fastqilluminafilter
)
# list concatenated files
if
is_paired_end
and
(
group_prefix
is
not
None
):
...
...
@@ -105,34 +106,81 @@ class MiSeqDiversity (NG6Workflow):
fileExtension
=
os
.
path
.
splitext
(
concat_read1_files
[
0
])[
1
]
if
fileExtension
==
fileFormat
:
gunzip
=
self
.
add_component
(
"GunZipFiles"
,[
concat_read1_files
,
concat_read2_files
])
makecontigs
=
self
.
add_component
(
"MothurMakeContigs"
,
kwargs
=
{
'read1_files'
:
gunzip
.
fastq_R1
,
'read2_files'
:
gunzip
.
fastq_R2
,
\
'sample_name'
:
self
.
args
[
"sample_name"
],
'maxambig'
:
'0'
,
'maxlength'
:
self
.
args
[
"max_contigs_length"
]},
component_prefix
=
"with_gz"
,
parent
=
fastqilluminafilter
)
makecontigs
=
self
.
add_component
(
"MothurMakeContigs"
,
kwargs
=
{
'read1_files'
:
gunzip
.
fastq_R1
,
'read2_files'
:
gunzip
.
fastq_R2
,
'sample_name'
:
self
.
args
[
"sample_name"
],
'maxambig'
:
self
.
args
[
"max_ambiguous"
],
'maxlength'
:
self
.
args
[
"max_contigs_length"
],
'processors'
:
self
.
args
[
"processors"
]},
component_prefix
=
"with_gz"
,
parent
=
fastqilluminafilter
)
else
:
makecontigs
=
self
.
add_component
(
"MothurMakeContigs"
,
kwargs
=
{
'read1_files'
:
concat_read1_files
,
'read2_files'
:
concat_read2_files
,
\
'sample_name'
:
self
.
args
[
"sample_name"
],
'maxambig'
:
'0'
,
'maxlength'
:
self
.
args
[
"max_contigs_length"
]},
component_prefix
=
"without_gz"
,
parent
=
fastqilluminafilter
)
makecontigs
=
self
.
add_component
(
"MothurMakeContigs"
,
kwargs
=
{
'read1_files'
:
concat_read1_files
,
'read2_files'
:
concat_read2_files
,
'sample_name'
:
self
.
args
[
"sample_name"
],
'maxambig'
:
self
.
args
[
"max_ambiguous"
],
'maxlength'
:
self
.
args
[
"max_contigs_length"
],
'processors'
:
self
.
args
[
"processors"
]},
component_prefix
=
"without_gz"
,
parent
=
fastqilluminafilter
)
uniqueseqs
=
self
.
add_component
(
"MothurUniqueSeqs"
,
[
makecontigs
.
good_fasta_files
])
countseqs
=
self
.
add_component
(
"MothurCountSeqs"
,
[
uniqueseqs
.
unique_names_files
,
makecontigs
.
good_groups_files
])
pcrseqs
=
self
.
add_component
(
"MothurPcrSeqs"
,
kwargs
=
{
'fasta_files'
:
self
.
args
[
"reference_alignment"
],
'forward_primer'
:
self
.
args
[
"forward_primer"
],
\
'reverse_primer'
:
self
.
args
[
"reverse_primer"
]})
alignseqs
=
self
.
add_component
(
"MothurAlign"
,
kwargs
=
{
'fasta_files'
:
uniqueseqs
.
unique_fasta_files
,
'reference_alignment_files'
:
pcrseqs
.
pcr_fasta_files
,
\
'count_table_files'
:
countseqs
.
count_table_files
,
'maxhomop'
:
8
},
parent
=
makecontigs
)
filterseqs
=
self
.
add_component
(
"MothurFilterSeqs"
,[
alignseqs
.
good_fasta_files
])
uniqueseqs_filter
=
self
.
add_component
(
"MothurUniqueSeqs"
,
kwargs
=
{
'fasta_files'
:
filterseqs
.
filtered_fasta_files
,
\
'count_table_files'
:
alignseqs
.
good_count_table_files
},
component_prefix
=
"after_filter"
)
precluster
=
self
.
add_component
(
"MothurPreCluster"
,
kwargs
=
{
'fasta_files'
:
uniqueseqs_filter
.
unique_fasta_files
,
\
'count_table_files'
:
uniqueseqs_filter
.
unique_count_table_files
})
chimerauchime
=
self
.
add_component
(
"MothurChimeraUchime"
,
kwargs
=
{
'fasta_files'
:
precluster
.
precluster_fasta_files
,
\
'count_table_files'
:
precluster
.
precluster_count_table_files
},
parent
=
alignseqs
)
pcrseqs
=
self
.
add_component
(
"MothurPcrSeqs"
,
kwargs
=
{
'fasta_files'
:
self
.
args
[
"reference_alignment"
],
'forward_primer'
:
self
.
args
[
"forward_primer"
],
'reverse_primer'
:
self
.
args
[
"reverse_primer"
]})
alignseqs
=
self
.
add_component
(
"MothurAlign"
,
kwargs
=
{
'fasta_files'
:
uniqueseqs
.
unique_fasta_files
,
'reference_alignment_files'
:
pcrseqs
.
pcr_fasta_files
,
'count_table_files'
:
countseqs
.
count_table_files
,
'maxhomop'
:
self
.
args
[
"max_homopolymers"
],
'processors'
:
self
.
args
[
"processors"
]},
parent
=
makecontigs
)
filterseqs
=
self
.
add_component
(
"MothurFilterSeqs"
,
kwargs
=
{
'align_files'
:
alignseqs
.
good_fasta_files
,
'processors'
:
self
.
args
[
"processors"
]})
uniqueseqs_filter
=
self
.
add_component
(
"MothurUniqueSeqs"
,
kwargs
=
{
'fasta_files'
:
filterseqs
.
filtered_fasta_files
,
'count_table_files'
:
alignseqs
.
good_count_table_files
},
component_prefix
=
"after_filter"
)
precluster
=
self
.
add_component
(
"MothurPreCluster"
,
kwargs
=
{
'fasta_files'
:
uniqueseqs_filter
.
unique_fasta_files
,
'count_table_files'
:
uniqueseqs_filter
.
unique_count_table_files
,
'processors'
:
self
.
args
[
"processors"
]})
chimerauchime
=
self
.
add_component
(
"MothurChimeraUchime"
,
kwargs
=
{
'fasta_files'
:
precluster
.
precluster_fasta_files
,
'count_table_files'
:
precluster
.
precluster_count_table_files
,
'processors'
:
self
.
args
[
"processors"
]},
parent
=
alignseqs
)
# classify reads with the provided taxonomy
classifyseqs
=
self
.
add_component
(
"MothurClassifySeqs"
,
kwargs
=
{
'fasta_files'
:
chimerauchime
.
pick_fasta_files
,
'template_files'
:
self
.
args
[
"classify_template"
],
'taxonomy_files'
:
self
.
args
[
"classify_taxonomy"
],
'count_table_files'
:
chimerauchime
.
good_count_table_files
,
'without_krona'
:
self
.
args
[
"without_krona"
]},
parent
=
chimerauchime
)
classifyseqs
=
self
.
add_component
(
"MothurClassifySeqs"
,
kwargs
=
{
'fasta_files'
:
chimerauchime
.
pick_fasta_files
,
'template_files'
:
self
.
args
[
"classify_template"
],
'taxonomy_files'
:
self
.
args
[
"classify_taxonomy"
],
'count_table_files'
:
chimerauchime
.
good_count_table_files
,
'without_krona'
:
self
.
args
[
"without_krona"
],
'processors'
:
self
.
args
[
"processors"
]},
parent
=
chimerauchime
)
# OTUs approach
distseqs
=
self
.
add_component
(
"MothurDistSeqs"
,
[
chimerauchime
.
pick_fasta_files
])
cluster
=
self
.
add_component
(
"MothurCluster"
,
kwargs
=
{
'dist_files'
:
distseqs
.
dist_files
,
'count_table_files'
:
chimerauchime
.
good_count_table_files
})
distseqs
=
self
.
add_component
(
"MothurDistSeqs"
,
kwargs
=
{
'fasta_files'
:
chimerauchime
.
pick_fasta_files
,
'processors'
:
self
.
args
[
"processors"
]})
cluster
=
self
.
add_component
(
"MothurCluster"
,
kwargs
=
{
'dist_files'
:
distseqs
.
dist_files
,
'count_table_files'
:
chimerauchime
.
good_count_table_files
})
otuanalysis
=
self
.
add_component
(
"MothurOTUAnalysis"
,
kwargs
=
{
'an_list_files'
:
cluster
.
an_list_files
,
'count_table_files'
:
chimerauchime
.
good_count_table_files
,
\
'taxonomy_files'
:
classifyseqs
.
taxonomy_files
,
'label'
:
self
.
args
[
"labels"
],
'tree_label'
:
self
.
args
[
"labels"
],
'without_krona'
:
self
.
args
[
"without_krona"
]},
parent
=
chimerauchime
)
\ No newline at end of file
otuanalysis
=
self
.
add_component
(
"MothurOTUAnalysis"
,
kwargs
=
{
'an_list_files'
:
cluster
.
an_list_files
,
'count_table_files'
:
chimerauchime
.
good_count_table_files
,
'taxonomy_files'
:
classifyseqs
.
taxonomy_files
,
'label'
:
self
.
args
[
"labels"
],
'tree_label'
:
self
.
args
[
"labels"
],
'without_krona'
:
self
.
args
[
"without_krona"
]},
parent
=
chimerauchime
)
workflows/miseq_diversity/workflow.properties
View file @
55900cc1
...
...
@@ -74,6 +74,21 @@ max_contigs_length.name = max_contigs_length
max_contigs_length.flag
=
--max-contigs-length
max_contigs_length.help
=
Maximum length sequences after make contigs
max_ambiguous.flag
=
--max-ambiguous
max_ambiguous.help
=
How many N allowed in a sequence
max_ambiguous.default
=
0
max_ambiguous.type
=
int
max_homopolymers.flag
=
--max-homopolymers
max_ambiguous.help
=
Which is the maximum size of homopolymers to allow
max_ambiguous.default
=
8
max_ambiguous.type
=
int
processors.flag
=
--processors
processors.help
=
How many cpus can be use during the process
processors.default
=
1
processors.type
=
int
reference_alignment.name
=
reference_alignment
reference_alignment.flag
=
--reference-alignment
reference_alignment.help
=
Where is stored the reference alignment
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment