Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
ng6
Commits
940ecf37
Commit
940ecf37
authored
Aug 19, 2014
by
Penom Nom
Browse files
generic splitbc component
parent
1de1764c
Changes
3
Hide whitespace changes
Inline
Side-by-side
workflows/
radseq/
components/splitbc.py
→
workflows/components/splitbc.py
View file @
940ecf37
...
...
@@ -19,54 +19,58 @@ import os
from
jflow.iotypes
import
OutputFileList
,
OutputFile
,
InputFile
,
InputFileList
,
Formats
from
jflow.abstraction
import
MultiMap
from
jflow.component
import
Component
from
weaver.function
import
ShellFunction
from
ng6.analysis
import
Analysis
class
SplitBC
(
Analysis
):
ENZYMES
=
{
'sbfI'
:
{
'rad'
:
'CCTGCAGG'
,
'radtag'
:
'TGCAGG'
}
}
def
get_enzyme
(
self
,
name
):
if
not
self
.
ENZYMES
.
has_key
(
name
)
:
raise
ValueError
(
"The enzyme name "
+
str
(
name
)
+
" does not exists. Accepted names are "
+
str
(
self
.
ENZYMES
.
keys
()))
return
(
self
.
ENZYMES
[
name
][
'rad'
],
self
.
ENZYMES
[
name
][
'radtag'
])
class
SplitBC
(
Component
):
def
define_parameters
(
self
,
fastq_file1
,
fastq_file2
,
matrix_indiv_name
,
barcode_file
,
enzyme
,
mismatches
,
tag_mismatch
,
trim
=
True
,
forward
=
True
):
def
define_parameters
(
self
,
fastq_file1
,
barcode_file
,
matrix_indiv_name
,
fastq_file2
=
[],
rad
=
None
,
rad_tag
=
None
,
mismatches
=
None
,
tag_mismatch
=
None
,
trim_barcode
=
False
,
trim_reads2
=
False
,
bol
=
True
,
eol
=
False
,
partial
=
None
,
no_adapt
=
False
):
"""
@param fastq_file1: list of fastq_files path
@param barcode_file: list of barcode file path
@param matrix_indiv_name: list of list of individual names (example : [[], [] , []]). The length of this
matrix_indiv_name must be the same as the length of fastq_file1 (and fastq_file2 if provided)
@param fastq_file2: list of fastq_files path
@param
matrix_indiv_name: list of list of individual names
@param
barcode_file: list of bascode file path
@param enzyme: enzyme name
@param
rad:
@param
rad_tag:
...
"""
check_len
=
len
(
fastq_file1
)
==
len
(
matrix_indiv_name
)
==
len
(
barcode_file
)
self
.
fastq1
=
OutputFileList
(
fastq_file1
,
Formats
.
FASTQ
)
self
.
fastq2
=
None
if
fastq_file2
is
not
None
:
if
fastq_file2
:
check_len
=
len
(
fastq_file1
)
==
len
(
matrix_indiv_name
)
==
len
(
fastq_file1
)
==
len
(
barcode_file
)
self
.
fastq2
=
OutputFileList
(
fastq_file2
,
Formats
.
FASTQ
)
if
not
check_len
:
raise
Exception
(
"length of fastq_file1, fastq_file2, matrix_indiv_name and barcode_file must be the same"
)
if
trim_barcode
and
trim_reads2
:
raise
Exception
(
"you must specify either trim_barcode or trim_reads2, but not both"
)
if
(
bol
==
eol
==
True
)
or
(
bol
==
eol
==
False
)
:
raise
Exception
(
"one of bol, eol must be specified, but not both"
)
if
(
rad
is
not
None
and
rad_tag
is
None
)
or
(
rad
is
None
and
rad_tag
is
not
None
)
:
raise
Exception
(
"you must specify rad with rad_tag"
)
self
.
barcode_file
=
OutputFileList
(
barcode_file
)
self
.
mismatches
=
mismatches
self
.
rad
,
self
.
rad_tag
=
self
.
get_enzyme
(
enzyme
)
self
.
tag_mismatch
=
tag_mismatch
self
.
trim
=
trim
self
.
forward
=
forward
self
.
prefix_r1
=
os
.
path
.
join
(
self
.
output_directory
,
"%_1.fq"
)
self
.
prefix_r2
=
os
.
path
.
join
(
self
.
output_directory
,
"%_2.fq"
)
self
.
trim_barcode
=
trim_barcode
self
.
trim_reads2
=
trim_reads2
self
.
bol
=
bol
self
.
eol
=
eol
self
.
partial
=
partial
self
.
no_adapt
=
no_adapt
self
.
rad
=
rad
self
.
rad_tag
=
rad_tag
self
.
matrix_read1
=
[]
self
.
matrix_read2
=
[]
self
.
output_read1
=
[]
...
...
@@ -75,46 +79,54 @@ class SplitBC (Analysis):
for
id
,
inames
in
enumerate
(
matrix_indiv_name
)
:
outr1
=
OutputFileList
(
self
.
get_outputs
(
'{basename_woext}_1.fq'
,
inames
),
Formats
.
FASTQ
)
outr2
=
OutputFileList
(
self
.
get_outputs
(
'{basename_woext}_2.fq'
,
inames
),
Formats
.
FASTQ
)
self
.
matrix_read1
.
append
(
outr1
)
self
.
matrix_read2
.
append
(
outr2
)
self
.
output_read1
+=
outr1
self
.
output_read2
+=
outr2
self
.
stdout
.
append
(
OutputFile
(
os
.
path
.
join
(
self
.
output_directory
,
"splitbc"
+
str
(
id
)
+
".stdout"
)))
def
get_version
(
self
):
return
"1.1"
def
define_analysis
(
self
):
self
.
name
=
"splitbc"
self
.
description
=
"Demultiplex individual"
self
.
software
=
"splitbc.pl"
self
.
options
=
' '
if
self
.
fastq2
is
not
None
:
outr2
=
OutputFileList
(
self
.
get_outputs
(
'{basename_woext}_2.fq'
,
inames
),
Formats
.
FASTQ
)
self
.
matrix_read2
.
append
(
outr2
)
self
.
output_read2
+=
outr2
self
.
stdout
.
append
(
OutputFile
(
os
.
path
.
join
(
self
.
output_directory
,
"splitbc"
+
str
(
id
)
+
".stdout"
)))
def
process
(
self
):
strand
=
"--bol"
if
self
.
forward
else
"--eol"
prefix_r1
=
os
.
path
.
join
(
self
.
output_directory
,
"%_1.fq"
)
command
=
[
self
.
get_exec_path
(
"splitbc.pl"
),
"--bcfile"
,
"$1"
,
"--prefix-r1"
,
prefix_r1
]
if
self
.
bol
:
command
.
append
(
'--bol'
)
elif
self
.
eol
:
command
.
append
(
'--eol'
)
if
self
.
mismatches
is
not
None
:
command
.
extend
([
"--mismatches"
,
self
.
mismatches
])
if
self
.
tag_mismatch
is
not
None
:
command
.
extend
([
"--TAG_mismatch"
,
self
.
tag_mismatch
])
if
self
.
partial
is
not
None
:
command
.
extend
([
"--partial"
,
self
.
partial
])
if
self
.
trim_barcode
:
command
.
append
(
"--trim"
)
elif
self
.
trim_reads2
:
command
.
append
(
"--trim2"
)
if
self
.
no_adapt
is
not
None
:
command
.
append
(
"--no_adapt"
)
if
self
.
rad
is
not
None
and
self
.
rad_tag
is
not
None
:
command
.
extend
([
"--rad"
,
self
.
rad
,
"--radTAG"
,
self
.
rad_tag
])
if
self
.
fastq2
is
not
None
:
command
=
" "
.
join
([
self
.
get_exec_path
(
"splitbc.pl"
),
"$1"
,
"$2"
,
"--mismatches"
,
self
.
mismatches
,
"--bcfile"
,
"$3"
,
"--rad"
,
self
.
rad
,
"--radTAG"
,
self
.
rad_tag
,
"--TAG_mismatch"
,
self
.
tag_mismatch
,
"--trim"
if
self
.
trim
else
""
,
"--prefix-r1"
,
self
.
prefix_r1
,
"--prefix-r2"
,
self
.
prefix_r2
,
strand
,
' 2>&1 >> $4 '
])
prefix_r2
=
os
.
path
.
join
(
self
.
output_directory
,
"%_2.fq"
)
command
.
extend
([
"--prefix-r2"
,
prefix_r2
,
"$2"
,
"$3"
,
'2>&1 >> $4'
])
command
=
' '
.
join
(
command
)
splitbc
=
ShellFunction
(
command
,
cmd_format
=
'{EXE} {IN} {OUT}'
)
MultiMap
(
splitbc
,
inputs
=
[
self
.
fastq1
,
self
.
fastq2
,
self
.
barcode_file
],
outputs
=
[
self
.
stdout
,
self
.
matrix_read1
,
self
.
matrix_read2
])
MultiMap
(
splitbc
,
inputs
=
[
self
.
barcode_file
,
self
.
fastq1
,
self
.
fastq2
],
outputs
=
[
self
.
stdout
,
self
.
matrix_read1
,
self
.
matrix_read2
])
else
:
command
=
" "
.
join
([
self
.
get_exec_path
(
"splitbc.pl"
),
"$1"
,
"--mismatches"
,
self
.
mismatches
,
"--bcfile"
,
"$2"
,
"--rad"
,
self
.
rad
,
"--radTAG"
,
self
.
rad_tag
,
"--TAG_mismatch"
,
self
.
tag_mismatch
,
"--trim"
if
self
.
trim
else
""
,
"--prefix-r1"
,
self
.
prefix_r1
,
strand
,
' 2>&1 > $3 '
])
command
.
extend
([
"$2"
,
'2>&1 >> $3'
])
command
=
' '
.
join
(
command
)
splitbc
=
ShellFunction
(
command
,
cmd_format
=
'{EXE} {IN} {OUT}'
)
MultiMap
(
splitbc
,
inputs
=
[
self
.
fastq1
,
self
.
barcode_file
],
outputs
=
[
self
.
stdout
,
self
.
matrix_read1
])
def
post_process
(
self
):
print
""
\ No newline at end of file
MultiMap
(
splitbc
,
inputs
=
[
self
.
barcode_file
,
self
.
fastq1
],
outputs
=
[
self
.
stdout
,
self
.
matrix_read1
])
workflows/radseq/__init__.py
View file @
940ecf37
...
...
@@ -24,8 +24,23 @@ from ng6.ng6workflow import NG6Workflow
class
RADseq
(
NG6Workflow
):
ENZYMES
=
{
'sbfI'
:
{
'rad'
:
'CCTGCAGG'
,
'radtag'
:
'TGCAGG'
}
}
def
get_enzyme
(
self
,
name
):
if
not
self
.
ENZYMES
.
has_key
(
name
)
:
raise
ValueError
(
"The enzyme name "
+
str
(
name
)
+
" does not exists. Accepted names are "
+
str
(
self
.
ENZYMES
.
keys
()))
return
(
self
.
ENZYMES
[
name
][
'rad'
],
self
.
ENZYMES
[
name
][
'radtag'
])
def
process
(
self
):
rad
,
rad_tag
=
self
.
get_enzyme
(
self
.
args
[
'enzyme'
])
# group all individual by pool
pools
=
{}
for
p
in
self
.
args
[
'pool'
]
:
...
...
@@ -65,10 +80,11 @@ class RADseq (NG6Workflow):
ff
.
write
(
indiv
[
'indiv_name'
]
+
"
\t
"
+
indiv
[
'barcode'
]
+
"
\n
"
)
indiv_names
.
append
(
inames
)
splitbc
=
self
.
add_component
(
"SplitBC"
,
[
fastq_files_1
,
fastq_files_2
if
fastq_files_2
else
None
,
indiv_names
,
barcode_files
,
self
.
args
[
'enzyme'
],
self
.
args
[
'mismatches'
],
self
.
args
[
'tag_mismatch'
],
self
.
args
[
'trim'
],
self
.
args
[
'forward'
]])
splitbc
=
self
.
add_component
(
"SplitBC"
,
[
fastq_files_1
,
barcode_files
,
indiv_names
,
fastq_files_2
,
rad
,
rad_tag
,
self
.
args
[
'mismatches'
],
self
.
args
[
'tag_mismatch'
],
self
.
args
[
'trim_barcode'
],
self
.
args
[
'trim_reads2'
]])
ustacks
=
self
.
add_component
(
"Ustacks"
,
[],
{
"indiv_dic"
:
indivs_by_name
,
"read1_files"
:
splitbc
.
output_read1
,
"max_locus"
:
3
}
)
#
ustacks = self.add_component("Ustacks" , [], {"indiv_dic": indivs_by_name, "read1_files" : splitbc.output_read1 , "max_locus" : 3 } )
#cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])
workflows/radseq/workflow.properties
View file @
940ecf37
...
...
@@ -94,17 +94,19 @@ mismatches.flag = --mismatches
mismatches.help
=
Max. number of mismatches allowed.
mismatches.default
=
1
forward.name
=
forward
forward.flag
=
--forward
forward.help
=
Match barcode at the begening of the sequence (5' 3')
forward.default
=
True
forward.type
=
bool
trim_barcode.name
=
trim_barcode
trim_barcode.flag
=
--trim-barcode
trim_barcode.help
=
Should the barecode be trimmed
trim_barcode.default
=
True
trim_barcode.exclude
=
trim_reads2
trim_barcode.type
=
bool
trim.name
=
trim
trim.flag
=
--trim
trim.help
=
Should the barecode be trimmed.
trim.default
=
True
trim.type
=
bool
trim_reads2.name
=
trim_reads2
trim_reads2.flag
=
--trim-reads2
trim_reads2.help
=
Shoud the read 2 be trimmed to have the same length as the read1
trim_reads2.default
=
False
trim_reads2.exclude
=
trim_barcode
trim_reads2.type
=
bool
catalog_mismatches.name
=
catalog_mismatches
catalog_mismatches.flag
=
--catalog-mismatches
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment