Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Maintenance - Mise à jour mensuelle Lundi 6 Février entre 7h00 et 9h00
Open sidebar
genotoul-bioinfo
ng6
Commits
f01a98e3
Commit
f01a98e3
authored
Mar 01, 2018
by
Maxime Manno
🍜
Browse files
Add trimporchop component
parent
6d118a0b
Changes
1
Hide whitespace changes
Inline
Side-by-side
workflows/ont_qc/components/trimporechop.py
0 → 100644
View file @
f01a98e3
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import
re
,
os
from
subprocess
import
Popen
,
PIPE
import
logging
import
time
from
ng6.analysis
import
Analysis
from
ng6.utils
import
Utils
from
jflow.utils
import
get_argument_pattern
class
Trim_porechop
(
Analysis
):
"""
This module trim the reads from ONT data
"""
def
define_parameters
(
self
,
fastq_files
,
nbthreads
=
4
,
formatfile
=
"fastq"
,
discard_middle
=
"discard_middle"
,
archivename
=
"porechop_archive"
):
self
.
add_input_file_list
(
"fastq_files"
,
"fastq_files"
,
default
=
fastq_files
,
required
=
True
,
file_format
=
'fastq'
)
self
.
add_parameter
(
"nbthreads"
,
"number of threads to use"
,
default
=
nbthreads
,
type
=
'int'
)
self
.
add_parameter
(
"formatfile"
,
"format of the input files"
,
default
=
formatfile
,
type
=
'str'
)
self
.
add_parameter
(
"discard_middle"
,
"discard_middle"
,
default
=
discard_middle
,
choices
=
[
"discard_middle"
,
"do_not_discard_middle"
])
self
.
add_parameter
(
"archive_name"
,
"Name of the archive"
,
default
=
archivename
,
type
=
'str'
)
output_ext
=
'_trim.'
+
self
.
formatfile
self
.
add_output_file_list
(
"files_trimmed"
,
"files_trimmed"
,
pattern
=
'{basename_woext}'
+
output_ext
,
items
=
self
.
fastq_files
,
file_format
=
self
.
formatfile
)
self
.
add_output_file_list
(
"stdouts"
,
"stdouts"
,
pattern
=
'{basename_woext}.stdout'
,
items
=
self
.
fastq_files
)
def
define_analysis
(
self
):
self
.
name
=
"TrimPorechop"
self
.
description
=
"Trim the reads generated by Albacore and remove ONT adapters"
self
.
software
=
"porechop"
if
self
.
discard_middle
==
"discard_middle"
:
self
.
options
=
"--discard_middle"
def
__parse_stat_file
(
self
,
stat_file
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin Trimporechop.__parse_stat_file! file ="
,
stat_file
)
"""
Parse the stat file
@param stat_file : the stdout porechop
@return : {"read_trim_start" : read_trim_start, ...}
"""
read_trim_start
=
0
read_total_start
=
0
bp_removed_start
=
0
read_trim_end
=
0
read_total_end
=
0
bp_removed_end
=
0
while
os
.
stat
(
stat_file
).
st_size
==
0
:
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.__parse_stat_file! file empty : "
+
stat_file
)
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.__parse_stat_file! spleep 10..."
)
for
line
in
open
(
stat_file
,
"r"
).
readlines
():
line
=
line
.
strip
()
if
re
.
search
(
"(.*) reads had adapters trimmed from their start (.*)"
,
line
)
!=
None
:
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.__parse_stat_file : line start "
+
str
(
line
))
read_trim_start
=
line
.
split
(
" "
)[
0
]
read_total_start
=
line
.
split
(
" "
)[
2
]
bp_removed_start
=
line
.
split
(
" "
)[
10
].
replace
(
"("
,
""
)
if
re
.
search
(
"(.*) reads had adapters trimmed from their end (.*)"
,
line
)
!=
None
:
read_trim_end
=
line
.
split
(
" "
)[
0
]
read_total_end
=
line
.
split
(
" "
)[
2
]
bp_removed_end
=
line
.
split
(
" "
)[
10
].
replace
(
"("
,
""
)
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.__parse_stat_file : read_trim_start "
+
str
(
read_trim_start
))
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.__parse_stat_file : read_trim_end "
+
str
(
read_trim_end
))
logging
.
getLogger
(
"jflow"
).
debug
(
"End Trimporechop.__parse_stat_file! "
)
return
[
read_trim_start
,
read_total_start
,
bp_removed_start
,
read_trim_end
,
read_total_end
,
bp_removed_end
]
def
post_process
(
self
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin Trimporechop.post_process! ont_qc"
)
# Create dictionary : key = file name or prefix, value = files path
results_files
=
[]
# add header of stats
group
=
"statsporechop"
self
.
_add_result_element
(
"metrics"
,
"headers"
,
','
.
join
([
"read_trim_start"
,
"read_total_start"
,
"bp_removed_start"
,
"read_trim_end"
,
"read_total_end"
,
"bp_removed_end"
]),
group
)
print
(
os
.
listdir
(
self
.
output_directory
))
for
file
in
os
.
listdir
(
self
.
output_directory
):
full_file_path
=
os
.
path
.
join
(
self
.
output_directory
,
file
)
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.post_process : full_file_path "
+
full_file_path
)
if
file
.
endswith
(
".fastq"
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.post_process match .fastq : full_file_path "
+
full_file_path
)
results_files
.
append
(
full_file_path
)
elif
file
.
endswith
(
".stdout"
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Trimporechop.post_process match .stdout: full_file_path "
+
full_file_path
)
results_files
.
append
(
full_file_path
)
filename
=
os
.
path
.
basename
(
file
).
split
(
".stdout"
)[
0
]
resultlist
=
self
.
__parse_stat_file
(
full_file_path
)
read_trim_start
=
resultlist
[
0
]
read_total_start
=
resultlist
[
1
]
bp_removed_start
=
resultlist
[
2
]
read_trim_end
=
resultlist
[
3
]
read_total_end
=
resultlist
[
4
]
bp_removed_end
=
resultlist
[
5
]
#add stats for each fastq file
self
.
_add_result_element
(
"ont_sample"
,
"read_trim_start"
,
read_trim_start
,
filename
)
self
.
_add_result_element
(
"ont_sample"
,
"read_total_start"
,
read_total_start
,
filename
)
self
.
_add_result_element
(
"ont_sample"
,
"bp_removed_start"
,
bp_removed_start
,
filename
)
self
.
_add_result_element
(
"ont_sample"
,
"read_trim_end"
,
read_trim_end
,
filename
)
self
.
_add_result_element
(
"ont_sample"
,
"read_total_end"
,
read_total_end
,
filename
)
self
.
_add_result_element
(
"ont_sample"
,
"bp_removed_end"
,
bp_removed_end
,
filename
)
#Finaly create and add the archive to the analysis
self
.
_create_and_archive
(
results_files
,
self
.
archive_name
)
logging
.
getLogger
(
"jflow"
).
debug
(
"End Trimporechop.post_process! "
)
def
get_version
(
self
):
#os.system("module load bioinfo/Porechop-0.2.1")
cmd
=
[
self
.
get_exec_path
(
"porechop"
),
"--version"
]
p
=
Popen
(
cmd
,
stdout
=
PIPE
,
stderr
=
PIPE
)
stdout
,
stderr
=
p
.
communicate
()
return
stdout
def
process
(
self
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin Trimporechop.process! ont_qc"
)
for
output_pos
,
output
in
enumerate
(
self
.
stdouts
):
file_group
=
[]
# Set prefix
reg
=
re
.
search
(
"(.+).stdout$"
,
output
)
basename
=
os
.
path
.
basename
(
reg
.
group
(
1
))
# Build fastq list for sample read
for
file
in
self
.
fastq_files
:
if
(
os
.
path
.
basename
(
file
))
==
basename
+
"."
+
self
.
formatfile
:
file_group
.
append
(
file
)
# Create cmd
[
cmd_inputs_pattern
,
next_arg_number
]
=
get_argument_pattern
(
file_group
,
0
)
self
.
add_shell_execution
(
self
.
get_exec_path
(
"porechop"
)
+
" "
+
self
.
options
+
" --input ${"
+
str
(
next_arg_number
)
+
"} --output ${"
+
str
(
next_arg_number
+
1
)
+
"} --format "
+
self
.
formatfile
+
" --threads "
+
str
(
self
.
nbthreads
)
+
" > "
+
" ${"
+
str
(
next_arg_number
+
2
)
+
"}"
,
cmd_format
=
'{EXE} {IN} {OUT}'
,
map
=
False
,
inputs
=
file_group
,
outputs
=
[
self
.
files_trimmed
[
output_pos
],
self
.
stdouts
[
output_pos
]])
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging
.
getLogger
(
"jflow"
).
debug
(
"End Trimporechop.process! "
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment