Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
ng6
Commits
09e84fbd
Commit
09e84fbd
authored
Aug 02, 2016
by
Gerald Salin
Browse files
Change barcode file add
parent
9cd2e910
Changes
1
Hide whitespace changes
Inline
Side-by-side
workflows/pacbio_qc/components/rs_subreads.py
View file @
09e84fbd
...
...
@@ -18,6 +18,7 @@
import
os
import
json
import
logging
from
workflows.pacbio_qc.lib.pacbiolib
import
h5file
...
...
@@ -29,10 +30,11 @@ def rs_subreads(inputfile, stdout, componentdir, smrtpipe, fofnToSmrtpipeInput,
import
subprocess
import
os
import
logging
from
workflows.pacbio_qc.lib.pacbiolib
import
PacbioH5Reader
# crate directory
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin rs_subreads!"
)
# cr
e
ate directory
filebase
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
os
.
path
.
splitext
(
inputfile
)[
0
])[
0
])
outputdir
=
os
.
path
.
join
(
componentdir
,
filebase
)
settings_xml
=
os
.
path
.
join
(
componentdir
,
filebase
+
"_settings.xml"
)
...
...
@@ -52,9 +54,9 @@ def rs_subreads(inputfile, stdout, componentdir, smrtpipe, fofnToSmrtpipeInput,
</param>
<param name='score'><value>{1}</value></param>
</module>"""
.
format
(
barcode_file
,
barcode_score
)
# write settings
with
open
(
settings_xml
,
"w"
)
as
fh
:
fh
.
write
((
"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
# write settings
with
open
(
settings_xml
,
"w"
)
as
fh
:
fh
.
write
((
"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<smrtpipeSettings>
<module id="P_Fetch" />
<module id="P_Filter" >
...
...
@@ -73,6 +75,27 @@ def rs_subreads(inputfile, stdout, componentdir, smrtpipe, fofnToSmrtpipeInput,
</smrtpipeSettings>
"""
).
format
(
min_subreads_length
,
polymerase_read_qual
,
polymerase_read_length
))
if
barcode_file
==
'None'
or
barcode_file
is
None
:
# write settings
with
open
(
settings_xml
,
"w"
)
as
fh
:
fh
.
write
((
"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<smrtpipeSettings>
<module id="P_Fetch" />
<module id="P_Filter" >
<param name="minSubReadLength">
<value>{0}</value>
</param>
<param name="readScore">
<value>{1}</value>
</param>
<param name="minLength">
<value>{2}</value>
</param>
</module>
<module id="P_FilterReports"/>
</smrtpipeSettings>
"""
).
format
(
min_subreads_length
,
polymerase_read_qual
,
polymerase_read_length
))
# write fofn and xml
reader
=
PacbioH5Reader
(
inputfile
)
with
open
(
inputs_fofn
,
"w"
)
as
fh
:
...
...
@@ -104,6 +127,7 @@ class RS_Subreads (Analysis):
"""
def
define_parameters
(
self
,
sample_names
,
input_files
,
min_subreads_length
=
0
,
polymerase_read_qual
=
0
,
polymerase_read_length
=
0
,
barcode_file
=
None
,
barcode_score
=
22
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin RS_Subreads.define_parameters!"
)
self
.
add_parameter_list
(
"sample_names"
,
"sample names, each sample name must correspond to an input file"
,
default
=
sample_names
,
required
=
True
)
self
.
add_input_file_list
(
"input_files"
,
"Input pacbio bas.h5 files"
,
default
=
input_files
,
file_format
=
h5file
,
required
=
True
)
self
.
add_parameter
(
"min_subreads_length"
,
"Subreads shorter than this value (in base pairs) are filtered out and excluded from analysis"
,
default
=
min_subreads_length
,
type
=
'int'
)
...
...
@@ -117,12 +141,25 @@ class RS_Subreads (Analysis):
self
.
add_output_file_list
(
'stdouts'
,
"logs"
,
pattern
=
"{basename}.stdout"
,
items
=
items
)
def
process
(
self
):
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin RS_Subreads.process!"
)
subreads
=
PythonFunction
(
rs_subreads
,
cmd_format
=
"{EXE} {IN} {OUT} {ARG}"
)
for
i
,
e
in
enumerate
(
self
.
input_files
)
:
logging
.
getLogger
(
"jflow"
).
debug
(
"Begin RS_Subreads.process! "
+
e
)
print
(
'START FOR '
,
i
,
'------>'
,
e
)
'''self.add_python_execution(rs_subreads,
cmd_format="{EXE} {IN} {OUT} {ARG}",
map=False,
inputs = [self.input_files[i]],
outputs = [self.stdouts[i]],
arguments = [self.output_directory, self.get_exec_path("smrtpipe"), self.get_exec_path("fofnToSmrtpipeInput.py"), self.min_subreads_length, self.polymerase_read_qual, self.polymerase_read_length, self.barcode_file, self.barcode_score]
)'''
subreads
(
inputs
=
self
.
input_files
[
i
],
outputs
=
[
self
.
stdouts
[
i
]],
arguments
=
[
self
.
output_directory
,
self
.
get_exec_path
(
"smrtpipe"
),
self
.
get_exec_path
(
"fofnToSmrtpipeInput.py"
),
self
.
min_subreads_length
,
self
.
polymerase_read_qual
,
self
.
polymerase_read_length
,
self
.
barcode_file
,
self
.
barcode_score
])
logging
.
getLogger
(
"jflow"
).
debug
(
"End RS_Subreads.process! "
)
print
(
'END PROCESS'
)
def
get_version
(
self
):
return
"1.0"
...
...
@@ -135,11 +172,15 @@ class RS_Subreads (Analysis):
def
post_process
(
self
):
metrics
=
[]
metrics2
=
[]
for
i
,
samplefile
in
enumerate
(
self
.
input_files
)
:
sample
=
self
.
sample_names
[
i
]
sdir
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
os
.
path
.
splitext
(
samplefile
)[
0
])[
0
])
sample_outdir
=
os
.
path
.
join
(
self
.
output_directory
,
sdir
)
'''print ('sample -------> ',sample,'
\n
')
print ('self.sample_names -------> ',self.sample_names,'
\n
')'''
# loading
jsonfile
=
os
.
path
.
join
(
sample_outdir
,
'results'
,
'filter_reports_loading.json'
)
...
...
@@ -213,6 +254,21 @@ class RS_Subreads (Analysis):
image
=
os
.
path
.
join
(
sample_outdir
,
'results'
,
'filtered_subread_report.png'
)
self
.
_add_result_element
(
sample
,
"report"
,
self
.
_save_file
(
image
,
sample
+
".filtered_subread_report.png"
),
group
)
# Barcode report
jsonfile
=
os
.
path
.
join
(
sample_outdir
,
'results'
,
'barcode_report.json'
)
if
os
.
path
.
isfile
(
jsonfile
):
group
=
'barcode_results'
if
group
not
in
metrics2
:
metrics2
.
append
(
group
)
self
.
_add_result_element
(
"metrics2"
,
"headers"
,
','
.
join
([
'Reads'
,
'Bases'
]),
group
)
stats
=
self
.
parse_barcode_report_json
(
jsonfile
)
for
i
,
barcode
in
enumerate
(
stats
[
'Barcode Name'
]):
self
.
_add_result_element
(
barcode
,
'Reads'
,
stats
[
'Reads'
][
i
]
,
group
)
self
.
_add_result_element
(
barcode
,
'Bases'
,
stats
[
'Bases'
][
i
]
,
group
)
# ------ parsing of json files ------
...
...
@@ -292,6 +348,25 @@ class RS_Subreads (Analysis):
res
[
"Number of Reads"
]
=
e
[
'value'
]
return
res
def
parse_barcode_report_json
(
self
,
jsonfile
):
obj
=
self
.
load_json
(
jsonfile
)
'''print (obj)'''
res
=
{}
for
e
in
obj
[
'tables'
]
:
'''print ("ATTRIBUTE")
print ("e ----> ",e,'
\n
')'''
for
c
in
e
[
'columns'
]:
'''print ('c------> ', c,'
\n
')'''
if
c
[
'id'
]
==
'barcode.barcode_table.barcode'
:
res
[
'Barcode Name'
]
=
c
[
'values'
]
elif
c
[
'id'
]
==
'barcode.barcode_table.number_of_reads'
:
res
[
'Reads'
]
=
c
[
'values'
]
elif
c
[
'id'
]
==
'barcode.barcode_table.number_of_bases'
:
res
[
'Bases'
]
=
c
[
'values'
]
'''print ("res ---> ",res,'
\n
')'''
return
res
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment