Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
ng6
Commits
e52960af
Commit
e52960af
authored
Jan 04, 2019
by
Gerald Salin
Browse files
Merge branch 'count_files' into 'master'
Count reads and bases too early See merge request
!18
parents
82d05c7c
b143425b
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/ng6/analysis.py
View file @
e52960af
...
...
@@ -52,6 +52,7 @@ def add_analysis(parent_id, analysis_cfg, *input_files):
import
pickle
import
logging
logging
.
getLogger
(
"Analysis"
).
debug
(
"Start. Imports went good."
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"Start. working for analysis "
+
analysis_cfg
)
# get inputs from parameters
analysis_serialized_path
=
input_files
[
0
]
...
...
@@ -366,6 +367,7 @@ class Analysis (Component):
def
_create_and_archive
(
self
,
files
,
archive_name
=
None
,
prefix
=
"dir"
):
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive entering"
)
"""
return the web path to the archive files
If there are samples, datas will be organised by samples
...
...
@@ -391,13 +393,14 @@ class Analysis (Component):
if
len
(
file_basenames
)
!=
len
(
set
(
file_basenames
))
:
try
:
# If there are samples
if
self
.
run
:
samples
=
self
.
run
.
get_samples
()
else
:
run_id
=
self
.
__get_run_id
()
my_run
=
Run
.
get_from_id
(
run_id
)
samples
=
my_run
.
get_samples
()
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive with samples"
)
gfiles
=
{}
ungrouped_files
=
[]
what_left
=
[]
...
...
@@ -471,14 +474,20 @@ class Analysis (Component):
for
file
in
what_left
:
copyfile
(
file
,
os
.
path
.
join
(
tmp_dir
,
os
.
path
.
basename
(
file
)))
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before tarf"
)
tarf
=
Utils
.
tar_dir
(
tmp_dir
,
os
.
path
.
join
(
self
.
__get_work_directory
(),
archive_name
))
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before targzf"
)
targzf
=
Utils
.
gzip
(
tarf
,
self
.
__get_work_directory
(),
delete
=
False
)
# Then delete temporary files
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before os.remove(tarf)"
)
os
.
remove
(
tarf
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before rmtree(tmp_dir)"
)
rmtree
(
tmp_dir
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before return "
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
)))
return
'fileadmin'
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
))
except
:
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive in execpt, without samples?"
)
gfiles
=
{}
# Group files by folders
for
file
in
files
:
...
...
@@ -499,19 +508,29 @@ class Analysis (Component):
os
.
makedirs
(
dir
)
for
file
in
gfiles
[
dir_name
]:
copyfile
(
file
,
os
.
path
.
join
(
dir
,
os
.
path
.
basename
(
file
)))
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before tarf"
)
tarf
=
Utils
.
tar_dir
(
tmp_dir
,
os
.
path
.
join
(
self
.
__get_work_directory
(),
archive_name
))
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before targzf"
)
targzf
=
Utils
.
gzip
(
tarf
,
self
.
__get_work_directory
(),
delete
=
False
)
# Then delete temporary files
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before os.remove(tarf)"
)
os
.
remove
(
tarf
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before rmtree(tmp_dir)"
)
rmtree
(
tmp_dir
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before return "
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
)))
return
'fileadmin'
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
))
else
:
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive, length differs"
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before tarf"
)
tarf
=
Utils
.
tar_files
(
files
,
os
.
path
.
join
(
self
.
__get_work_directory
(),
archive_name
))
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before targzf"
)
targzf
=
Utils
.
gzip
(
tarf
,
self
.
__get_work_directory
(),
delete
=
False
)
# Then delete temporary files
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before os.remove(tarf)"
)
os
.
remove
(
tarf
)
logging
.
getLogger
(
"Analysis"
).
debug
(
"_create_and_archive before return "
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
)))
return
'fileadmin'
+
os
.
path
.
join
(
self
.
directory
,
os
.
path
.
basename
(
targzf
))
...
...
src/ng6/t3MySQLdb.py
View file @
e52960af
...
...
@@ -63,17 +63,21 @@ class t3MySQLdb(object):
# opened connection
#
if
dictc
:
return
pymysql
.
connect
(
host
=
self
.
host
,
try
:
if
dictc
:
return
pymysql
.
connect
(
host
=
self
.
host
,
user
=
self
.
user
,
password
=
self
.
passwd
,
db
=
self
.
db
,
cursorclass
=
pymysql
.
cursors
.
DictCursor
)
return
pymysql
.
connect
(
host
=
self
.
host
,
return
pymysql
.
connect
(
host
=
self
.
host
,
user
=
self
.
user
,
password
=
self
.
passwd
,
db
=
self
.
db
)
except
Exception
as
e
:
logging
.
getLogger
(
"t3MySQLdb.get_connection"
).
error
(
"Exception while connecting to the database "
+
self
.
db
+
" on the server "
+
self
.
host
+
" with user "
+
self
.
user
+
":"
+
str
(
e
))
raise
e
def
execute
(
self
,
sql
,
commit
=
False
,
dictc
=
False
,
exc_msg
=
None
):
"""
...
...
@@ -83,7 +87,9 @@ class t3MySQLdb(object):
@param dictc: use a dictionary cursor to get dictionary results
@param exc_msg: exception message, to be passed to the raised exception
"""
logging
.
getLogger
(
"t3MySQLdb.execute"
).
debug
(
"Entering execute"
)
connection
=
self
.
get_connection
(
dictc
)
logging
.
getLogger
(
"t3MySQLdb.execute"
).
debug
(
"after connection "
+
str
(
connection
))
try
:
id
,
rows
=
None
,
None
with
connection
.
cursor
()
as
cursor
:
...
...
@@ -93,8 +99,10 @@ class t3MySQLdb(object):
rows
=
cursor
.
fetchall
()
if
commit
:
connection
.
commit
()
logging
.
getLogger
(
"t3MySQLdb.execute"
).
debug
(
"before returning"
)
return
_QResult
(
rowcount
=
rowcount
,
rows
=
rows
,
lastrowid
=
id
)
except
Exception
as
e
:
logging
.
getLogger
(
"t3MySQLdb.execute"
).
error
(
"Exception"
+
str
(
e
))
if
exc_msg
:
raise
Exception
(
e
,
exc_msg
)
else
:
raise
e
finally
:
...
...
@@ -223,13 +231,17 @@ class t3MySQLdb(object):
@param project_id : the project id to select
@return: [name, description]
"""
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
"Selecting project id="
+
str
(
project_id
))
sql
=
"SELECT name, description FROM tx_nG6_project WHERE tx_nG6_project.uid ='"
+
str
(
project_id
)
+
"'"
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
sql
)
qresult
=
self
.
execute
(
sql
)
res
=
qresult
.
rows
[
0
]
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
"Returning ["
+
str
([
res
[
0
],
res
[
1
]])
+
"]"
)
return
[
res
[
0
],
res
[
1
]]
try
:
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
"Selecting project id="
+
str
(
project_id
))
sql
=
"SELECT name, description FROM tx_nG6_project WHERE tx_nG6_project.uid ='"
+
str
(
project_id
)
+
"'"
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
sql
)
qresult
=
self
.
execute
(
sql
)
res
=
qresult
.
rows
[
0
]
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
"Returning ["
+
str
([
res
[
0
],
res
[
1
]])
+
"]"
)
return
[
res
[
0
],
res
[
1
]]
except
Exception
as
e
:
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
error
(
"Exception while getting the project : "
+
str
(
e
))
raise
e
def
select_project_from_name
(
self
,
name
):
"""
...
...
@@ -239,10 +251,11 @@ class t3MySQLdb(object):
"""
logging
.
getLogger
(
"t3MySQLdb.select_project_from_name"
).
debug
(
"Selecting project name="
+
str
(
name
))
sql
=
"SELECT uid, description FROM tx_nG6_project WHERE tx_nG6_project.name ='"
+
str
(
name
)
+
"'"
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
sql
)
logging
.
getLogger
(
"t3MySQLdb.select_project
_from_name
"
).
debug
(
sql
)
qresult
=
self
.
execute
(
sql
)
logging
.
getLogger
(
"t3MySQLdb.select_project_from_name"
).
debug
(
"before getting qresult"
)
res
=
qresult
.
rows
[
0
]
logging
.
getLogger
(
"t3MySQLdb.select_project"
).
debug
(
"Returning ["
+
str
([
res
[
0
],
res
[
1
]])
+
"]"
)
logging
.
getLogger
(
"t3MySQLdb.select_project
_from_name
"
).
debug
(
"Returning ["
+
str
([
res
[
0
],
res
[
1
]])
+
"]"
)
return
[
res
[
0
],
res
[
1
]]
def
get_project_analysis_ids
(
self
,
project_id
):
...
...
workflows/components/addrawfiles.py
View file @
e52960af
...
...
@@ -56,7 +56,7 @@ def md5sum(md5_file_out, files_to_md5sum):
stdout
,
stderr
=
p
.
communicate
()
logging
.
getLogger
(
"AddRawFiles"
).
debug
(
"md5sum. finished"
)
def
add_stats_to_run
(
run_dump_path
,
file
):
def
add_stats_to_run
(
run_dump_path
,
file
):
import
pickle
import
logging
total_nb_seq
=
0
...
...
@@ -164,8 +164,8 @@ class AddRawFiles (Component):
logging
.
getLogger
(
"AddRawFiles"
).
debug
(
"process. Before self.add_stats_to_run(extract_stats_from_seq_file"
)
logging
.
getLogger
(
"AddRawFiles"
).
debug
(
"process. Dirname of files_to_save_stats[0] : "
+
os
.
path
.
dirname
(
files_to_save_stats
[
0
]))
#Add number of reads and total length in base for each seq file and add these data to the run in the database
self
.
add_python_execution
(
add_stats_to_run
,
cmd_format
=
'{EXE} {ARG}
{IN}
> {OUT}'
,
map
=
False
,
outputs
=
self
.
stdout
,
in
put
s
=
files_to_save_stats
[
0
],
includes
=
self
.
files_to_save
,
arguments
=
[
run_dump_path
])
self
.
add_python_execution
(
add_stats_to_run
,
cmd_format
=
'{EXE} {ARG} > {OUT}'
,
map
=
False
,
outputs
=
self
.
stdout
,
in
clude
s
=
files_to_save_stats
,
arguments
=
[
run_dump_path
,
files_to_save_stats
[
0
]
])
#archive the files in the work folder of the run to be rsynced at the end
logging
.
getLogger
(
"AddRawFiles"
).
debug
(
"process. Before copying/archiving files with compression = "
+
self
.
compression
)
...
...
workflows/components/fastqc.py
View file @
e52960af
...
...
@@ -17,7 +17,7 @@
import
os
import
re
import
logging
from
subprocess
import
Popen
,
PIPE
from
jflow.utils
import
get_argument_pattern
...
...
@@ -101,9 +101,11 @@ class FastQC (Analysis):
return
stats
def
post_process
(
self
):
logging
.
getLogger
(
"FastQC"
).
debug
(
"post_process, entering"
)
results_files
=
[]
metrics
=
[]
for
file
in
os
.
listdir
(
self
.
output_directory
):
logging
.
getLogger
(
"FastQC"
).
debug
(
"post_process, working on "
+
file
)
full_file_path
=
os
.
path
.
join
(
self
.
output_directory
,
file
)
if
file
.
endswith
(
".zip"
):
results_files
.
append
(
full_file_path
)
...
...
@@ -176,10 +178,9 @@ class FastQC (Analysis):
for
metric
in
metrics
:
self
.
_add_result_element
(
"metrics"
,
"metric"
,
metric
,
metric
)
# Finaly create and add the archive to the analysis
print
(
results_files
)
print
(
"
archive_name "
,
self
.
archive_name
)
logging
.
getLogger
(
"FastQC"
).
debug
(
"post_process, results_files = "
+
str
(
results_files
)
)
logging
.
getLogger
(
"FastQC"
).
debug
(
"post_process,
archive_name
=
"
+
self
.
archive_name
)
self
.
archive_name
=
self
.
archive_name
.
replace
(
"/"
,
"-"
)
print
(
"archive_name "
,
self
.
archive_name
)
self
.
_create_and_archive
(
results_files
,
self
.
archive_name
)
def
get_version
(
self
):
...
...
workflows/components/fastqilluminafilter.py
View file @
e52960af
...
...
@@ -49,20 +49,26 @@ class FastqIlluminaFilter (Analysis):
def
post_process
(
self
):
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process entering"
)
# Create dictionary : key = file name or prefix, value = files path
files
=
{}
if
self
.
group_prefix
:
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process self.group_prefix is true"
)
files
=
Utils
.
get_filepath_by_prefix
(
self
.
stdout
,
self
.
group_prefix
)
else
:
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process self.group_prefix is false"
)
for
file
in
self
.
stdout
:
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process self.group_prefix is false, work on "
+
file
)
file_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
file
))[
0
]
files
[
file_name
]
=
[
file
]
# Merge analyses stat
for
sample_file
in
list
(
files
.
keys
()):
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process, work on "
+
sample_file
)
tot_input
=
0
tot_output
=
0
for
file
in
files
[
sample_file
]:
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"post_process, work on "
+
file
)
[
input
,
output
]
=
self
.
__parse_stat_file
(
file
)
tot_input
+=
int
(
input
)
tot_output
+=
int
(
output
)
...
...
@@ -78,6 +84,7 @@ class FastqIlluminaFilter (Analysis):
def
__parse_stat_file
(
self
,
stat_file
):
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"__parse_stat_file, entering"
)
"""
Parse the stat file
@param stat_file : the fastq_illumina_filter summary file
...
...
@@ -95,6 +102,7 @@ class FastqIlluminaFilter (Analysis):
if
output_reg
:
output
=
output_reg
.
group
(
1
).
replace
(
","
,
""
)
logging
.
getLogger
(
"FastqIlluminaFilter"
).
debug
(
"__parse_stat_file, returning"
)
return
[
input
,
output
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment