Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
D-GENIES
Commits
4c292dd6
Commit
4c292dd6
authored
Apr 06, 2018
by
Floreal Cabanettes
Browse files
Add cluster mode for plot alignment mode
parent
d84ad2c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/dgenies/bin/all_prepare.py
View file @
4c292dd6
...
...
@@ -8,7 +8,7 @@ from filter_contigs import Filter
from
index
import
index_file
def
index_fasta
(
name
,
filepath
,
out_dir
,
type_f
):
def
index_fasta
(
name
,
filepath
,
out_dir
,
type_f
,
dofilter
=
True
):
"""
Index and filter fasta
:param name: name of the specie
...
...
@@ -17,23 +17,25 @@ def index_fasta(name, filepath, out_dir, type_f):
:param type_f: type of fasta (query or target)
"""
uncompressed
=
None
if
filepath
.
endswith
(
".gz"
):
if
filepath
.
endswith
(
".gz"
)
and
dofilter
:
uncompressed
=
filepath
[:
-
3
]
index
=
os
.
path
.
join
(
out_dir
,
type_f
+
".idx"
)
index
=
os
.
path
.
join
(
out_dir
,
type_f
+
".idx"
)
success
,
nb_contigs
=
index_file
(
filepath
,
name
,
index
,
uncompressed
)
if
success
:
in_fasta
=
filepath
if
uncompressed
is
not
None
:
in_fasta
=
uncompressed
filtered_fasta
=
os
.
path
.
join
(
os
.
path
.
dirname
(
in_fasta
),
"filtered_"
+
os
.
path
.
basename
(
in_fasta
))
filter_f
=
Filter
(
fasta
=
in_fasta
,
index_file
=
index
,
type_f
=
type_f
,
min_filtered
=
nb_contigs
/
4
,
split
=
False
,
out_fasta
=
filtered_fasta
,
replace_fa
=
True
)
is_filtered
=
filter_f
.
filter
()
is_filtered
=
False
if
dofilter
:
in_fasta
=
filepath
if
uncompressed
is
not
None
:
in_fasta
=
uncompressed
filtered_fasta
=
os
.
path
.
join
(
os
.
path
.
dirname
(
in_fasta
),
"filtered_"
+
os
.
path
.
basename
(
in_fasta
))
filter_f
=
Filter
(
fasta
=
in_fasta
,
index_file
=
index
,
type_f
=
type_f
,
min_filtered
=
nb_contigs
/
4
,
split
=
False
,
out_fasta
=
filtered_fasta
,
replace_fa
=
True
)
is_filtered
=
filter_f
.
filter
()
if
uncompressed
is
not
None
:
if
is_filtered
:
os
.
remove
(
filepath
)
...
...
@@ -41,6 +43,7 @@ def index_fasta(name, filepath, out_dir, type_f):
save_file
.
write
(
uncompressed
)
else
:
os
.
remove
(
uncompressed
)
else
:
if
uncompressed
is
not
None
:
try
:
...
...
@@ -53,16 +56,21 @@ def index_fasta(name, filepath, out_dir, type_f):
parser
=
argparse
.
ArgumentParser
(
description
=
"Split huge contigs"
)
parser
.
add_argument
(
'-q'
,
'--query'
,
type
=
str
,
required
=
False
,
help
=
"Query fasta file"
)
parser
.
add_argument
(
'-u'
,
'--query-split'
,
type
=
str
,
required
=
False
,
help
=
"Query fasta file split"
)
parser
.
add_argument
(
'-t'
,
'--target'
,
type
=
str
,
required
=
Tru
e
,
help
=
"Target fasta file"
)
parser
.
add_argument
(
'-t'
,
'--target'
,
type
=
str
,
required
=
Fals
e
,
help
=
"Target fasta file"
)
parser
.
add_argument
(
'-n'
,
'--query-name'
,
type
=
str
,
required
=
False
,
help
=
"Query name"
)
parser
.
add_argument
(
'-m'
,
'--target-name'
,
type
=
str
,
required
=
Tru
e
,
help
=
"Target name"
)
parser
.
add_argument
(
'-m'
,
'--target-name'
,
type
=
str
,
required
=
Fals
e
,
help
=
"Target name"
)
parser
.
add_argument
(
'-s'
,
'--size'
,
type
=
int
,
required
=
False
,
default
=
10
,
help
=
"Max size of contigs (Mb) - for query split"
)
parser
.
add_argument
(
'-p'
,
'--preptime-file'
,
type
=
str
,
required
=
True
,
help
=
"File into save prep times"
)
parser
.
add_argument
(
'--split'
,
type
=
bool
,
const
=
True
,
nargs
=
"?"
,
required
=
False
,
default
=
False
,
help
=
"Split query"
)
parser
.
add_argument
(
'--index-only'
,
type
=
bool
,
const
=
True
,
nargs
=
"?"
,
required
=
False
,
default
=
False
,
help
=
"Index files only. No split, no filter."
)
args
=
parser
.
parse_args
()
if
args
.
index_only
and
args
.
split
:
raise
Exception
(
"--index-only and --split arguments are mutually exclusive"
)
out_dir
=
os
.
path
.
dirname
(
args
.
target
)
with
open
(
args
.
preptime_file
,
"w"
)
as
ptime
:
...
...
@@ -88,9 +96,13 @@ with open(args.preptime_file, "w") as ptime:
else
:
exit
(
1
)
else
:
index_fasta
(
name
=
args
.
query_name
,
filepath
=
args
.
query
,
out_dir
=
out_dir
,
type_f
=
"query"
)
print
(
"Indexing target..."
)
index_fasta
(
name
=
args
.
target_name
,
filepath
=
args
.
target
,
out_dir
=
out_dir
,
type_f
=
"target"
)
print
(
"Indexing query..."
)
index_fasta
(
name
=
args
.
query_name
,
filepath
=
args
.
query
,
out_dir
=
out_dir
,
type_f
=
"query"
,
dofilter
=
not
args
.
index_only
)
if
args
.
target
is
not
None
:
print
(
"Indexing target..."
)
index_fasta
(
name
=
args
.
target_name
,
filepath
=
args
.
target
,
out_dir
=
out_dir
,
type_f
=
"target"
,
dofilter
=
not
args
.
index_only
)
ptime
.
write
(
str
(
round
(
time
.
time
()))
+
"
\n
"
)
...
...
src/dgenies/lib/job_manager.py
View file @
4c292dd6
...
...
@@ -830,31 +830,7 @@ class JobManager:
self
.
set_job_status
(
"prepared"
)
self
.
run_job
(
"local"
)
def
prepare_dotplot_local
(
self
):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
file and sort it.
"""
self
.
set_job_status
(
"preparing"
)
# Prepare target index:
target_format
=
os
.
path
.
splitext
(
self
.
target
.
get_path
())[
1
][
1
:]
if
target_format
==
"idx"
:
shutil
.
move
(
self
.
target
.
get_path
(),
self
.
idx_t
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".target"
))
else
:
index_file
(
self
.
target
.
get_path
(),
self
.
target
.
get_name
(),
self
.
idx_t
)
# Prepare query index:
if
self
.
query
is
not
None
:
query_format
=
os
.
path
.
splitext
(
self
.
query
.
get_path
())[
1
][
1
:]
if
query_format
==
"idx"
:
shutil
.
move
(
self
.
query
.
get_path
(),
self
.
idx_q
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".query"
))
else
:
index_file
(
self
.
query
.
get_path
(),
self
.
query
.
get_name
(),
self
.
idx_q
)
else
:
shutil
.
copy
(
self
.
idx_t
,
self
.
idx_q
)
def
_end_of_prepare_dotplot
(
self
):
# Parse alignment file:
if
hasattr
(
parsers
,
self
.
aln_format
):
getattr
(
parsers
,
self
.
aln_format
)(
self
.
align
.
get_path
(),
self
.
paf_raw
)
...
...
@@ -881,6 +857,78 @@ class JobManager:
if
MODE
==
"webserver"
and
self
.
config
.
send_mail_status
:
self
.
send_mail_post
()
def
prepare_dotplot_cluster
(
self
,
batch_system_type
):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
:param batch_system_type: type of cluster
"""
args
=
[
self
.
config
.
cluster_prepare_script
,
"-p"
,
self
.
preptime_file
,
"--index-only"
]
has_index
=
False
target_format
=
os
.
path
.
splitext
(
self
.
target
.
get_path
())[
1
][
1
:]
if
target_format
==
"idx"
:
shutil
.
move
(
self
.
target
.
get_path
(),
self
.
idx_t
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".target"
))
else
:
has_index
=
True
args
+=
[
"-t"
,
self
.
target
.
get_path
(),
"-m"
,
self
.
target
.
get_name
()]
if
self
.
query
is
not
None
:
query_format
=
os
.
path
.
splitext
(
self
.
query
.
get_path
())[
1
][
1
:]
if
query_format
==
"idx"
:
shutil
.
move
(
self
.
query
.
get_path
(),
self
.
idx_q
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".query"
))
else
:
has_index
=
True
args
+=
[
"-q"
,
self
.
query
.
get_path
(),
"-n"
,
self
.
query
.
get_name
()]
success
=
True
if
has_index
:
success
=
self
.
launch_to_cluster
(
step
=
"prepare"
,
batch_system_type
=
batch_system_type
,
command
=
self
.
config
.
cluster_python_exec
,
args
=
args
,
log_out
=
self
.
logs
,
log_err
=
self
.
logs
)
if
success
:
if
self
.
query
is
None
:
shutil
.
copy
(
self
.
idx_t
,
self
.
idx_q
)
self
.
_end_of_prepare_dotplot
()
elif
MODE
==
"webserver"
and
self
.
config
.
send_mail_status
:
self
.
send_mail_post
()
def
prepare_dotplot_local
(
self
):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
file and sort it.
"""
self
.
set_job_status
(
"preparing"
)
# Prepare target index:
target_format
=
os
.
path
.
splitext
(
self
.
target
.
get_path
())[
1
][
1
:]
if
target_format
==
"idx"
:
shutil
.
move
(
self
.
target
.
get_path
(),
self
.
idx_t
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".target"
))
else
:
index_file
(
self
.
target
.
get_path
(),
self
.
target
.
get_name
(),
self
.
idx_t
)
# Prepare query index:
if
self
.
query
is
not
None
:
query_format
=
os
.
path
.
splitext
(
self
.
query
.
get_path
())[
1
][
1
:]
if
query_format
==
"idx"
:
shutil
.
move
(
self
.
query
.
get_path
(),
self
.
idx_q
)
os
.
remove
(
os
.
path
.
join
(
self
.
output_dir
,
".query"
))
else
:
index_file
(
self
.
query
.
get_path
(),
self
.
query
.
get_name
(),
self
.
idx_q
)
else
:
shutil
.
copy
(
self
.
idx_t
,
self
.
idx_q
)
self
.
_end_of_prepare_dotplot
()
def
prepare_data
(
self
):
if
self
.
align
is
None
:
if
MODE
==
"webserver"
:
...
...
@@ -899,8 +947,7 @@ class JobManager:
if
job
.
batch_type
==
"local"
:
self
.
prepare_dotplot_local
()
else
:
print
(
"NOT IMPLEMENTED!"
)
# self.prepare_data_cluster(job.batch_type)
self
.
prepare_dotplot_cluster
(
job
.
batch_type
)
else
:
self
.
prepare_dotplot_local
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment