Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python3
import os
import sys
import shutil
import argparse
from subprocess import run
def fail(message):
print(message, file=sys.stderr)
return 1
def success(message):
print(message)
return 0
def parse_arguments():
parser = argparse.ArgumentParser(
prog="genomestrip.py",
description="Launch genomestrip on a specific chromosome"
" ")
parser.add_argument("--bamlist", help="File listing bam files", type=str, required=True)
parser.add_argument("--reference", help="Reference fasta file", type=str, required=True)
parser.add_argument("--gendermap", help="Gendermap file", type=str, required=True)
parser.add_argument("--outdir", help="Output folder", type=str, required=True)
parser.add_argument("--outprefix", help="Output files prefix", type=str, required=True)
return parser.parse_args()
def launch(bamlist, reference, gendermap, outdir, outprefix):
reference_prefix = os.path.splitext(reference)[0]
run_dir = os.path.join(outdir, outprefix + "_run")
tmp_dir = os.path.join(outdir, outprefix + "_tmp")
sv_dir = os.environ["SV_DIR"]
os.environ["PATH"] = sv_dir + "/bwa:" + os.environ["PATH"]
old_libpath = ":" + os.environ["LD_LIBRARY_PATH"] if "LD_LIBRARY_PATH" in os.environ else ""
os.environ["LD_LIBRARY_PATH"] = sv_dir + "/bwa" + old_libpath
classpath = "{sv_dir}/lib/SVToolkit.jar:{sv_dir}/lib/gatk/GenomeAnalysisTK.jar:{sv_dir}/lib/gatk/Queue.jar". \
format(sv_dir=sv_dir)
log_dir = os.path.join(run_dir, "logs")
if os.path.exists(tmp_dir):
shutil.rmtree(tmp_dir)
if os.path.exists(log_dir):
shutil.rmtree(log_dir)
exit_code = run("java -cp {classpath} {mx} -jar {sv_dir}/lib/SVToolkit.jar"
.format(classpath=classpath, mx=mx, sv_dir=sv_dir), shell=True).returncode
if exit_code != 0:
return fail("An error has occurred. See above.")
command = (
"""LC_ALL=C java -cp {classpath} {mx} \
org.broadinstitute.gatk.queue.QCommandLine \
-S {sv_dir}/qscript/SVPreprocess.q \
-S {sv_dir}/qscript/SVQScript.q \
-gatk {sv_dir}/lib/gatk/GenomeAnalysisTK.jar \
-cp {classpath} \
-jobProject Capri \
-configFile {sv_dir}/conf/genstrip_parameters.txt \
-tempDir {tmp_dir} \
-R {reference_prefix}.fasta \
-I {bamlist} \
-runDirectory {run_dir} \
-md {run_dir}/metadata \
-jobLogDir {run_dir}/logs \
-disableGATKTraversal \
-bamFilesAreDisjoint true \
-computeGCProfiles true \
-computeReadCounts true \
-copyNumberMaskFile {reference_prefix}.gcmask.fasta \
-genderMaskBedFile {reference_prefix}.gendermask.bed \
-genomeMaskFile {reference_prefix}.svmask.fasta \
-ploidyMapFile {reference_prefix}.ploidymap.txt \
-readDepthMaskFile {reference_prefix}.rdmask.bed \
-genderMapFile {gendermap} \
-maxConcurrentRun 20 \
-run""".format(classpath=classpath, mx=mx, sv_dir=sv_dir, run_dir=run_dir, tmp_dir=tmp_dir,
reference_prefix=reference_prefix, bamlist=bamlist, gendermap=gendermap))
with_drmaa = "SV_PARALLEL_DRMAA" in os.environ and os.environ["SV_PARALLEL_DRMAA"] == "1"
if with_drmaa:
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
-gatkJobRunner Drmaa \
-jobQueue workq \
-jobNative \"${SV_PARALLEL_NATIVES}\" "
exit_code = run(command, shell=True).returncode
if exit_code != 0:
return fail("Run of genomestrip failed")
shutil.rmtree(tmp_dir)
final_metadata = os.path.join(outdir, "metadata")
if os.path.exists(final_metadata):
shutil.rmtree(final_metadata)
shutil.move(os.path.join(run_dir, "metadata"), final_metadata)
return success("Completed successfully")
def main():
args = parse_arguments()
# Check files
bamlist = args.bamlist
if not os.path.isfile(bamlist):
return fail("File %s does not exists or is not a file" % bamlist)
reference = args.reference
if not os.path.isfile(reference):
return fail("File %s does not exists or is not a file" % reference)
gendermap = args.gendermap
if not os.path.isfile(gendermap):
return fail("File %s does not exists or is not a file" % gendermap)
# Launch
return launch(**{k: v for k, v in vars(args).items() if k in launch.__code__.co_varnames})
if __name__ == "__main__":
sys.exit(main())