Skip to content
Snippets Groups Projects
gstrip_preprocess.py 4.28 KiB
Newer Older
#!/usr/bin/env python3

import os
import sys
import shutil
import argparse
from subprocess import run


def fail(message):
    print(message, file=sys.stderr)
    return 1


def success(message):
    print(message)
    return 0


def parse_arguments():
    parser = argparse.ArgumentParser(
        prog="genomestrip.py",
        description="Launch genomestrip on a specific chromosome"
                    "  ")
    parser.add_argument("--bamlist", help="File listing bam files", type=str, required=True)
    parser.add_argument("--reference", help="Reference fasta file", type=str, required=True)
    parser.add_argument("--gendermap", help="Gendermap file", type=str, required=True)
    parser.add_argument("--outdir", help="Output folder", type=str, required=True)
    parser.add_argument("--outprefix", help="Output files prefix", type=str, required=True)

    return parser.parse_args()


def launch(bamlist, reference, gendermap, outdir, outprefix):
    reference_prefix = os.path.splitext(reference)[0]
    run_dir = os.path.join(outdir, outprefix + "_run")
    tmp_dir = os.path.join(outdir, outprefix + "_tmp")
    sv_dir = os.environ["SV_DIR"]
    os.environ["PATH"] = sv_dir + "/bwa:" + os.environ["PATH"]
    old_libpath = ":" + os.environ["LD_LIBRARY_PATH"] if "LD_LIBRARY_PATH" in os.environ else ""
    os.environ["LD_LIBRARY_PATH"] = sv_dir + "/bwa" + old_libpath

    mx = "-Xmx16g"
    classpath = "{sv_dir}/lib/SVToolkit.jar:{sv_dir}/lib/gatk/GenomeAnalysisTK.jar:{sv_dir}/lib/gatk/Queue.jar". \
        format(sv_dir=sv_dir)

    log_dir = os.path.join(run_dir, "logs")

    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

    os.makedirs(tmp_dir)
    os.makedirs(log_dir)
    exit_code = run("java -cp {classpath} {mx} -jar {sv_dir}/lib/SVToolkit.jar"
                    .format(classpath=classpath, mx=mx, sv_dir=sv_dir), shell=True).returncode
    if exit_code != 0:
        return fail("An error has occurred. See above.")
    command = (
        """LC_ALL=C java -cp {classpath} {mx} \
org.broadinstitute.gatk.queue.QCommandLine \
-S {sv_dir}/qscript/SVPreprocess.q \
-S {sv_dir}/qscript/SVQScript.q \
-gatk {sv_dir}/lib/gatk/GenomeAnalysisTK.jar \
-cp {classpath} \
-jobProject Capri \
-configFile {sv_dir}/conf/genstrip_parameters.txt \
-tempDir {tmp_dir} \
-R {reference_prefix}.fasta \
-I {bamlist} \
-runDirectory {run_dir} \
-md {run_dir}/metadata \
-jobLogDir {run_dir}/logs \
-disableGATKTraversal \
-bamFilesAreDisjoint true \
-computeGCProfiles  true \
-computeReadCounts  true \
-copyNumberMaskFile {reference_prefix}.gcmask.fasta \
-genderMaskBedFile  {reference_prefix}.gendermask.bed \
-genomeMaskFile     {reference_prefix}.svmask.fasta \
-ploidyMapFile      {reference_prefix}.ploidymap.txt \
-readDepthMaskFile  {reference_prefix}.rdmask.bed \
-genderMapFile      {gendermap} \
-maxConcurrentRun 20 \
-run""".format(classpath=classpath, mx=mx, sv_dir=sv_dir, run_dir=run_dir, tmp_dir=tmp_dir,
               reference_prefix=reference_prefix, bamlist=bamlist, gendermap=gendermap))

    with_drmaa = "SV_PARALLEL_DRMAA" in os.environ and os.environ["SV_PARALLEL_DRMAA"] == "1"

    if with_drmaa:
Floreal Cabanettes's avatar
Floreal Cabanettes committed
        command += " -jobRunner Drmaa \
-gatkJobRunner Drmaa \
-jobQueue workq \
-jobNative \"${SV_PARALLEL_NATIVES}\" "

    exit_code = run(command, shell=True).returncode

    if exit_code != 0:
        return fail("Run of genomestrip failed")

    shutil.rmtree(tmp_dir)

    final_metadata = os.path.join(outdir, "metadata")
    if os.path.exists(final_metadata):
        shutil.rmtree(final_metadata)
    shutil.move(os.path.join(run_dir, "metadata"), final_metadata)

    return success("Completed successfully")


def main():
    args = parse_arguments()

    # Check files
    bamlist = args.bamlist
    if not os.path.isfile(bamlist):
        return fail("File %s does not exists or is not a file" % bamlist)
    reference = args.reference
    if not os.path.isfile(reference):
        return fail("File %s does not exists or is not a file" % reference)
    gendermap = args.gendermap
    if not os.path.isfile(gendermap):
        return fail("File %s does not exists or is not a file" % gendermap)

    # Launch
    return launch(**{k: v for k, v in vars(args).items() if k in launch.__code__.co_varnames})


if __name__ == "__main__":
    sys.exit(main())