added parsing merging and genotyping

eceb3c6b · Thomas Faraut · 3c092b4a · eceb3c6b · eceb3c6b · eceb3c6b
Commit eceb3c6b authored 7 years ago by Thomas Faraut
--- a/snakecnv/Snakefile
+++ b/snakecnv/Snakefile
@@ -51,7 +51,7 @@ def valid_chromosome(chrom, regexp_chrom):
    p = re.compile(regexp_chrom)
    return p.match(chrom) is not None

-ROOTPATH = "/home/faraut/dynawork/CapriSV/pipeline/capricnv/snakewf/playground/cnvdetection"
+ROOTPATH = "/home/faraut/dynawork/CapriSV/pipeline/cnvpipelines/snakecnv"

 shell.prefix("export PATH=%s/bin:%s/soft/lumpy/bin:\"$PATH\";" % (ROOTPATH,ROOTPATH))

@@ -79,6 +79,30 @@ rule all:
               chrom=chromosomes,
               tool=tools)

+rule merging:
+    input:
+        expand("{tool}/{{chrom}}/{{svtype}}/{tool}_{{chrom}}_{{svtype}}_parsed.vcf.gz",
+               tool=tools)
+    output:
+        "merge/{chrom}/{svtype}/merge_{chrom}_{svtype}.vcf.gz"
+    params:
+        outdir = "merge",
+        reference = REFERENCE
+    script:
+        "bin/merge.py"
+
+rule parsing:
+    input:
+        unpack(getToolOuputFile),
+        reference = REFERENCE
+    output:
+        touch("{tool}/{chrom}/{svtype}/parsing.done"),
+        parseoutput = "{tool}/{chrom}/{svtype}/{tool}_{chrom}_{svtype}_parsed.vcf.gz"
+    script:
+        # Parse for the specific tool, the
+        # specified variants and the specified chrom
+        "bin/parse.py"
+
 rule extract:
    input:
        bam = "data/bams/{sample}.bam"
@@ -89,15 +113,5 @@ rule extract:
        " -r {wildcards.chrom}"
        " -o data/bams/{wildcards.chrom}"

-rule parsing:
-    input:
-        getToolOuputFile
-    output: touch("{tool}/{chrom}/{svtype}/parsing.done")
-    shell:
-        "echo {input}"
-    #  script:
-        # Parse for the specific tool, the
-        # specified variants and the specified chrom
-    #    rundetectionscript.py

 include: "tools/modules.snk"
--- a/snakecnv/bin/cnvpipeline.sh
+++ b/snakecnv/bin/cnvpipeline.sh
+#!/bin/bash
+
+# If you adapt this script for your own use, you will need to set these two variables based on your environment.
+# SV_DIR is the installation directory for SVToolkit - it must be an exported environment variable.
+# SV_TMPDIR is a directory for writing temp files, which may be large if you have a large data set.
+export SV_DIR=/home/faraut/save/Softwares/svtoolkit
+
+echo "---"
+echo "Running CNVPipeline.sh $1 $2 $3 $4 $5"
+echo "---"
+
+myname=`basename "$0"`
+
+function usage() {
+    echo "
+Program: geneomstrip cnvpipeline wrapper
+usage:   $myname  <bamlist> <genome> <metadata> <gendermap> <outdir> <outprefix> <intervallist>
+
+positional args:
+        bamlist       a file specifying the bamfiles
+        genome        the geome fasta file
+        metadata      the metada directory created by preprocess
+        gendermap     file containing the declared gender for each sample.
+        outdir        the output dir
+        outprefix     prefix of the output file
+        intervallist  Interval(s) or .list for which intervals to process (
+see 
+ http://software.broadinstitute.org/software/genomestrip/org_broadinstitute_sv_qscript_CNVDiscoveryPipeline.html
+for a complete description
+"
+}
+
+
+bamlist=$1
+genome=$2
+metadata=$3
+gendermap=$4
+outdir=$5
+outprefix=$6
+intervallist=$7
+
+if [[ -z ${bamlist} || -z ${genome} || -z ${metadata} || -z ${gendermap} ||  -z ${outdir} || -z ${outprefix} || -z ${intervallist}  ]]
+then
+   usage
+   echo "At least one argument is missing"
+   exit 1
+fi
+
+if [ ! -d "${metadata}" ] ; then
+  echo "Directory ${metadata} absent  ... exiting"
+  exit 1
+fi
+
+inputFile=$bamlist
+reference_prefix=${genome%.fasta}
+
+runDir=${outdir}/${outprefix}_run
+
+mkdir -p ${runDir}/logs || exit 1
+
+
+# For SVAltAlign, you must use the version of bwa compatible with Genome STRiP.
+export PATH=${SV_DIR}/bwa:${PATH}
+export LD_LIBRARY_PATH=${SV_DIR}/bwa:${LD_LIBRARY_PATH}
+export LD_LIBRARY_PATH=/SGE/ogs/lib/linux-x64:$LD_LIBRARY_PATH
+
+mx="-Xmx4g"
+classpath="${SV_DIR}/lib/SVToolkit.jar:${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar:${SV_DIR}/lib/gatk/Queue.jar"
+
+echo "-- Run CNVPipeline -- " 
+LC_ALL=C java -cp ${classpath} ${mx} \
+    org.broadinstitute.gatk.queue.QCommandLine \
+    -S ${SV_DIR}/qscript/discovery/cnv/CNVDiscoveryPipeline.q \
+    -S ${SV_DIR}/qscript/SVQScript.q \
+    -cp ${classpath} \
+    -gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
+    -configFile ${SV_DIR}/conf/genstrip_parameters.txt \
+    --disableJobReport \
+    -jobProject Capri \
+    -jobRunner Drmaa \
+    -gatkJobRunner Drmaa \
+    -jobQueue workq \
+    -jobNative '-l mem=16G -l h_vmem=16G' \
+    -R ${reference_prefix}.fasta \
+    -genderMaskBedFile  ${reference_prefix}.gendermask.bed \
+    -genomeMaskFile     ${reference_prefix}.svmask.fasta \
+    -genderMapFile      ${gendermap} \
+    -ploidyMapFile      ${reference_prefix}.ploidymap.txt \
+    -md ${metadata} \
+    -runDirectory ${runDir} \
+    -jobLogDir ${runDir}/logs \
+    -I ${inputFile} \
+    -intervalList $intervallist \
+    -tilingWindowSize 5000 \
+    -tilingWindowOverlap 2500 \
+    -maximumReferenceGapLength 2500 \
+    -boundaryPrecision 200 \
+    -minimumRefinedLength 2500 \
+    -run \
+    || exit 1
+
+sites=${runDir}/results/gs_cnv.genotypes.vcf.gz
+genotypes=${outdir}/${outprefix}.vcf.gz
+
+# Run genotyping on the discovered sites.
+LC_ALL=C  java -Xmx4g -cp ${classpath} ${mx} \
+     org.broadinstitute.sv.apps.GenerateHaploidCNVGenotypes \
+     -R ${reference_prefix}.fasta \
+     -ploidyMapFile ${reference_prefix}.ploidymap.txt \
+     -genderMapFile ${gendermap}  \
+     -vcf ${sites} \
+     -O ${genotypes} \
+     -estimateAlleleFrequencies true \
+     -genotypeLikelihoodThreshold 0.001 || exit 1
+     
+echo "Script CNVPipeline completed successfully at "`date +%Y-%m-%d`
--- a/snakecnv/bin/extractregionreads.py
+++ b/snakecnv/bin/extractregionreads.py
@@ -86,7 +86,7 @@ def main(args):
                                     "wb",
                                     header=new_header
                                     ) as outf:
-                for read in inf.fetch(chrom, start, end):
+                for read in inf.fetch(contig=chrom, start=start, stop=end):
                    if read.mate_is_unmapped:
                        read.reference_id = 0
                        read.next_reference_id = 0

--- a/snakecnv/bin/gstrip_genotyper.sh
+++ b/snakecnv/bin/gstrip_genotyper.sh
+#!/bin/bash
+
+# If you adapt this script for your own use, you will need to set these two variables based on your environment.
+# SV_DIR is the installation directory for SVToolkit - it must be an exported environment variable.
+# SV_TMPDIR is a directory for writing temp files, which may be large if you have a large data set.
+export SV_DIR=/usr/local/bioinfo/src/genomeSTRIP/svtoolkit
+
+echo "---"
+echo "Running gstrip_genotyper.sh $1 $2 $3 $4 $5 $6 $7 $8 $9"
+echo "---"
+
+myname=`basename "$0"`
+
+function usage() {
+    echo "
+Program: SVGenotyper wrapper
+usage:   $myname  <bamlist> <chrom> <genome> <metadata> <rundir> <gendermap> <sites> genotypes> [<ncpus>]
+
+positional args:
+        bamlist    a file with the bamfiles
+        chrom      specific chromosome
+        genome     the genome fasta file (the genomestrip reference genome fasta file)
+        metadata   directory with genomestrip metada (generated by SVpreprocess)
+        rundir     directory for auwiliarry data for variant filtering
+        gendermap  the gendermap file for genomestrip
+        sites      vcf file with sv to genotype
+        genotypes  vcf file with genotypes (output file)
+       
+optional args:
+        cpus       the number of cpus [1]
+"
+}
+
+bamlist=$1
+chrom=$2
+genome=$3
+metadata=$4
+rundir=$5
+gendermap=$6
+sites=$7
+genotypes=$8
+ncpus=${9:-1}
+
+if [[ -z "$bamlist" || -z "$chrom" || -z "$genome" || -z "$metadata" || -z "$rundir" || -z "$gendermap" || -z "$sites" || -z "$genotypes" ]]
+then 
+   usage
+   echo "At least one argument is missing"
+   exit 1
+fi
+
+inputFile=$bamlist
+reference_prefix=${genome%.fasta}
+
+if [[ ${bamlist: -5} != ".list" ]]; then
+  echo "bamlist file must have the extension .list ... exiting"
+  exit
+fi
+
+# Now testing the existence of directories and files
+if [[ ! -d ${metadata}  ]]; then
+  echo "Directory metadata ${metadata} is missing ... exiting"
+  exit 1
+fi
+
+if [[ ! -s ${sites} ]]; then
+  echo "${sites} shoud be an existing non empty file ... exiting"
+  exit 1
+fi
+
+if [[ ! -s ${gendermap} ]]; then
+  echo "${gendermap} shoud be an existing non empty file ... exiting"
+  exit 1
+fi
+
+runDir=${rundir}
+SV_TMPDIR=${runDir}/tmp
+
+mkdir -p ${runDir}/logs || exit 1
+mkdir -p ${SV_TMPDIR}
+
+# For SVAltAlign, you must use the version of bwa compatible with Genome STRiP.
+export PATH=${SV_DIR}/bwa:${PATH}
+export LD_LIBRARY_PATH=${SV_DIR}/bwa:/SGE/ogs/lib/linux-x64:${LD_LIBRARY_PATH}
+
+mx="-Xmx4g"
+classpath="${SV_DIR}/lib/SVToolkit.jar:${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar:${SV_DIR}/lib/gatk/Queue.jar"
+
+# Run genotyping on the discovered sites.
+LC_ALL=C java -cp ${classpath} ${mx} \
+    org.broadinstitute.gatk.queue.QCommandLine \
+    -S ${SV_DIR}/qscript/SVGenotyper.q \
+    -S ${SV_DIR}/qscript/SVQScript.q \
+    -gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
+    --disableJobReport \
+    -cp ${classpath} \
+    -jobProject Capri \
+    -jobRunner Drmaa \
+    -gatkJobRunner Drmaa \
+    -jobQueue workq \
+    -jobNative "-l mem=24G -l h_vmem=24G -pe parallel_smp $ncpus" \
+    -parallelJobs $ncpus \
+    -configFile ${SV_DIR}/conf/genstrip_parameters.txt \
+    -tempDir ${SV_TMPDIR} \
+    -R ${reference_prefix}.fasta \
+    -genderMaskBedFile  ${reference_prefix}.gendermask.bed \
+    -genomeMaskFile     ${reference_prefix}.svmask.fasta \
+    -genderMapFile      ${gendermap} \
+    -runDirectory ${runDir} \
+    -md ${metadata} \
+    -disableGATKTraversal \
+    -jobLogDir ${runDir}/logs \
+    -L ${chrom} \
+    -I ${inputFile} \
+    -vcf ${sites} \
+    -O ${genotypes} \
+    -run \
+    || exit 1
+
+#bgzip -f ${genotypes}
+#tabix -f -p vcf ${genotypes}.gz
+#mv ${genotypes} ${SV_TMPDIR}/patchgenomestripvcf
+#if [[ ${genotypes: -4} != ".gz" ]]; then
+#  bcftools view -O z ${SV_TMPDIR}/patchgenomestripvcf > ${genotypes}
+#else
+#  bcftools view ${SV_TMPDIR}/patchgenomestripvcf > ${genotypes}
+#fi
+
+rm -fr ${SV_TMPDIR}
+
+echo "Script SVGenotyping completed successfully at "`date +%c`
--- a/snakecnv/bin/merge.py
+++ b/snakecnv/bin/merge.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Usage:
+Snakemake script
+
+"""
+
+import os
+import tempfile
+import shutil
+from subprocess import call
+
+from svrunner_utils import addContigInfosTovcf
+
+
+def concatenate(vcf_files, outputfile, reference, mergedir, chrom, svtype):
+        tempdir = tempfile.mkdtemp(dir=mergedir)
+        vcfFileslist = tempdir + "/" + "vcffile.list"
+        with open(vcfFileslist, "w") as outfh:
+            for gfile in vcf_files:
+                vcf_sample_dropped_file = tempdir + "/" + os.path.basename(gfile)
+                cmd = "bcftools view --drop-genotypes " + gfile + \
+                    " |  vcf-sort -c | bgzip -c >" + vcf_sample_dropped_file
+                cmd += " && tabix -p vcf " + vcf_sample_dropped_file
+                call(cmd, shell=True)
+                outfh.write(vcf_sample_dropped_file + "\n")
+        temp_outputfile = tempdir + "/" + "merge.vcf"
+        cmd = "bcftools concat -f " + vcfFileslist + \
+              " -a | vcf-sort -c  > " + temp_outputfile
+        call(cmd, shell=True)
+
+        # drop format info from header
+        temp_outputfile_newheader = tempdir + "/" + "merge_newheader.vcf"
+        cmd = "bcftools view -h " + temp_outputfile + \
+            " | grep -v '^##FORMAT' > " + tempdir + "/newheader ;"
+        cmd += " bcftools reheader -h " + tempdir + "/newheader " + \
+            temp_outputfile + " > " + temp_outputfile_newheader
+        call(cmd, shell=True)
+
+        # Correct contig infos here using bcftools annotate
+        addContigInfosTovcf(temp_outputfile_newheader, outputfile, reference)
+        shutil.rmtree(tempdir)
+
+
+def merging(inputfiles, outputfile, reference, outdir, chrom, svtype):
+    print("\n".join(["\t".join(inputfiles), outputfile, outdir, chrom, svtype]))
+
+    concatenate(inputfiles, outputfile, outdir, reference,
+                chrom, svtype)
+
+merging(snakemake.input,
+        snakemake.output[0],
+        snakemake.params['outdir'],
+        snakemake.params['reference'],
+        snakemake.wildcards['svtype'],
+        snakemake.wildcards['svtype']
+        )
--- a/snakecnv/bin/parse.py
+++ b/snakecnv/bin/parse.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Usage:
+Snakemake script
+
+"""
+
+import os
+
+from pysam import Fastafile, tabix_index
+
+from svrunner_utils import eprint
+from svrunner_utils import get_contigs, sorting, eprint
+
+from svreader.genomestrip import GenomeSTRIPReader, GenomeSTRIPWriter
+from svreader.delly import DellyReader, DellyWriter
+from svreader.lumpy import LumpyReader, LumpyWriter
+from svreader.pindel import PindelReader, PindelWriter
+
+tool_to_reader = {"pindel": PindelReader, "delly": DellyReader,
+                  "lumpy": LumpyReader,  "genomestrip": GenomeSTRIPReader}
+tool_to_writer = {"pindel": PindelWriter, "delly": DellyWriter,
+                  "lumpy": LumpyWriter, "genomestrip": GenomeSTRIPWriter}
+
+
+def convert_svtool_to_vcf(reference, inputfile, outputfile,
+                          toolname, svtype,
+                          minlen=100, maxlen=5000000,
+                          max_overlap=0.4, doFilter=False,
+                          filterbed=None):
+
+    # The reference handle
+    reference_handle = Fastafile(reference) if reference else None
+    reference_contigs = get_contigs(reference)
+
+    SVReader = tool_to_reader[toolname](inputfile,
+                                        reference_handle=reference_handle,
+                                        svs_to_report=[svtype])
+    SVWriter = tool_to_writer[toolname](outputfile,
+                                        reference_contigs,
+                                        SVReader)
+
+    # The records
+    eprint("Now reading the records")
+    records = []
+    for record in SVReader:
+        records.append(record)
+    eprint("%d records" % (len(records)))
+
+    # Merging records (necessary for delly inversions)
+    records = SVReader.bnd_merge(svtype, records)
+
+    # Now filtering the records
+    if doFilter and 0:
+        eprint("Now filtering")
+        records = SVReader.filtering(records, max_overlap, minlen, maxlen,
+                                     filterbed)
+
+    # Sorting the vcf records
+    records = sorting(records, reference_contigs)
+
+    eprint("Now writing the records")
+    for record in records:
+        SVWriter.write(record)
+    SVWriter.close()
+    eprint("%d records" % (len(records)))
+
+    tabix_index(outputfile, force=True, preset='vcf')
+
+
+def parsing(reference, tooloutput, parseoutput, tool, svtype):
+    print("\n".join([reference, tooloutput, parseoutput, tool, svtype]))
+
+    convert_svtool_to_vcf(reference, tooloutput, parseoutput,
+                          tool, svtype)
+
+parsing(snakemake.input['reference'],
+        snakemake.input['tooloutput'],
+        snakemake.output['parseoutput'],
+        snakemake.wildcards['tool'],
+        snakemake.wildcards['svtype'])
--- a/snakecnv/bin/svtyper.sh
+++ b/snakecnv/bin/svtyper.sh
+#!/bin/bash
+
+# see http://redsymbol.net/articles/unofficial-bash-strict-mode/
+set -euo pipefail
+
+echo "---"
+echo "Running svtyper.sh $1 $2 $3"
+echo "Starting "`date +%Y-%m-%d`" at "`date +%H:%M:%S`
+echo "---"
+
+svtyper=svtyper
+
+root=/home/faraut/work/CapriSV/src/SVPipeline
+export PATH=$root/bin:$PATH
+PYTHONPATH=""
+
+myname=`basename "$0"`
+
+function usage() {
+    echo "
+Program: lumpy wrapper
+usage:   $myname  [options] <bamlist> <input> <outprefix> 
+
+positional args:
+        bamlist    a file with the bamfiles
+        input      the input vcf file (gzipped or not)
+        outprefix  prefix of the output file
+        ncpus      the number of cpus, optional [1]
+"
+}
+
+get_bams( ) {
+    bams=""
+    for bamfile in $@; do
+       bams=$bams$bamfile","
+    done
+    bams=${bams%?}
+    echo $bams
+}
+
+function join_by { 
+    local IFS="$1"; 
+    shift; 
+    echo "$*"; }
+
+bamlist=$1
+input=$2
+outprefix=$3
+ncpus=${4:-1}   # If parameter is unset or null, the expansion of word is substituted (see https://www.gnu.org/software/bash/manual/bashref.html#Shell-Parameter-Expansion)
+
+if [[ -z ${bamlist} || -z ${input} || -z ${outprefix} ]]
+then 
+   usage
+   echo "At least one argument is missing"
+   exit 1
+fi
+
+mapfile -t bamarray < ${bamlist}
+bams=`join_by ',' "${bamarray[@]}"`
+
+origdir=`pwd`
+tempdir=`mktemp -d -p .`
+cd $tempdir 
+echo "Working in $tempdir"
+
+# genotype the Structural variants (and parallelize the job)
+zless ${input} | head -n 10000 |  grep "^#" > header 
+zless ${input} | grep -v "^#"  |  split -l 100 - temp_
+for i in temp_*;
+do cat header $i >  $i.raw.vcf
+rm -f $i;done 
+ls *.raw.vcf | parallel --no-notice -P ${ncpus} ${svtyper} -i {} -B ${bams} '>' {.}.output
+rm -f lumpy_temp_*.raw.vcf
+vcf-concat *.raw.output | vcf-sort > ${outprefix}.vcf 
+
+bgzip -f ${outprefix}.vcf
+tabix -f -p vcf ${outprefix}.vcf.gz
+
+cd $origdir 
+rm -fr $tempdir 
+
+echo "Script lumpy.sh completed successfully "`date +%Y-%m-%d`" at "`date +%H:%M:%S`
+
--- a/snakecnv/lib/svreader/genomestrip.py
+++ b/snakecnv/lib/svreader/genomestrip.py
@@ -116,7 +116,7 @@ class GenomeSTRIPRecord(SVRecord):
        super(GenomeSTRIPRecord, self).__init__(genomestrip_name,
                                                record.chrom,
                                                record.pos,
-                                                record.info['END'],
+                                                record.stop,
                                                record.id,
                                                ref=record.ref,
                                                qual=record.qual,

--- a/snakecnv/lib/svreader/lumpy.py
+++ b/snakecnv/lib/svreader/lumpy.py
@@ -68,7 +68,7 @@ class LumpyRecord(SVRecord):
        super(LumpyRecord, self).__init__(lumpy_name,
                                          record.chrom,
                                          record.pos,
-                                          record.info['END'],
+                                          record.stop,
                                          record.id,
                                          ref=record.ref,
                                          qual=record.qual,
@@ -107,7 +107,7 @@ class LumpyRecord(SVRecord):
        else:
            return 0

-    def variantSample(self, sample):
+    def variantSampleOld(self, sample):
        if (max(self.sv[sample]["AS"]) > 0 or
            max(self.sv[sample]["AP"]) or
            max(self.sv[sample]["ASC"]) > 0):
@@ -115,6 +115,13 @@ class LumpyRecord(SVRecord):
        else:
            return False

+    def variantSample(self, sample):
+        if self.sv[sample]["SU"] > 0:
+            return True
+        else:
+            return False
+
+
    def variantSamples(self):
        variant_samples = []
        for sample in self.sv:

--- a/snakecnv/lib/svsnakemake_utils.py
+++ b/snakecnv/lib/svsnakemake_utils.py

+import os
 import sys

-from pysam import Fastafile, tabix_index
-from svrunner_utils import get_contigs, sorting, eprint
-
-from svreader.genomestrip import GenomeSTRIPReader, GenomeSTRIPWriter
-from svreader.delly import DellyReader, DellyWriter
-from svreader.lumpy import LumpyReader, LumpyWriter
-from svreader.pindel import PindelReader, PindelWriter
-
-
-tool_to_reader = {"pindel": PindelReader, "delly": DellyReader,
-                  "lumpy": LumpyReader,  "genomestrip": GenomeSTRIPReader}
-tool_to_writer = {"pindel": PindelWriter, "delly": DellyWriter,
-                  "lumpy": LumpyWriter, "genomestrip": GenomeSTRIPWriter}
-

 def get_tool_prefix(tool, chrom, svtype):
    common_prefix = "_".join([tool, chrom])
@@ -24,13 +11,13 @@ def get_tool_prefix(tool, chrom, svtype):
        tool_suffix = get_lumpy_suffix(svtype)
    elif tool == "delly":
        tool_suffix = get_delly_suffix(svtype)
-    else:
-        tool_suffix = svtype + "_" + ".vcf.gz"
-    return "_".join([common_prefix, tool_suffix])
+    elif tool == "genomestrip":
+        tool_suffix = get_genomestrip_suffix(svtype)
+    return common_prefix + tool_suffix


 def get_delly_suffix(svtype):
-    return svtype + "_" + ".bcf"
+    return "_" + svtype + ".bcf"


 def get_lumpy_suffix(svtype):
@@ -53,57 +40,11 @@ def get_pindel_suffix(svtype):
    if svtype not in SV_TYPE_TO_PINDEL:
        raise KeyError('Unsupported variant type')
    pindel_type = SV_TYPE_TO_PINDEL[svtype]
-    outfile = pindel_type + ".gz"
+    outfile = "_" + pindel_type + ".gz"
    return outfile


 def getToolOuputFile(wildcards):
    tool_prefix = get_tool_prefix(wildcards.tool,
                                  wildcards.chrom, wildcards.svtype)
-    return os.path.join(wildcards.tool, wildcards.chrom, tool_prefix)
-
-
-def convert_svtool_to_vcf(self, inputfile, outputfile,
-                          toolname, svtype, params,
-                          minlen=100, maxlen=5000000,
-                          max_overlap=0.4, doFilter=False):
-
-    reference = self._Param('reference')
-
-    # The reference handle
-    reference_handle = Fastafile(reference) if reference else None
-    reference_contigs = get_contigs(reference)
-
-    SVReader = tool_to_reader[toolname](inputfile,
-                                        reference_handle=reference_handle,
-                                        svs_to_report=[svtype])
-    SVWriter = tool_to_writer[toolname](outputfile,
-                                        reference_contigs,
-                                        SVReader)
-
-    # The records
-    eprint("Now reading the records")
-    records = []
-    for record in SVReader:
-        records.append(record)
-    eprint("%d records" % (len(records)))
-
-    # Merging records (necessary for delly inversions)
-    records = SVReader.bnd_merge(svtype, records)
-
-    # Now filtering the records
-    if doFilter:
-        eprint("Now filtering")
-        records = SVReader.filtering(records, max_overlap, minlen, maxlen,
-                                     params["filterBed"])
-
-    # Sorting the vcf records
-    records = sorting(records, reference_contigs)
-
-    eprint("Now writing the records")
-    for record in records:
-        SVWriter.write(record)
-    SVWriter.close()
-    eprint("%d records" % (len(records)))
-
-    tabix_index(outputfile, force=True, preset='vcf')
+    return {'tooloutput': os.path.join(wildcards.tool, wildcards.chrom, tool_prefix)}
--- a/snakecnv/soft/lumpy/bin/lumpyexpress.config
+++ b/snakecnv/soft/lumpy/bin/lumpyexpress.config
-LUMPY_HOME=/home/faraut/dynawork/CapriSV/pipeline/capricnv/snakewf/playground/cnvdetection/soft/lumpy
+LUMPY_HOME=/home/faraut/dynawork/CapriSV/pipeline/cnvpipelines/snakecnv/soft/lumpy/lumpy

 LUMPY=$LUMPY_HOME/bin/lumpy
 SAMBLASTER=samblaster

--- a/snakecnv/tools/cnvpipeline.snk
+++ b/snakecnv/tools/cnvpipeline.snk
+
+
+def intervallist(reference, chrom, output):
+    chrom_length = getChromLength(reference, chrom)
+    chrlistFh = open(output, "w")
+    chrlistFh.write(chrom + ":" + str(1) + "-" + str(chrom_length) + "\n")
+    chrlistFh.close()
+
+rule intervallist:
+    input:
+        reference = REFERENCE
+    output:
+        intervallist = "cnvpipeline/{chrom}/{chrom}.list"
+    run:
+        intervallist(input.reference, wildcards.chrom, output.intervallist)
+
+rule cnvpipeline:
+    input:
+        bams = expand("data/bams/{{chrom}}/{sample}.bam", sample=samples),
+        bamlist = "genomestrip/bamlist.list",
+        gendermap = "genomestrip/gendermap.txt",
+        metadatadone = "genomestrip/metadata/sample_gender.report.txt",
+        genome = REFERENCE
+    params:
+        metadata = "genomestrip/metadata"
+    output:
+        touch("genomestrip/{chrom}/done")
+    threads: 2
+    shell:
+        "genomestrip.sh {input.bamlist} {wildcards.chrom} {input.genome}"
+        " {params.metadata} {input.gendermap} genomestrip/{wildcards.chrom}"
+        " genomestrip_{wildcards.chrom}"
--- a/snakecnv/tools/delly.snk
+++ b/snakecnv/tools/delly.snk
@@ -5,18 +5,18 @@ rule delly:
        genome = REFERENCE
    output:
        touch("delly/{chrom}/done"),
-        expand("delly/{{chrom}}/cnv_{{chrom}}_{svtype}.bcf", svtype=['INV', 'DEL', 'DUP']),
-        expand("delly/{{chrom}}/cnv_{{chrom}}_{svtype}_raw.bcf", svtype=['INV', 'DEL', 'DUP'])
+        expand("delly/{{chrom}}/delly_{{chrom}}_{svtype}.bcf", svtype=['INV', 'DEL', 'DUP']),
+        expand("delly/{{chrom}}/delly_{{chrom}}_{svtype}_raw.bcf", svtype=['INV', 'DEL', 'DUP'])
    threads: 2
    shell:
        """
        for svtype in INV DEL DUP;
        do
        delly call -g {input.genome} -t ${{svtype}} \
-                   -o delly/{wildcards.chrom}/cnv_{wildcards.chrom}_${{svtype}}_raw.bcf \
+                   -o delly/{wildcards.chrom}/delly_{wildcards.chrom}_${{svtype}}_raw.bcf \
                   {input.bams}
        delly filter -f germline  -m 50 -g {input.genome} -t ${{svtype}} \
-                     -o delly/{wildcards.chrom}/cnv_{wildcards.chrom}_${{svtype}}.bcf \
-                     delly/{wildcards.chrom}/cnv_{wildcards.chrom}_${{svtype}}_raw.bcf
+                     -o delly/{wildcards.chrom}/delly_{wildcards.chrom}_${{svtype}}.bcf \
+                     delly/{wildcards.chrom}/delly_{wildcards.chrom}_${{svtype}}_raw.bcf
        done
        """
--- a/snakecnv/tools/genomestrip.snk
+++ b/snakecnv/tools/genomestrip.snk
@@ -40,12 +40,14 @@ rule genomestrip:
        bams = expand("data/bams/{{chrom}}/{sample}.bam", sample=samples),
        bamlist = "genomestrip/bamlist.list",
        gendermap = "genomestrip/gendermap.txt",
-        metadata = "genomestrip/metadata",
+        metadatadone = "genomestrip/metadata/sample_gender.report.txt",
        genome = REFERENCE
+    params:
+        metadata = "genomestrip/metadata"
    output:
        touch("genomestrip/{chrom}/done")
    threads: 2
    shell:
        "genomestrip.sh {input.bamlist} {wildcards.chrom} {input.genome}"
-        " {input.metadata} {input.gendermap} genomestrip/{wildcards.chrom}"
+        " {params.metadata} {input.gendermap} genomestrip/{wildcards.chrom}"
        " genomestrip_{wildcards.chrom}"
--- a/snakecnv/tools/genotyping.snk
+++ b/snakecnv/tools/genotyping.snk
+
+
+rule svtyping:
+    input:
+        sites = "merge/{chrom}/DEL/merge_{chrom}_DEL.vcf.gz",
+        bamlist = "genomestrip/bamlist.list",
+        genome = REFERENCE
+    output:
+        vcf = "genotypes/{chrom}/DEL/svtyper_{chrom}_DEL_genotypes.vcf.gz"
+    params:
+        outputprefix = "genotypes/{chrom}/DEL/svtyper_{chrom}_DEL_genotypes"
+    threads: 4
+    shell:
+        "svtyper.sh {input.bamlist} {input.sites} {params.outputprefix}"
+        " {threads}"
+
+
+rule genotyping:
+    input:
+        sites = "merge/{chrom}/DEL/merge_{chrom}_DEL.vcf.gz",
+        bamlist = "genomestrip/bamlist.list",
+        gendermap = "genomestrip/gendermap.txt",
+        metadatadone = "genomestrip/metadata/sample_gender.report.txt",
+        genome = REFERENCE
+    params:
+        metadata = "genomestrip/metadata"
+    output:
+        vcf = "genotypes/{chrom}/DEL/genomestrip_{chrom}_DEL_genotypes.vcf.gz"
+    threads: 2
+    shell:
+        "gstrip_genotyper.sh {input.bamlist} {wildcards.chrom} {input.genome}"
+        " {params.metadata} genomestrip/{wildcards.chrom} {input.gendermap} "
+        " {input.sites} {output.vcf}"
--- a/snakecnv/tools/lumpy.snk
+++ b/snakecnv/tools/lumpy.snk
@@ -16,6 +16,7 @@ rule exclude:
            printf "%s\t%s\t%s" {wildcards.chrom} "0" "1" > {output.excluded}
        fi
        """
+        
 rule lumpy:
    input:
        bams = expand("data/bams/{{chrom}}/{sample}.bam", sample=samples),

--- a/snakecnv/tools/modules.snk
+++ b/snakecnv/tools/modules.snk
@@ -4,3 +4,5 @@ include: "delly.snk"
 include: "lumpy.snk"
 include: "pindel.snk"
 include: "genomestrip.snk"
+include: "genotyping.snk"
+include: "cnvpipeline.snk"