Skip to content
Snippets Groups Projects
Commit 18c31896 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

new filters for annotation

parent c1c226a5
No related branches found
No related tags found
No related merge requests found
......@@ -10,12 +10,26 @@ from svfilter import GenomeSTRIPLikefiltering
from svfilter import AnnotateSupport
def add_metadata(reader):
reader.addInfo("SOURCEID", 1, "String",
"The source sv identifier")
reader.addFilter("LOWSUPPORT", "total supp reads < 5 or supp samples < 2")
def setNewId(record, identifier):
record.record.info['SOURCEID'] = record.id
record.id = id
record.id = identifier
def FilteringBySupport(SVSet, minsupp_reads=5, min_supp_sampes=2):
for sv in SVSet:
info = sv.record.info
if (info["SUPP_READS"] < minsupp_reads or
info["NUM_SUPP_SAMPLES"] < min_supp_sampes):
sv.filter.add("LOWSUPPORT")
def annotate(inputfile, outputfile, genotyper, add_infos=True,
def annotate(inputfile, outputfile, genotyper, chrom, add_infos=True,
overlap_cutoff=0.5):
""" Filtering the candidate CNVs according to the following criteria
- non duplicate sites
......@@ -32,20 +46,22 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True,
eprint(" Reading file %s" % (inputfile))
reader = VCFReader(inputfile, "merge")
# Adding metadata : INFO and FILTER
add_metadata(reader, add_infos)
add_metadata(reader)
num = 1
for record in reader:
setNewId(record)
ident = "_".join(["svpipeline_", chrom, record.svtype, str(num)])
setNewId(record, ident)
SVSet.append(record)
num += 1
eprint("Working with " + str(len(SVSet)) + " records")
# Support annotation
AnnotateSupport(SVSet)
# PASS and "." will be now marked PASS
UnifiedPassAnnotation(SVSet)
# Support annotation
AnnotateSupport(SVSet, reader)
# Redundancy annotation
GenomeSTRIPLikeRedundancyAnnotator(SVSet, reader, genotyper=genotyper)
......@@ -53,6 +69,9 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True,
if reader.numSamples() > 0:
GenomeSTRIPLikefiltering(SVSet, reader)
# Now supporting info filtering
FilteringBySupport(SVSet)
VCFout = VCFWriter(outputfile, reader)
for record in sorted(SVSet, key=lambda k: k.start):
VCFout.write(record)
......
......@@ -505,7 +505,10 @@ class MergeBatches(Detection):
def get_all_outputs(self, wildcards):
outputs = ["Summarized_results.html"]
outputs += expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz",
#outputs += expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz",
# chrom=self.chromosomes,
# svtype=[x for x in self.varianttypes if x != "mCNV"])
outputs += expand("annotated/{svtype}/svtyper_{chrom}_{svtype}_final.vcf.gz",
chrom=self.chromosomes,
svtype=[x for x in self.varianttypes if x != "mCNV"])
return outputs
......
......@@ -185,7 +185,7 @@ def get_root_path():
rule summary:
input:
expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz",
expand("annotated/{svtype}/svtyper_{chrom}_{svtype}_final.vcf.gz",
chrom=cmd.chromosomes, svtype=variant_types)
params:
tpl = os.path.join(get_root_path(), "full.tpl"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment