Skip to content
Snippets Groups Projects
Commit 18c31896 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

new filters for annotation

parent c1c226a5
No related branches found
No related tags found
No related merge requests found
...@@ -10,12 +10,26 @@ from svfilter import GenomeSTRIPLikefiltering ...@@ -10,12 +10,26 @@ from svfilter import GenomeSTRIPLikefiltering
from svfilter import AnnotateSupport from svfilter import AnnotateSupport
def add_metadata(reader):
reader.addInfo("SOURCEID", 1, "String",
"The source sv identifier")
reader.addFilter("LOWSUPPORT", "total supp reads < 5 or supp samples < 2")
def setNewId(record, identifier): def setNewId(record, identifier):
record.record.info['SOURCEID'] = record.id record.record.info['SOURCEID'] = record.id
record.id = id record.id = identifier
def FilteringBySupport(SVSet, minsupp_reads=5, min_supp_sampes=2):
for sv in SVSet:
info = sv.record.info
if (info["SUPP_READS"] < minsupp_reads or
info["NUM_SUPP_SAMPLES"] < min_supp_sampes):
sv.filter.add("LOWSUPPORT")
def annotate(inputfile, outputfile, genotyper, add_infos=True,
def annotate(inputfile, outputfile, genotyper, chrom, add_infos=True,
overlap_cutoff=0.5): overlap_cutoff=0.5):
""" Filtering the candidate CNVs according to the following criteria """ Filtering the candidate CNVs according to the following criteria
- non duplicate sites - non duplicate sites
...@@ -32,20 +46,22 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True, ...@@ -32,20 +46,22 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True,
eprint(" Reading file %s" % (inputfile)) eprint(" Reading file %s" % (inputfile))
reader = VCFReader(inputfile, "merge") reader = VCFReader(inputfile, "merge")
# Adding metadata : INFO and FILTER add_metadata(reader)
add_metadata(reader, add_infos)
num = 1
for record in reader: for record in reader:
setNewId(record) ident = "_".join(["svpipeline_", chrom, record.svtype, str(num)])
setNewId(record, ident)
SVSet.append(record) SVSet.append(record)
num += 1
eprint("Working with " + str(len(SVSet)) + " records") eprint("Working with " + str(len(SVSet)) + " records")
# Support annotation
AnnotateSupport(SVSet)
# PASS and "." will be now marked PASS # PASS and "." will be now marked PASS
UnifiedPassAnnotation(SVSet) UnifiedPassAnnotation(SVSet)
# Support annotation
AnnotateSupport(SVSet, reader)
# Redundancy annotation # Redundancy annotation
GenomeSTRIPLikeRedundancyAnnotator(SVSet, reader, genotyper=genotyper) GenomeSTRIPLikeRedundancyAnnotator(SVSet, reader, genotyper=genotyper)
...@@ -53,6 +69,9 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True, ...@@ -53,6 +69,9 @@ def annotate(inputfile, outputfile, genotyper, add_infos=True,
if reader.numSamples() > 0: if reader.numSamples() > 0:
GenomeSTRIPLikefiltering(SVSet, reader) GenomeSTRIPLikefiltering(SVSet, reader)
# Now supporting info filtering
FilteringBySupport(SVSet)
VCFout = VCFWriter(outputfile, reader) VCFout = VCFWriter(outputfile, reader)
for record in sorted(SVSet, key=lambda k: k.start): for record in sorted(SVSet, key=lambda k: k.start):
VCFout.write(record) VCFout.write(record)
......
...@@ -505,7 +505,10 @@ class MergeBatches(Detection): ...@@ -505,7 +505,10 @@ class MergeBatches(Detection):
def get_all_outputs(self, wildcards): def get_all_outputs(self, wildcards):
outputs = ["Summarized_results.html"] outputs = ["Summarized_results.html"]
outputs += expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz", #outputs += expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz",
# chrom=self.chromosomes,
# svtype=[x for x in self.varianttypes if x != "mCNV"])
outputs += expand("annotated/{svtype}/svtyper_{chrom}_{svtype}_final.vcf.gz",
chrom=self.chromosomes, chrom=self.chromosomes,
svtype=[x for x in self.varianttypes if x != "mCNV"]) svtype=[x for x in self.varianttypes if x != "mCNV"])
return outputs return outputs
......
...@@ -185,7 +185,7 @@ def get_root_path(): ...@@ -185,7 +185,7 @@ def get_root_path():
rule summary: rule summary:
input: input:
expand("filtered/{svtype}/svtyper_{chrom}_{svtype}_genotypes_filtered.vcf.gz", expand("annotated/{svtype}/svtyper_{chrom}_{svtype}_final.vcf.gz",
chrom=cmd.chromosomes, svtype=variant_types) chrom=cmd.chromosomes, svtype=variant_types)
params: params:
tpl = os.path.join(get_root_path(), "full.tpl"), tpl = os.path.join(get_root_path(), "full.tpl"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment