Commit 1da0ee8f authored by Penom Nom's avatar Penom Nom
Browse files

bedify: add translocation for delly/pindel/breakdancer

Analyzebed: 
        improved versions / options / descriptions
        cleaned post-process
        Jvenn by type
        docstrings
        added "unique" fonction
Minor changes in the other files
parent 1d021861
......@@ -13,60 +13,73 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import re
import tempfile
from io import BytesIO
from subprocess import Popen,PIPE
from jflow.workflow import Workflow
from ng6.ng6workflow import NG6Workflow
#TODO reference -> force fasta
class SVDetection (NG6Workflow):
def get_description(self):
return "This pipeline aims to detect structural variations in whole genomes."
def define_parameters(self, function="process"):
self.add_input_file("reference_genome", "On which genome should the reads being aligned on", required=True) # , file_format="fasta"
self.add_parameter("chrom", "Studied chromosome (do not forget the chr prefix if there is one)", default="", type=str)
#TODO reference -> force fasta
self.add_input_file("reference_genome", "On which genome should the reads being aligned on", required=True)
self.add_parameter("bin_size", "Bin size for cnvnator: Depends on the deep of coverage. X5:500, x10:250, x20-30:100, x100:30", default=100, type=int)
self.add_parameter("mean_insert", "mean insert size (PINDEL only)", type=int, required=True) #TODO use sample insert
self.add_parameter("min_sup", "Minimum number of read supporting a SV (3) [Not available for cnvnator]", type=int, default=3)
self.add_parameter("min_len", "Minimum length of a variant (50)", type=int, default=50)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000)", type=int, default=8500000) #TODO: max calculed % chr size
#TODO in future: minmapq -> just after aligment
self.add_parameter("min_mapq", "min. paired-end mapping quality", type=int, default=30)
self.add_parameter("overlap_percent", "Minimum threshold of overlap percent between 2 SV", type=float, default=0.7)
self.add_parameter("li_range", "Range of localisation interval around breakpoints", type=int, default=1000)
#TODO: infuture minmapq -> juste apres lalignement
#TODO: dynamic softwares choice ?
def process(self):
# parameters
options = "Filter options:<br/>\
- min size {0}<br/>\
- max size {1}<br/>\
- min supporting read : {2}<br/>\
- min mapping quality : {6}<br/>\
CNVnator options:<br/>\
- bin size {3}<br/>\
Merge parameters:<br/>\
- overlap percent threshold: {4}<br/>\
- localisation interval range: {5}<br/>\
The rest is default.".format(self.min_len, self.max_len, self.min_sup, self.bin_size, self.overlap_percent, self.li_range, self.min_mapq)
input_bam = [spl.reads1[0] for spl in self.samples]
indiv_list = [spl.name for spl in self.samples]
results = []
tata = ["H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W"]
toto = "/home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KH.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KI.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KJ.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KK.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KL.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KM.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KN.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KO.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KP.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KQ.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KR.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KS.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KT.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KU.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KV.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KW.rmdup.realn.bam"
# delly = self.add_component("Delly", [input_bam, self.reference_genome])
# breakdancer = self.add_component("Breakdancer", [input_bam, self.reference_genome, self.chrom, self.min_sup, self.min_mapq, self.max_len])
pindel = self.add_component("Pindel", kwargs={"input_bam":input_bam,"reference_genome": self.reference_genome,
"output_file":"pindel.concat.out", "chrom":self.chrom, "mean_insert":self.mean_insert,
"min_sup":self.min_sup, "indiv_list":indiv_list})
# cnvnator = self.add_component("CnvNator", [input_bam, self.reference_genome, self.chrom, self.bin_size])
# #=====TEST PURPOSE=====
input_f = ["/home/yguarr/DataTest/example_cnvnator_raw.txt","/home/yguarr/DataTest/example2_cnvnator_raw.txt","/home/yguarr/DataTest/example3_cnvnator_raw.txt"]
# indiv_list = ["SAMPLE1","SAMPLE2","SAMPLE3"]
# #=====TEST PURPOSE=====
# breakdancer = self.add_component("Breakdancer", [input_bam, self.reference_genome, self.min_sup, self.min_mapq, self.max_len])
# pindel = self.add_component("Pindel", kwargs={"input_bam":input_bam,"reference_genome": self.reference_genome,
# "output_file":"pindel.concat.out", "mean_insert":self.mean_insert,
# "min_sup":self.min_sup, "indiv_list":indiv_list})
# cnvnator = self.add_component("CnvNator", [input_bam, self.reference_genome, self.bin_size])
# delly = self.add_component("Standardisation", [delly.output_file, "delly", "delly.bed", self.min_len, self.max_len, indiv_list])
pindel = self.add_component("Standardisation", [pindel.output_file, "pindel", "pindel.bed", self.min_len, self.max_len, indiv_list])
# pindel = self.add_component("Standardisation", [pindel.output_file, "pindel", "pindel.bed", self.min_len, self.max_len, indiv_list])
# breakdancer = self.add_component("Standardisation", [breakdancer.output_file, "breakdancer", "breakdancer.bed", self.min_len, self.max_len, indiv_list, input_bam])
cnvnator = self.add_component("Standardisation", [input_f, "cnvnator", "cnvnator.bed", self.min_len, self.max_len, indiv_list])
# cnvnator = self.add_component("Standardisation", [input_f, "cnvnator", "cnvnator.bed", self.min_len, self.max_len, indiv_list])
# #=====TEST PURPOSE=====
# input_f = ["/home/yguarr/DataTest/example_cnvnator_raw.txt","/home/yguarr/DataTest/example2_cnvnator_raw.txt","/home/yguarr/DataTest/example3_cnvnator_raw.txt"]
# indiv_list = ["SAMPLE1","SAMPLE2","SAMPLE3"]
# delly = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/eg_tra_delly", "delly", "delly.bed", self.min_len, self.max_len, tata])
# pindel = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/pindel_INT", "pindel_translocation", "pindel.bed", self.min_len, self.max_len, indiv_list])
# breakdancer = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/bd_tra_multi_sample", "breakdancer", "breakdancer.bed", self.min_len, self.max_len, tata, toto])
# results=[delly.output_file, pindel.output_file, cnvnator.output_file, breakdancer.output_file]
# results=[delly.output_file, pindel.output_file, breakdancer.output_file]
results=[pindel.output_file,cnvnator.output_file]
# results = ["/home/yguarr/delly.bed", "/home/yguarr/breakdancer.bed", "/home/yguarr/cnvnator.bed", "/home/yguarr/pindel.bed"]
# results=[pindel.output_file,cnvnator.output_file]
# results = ["/home/yguarr/DataTest/sort_epi_dly_test.bed", "/home/yguarr/DataTest/sort_epi_bd_test.bed",
# "/home/yguarr/DataTest/sort_epi_cnv_test.bed", "/home/yguarr/DataTest/sort_epi_pdl_test.bed"]
# results = [cnvnator.output_file, pindel.output_file]
arg_list = [results, self.bin_size, self.min_sup, self.min_len, self.max_len, self.overlap_percent, self.li_range, ["pindel", "cnvnator"]]
self.add_component(component_name="Analyzebed", addto="project", args=arg_list)
# #=====TEST PURPOSE=====
# arg_list = [results, self.overlap_percent, self.li_range, ["dly", "BD","CNV","Pdl"], options]
# self.add_component(component_name="Analyzebed", addto="project", args=arg_list)
def post_process(self):
pass
......@@ -21,80 +21,77 @@ from collections import Counter
import os.path
import numpy
import itertools
import subprocess
import re
class Analyzebed (Analysis):
def define_parameters(self, files, bin, sup, min, max, p, li, i_list):
def define_parameters(self, files, p, li, i_list, options):
self.add_input_file_list( "input_files", "The files to be archived.", default=files, required=True )
self.add_output_file_list( "output_links", "Link to the files.", pattern='link2_{basename}' , items=self.input_files )
self.add_output_file( "software_merge", "TODO", filename="software_merge.bed")
self.add_parameter("bin_size", "Bin size for cnvnator", default=bin, type=int)
self.add_parameter("min_sup", "Minimum number of read supporting a SV (3) [Not available for cnvnator]", type=int, default=sup)
self.add_parameter("min_len", "Minimum length of a variant (50)", type=int, default=min)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000)", type=int, default=max) #TODO: max calculed % chr size
self.add_parameter("min_mapq", "min. paired-end mapping quality", type=int, default=30)
self.add_parameter("overlap_percent", "Minimum threshold of overlap percent between 2 SV", type=float, default=p)
self.add_parameter("li_range", "Range of localisation interval around breakpoints", type=int, default=li)
self.add_parameter("param_sum", "Summary of used parameters", type=str, default=options)
self.add_parameter_list("indiv_list", "String list of studied individuals name", default=i_list, type=str, required=True)
def get_version(self):
return "0.2.5b5 / 0.3.2 / 0.6.7 / 1.4.5"
pindel_help = subprocess.Popen(["pindel","-h"], universal_newlines=True, stdout=subprocess.PIPE)
pindel_version = re.search("Pindel version ([0-9\.a-b]+),", pindel_help.stdout.read())
cnvnator_help = subprocess.Popen(["cnvnator"], universal_newlines=True, stderr=subprocess.PIPE)
cnvnator_version = re.search("CNVnator v([0-9\.]+)", cnvnator_help.stderr.read())
delly_help = subprocess.Popen(["delly"], universal_newlines=True, stdout=subprocess.PIPE)
delly_version = re.search("Delly \(Version: ([0-9\.]+)\)", delly_help.stdout.read())
breakdancer_help = subprocess.Popen(["breakdancer-max"], universal_newlines=True, stderr=subprocess.PIPE)
breakdancer_version = re.search("breakdancer-max version ([0-9\.]+)", breakdancer_help.stderr.read())
versions = "Pindel: {0} / CNVnator: {1} / Delly: {2} / Breakdancer: {3}".format(pindel_version.groups()[0],
cnvnator_version.groups()[0], delly_version.groups()[0], breakdancer_version.groups()[0])
return versions
def define_analysis(self):
self.options = []
if self.bin_size:
self.options.append("--bin-size {0}".format(self.bin_size))
if self.min_len:
self.options.append("--min-len {0}".format(self.min_len))
if self.max_len:
self.options.append("--max-len {0}".format(self.max_len))
if self.bin_size:
self.options.append("--min-mapq {0}".format(self.bin_size))
self.options = " ".join(self.options)
self.name = "Analyzebed"
self.options = self.param_sum
self.name = "SV Detection"
self.description = "A few stats on SV repartition"
self.software = "Pindel, CNVnator, Delly, Breakdancer"
def post_process(self):
groups = ["ALL"]
for bedfile in self.output_links:
#GET NAMES
filename = os.path.basename(bedfile)
sample = os.path.splitext(filename)[0]
soft = sample.split("_",1)[1] #soft name
sizes_by_type = {} #Dict containing sizes by sv types
sizes_list = [] # all sv sizes
for i, bedfile in enumerate(self.output_links):
sample = os.path.splitext(os.path.basename(bedfile))[0]
soft = self.indiv_list[i]
sizes_list = []
sizes_by_type = {}
n_list = []
n_by_type = {}
n_list = [] # for each sv, number of members
#PARSE
#PARSE FILE
with open(bedfile) as result_file:
for line in result_file:
sv = line.split()
#sizes
size = int(sv[2])-int(sv[1])
if sv[4] in sizes_by_type:
sizes_by_type[sv[4]].append(size)
else:
sizes_by_type[sv[4]] = [size]
sizes_list.append(size)
#members
sv_type = sv[4]
members = sv[7].split(";")
n = len(members)
if sv[4] in n_by_type:
n_by_type[sv[4]].append(n)
else:
n_by_type[sv[4]] = [n]
sizes_list.append(size)
n_list.append(n)
if sv_type in sizes_by_type:
sizes_by_type[sv_type].append(size)
n_by_type[sv_type].append(n)
else:
sizes_by_type[sv_type] = [size]
n_by_type[sv_type] = [n]
#GENERATE DB INFOS
sizes_by_type["ALL"] = sizes_list
n_by_type["ALL"] = n_list
for type in sizes_by_type:
if type not in groups:
groups.append(type)
sizes = numpy.array(sizes_by_type[type])
boxplot = self.boxplot(sizes)
accu = self.accumulation(n_by_type[type])
database = {
'boxplot':boxplot[0],
db_entry = {'boxplot':boxplot[0],
'outliers':boxplot[1],
'n_tag':accu[0],
'n_count':accu[1],
......@@ -106,33 +103,60 @@ class Analyzebed (Analysis):
'median':int(numpy.median(sizes))
}
self._add_result_element(sample, "soft", soft, type)
for key in database:
self._add_result_element(sample, key, database[key], type)
venn_table={}
for key in db_entry:
self._add_result_element(sample, key, db_entry[key], type)
#CREATE VENN TABLE
venn_table = {}
for key in self.powerset(self.indiv_list):
if key == ():
venn_table["all"] = 0
else:
venn_table[";".join(key)] = 0
venn_table_by_type = {}
with open(self.software_merge) as merge_list:
for line in merge_list:
used_software = []
for column in line.split()[6].split(";"):
svr = line.split()
svr_type = svr[4]
for column in svr[6].split(";"):
used_software.append("".join(column.split("-")[:-1]))
for key in self.powerset(used_software):
for key in self.powerset(self.unique(used_software)):
if key == ():
venn_table["all"] += 1
else:
venn_table[";".join(key)] += 1
db_venn = "".join([y for y in str(venn_table)[1:-1] if y not in ["\'", " "]])
print(db_venn)
self._add_result_element("software_merge", "venn_count", db_venn)
self._create_and_archive(self.output_links, "sv_list_by_soft.tar")
# self._create_and_archive(self.software_merge, "merge_sv_list.tar")
if svr_type not in venn_table_by_type:
for key in self.powerset(self.indiv_list):
if key == ():
venn_table_by_type[svr_type] = {"all":0}
else:
venn_table_by_type[svr_type][";".join(key)] = 0
for key in self.powerset(self.unique(used_software)):
if key == ():
venn_table_by_type[svr_type]["all"] += 1
else:
venn_table_by_type[svr_type][";".join(key)] += 1
venn_table_by_type['ALL'] = venn_table
#GENERATE DB INFOS
for type in venn_table_by_type:
v_table = venn_table_by_type[type]
db_venn = "".join([y for y in str(v_table)[1:-1] if y not in ["\'", " "]])
self._add_result_element("software_merge", "venn_count", db_venn, type)
result_files = [self.output_links, self.software_merge]
self._create_and_archive(self.output_links, None) #TODO pass result_files
def process(self):
link = ShellFunction(self.get_exec_path("ln") + " -s $1 $2", cmd_format='{EXE} {IN} {OUT}')
Map(link, inputs=self.input_files, outputs=self.output_links)
# MERGE
mrg = ShellFunction(self.get_exec_path("merge_sv.py") + \
" --output $1 \
--tag $2\
......@@ -144,35 +168,89 @@ class Analyzebed (Analysis):
mrg(outputs=self.software_merge, inputs=self.input_files, arguments=["soft_merge", self.overlap_percent, self.li_range, self.indiv_list])
def js_prepross(self, my_list):
"""
facilitate data transfer and comprehension for javascript via jflow & php
:Example: self.js_prepross(['rock', 'scissor', 'paper'])
>>> "rock;scissor;paper"
"""
my_string = ";".join(map(str,my_list))
return my_string
def venn_diag(self, keys, venn_table):
for key in self.powerset(keys):
if key == ():
venn_table["all"] += 1
else:
venn_table[";".join(key)] += 1
return
def accumulation(self, all_n):
"""
Count how many SV are present in at least 1..N individuals
:return: 1..N & associated count as strings
:Example: self.accumulation([1,1,1,1,2,3,3])
>>> ["1;2;3", "4;1;2"]
"""
n_count = []
all_n = Counter(all_n)
for i,n in enumerate(all_n): # cas improbable ou on aurait pas valeur pour un n --> bug
for i,n in enumerate(all_n): # Rare bug if one n is missing in 1..N --> bug in GUI
tmp = 0
for j in list(reversed(range(i+1, len(all_n)+1))):
tmp = tmp + all_n[j]
n_count.append(tmp)
n_tag = range(len(all_n)+1)[1:]
n_tag = self.js_prepross(n_tag)
n_tag = self.js_prepross(all_n.keys())
n_count = self.js_prepross(n_count)
return([n_tag, n_count])
def boxplot(self, sizes):
"""
give the values needed for drawing a boxplot
:return: ["lower whisker;lower quartile;median;upper quartile;upper whisker", number of outliers]
:Example: self.boxplot([13799,12699,101,...])
>>> ["101;509;1099;5349;12299", 4]
"""
#boxplot
median = numpy.median(sizes)
upper_quartile = numpy.percentile(sizes, 75)
lower_quartile = numpy.percentile(sizes, 25)
iqr = upper_quartile - lower_quartile
upper_whisker = sizes[sizes<=upper_quartile+1.5*iqr].max()
lower_whisker = sizes[sizes>=lower_quartile-1.5*iqr].min()
boxplot = self.js_prepross([lower_whisker,lower_quartile,median,upper_quartile,upper_whisker])
#outliers
outliers = sizes[sizes > upper_whisker]
numpy.append(outliers, sizes[sizes < lower_whisker])
boxplot = self.js_prepross([lower_whisker,lower_quartile,median,upper_quartile,upper_whisker])
outliers = len(outliers)
return([boxplot, outliers])
def unique(self, seq, idfun=None):
"""
unique([1,2,2,3,27,1,2,4]) --> [1,2,3,27,4]
keep order of appearance intact
"""
# found on peterbe.com
if idfun is None:
def idfun(x): return x
seen = {}
result = []
for item in seq:
marker = idfun(item)
if marker in seen: continue
seen[marker] = 1
result.append(item)
return result
def powerset(self, iterable):
"""
return all possible sub-sets from an iterable
:Example: powerset("abc")
>>> (''),('a'),('b'),('c'),('a','b'),('a','c'),('b','c'),('a','b','c')
"""
s = list(iterable)
return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s)+1))
......@@ -117,6 +117,16 @@ name_list = sys.argv[8:]
elif soft == "delly":
pos_list = [0,1,2,6,6,6,6]
support_by_library = sv[10:]
elif soft == "pindel_translocation":
# pos_list = [0,2,4,6,8,10,12] # int_final
pos_list = [0,3,4,6,7,9,11] # int
call_id, chr1, start1, chr2, start2, seq, sup = [ sv[p] for p in pos_list ]
if len(seq)-2:
size = str(len(seq)-2)
else:
size = "?"
new_sv = [chr1, start1, chr2, start2, call_id, size, sup] # TODo add formate support
return new_sv
call_id, chromosome, start, end, sv_type, length, support_total = [ sv[p] for p in pos_list ]
if soft == "delly":
results = re.search("TYPE=([A-Z]+).*END=([0-9]+).*PE=([0-9]+)(?:.*?SR=|)([0-9]+)?", sv[8]) # field INFOS
......@@ -132,8 +142,23 @@ name_list = sys.argv[8:]
sv_type = "DEL"
elif sv_type in ["duplication", "DUP", "TDUP", "TD"]:
sv_type = "DUP"
elif sv_type in ["ITX", "CTX", "TRA"]:
elif sv_type in ["ITX", "CTX", "TRA"]: #TODO: report interK + append results
if soft =="delly":
results = re.search("CHR2=(\w+)(?:.*?CONSENSUS=|)([ATCG]+)?", sv[8])
try :
chr2, consensus = results.groups()
size = str(len(consensus))
except TypeError:
chr2 = results.groups()[0]
size="?"
else:
print("bad regex")
elif soft == "breakdancer":
chr2 = sv[4]
size = "?"
sv_type = "TRA"
new_sv = [chromosome, str(start), chr2, str(end), call_id, sv_type, size, str(support_total), support_line]
return new_sv
elif sv_type in ["I"]:
sv_type = "INS"
elif sv_type in ["INV"]:
......@@ -197,7 +222,10 @@ def main(SV_file_path, good_path, soft, tag, len_min, len_max, indiv_list, alt_n
iD = tag + "-" + str(c)
sv.insert(0, iD)
new_sv = parse_sv(sv, soft, indiv_list, alt_names)
if filter_out(new_sv, len_min, len_max):
if soft == "pindel_translocation":
new_sv = "\t".join(new_sv) + "\n"
good_output_file.write(new_sv)
elif new_sv[5] != "TRA" and filter_out(new_sv, len_min, len_max):
pass
else:
new_sv = "\t".join(new_sv) + "\n"
......
......@@ -43,7 +43,7 @@ def main(input_f, out, tag, len_min, len_max):
results = re.match("([a-z]+)\s(?:chr)?([0-9A-Z\.]+):([0-9]+)-([0-9]+)\s([0-9e\.\+]+)\s([0-9.]+)", line)
if results:
sv_type, chromosome, start, end, length, support = results.groups()
new_sv = [chromosome, start, end, call_id, sv_type, length, support]
new_sv = [chromosome, start, end, call_id, sv_type, length, "/", support]
if filter_out(new_sv, len_min, len_max):
pass
else:
......
......@@ -22,11 +22,9 @@ class Breakdancer (Component):
"""
:note negative distance not considered
"""
def define_parameters(self, input_bam, reference_genome, chrom, min_sup, min_mapq, max_len):
def define_parameters(self, input_bam, reference_genome, min_sup, min_mapq, max_len):
self.add_input_file_list("input_bam", "One bam file by individual/condition", default=input_bam, file_format="bam", required=True)
# self.add_output_file("breakdancer_cfg", "Breakdancer configuration file", filename="breakdancer.cfg")
self.add_output_file("output_file", "Breakdancer output file", filename="breakdancer.ctx")
self.add_parameter("chrom", "operate on a single chromosome", type=str, default=chrom)
self.add_parameter("min_sup", "Minimum number of read supporting a SV", type=int, default=min_sup)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000)", type=int, default=max_len)
self.add_parameter("min_mapq", "min. paired-end mapping quality", type=int, default=min_mapq)
......@@ -36,9 +34,6 @@ class Breakdancer (Component):
cfg_file_path = self.get_temporary_file(".bd.cfg")
bam2cfg = ShellFunction(self.get_exec_path("bam2cfg.pl") + " -q $2 " + utils.get_argument_pattern(self.input_bam, 3)[0] + " > $1 ", cmd_format='{EXE} {OUT} {ARG} {IN}')
bam2cfg(outputs=cfg_file_path, arguments=1, inputs=self.input_bam) #TODO change 1
# mod chromosome parameter
if self.chrom != "":
self.chrom = " -o {0}".format(self.chrom)
# command
breakdancer = ShellFunction(self.get_exec_path("breakdancer-max") \
+ " -r {0} -m {1} -q {2} ".format(self.min_sup, self.max_len, self.min_mapq) \
......
......@@ -36,21 +36,14 @@ def cnvnator_func(output, ref, bin_size, bam): # Wrapper which handle complex in
class CnvNator (Component):
def define_parameters(self, input_bam, reference_genome, chrom, bin_size):
def define_parameters(self, input_bam, reference_genome, bin_size):
self.add_input_file_list("input_bam", "One bam file by individual/condition", default=input_bam, file_format="bam", required=True)
self.add_input_file("reference_genome", "On which genome should the reads being aligned on", default=reference_genome, file_format="fasta", required=True)
self.add_output_file_list("output_file", "CNV output file", pattern='{basename}_call', items=input_bam)
self.add_parameter("chrom", "Studied chromosome", default=chrom, type=str)
self.add_parameter("bin_size", "Bin size for cnvnator: Depends on the deep of coverage. X5:500, x10:250, x20-30:100, x100:30", default=bin_size, type=int)
def process(self):
#In case we want to study specific chromosomes
chr=""
if self.chrom != "":
option = self.reference_genome + " -chrom {0} ".format(self.chrom)
else:
option = self.reference_genome
#Real cnvnator run starts
cnvnator = PythonFunction(cnvnator_func, cmd_format="{EXE} {OUT} {ARG} {IN}")
for i,o in zip(self.input_bam, self.output_file):
cnvnator(outputs=o, arguments=[option, self.bin_size], inputs=i)
\ No newline at end of file
cnvnator(outputs=o, arguments=[self.bin_size], inputs=i)
\ No newline at end of file
......@@ -21,7 +21,6 @@ import os.path
class Delly (Component):
def define_parameters(self, input_bam, reference_genome, min_mapq=30):
#TODO chrom exclusion
self.add_input_file_list("input_bam", "One bam file by individual/condition", default=input_bam, file_format="bam", required=True)
self.add_input_file("reference_genome", "On which genome should the reads being aligned on", default=reference_genome, required=True)
self.add_output_file("output_file", "Delly output file", filename="delly.vcf")
......
......@@ -22,19 +22,16 @@ from jflow.component import Component
class Pindel (Component):
def define_parameters(self, input_bam, reference_genome, output_file, chrom, mean_insert, min_sup, indiv_list):
def define_parameters(self, input_bam, reference_genome, output_file, mean_insert, min_sup, indiv_list):
self.add_input_file_list("input_bam", "One bam file by individual/condition", default=input_bam, file_format="bam", required=True)
self.add_input_file("reference_genome", "Animal ref file", default=reference_genome, file_format="fasta", required=True)
self.add_output_file("output_file", "Pindel concatenated output files [D/SI/TD/INV]", filename="pindel.concat.out")
self.add_parameter("chrom", "operate on a single chromosome", type=str, default=chrom)
self.add_parameter("mean_insert", "mean insert size", type=int, default=mean_insert, required=True)
self.add_parameter("min_sup", "Minimum number of read supporting a SV (3) [Not available for cnvnator]", type=int, default=min_sup)
self.add_parameter_list("indiv_list", "String list of studied individuals name", default=indiv_list, type=str, required=True)
#TODO minmapq
def process(self):
if self.chrom == "":
self.chrom = "ALL"
# create configuration file
cfg_file_path = self.get_temporary_file(".pdl.cfg")
with open(cfg_file_path, 'w') as config_file:
......@@ -51,7 +48,6 @@ class Pindel (Component):
pindel = ShellFunction(self.get_exec_path("pindel") + \
" -f " + self.reference_genome + \
" -i $1 " + \
" -c " + self.chrom + \
" -o "+ output_prefix + \
" -k false" + \
" > /dev/null " ,
......
......@@ -35,7 +35,6 @@ class Standardisation (Component):
self.add_parameter_list("indiv_list", "String list of studied individuals name", default=indiv_list, type=str, required=True)
def process(self):
if self.mode != "cnvnator":
#FIX
breakdancer_fix = " ".join(self.input_bam)
......@@ -67,6 +66,7 @@ class Standardisation (Component):
std(inputs=self.input_file[i], outputs=tmp_out[i], arguments=self.indiv_list[i])
if len(self.input_file) > 1:
unsorted_out = self.get_temporary_file(".bed")
#MERGE_SV
mrg = ShellFunction(self.get_exec_path("merge_sv.py") + \
" --output $1 \
--tag $2\
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment