Commit 9891e3ba authored by Penom Nom's avatar Penom Nom
Browse files

Translocations

Hotfix
parent b698df3d
......@@ -52,12 +52,6 @@ class SVDetection (NG6Workflow):
input_bam = [spl.reads1[0] for spl in self.samples]
indiv_list = [spl.name for spl in self.samples]
#=====TEST PURPOSE=====
# results = []
# tata = ["H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W"]
# toto = "/home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KH.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KI.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KJ.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KK.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KL.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KM.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KN.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KO.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KP.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KQ.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KR.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KS.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KT.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KU.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KV.rmdup.realn.bam /home/yguarracino/work/Project_Epigrani.335/Run_Lot1_-_8_IND.5529/Analyse_GATK_preprocess.23378/B00G6KW.rmdup.realn.bam"
#=====TEST PURPOSE=====
delly = self.add_component("Delly", [input_bam, self.reference_genome])
breakdancer = self.add_component("Breakdancer", [input_bam, self.reference_genome, self.min_sup, self.min_mapq, self.max_len])
pindel = self.add_component("Pindel", kwargs={"input_bam":input_bam,"reference_genome": self.reference_genome,
......@@ -66,26 +60,18 @@ class SVDetection (NG6Workflow):
cnvnator = self.add_component("CnvNator", [input_bam, self.reference_genome, self.bin_size])
delly = self.add_component("Standardisation", [delly.output_file, "delly", "delly.bed", self.min_len, self.max_len, indiv_list])
pindel_tra = self.add_component("Standardisation", [pindel.output_file, "pindel_translocation", "pindel.bed", self.min_len, self.max_len, indiv_list])
pindel = self.add_component("Standardisation", [pindel.output_file, "pindel", "pindel.bed", self.min_len, self.max_len, indiv_list])
breakdancer = self.add_component("Standardisation", [breakdancer.output_file, "breakdancer", "breakdancer.bed", self.min_len, self.max_len, indiv_list, input_bam])
cnvnator = self.add_component("Standardisation", [cnvnator.output_file, "cnvnator", "cnvnator.bed", self.min_len, self.max_len, indiv_list])
# #=====TEST PURPOSE=====
# input_f = ["/home/yguarr/DataTest/example_cnvnator_raw.txt","/home/yguarr/DataTest/example2_cnvnator_raw.txt","/home/yguarr/DataTest/example3_cnvnator_raw.txt"]
# indiv_list = ["SAMPLE1","SAMPLE2","SAMPLE3"]
# delly = self.add_component("Standardisation", ["/home/yguarr/DataTest/chr29.delly", "delly", "delly.bed", self.min_len, self.max_len, tata])
# delly = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/eg_tra_delly", "delly", "delly.bed", self.min_len, self.max_len, tata])
# pindel = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/pindel_INT", "pindel_translocation", "pindel.bed", self.min_len, self.max_len, indiv_list])
# breakdancer = self.add_component("Standardisation", ["/home/yguarr/DataTest/translocation/bd_tra_multi_sample", "breakdancer", "breakdancer.bed", self.min_len, self.max_len, tata, toto])
# results=[delly.output_file, pindel.output_file, breakdancer.output_file]
# results=[pindel.output_file,cnvnator.output_file]
# results = ["/home/yguarr/DataTest/sort_epi_dly_test.bed", "/home/yguarr/DataTest/sort_epi_bd_test.bed",
# "/home/yguarr/DataTest/sort_epi_cnv_test.bed", "/home/yguarr/DataTest/sort_epi_pdl_test.bed"]
# results = [cnvnator.output_file, pindel.output_file]
# #=====TEST PURPOSE===== translocation_file
# #=====TEST PURPOSE=====
results=[delly.output_file, breakdancer.output_file, cnvnator.output_file, pindel.output_file]
arg_list = [results, self.overlap_percent, self.li_range, ["dly", "BD","CNV","Pdl"], options]
translocations = [delly.translocation_file, breakdancer.translocation_file, cnvnator.translocation_file, pindel_tra.translocation_file]
arg_list = [results, self.overlap_percent, self.li_range, ["dly", "BD","CNV","Pdl"], options, translocations]
self.add_component(component_name="Analyzebed", addto="project", args=arg_list)
def post_process(self):
......
......@@ -27,8 +27,9 @@ from types import NoneType
class Analyzebed (Analysis):
def define_parameters(self, files, p, li, i_list, options):
def define_parameters(self, files, p, li, i_list, options, translo):
self.add_input_file_list( "input_files", "The files to be archived.", default=files, required=True )
self.add_input_file_list( "tra_files", "translocations to be archived.", default=translo, required=True )
self.add_output_file_list( "output_links", "Link to the files.", pattern='link2_{basename}' , items=self.input_files )
self.add_output_file( "software_merge", "TODO", filename="software_merge.bed")
self.add_parameter("overlap_percent", "Minimum threshold of overlap percent between 2 SV", type=float, default=p)
......@@ -154,8 +155,8 @@ class Analyzebed (Analysis):
self._add_result_element("software_merge", "venn_count", db_venn, type)
#output files
result_files = [self.output_links, self.software_merge]
self._create_and_archive(self.output_links, None) #TODO pass result_files
result_files = [self.output_links, self.software_merge, self.tra_files]
self._create_and_archive(result_files, "None") #TODO pass result_files
def process(self):
#create links
......
......@@ -167,13 +167,13 @@ name_list = sys.argv[8:]
return new_sv
def filter_out(sv, size_min, size_max):
size = int(sv[5])
size = float(sv[5])
if size < int(size_min) or size > int(size_max):
return True
else:
return False
def main(SV_file_path, good_path, soft, tag, len_min, len_max, indiv_list, alt_names=[]):
def main(SV_file_path, good_path, translocation_file, soft, tag, len_min, len_max, indiv_list, alt_names=[]):
"""
Read a structural variant file generated from a set of individuals and
transform it in a bed-like format.
......@@ -215,21 +215,22 @@ def main(SV_file_path, good_path, soft, tag, len_min, len_max, indiv_list, alt_n
>>> main("/home/sangoku/kamehameha.breakdancer", "kame.bed", "fail.bed", "breakdancer", "DBZ", 50, 9000, ["Krilin","Freezer","Boo"])
"""
support_table = []
with open(SV_file_path, 'r') as SV_list, open(good_path,'w') as good_output_file :
with open(SV_file_path, 'r') as SV_list, open(good_path,'w') as good_output_file, open(translocation_file,'w') as tra_output_file :
if soft == "pindel_translocation":
good_output_file.write("#shouldBeEmpty")
for c,line in enumerate(SV_list):
if line[0] != "#":
sv = line.split()
iD = tag + "-" + str(c)
sv.insert(0, iD)
new_sv = parse_sv(sv, soft, indiv_list, alt_names)
if soft == "pindel_translocation":
new_sv = "\t".join(new_sv) + "\n"
good_sv = parse_sv(sv, soft, indiv_list, alt_names)
new_sv = "\t".join(good_sv) + "\n"
if soft == "pindel_translocation" or good_sv[5] == "TRA":
tra_output_file.write(new_sv)
elif filter_out(new_sv, len_min, len_max):
good_output_file.write(new_sv)
elif new_sv[5] != "TRA" and filter_out(new_sv, len_min, len_max):
pass
else:
new_sv = "\t".join(new_sv) + "\n"
good_output_file.write(new_sv)
pass
# ================================================================
......@@ -237,6 +238,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--input_file', help='outfile of pindel/delly/breakdancer to translate to a bedlike format')
parser.add_argument('--output_file', help='bedlike translation')
parser.add_argument('--translocation_file', help='bedlike translation')
parser.add_argument('--software', help='choose : pindel/breakdancer/delly')
parser.add_argument('--tag', help='tag of each structural variant')
parser.add_argument('--min_length', help='min length of a variant', type=int)
......@@ -245,6 +247,6 @@ if __name__ == "__main__":
parser.add_argument('--alt_names', nargs='*', help='list of bam files the input file was processed on (only for break dancer)')
args = parser.parse_args()
if args.alt_names:
main(args.input_file, args.output_file, args.software, args.tag, args.min_length, args.max_length, args.names, args.alt_names)
main(args.input_file, args.output_file, args.translocation_file, args.software, args.tag, args.min_length, args.max_length, args.names, args.alt_names)
else :
main(args.input_file, args.output_file, args.software, args.tag, args.min_length, args.max_length, args.names)
\ No newline at end of file
main(args.input_file, args.output_file, args.translocation_file, args.software, args.tag, args.min_length, args.max_length, args.names)
\ No newline at end of file
......@@ -78,6 +78,8 @@ def fuse_sv(tag, p, paths_files, li):
for individu_b in indiv_list[i+1:]:
# and each sv "b", in the same chromosome "k"
# look for a match betwheen "a" and "b"
if chromosome not in individu_b:
continue
matches = a.match_in_list(individu_b[chromosome], p, li)
for b in matches:
# if possible, add "b" to "a" 's groups
......
......@@ -32,13 +32,18 @@ class Breakdancer (Component):
def process(self):
# create configuration file
cfg_file_path = self.get_temporary_file(".bd.cfg")
tmp_normal = self.get_temporary_file(".bd")
tmp_translocation = self.get_temporary_file(".bd")
bam2cfg = ShellFunction(self.get_exec_path("bam2cfg.pl") + " -q $2 " + utils.get_argument_pattern(self.input_bam, 3)[0] + " > $1 ", cmd_format='{EXE} {OUT} {ARG} {IN}')
bam2cfg(outputs=cfg_file_path, arguments=1, inputs=self.input_bam) #TODO change 1
# command
# same as breakdancer but for translocation
breakdancer = ShellFunction(self.get_exec_path("breakdancer-max") \
+ " -r {0} -m {1} -q {2} ".format(self.min_sup, self.max_len, self.min_mapq) \
+ " $1 > $2;"\
+ " if [ ! -e $2 ]; then echo no_read_groups_found > $2; fi",
+ " if [ ! -e $2 ]; then echo #no_read_groups_found > $2; fi",
cmd_format='{EXE} {IN} {OUT}')
# launch
breakdancer(cfg_file_path, self.output_file)
\ No newline at end of file
breakdancer(cfg_file_path, tmp_normal)
breakdancer(" -t "+cfg_file_path, tmp_translocation) # interchrom translo°
concatenate_files = ShellFunction('cat $2 $3 > $1', cmd_format='{EXE} {OUT} {IN}')
concatenate_files(outputs = self.output_file, inputs=[tmp_normal, tmp_translocation])
\ No newline at end of file
......@@ -16,6 +16,7 @@
from weaver.function import ShellFunction
from jflow.component import Component
from jflow import utils as utils
import os.path
class Delly (Component):
......@@ -29,7 +30,9 @@ class Delly (Component):
def process(self):
delly = ShellFunction(self.get_exec_path("delly") \
+ " -g {0} -q {1}".format(self.reference_genome, self.min_mapq) \
+ " -o $1 $2 > /dev/null;" \
+ " -o $1 "\
+ utils.get_argument_pattern(self.input_bam, 2)[0]\
+ " > /dev/null;" \
+ " if [ ! -e $1 ]; then touch $1; fi",
cmd_format='{EXE} {OUT} {IN}')
delly(inputs=self.input_bam, outputs=self.output_file)
......@@ -28,6 +28,8 @@ class Standardisation (Component):
choices=["breakdancer", "cnvnator", "delly", "pindel"], default=mode, required=True)
self.add_output_file("output_file", "Input file after being filtered and translated in an uniform bed-like format",
filename="{0}.bed".format(self.mode))
self.add_output_file("translocation_file", "translocations after being translated in an uniform bed-like format",
filename="trans_{0}.bed".format(self.mode))
self.add_parameter("min_len", "Minimum length of a variant (50bp)", type=int, default=min_len)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000bp)", type=int, default=max_len) #TODO chr %
self.add_parameter("overlap_percent", "Minimum threshold of overlap percent between 2 SV", type=float, default=p)
......@@ -46,13 +48,14 @@ class Standardisation (Component):
std = ShellFunction(self.get_exec_path("bedify.py") + "\
--input_file $1 \
--output_file $2 \
--names " + utils.get_argument_pattern(self.indiv_list, 3)[0] + "\
--translocation_file $3 \
--names " + utils.get_argument_pattern(self.indiv_list, 4)[0] + "\
--tag " + self.mode + "\
--software " + self.mode + "\
--min_length {0} --max_length {1} ".format(self.min_len, self.max_len) + \
breakdancer_fix,
cmd_format='{EXE} {IN} {OUT} {ARG}')
std(inputs=self.input_file, outputs=self.output_file, arguments=self.indiv_list) # Here, the mode is also used as a tag for the SVs (ID prefix)
std(inputs=self.input_file, outputs=[self.output_file, self.translocation_file], arguments=self.indiv_list) # Here, the mode is also used as a tag for the SVs (ID prefix)
else :
tmp_out = [file_path + ".bed" for file_path in self.input_file]
#CNV2BED
......@@ -62,6 +65,7 @@ class Standardisation (Component):
--tag $3 \
--min_length {0} --max_length {1}".format(self.min_len, self.max_len),
cmd_format='{EXE} {IN} {OUT} {ARG}')
sort = ShellFunction(self.get_exec_path("sort") + " -k1,1n -k2,2n $1 > $2", cmd_format='{EXE} {IN} {OUT}')
if len(self.input_file) > 1:
for i in range(len(self.input_file)):
std(inputs=self.input_file[i], outputs=tmp_out[i], arguments=self.indiv_list[i])
......@@ -75,8 +79,8 @@ class Standardisation (Component):
+ " --names" + utils.get_argument_pattern(self.indiv_list, 5)[0] \
+ " --files" + utils.get_argument_pattern(tmp_out, len(tmp_out)+5)[0],
cmd_format='{EXE} {OUT} {ARG} {IN}')
mrg(outputs=unsorted_out, inputs=tmp_out, arguments=[self.mode, self.overlap_percent, self.li_range, self.indiv_list])
sort = ShellFunction(self.get_exec_path("sort") + " -k1,1n -k2,2n $1 > $2", cmd_format='{EXE} {IN} {OUT}')
sort(unsorted_out, self.output_file)
mrg(outputs=unsorted_out, inputs=tmp_out, arguments=[self.mode, self.overlap_percent, self.li_range, self.indiv_list])
sort(unsorted_out, self.output_file)
else:
std(inputs=self.input_file, outputs=self.output_file, arguments=self.indiv_list)
std(inputs=self.input_file, outputs=tmp_out, arguments=self.indiv_list)
sort(tmp_out, self.output_file)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment