Skip to content
Snippets Groups Projects
Commit 919f9b97 authored by Clement Birbes's avatar Clement Birbes
Browse files

V1.0 Everything working

parent 073666d5
No related branches found
No related tags found
No related merge requests found
......@@ -13,10 +13,12 @@ parser = argparse.ArgumentParser(description='Arrange assembly according to refe
parser.add_argument('--paf', help = "input paf file", required=True)
parser.add_argument('--queryIdx', help = "input query index", required=True)
parser.add_argument('--refIdx', help = "input reference index", required=True)
parser.add_argument('--inputFasta', help = 'input fasta file', required=True)
args = vars(parser.parse_args())
InputPaf = args['paf']
InputQIdx = args['queryIdx']
InputTIdx = args['refIdx']
inputFasta = args['inputFasta']
def uncompress(filename):
"""
......@@ -79,11 +81,10 @@ def build_query_as_reference():
"""
sorter = Sorter(InputPaf, "map.paf")
sorter.sort()
paf_file = os.path.join("map.paf")
idx1 = os.path.join(InputQIdx)
idx2 = os.path.join(InputTIdx)
paf = Paf(paf_file, idx1, idx2)
paf = Paf(paf_file, idx1, idx2, inputFasta)
return paf.build_query_chr_as_reference()
build_query_as_reference()
......@@ -7,72 +7,73 @@ import argparse
parser = argparse.ArgumentParser(description='Create fasta')
parser.add_argument('--Chr', help = "input Chr name", required=True)
parser.add_argument('--Ref', help = "input Ref name", required=True)
parser.add_argument('--assembly1', help = "input Assembly name", required=True)
parser.add_argument('--assembly2', help = "input Assembly name", required=True)
parser.add_argument('--assembly3', help = "input Assembly name", required=True)
parser.add_argument('--assembly4', help = "input Assembly name", required=True)
parser.add_argument('--assembly5', help = "input Assembly name", required=True)
args = vars(parser.parse_args())
InputChr = args['Chr']
InputRef = args['Ref']
path1=args['assembly1']
path2=args['assembly2']
path3=args['assembly3']
path4=args['assembly4']
path5=args['assembly5']
output = open(str(InputChr)+"File.fasta","w")
path1='as_reference_query.fasta.sorted'
path2='2as_reference_query.fasta.sorted'
path3='3as_reference_query.fasta.sorted'
path4='4as_reference_query.fasta.sorted'
path5='5as_reference_query.fasta.sorted'
find = False
for record in SeqIO.parse(InputRef, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Reference_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if os.path.exists(path1):
if (str(path1) != "NO_FILE1") and (os.path.exists(path1)):
for record in SeqIO.parse(path1, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Assembly1_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">1_"+str(path1)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
find = True
break
if find = False:
output.write(">Assembly1_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if find == False:
output.write(">1_"+str(path1)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if os.path.exists(path2):
if (str(path2) != "NO_FILE2") and (os.path.exists(path2)):
for record in SeqIO.parse(path2, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Assembly2_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">2_"+str(path2)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
find = True
break
if find = False:
output.write(">Assembly2_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if find == False:
output.write(">2_"+str(path2)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if os.path.exists(path3):
if (str(path3) != "NO_FILE3") and (os.path.exists(path3)):
for record in SeqIO.parse(path3, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Assembly3_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">3_"+str(path3)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
find = True
break
if find = False:
output.write(">Assembly3_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if find == False:
output.write(">3_"+str(path3)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if os.path.exists(path4):
if (str(path4) != "NO_FILE4") and (os.path.exists(path4)):
for record in SeqIO.parse(path4, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Assembly4_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">4_"+str(path4)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
find = True
break
if find = False:
output.write(">Assembly4_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if find == False:
output.write(">4_"+str(path4)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if os.path.exists(path5):
if (str(path5) != "NO_FILE5") and (os.path.exists(path5)):
for record in SeqIO.parse(path5, "fasta"):
if (str(record.id) == str(InputChr)):
output.write(">Assembly5_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">5_"+str(path5)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
find = True
break
if find = False:
output.write(">Assembly5_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if find == False:
output.write(">5_"+str(path5)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
#!/usr/bin/env python3
pafIn = open("map.paf", "r")
pafOut = open("mapOut.paf", "w")
pafIn = open("mapIn.paf", "r")
pafOut = open("map.paf", "w")
for lines in pafIn:
line = lines.split("\t")
......@@ -11,7 +11,7 @@ for lines in pafIn:
pafOut.write(lines)
elif (target[0] == "R"):
pafOut.write(line[5]+"\t"+line[6]+"\t"+line[7]+"\t"+line[8]+"\t"+line[4]+"\t"+line[0]+"\t"+line[1]+"\t"+line[2]+"\t"+line[3]+"\t"+line[9]+"\t"+line[10]+"\t"+line[11]+"\t"+line[12]+"\t"+line[13]+"\t"+line[14]+"\t"+line[15])
elif (int(target[8]) < int(query[8])):
elif (int(target[0]) < int(query[0])):
pafOut.write(line[5]+"\t"+line[6]+"\t"+line[7]+"\t"+line[8]+"\t"+line[4]+"\t"+line[0]+"\t"+line[1]+"\t"+line[2]+"\t"+line[3]+"\t"+line[9]+"\t"+line[10]+"\t"+line[11]+"\t"+line[12]+"\t"+line[13]+"\t"+line[14]+"\t"+line[15])
else:
pafOut.write(lines)
......
#!/usr/bin/env python3
import os
import shutil
from math import sqrt
from numpy import mean
from pathlib import Path
......@@ -17,7 +18,7 @@ class Paf:
limit_idy = [0.25, 0.5, 0.75]
max_nb_lines = 100000
def __init__(self, paf: str, idx_q: str, idx_t: str, auto_parse: bool=True):
def __init__(self, paf: str, idx_q: str, idx_t: str, inputFasta: str="False", auto_parse: bool=True):
"""
:param paf: PAF file path
......@@ -29,12 +30,12 @@ class Paf:
:param auto_parse: if True, parse PAF file at initialisation
:type auto_parse: bool
"""
print("paf.py init")
self.paf = paf
self.idx_q = idx_q
self.idx_t = idx_t
self.sorted = False
self.data_dir = os.path.dirname(paf)
self.fasta = inputFasta
if os.path.exists(os.path.join(self.data_dir, ".sorted")):
self.paf += ".sorted"
self.idx_q += ".sorted"
......@@ -58,7 +59,6 @@ class Paf:
self.t_abs_start = {}
if auto_parse:
self.parse_paf()
print("paf.py init end")
@staticmethod
......@@ -203,7 +203,6 @@ class Paf:
try:
with open(self.paf, "r") as paf_file:
print(paf_file)
nb_lines = 0
for line in paf_file:
nb_lines += 1
......@@ -427,10 +426,8 @@ class Paf:
self.parse_paf(False,True)
sorted_file = self.paf + ".sorted"
if not self.sorted: # Do the sort
print("Not self.sorted")
if not self.paf.endswith(".sorted") and not self.idx_q.endswith(".sorted") and \
(not os.path.exists(self.paf + ".sorted") or not os.path.exists(self.idx_q + ".sorted")):
print("if not bla bla bla")
gravity_contig , lines_on_block = self.compute_gravity_contigs()
# For each contig, find best block, and deduce gravity of contig:
......@@ -457,7 +454,6 @@ class Paf:
# Check if contig must be re-oriented:
if len(lines_on_selected_block) > 0:
if not self.is_contig_well_oriented(lines_on_selected_block, contig, max_chr):
print("reorient contigs")
reorient_contigs.append(contig)
# Sort contigs:
......@@ -472,7 +468,6 @@ class Paf:
# Re-orient contigs:
if len(reorient_contigs) > 0:
print("reorient_contigs")
self.reorient_contigs_in_paf(reorient_contigs)
else:
shutil.copyfile(self.paf, sorted_file)
......@@ -486,13 +481,11 @@ class Paf:
self.paf = sorted_file
else: # Undo the sort
print("self.sorted, undo the sort")
self.paf = self.paf.replace(".sorted", "")
self.idx_q = self.idx_q.replace(".sorted", "")
self.set_sorted(False)
# Re parse PAF file:
print("reparse")
self.parsed = False
self.parse_paf(False,True)
......@@ -589,16 +582,12 @@ class Paf:
"""
try:
self.sort()
#self.parse_paf(False, True)
print("test ici")
if not self.sorted:
print("yo1")
raise Exception("Contigs must be sorted to do that!")
query_fasta = "query.fasta"
query_fasta = self.fasta
if not os.path.isfile(query_fasta):
print("yo2")
raise Exception("Query fasta does not exists")
o_fasta = os.path.join(os.path.dirname(query_fasta), "as_reference_" + os.path.basename(query_fasta))
o_fasta = os.path.join(os.path.dirname(query_fasta), "As_ref_" + os.path.basename(query_fasta))
if o_fasta.endswith(".gz"):
o_fasta = o_fasta[:-3]
if not os.path.exists(o_fasta):
......@@ -632,12 +621,8 @@ class Paf:
query_f.close()
if uncompressed:
os.remove(query_fasta)
print("succes")
status = "success"
except Exception as e:
o_fasta = "_._"
print("fail")
print(Exception)
print(e)
status="fail"
return o_fasta
......@@ -13,9 +13,6 @@ parser.add_argument('--Assembly3', help = "assembly fasta file", required=True)
parser.add_argument('--Assembly4', help = "assembly fasta file", required=True)
parser.add_argument('--Assembly5', help = "assembly fasta file", required=True)
args = vars(parser.parse_args())
InputChr = args['Chr']
InputRef = args['Ref']
......@@ -32,12 +29,12 @@ for record in SeqIO.parse(InputRef, "fasta"):
output.write(">Reference_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if str(path1) != 'NO_FILE1':
input = open("map.paf")
input = open("map1.paf")
searching = True
reverse = False
for line in input:
lines = line.strip("\n").split("\t")
if (lines[5] == str(InputChr) and searching):
if (str(InputChr) in lines[5] and searching):
Match = lines[0]
searching = False
if (int(lines[8])-int(line[7]) < 0):
......@@ -47,11 +44,11 @@ if str(path1) != 'NO_FILE1':
Match = 0
for record in SeqIO.parse(path1, "fasta"):
if ((str(record.id) == str(Match)) and reverse):
output.write(str(path1[0:5])+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
if ((str(record.id) == str(Match)) and not reverse):
output.write(str(path1[0:5])+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">1_"+str(path1)+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
elif ((str(record.id) == str(Match)) and not reverse):
output.write(">1_"+str(path1)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if Match == 0:
output.write(str(path1[0:5])+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
output.write(">1_"+str(path1)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if str(path2) != 'NO_FILE2':
input = open("map2.paf")
......@@ -59,7 +56,7 @@ if str(path2) != 'NO_FILE2':
reverse = False
for line in input:
lines = line.strip("\n").split("\t")
if (lines[5] == str(InputChr) and searching):
if (str(InputChr) in lines[5] and searching):
Match = lines[0]
searching = False
if (int(lines[8])-int(line[7]) < 0):
......@@ -67,13 +64,13 @@ if str(path2) != 'NO_FILE2':
break
else:
Match = 0
for record in SeqIO.parse(path1, "fasta"):
for record in SeqIO.parse(path2, "fasta"):
if ((str(record.id) == str(Match)) and reverse):
output.write(str(path2[0:5])+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
if ((str(record.id) == str(Match)) and not reverse):
output.write(str(path2[0:5])+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">2_"+str(path2)+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
elif ((str(record.id) == str(Match)) and not reverse):
output.write(">2_"+str(path2)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if Match == 0:
output.write(str(path2[0:5])+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
output.write(">2_"+str(path2)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if str(path3) != 'NO_FILE3':
input = open("map3.paf")
......@@ -81,7 +78,7 @@ if str(path3) != 'NO_FILE3':
reverse = False
for line in input:
lines = line.strip("\n").split("\t")
if (lines[5] == str(InputChr) and searching):
if (str(InputChr) in lines[5] and searching):
Match = lines[0]
searching = False
if (int(lines[8])-int(line[7]) < 0):
......@@ -89,13 +86,13 @@ if str(path3) != 'NO_FILE3':
break
else:
Match = 0
for record in SeqIO.parse(path1, "fasta"):
for record in SeqIO.parse(path3, "fasta"):
if ((str(record.id) == str(Match)) and reverse):
output.write(str(path3[0:5])+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
if ((str(record.id) == str(Match)) and not reverse):
output.write(str(path3[0:5])+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">3_"+str(path3)+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
elif ((str(record.id) == str(Match)) and not reverse):
output.write(">3_"+str(path3)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if Match == 0:
output.write(str(path3[0:5])+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
output.write(">3_"+str(path3)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if str(path4) != 'NO_FILE4':
input = open("map4.paf")
......@@ -103,7 +100,7 @@ if str(path4) != 'NO_FILE4':
reverse = False
for line in input:
lines = line.strip("\n").split("\t")
if (lines[5] == str(InputChr) and searching):
if (str(InputChr) in lines[5] and searching):
Match = lines[0]
searching = False
if (int(lines[8])-int(line[7]) < 0):
......@@ -111,13 +108,13 @@ if str(path4) != 'NO_FILE4':
break
else:
Match = 0
for record in SeqIO.parse(path1, "fasta"):
for record in SeqIO.parse(path4, "fasta"):
if ((str(record.id) == str(Match)) and reverse):
output.write(str(path4[0:5])+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
if ((str(record.id) == str(Match)) and not reverse):
output.write(str(path4[0:5])+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">4_"+str(path4)+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
elif ((str(record.id) == str(Match)) and not reverse):
output.write(">4_"+str(path4)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if Match == 0:
output.write(str(path4[0:5])+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
output.write(">4_"+str(path4)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
if str(path5) != 'NO_FILE5':
input = open("map5.paf")
......@@ -125,7 +122,7 @@ if str(path5) != 'NO_FILE5':
reverse = False
for line in input:
lines = line.strip("\n").split("\t")
if (lines[5] == str(InputChr) and searching):
if (str(InputChr) in lines[5] and searching):
Match = lines[0]
searching = False
if (int(lines[8])-int(line[7]) < 0):
......@@ -133,13 +130,13 @@ if str(path5) != 'NO_FILE5':
break
else:
Match = 0
for record in SeqIO.parse(path1, "fasta"):
for record in SeqIO.parse(path5, "fasta"):
if ((str(record.id) == str(Match)) and reverse):
output.write(str(path5[0:5])+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
if ((str(record.id) == str(Match)) and not reverse):
output.write(str(path5[0:5])+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
output.write(">5_"+str(path5)+"_"+str(InputChr)+"\n"+str(record.seq.reverse_complement())+"\n")
elif ((str(record.id) == str(Match)) and not reverse):
output.write(">5_"+str(path5)+"_"+str(InputChr)+"\n"+str(record.seq)+"\n")
if Match == 0:
output.write(str(path5[0:5])+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
output.write(">5_"+str(path5)+"_"+str(InputChr)+"\n"+str("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")+"\n")
input.close()
output.close()
......@@ -136,8 +136,8 @@ process index_files {
script:
"""
index.py -i ${reference} -n reference -o target.idx
index.py -i ${assembly} -n query -o query.idx
index.py -i ${reference} -n ${reference.baseName} -o target.idx
index.py -i ${assembly} -n ${assembly.baseName} -o query_${assembly.simpleName}.idx
"""
}
......@@ -150,13 +150,15 @@ process create_backup {
file map from AlignmentBackup_ch
output:
file 'file.tar' into tar_ch
file 'map.paf' into AlignmentCompare_ch
file '*.tar' into tar_ch
file 'map1.paf' into AlignmentCompare_ch
script:
"""
sort_paf.py -i ${map} -o map.paf
tar -hcvf file.tar ${query} ${target} map.paf
mv query_*.idx query.idx
tar -hcvf ${query.simpleName}.tar query.idx target.idx map.paf
mv map.paf map1.paf
"""
}
// Boucle2 : Obligatoire, aligne assembly/ref, index assembly/ref et fait backup
......@@ -189,8 +191,8 @@ if (params.assembly2){
script:
"""
index.py -i ${reference} -n reference -o target.idx
index.py -i ${assembly} -n query -o query.idx
index.py -i ${reference} -n ${reference.baseName} -o target.idx
index.py -i ${assembly} -n ${assembly.baseName} -o query_${assembly.simpleName}.idx
"""
}
......@@ -203,13 +205,14 @@ if (params.assembly2){
file map from Alignment2Backup_ch
output:
file 'file.tar' into tar2_ch
file '*.tar' into tar2_ch
file 'map2.paf' into Alignment2Compare_ch
script:
"""
sort_paf.py -i ${map} -o map.paf
tar -hcvf file.tar ${query} ${target} map.paf
mv query_*.idx query.idx
tar -hcvf ${query.simpleName}.tar query.idx target.idx map.paf
mv map.paf map2.paf
"""
}
......@@ -245,8 +248,8 @@ if (params.assembly3){
script:
"""
index.py -i ${reference} -n reference -o target.idx
index.py -i ${assembly} -n query -o query.idx
index.py -i ${reference} -n ${reference.baseName} -o target.idx
index.py -i ${assembly} -n ${assembly.baseName} -o query_${assembly.simpleName}.idx
"""
}
......@@ -259,13 +262,14 @@ if (params.assembly3){
file map from Alignment3Backup_ch
output:
file 'file.tar' into tar3_ch
file '*.tar' into tar3_ch
file 'map3.paf' into Alignment3Compare_ch
script:
"""
sort_paf.py -i ${map} -o map.paf
tar -hcvf file.tar ${query} ${target} map.paf
mv query_*.idx query.idx
tar -hcvf ${query.simpleName}.tar query.idx target.idx map.paf
mv map.paf map3.paf
"""
}
......@@ -301,8 +305,8 @@ if (params.assembly4){
script:
"""
index.py -i ${reference} -n reference -o target.idx
index.py -i ${assembly} -n query -o query.idx
index.py -i ${reference} -n ${reference.baseName} -o target.idx
index.py -i ${assembly} -n ${assembly.baseName} -o query_${assembly.simpleName}.idx
"""
}
......@@ -315,13 +319,14 @@ if (params.assembly4){
file map from Alignment4Backup_ch
output:
file 'file.tar' into tar4_ch
file '*.tar' into tar4_ch
file 'map4.paf' into Alignment4Compare_ch
script:
"""
sort_paf.py -i ${map} -o map.paf
tar -hcvf file.tar ${query} ${target} map.paf
mv query_*.idx query.idx
tar -hcvf ${query.simpleName}.tar query.idx target.idx map.paf
mv map.paf map4.paf
"""
}
......@@ -357,8 +362,8 @@ if (params.assembly5){
script:
"""
index.py -i ${reference} -n reference -o target.idx
index.py -i ${assembly} -n query -o query.idx
index.py -i ${reference} -n ${reference.baseName} -o target.idx
index.py -i ${assembly} -n ${assembly.baseName} -o query_${assembly.simpleName}.idx
"""
}
......@@ -371,13 +376,14 @@ if (params.assembly5){
file map from Alignment5Backup_ch
output:
file 'file.tar' into tar5_ch
file '*.tar' into tar5_ch
file 'map5.paf' into Alignment5Compare_ch
script:
"""
sort_paf.py -i ${map} -o map.paf
tar -hcvf file.tar ${query} ${target} map.paf
mv query_*.idx query.idx
tar -hcvf ${query.simpleName}.tar query.idx target.idx map.paf
mv map.paf map5.paf
"""
}
......@@ -392,14 +398,14 @@ if (params.arrange){
input:
set file(query), file(target) from IndexArrange_ch
file map from AlignmentArrange_ch
file "query.fasta" from AssemblyArrange_ch
file fasta from AssemblyArrange_ch
output:
file 'as_reference_query.fasta' into FinalAssembly_ch
file 'As_ref_*' into FinalAssembly_ch
script:
"""
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target}
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target} --inputFasta ${fasta}
"""
}
......@@ -411,15 +417,14 @@ if (params.arrange){
input:
set file(query), file(target) from Index2Arrange_ch
file map from Alignment2Arrange_ch
file "query.fasta" from Assembly2Arrange_ch
file fasta from Assembly2Arrange_ch
output:
file '2as_reference_query.fasta' into Final2Assembly_ch
file 'As_ref_*' into Final2Assembly_ch
script:
"""
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target}
mv as_reference_query.fasta 2as_reference_query.fasta
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target} --inputFasta ${fasta}
"""
}
}
......@@ -432,15 +437,14 @@ if (params.arrange){
input:
set file(query), file(target) from Index3Arrange_ch
file map from Alignment3Arrange_ch
file "query.fasta" from Assembly3Arrange_ch
file fasta from Assembly3Arrange_ch
output:
file '3as_reference_query.fasta' into Final3Assembly_ch
file 'As_ref_*' into Final3Assembly_ch
script:
"""
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target}
mv as_reference_query.fasta 3as_reference_query.fasta
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target} --inputFasta ${fasta}
"""
}
}
......@@ -453,15 +457,14 @@ if (params.arrange){
input:
set file(query), file(target) from Index4Arrange_ch
file map from Alignment4Arrange_ch
file "query.fasta" from Assembly4Arrange_ch
file fasta from Assembly4Arrange_ch
output:
file '4as_reference_query.fasta' into Final4Assembly_ch
file 'As_ref_*' into Final4Assembly_ch
script:
"""
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target}
mv as_reference_query.fasta 4as_reference_query.fasta
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target} --inputFasta ${fasta}
"""
}
}
......@@ -474,15 +477,14 @@ if (params.arrange){
input:
set file(query), file(target) from Index5Arrange_ch
file map from Alignment5Arrange_ch
file "query.fasta" from Assembly5Arrange_ch
file fasta from Assembly5Arrange_ch
output:
file '5as_reference_query.fasta' into Final5Assembly_ch
file 'As_ref_*' into Final5Assembly_ch
script:
"""
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target}
mv as_reference_query.fasta 5as_reference_query.fasta
Arrange.py --paf ${map} --queryIdx ${query} --refIdx ${target} --inputFasta ${fasta}
"""
}
}
......@@ -509,22 +511,16 @@ if (params.compare){
file '*.tar' into ComparedAssembly_ch
script:
def filter2 = assembly2.name != 'NO_FILE2' ? "--filter $assembly2" : ''
def filter3 = assembly3.name != 'NO_FILE3' ? "--filter $assembly3" : ''
def filter4 = assembly4.name != 'NO_FILE5' ? "--filter $assembly4" : ''
def filter5 = assembly5.name != 'NO_FILE5' ? "--filter $assembly5" : ''
"""
CreateChrFasta.py --Chr ${chrName} --Ref ${reference}
CreateChrFasta.py --Chr ${chrName} --Ref ${reference} --assembly1 ${assembly1} --assembly2 ${assembly2} --assembly3 ${assembly3} --assembly4 ${assembly4} --assembly5 ${assembly5}
minimap2 -t $task.cpus -X ${chrName}File.fasta ${chrName}File.fasta > alignment.paf
index.py -i ${chrName}File.fasta -n reference -o target.idx
cp target.idx query.idx
sort_paf.py -i alignment.paf -o map.paf
sort_paf.py -i alignment.paf -o mapIn.paf
InversePaf.py
mv mapOut.paf map.paf
tar -hcvf ${chrName}.tar target.idx query.idx map.paf
"""
}
......@@ -553,11 +549,6 @@ if (params.compare){
file '*.tar' into ComparedAssembly_ch
script:
def filter2 = assembly2.name != 'NO_FILE2' ? "--filter $assembly2" : 'NO'
def filter3 = assembly3.name != 'NO_FILE3' ? "--filter $assembly3" : 'NO'
def filter4 = assembly4.name != 'NO_FILE4' ? "--filter $assembly4" : 'NO'
def filter5 = assembly5.name != 'NO_FILE5' ? "--filter $assembly5" : 'NO'
"""
renameChr.py --Chr ${chrName} --Ref ${reference} --Assembly1 ${assembly1} --Assembly2 ${assembly2} --Assembly3 ${assembly3} --Assembly4 ${assembly4} --Assembly5 ${assembly5}
......@@ -566,9 +557,8 @@ if (params.compare){
index.py -i ${chrName}File.fasta -n reference -o target.idx
cp target.idx query.idx
sort_paf.py -i alignment.paf -o map.paf
sort_paf.py -i alignment.paf -o mapIn.paf
InversePaf.py
mv mapOut.paf map.paf
tar -hcvf ${chrName}.tar target.idx query.idx map.paf
"""
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment