Commit dc0d0bdf authored by Claire Kuchly's avatar Claire Kuchly
Browse files

modif output file

parent b4ac19f8
......@@ -31,20 +31,20 @@ import jflow.seqio as seqio
class Process_radtag (Analysis):
def recover_mate_discards (read1,read2, output_file):
# enregistrement des ID de squences 1
# enregistrement des ID de sequences 1
id_R1=[]
reader = seqio.SequenceReader(read1,fileformat="fastq")
for id, desc, seq, qual in reader :
id_R1.append(id)
# disctionnaire de squences 2 correspondant aux ID de lecures 1
# disctionnaire de sequences 2 correspondant aux ID de lecures 1
dic_R2={}
reader = seqio.SequenceReader(read2,fileformat="fastq")
for id, desc, seq, qual in reader :
if id in id_R1:
dic_R2[id]=desc+"\n"+seq+"\n+\n"+qual+"\n"
# ecriture du fichier fastq de lectures 2 quivalent aux lectures 1
# ecriture du fichier fastq de lectures 2 equivalent aux lectures 1
handle=open(output_file,"w")
string=""
i=0
......@@ -62,13 +62,13 @@ class Process_radtag (Analysis):
handle.close()
def recover_mate_ok (read1,read2,output_file):
# enregistrement des ID de squences 1
# enregistrement des ID de sequences 1
id_R1=[]
reader = seqio.SequenceReader(read1,fileformat="fastq")
for id, desc, seq, qual in reader :
id_R1.append(id[:-2])
# disctionnaire de squences 2 correspondant aux ID des lcures 1
# disctionnaire de sequences 2 correspondant aux ID des lcures 1
dic_R2={}
reader = seqio.SequenceReader(read2,fileformat="fastq")
for id, desc, seq, qual in reader :
......@@ -76,7 +76,7 @@ class Process_radtag (Analysis):
if convert_id in id_R1:
dic_R2[convert_id]=convert_id+"_2\n"+seq+"\n+\n"+qual+"\n"
# ecriture du fichier fastq de lectures 2 quivalent aux lectures 1
# ecriture du fichier fastq de lectures 2 equivalent aux lectures 1
handle=open(output_file,"w")
string=""
i=0
......@@ -128,10 +128,10 @@ Currently supported enzymes include:
self.max_length = max_length
self.archive_name = archive_name
self.prefixes = self.get_outputs('{basename_woext}', [read1_files, read2_files])
self.output_read_1 = OutputFileList(self.get_outputs('{basename_woext}.fq.gz', self.read1_files), Formats.FASTQ)
self.output_read_2 = OutputFileList(self.get_outputs('{basename_woext}.fq.gz', self.read2_files), Formats.FASTQ)
self.discard_read_1 = OutputFileList(self.get_outputs('{basename_woext}.fq.discard.gz', self.read1_files), Formats.FASTQ)
self.discard_read_2 = OutputFileList(self.get_outputs('{basename_woext}.fq.discard.gz', self.read2_files), Formats.FASTQ)
self.output_read_1 = OutputFileList(self.get_outputs('{basename}.gz', self.read1_files), Formats.FASTQ)
self.output_read_2 = OutputFileList(self.get_outputs('{basename}.gz', self.read2_files), Formats.FASTQ)
self.discard_read_1 = OutputFileList(self.get_outputs('{basename}.discard.gz', self.read1_files), Formats.FASTQ)
self.discard_read_2 = OutputFileList(self.get_outputs('{basename}.discard.gz', self.read2_files), Formats.FASTQ)
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.prefixes))
def define_analysis(self):
......@@ -209,23 +209,24 @@ Currently supported enzymes include:
# Tmp output
# Creates list for temporary uncompressed files
tmp_output_read_1 = self.get_outputs('{basename}', self.read1_files)
tmp_output_read_2 = self.get_outputs('{basename}', self.read2_files)
tmp_discard_read_1 = self.get_outputs('{basename}.discards', self.prefixes)
tmp_discard_read_2 = self.get_outputs('{basename}.discards', self.prefixes)
# Process radtags read1 files
for i in range(0, len(self.prefixes)):
process_radtag = ShellFunction(self.get_exec_path("process_radtags") + " -f $1 " + self.options + " -o " + self.output_directory + " 2> $2 ", cmd_format='{EXE} {IN} {OUT}')
process_radtag(inputs = [self.read1_files[i]], outputs = [self.stderrs[i]])
tmp_output_read_1 = os.path.join(self.output_directory, self.get_outputs('{basename}',self.read1_files)
tmp_output_read_2 = os.path.join(self.output_directory, self.get_outputs('{basename}',self.read2_files)
tmp_discard_read_1 = os.path.join(self.output_directory, self.get_outputs('{basename}.discard',self.read1_files)
tmp_discard_read_2 = os.path.join(self.output_directory, self.get_outputs('{basename}.discard',self.read2_files)
# Process radtags read1 files
for i in range(0, len(self.prefixes)):
process_radtag = ShellFunction(self.get_exec_path("process_radtags") + " -f $1 " + self.options + " -o " + self.output_directory + " 2> $2 ", cmd_format='{EXE} {IN} {OUT}')
process_radtag(inputs = [self.read1_files[i]], outputs = [tmp_output_read_1[i], tmp_discard_read_1[i], self.stderrs[i]])
# Recover_mate and recover_discard
recover_mate = PythonFunction(recover_mate_ok, cmd_format="{EXE} {IN} {OUT}")
recover_mate = Map(recover_mate, inputs = [self.tmp_output_read_1, self.input_read_2], outputs=[self.tmp_output_read_2, self.stderr])
recover_mate = PythonFunction(recover_mate_ok, cmd_format="{EXE} {IN} {OUT}")
recover_mate = Map(recover_mate, inputs = [tmp_output_read_1, self.input_read_2, tmp_output_read_2], outputs=[tmp_output_read_2])
recover_discard = PythonFunction(recover_mate_discard, cmd_format="{EXE} {IN} {OUT}")
recover_discard = Map(recover_discard, inputs = [self.tmp_discard_read_1, self.input_read_2], outputs=[self.tmp_discard_read_2, self.stderr])
recover_discard = PythonFunction(recover_mate_discard, cmd_format="{EXE} {IN} {OUT}")
recover_discard = Map(recover_discard, inputs = [tmp_discard_read_1, self.input_read_2, tmp_discard_read_2], outputs=[tmp_discard_read_2])
# Compress
# Compress
compress = ShellFunction("gzip $1 $2 $3", cmd_format='{EXE} {IN} {OUT}')
compress = MultiMap(compress, inputs = [tmp_output_read_1, tmp_output_read_2, tmp_discard_read_1, tmp_discard_read_2], outputs = [self.out , self.not_combined_read_1, self.not_combined_read_2])
compress = MultiMap(compress, inputs = [tmp_output_read_1, tmp_output_read_2, tmp_discard_read_1, tmp_discard_read_2], outputs = [self.output_read_1 , self.output_read_2, self.discard_read_1, self.discard_read_2])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment