Commit 7814bd15 authored by Maria Bernard's avatar Maria Bernard

No commit message

No commit message
parent 0590d883
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
import numpy as np
from subprocess import Popen, PIPE
from ng6.analysis import Analysis
from weaver.function import PythonFunction
def wrap_clone_filter(exec_path, out_dir, input_read1, input_read2, output_read1, output_read2, stderr_path):
from subprocess import Popen, PIPE
# clone_filter
cmd = [exec_path, "-1", input_read1, "-2", input_read2, "-o", out_dir , "i", "gzfastq", "-y", "fastq"]
p = Popen(cmd, stderr=PIPE)
stderr = p.communicate()[1]
# write down the stderr
stdeh = open(stderr_path, "w")
stdeh.write(stderr)
stdeh.close()
# rename and compress
tmp_read1=self.get_output('{basename_woext}.fil.fq_1',input_read1)
tmp_read2=self.get_output('{basename_woext}.fil.fq_2',input_read2)
cmd = ["gzip", tmp_read1, tmp_read2, ";", "mv", tmp_read1, output_read1, ";", "mv", tmp_read2, output_read2]
p = Popen(cmd, stderr=PIPE)
stderr = p.communicate()[1]
# write down the stderr
stdeh = open(stderr_path, "a")
stdeh.write(stderr)
stdeh.close()
class CloneFilter (Analysis):
def define_parameters(self, read1_files, read2_files):
"""
@param read1_files : paths to reads 1
@param read2_files : paths to reads 2
"""
self.add_input_file_list( "read1_files", "paths to reads 1", default=read1_files, required=True)
self.add_input_file_list( "read2_files", "paths to reads 2", default=read2_files, required=True)
if len(self.read1_files) != len(self.read2_files):
raise Exception("[ERROR] : the number of files for read 1 and read 2 are not equal. Please check your inputs")
self.add_output_file_list("out_read1_files", "uniq read1 files", pattern='{basename_woext}_cloneFil.fq', items=self.read1_files)
self.add_output_file_list("out_read2_files", "uniq read2 files", pattern='{basename_woext}_cloneFil.fq', items=self.read2_files)
self.add_output_file_list("stderrs", "clone_filter_log files", pattern='{basename_woext}.stderrs', items=self.read1_files)
def define_analysis(self):
self.name = "clone_filter"
self.description = "remove duplicate from paired fastq files "
self.software = "clone_filter"
def get_version(self):
cmd = [self.get_exec_path("clone_filter"), "--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stderr.split()[1]
def process(self):
tmp_outputs_read_1 = self.get_outputs('{basename_woext}.fil.fq_1',self.read1_files)
tmp_outputs_read_2 = self.get_outputs('{basename_woext}.fil.fq_2',self.read2_files)
for idx, read1 in enumerate(self.read1_files):
clone_filter = PythonFunction(wrap_clone_filter, cmd_format='{EXE} {ARG} {IN} {OUT}')
clone_filter(arguments = [self.get_exec_path("clone_filter"), self.output_directory ], \
inputs = [self.read1_files[idx], self.read2_files[idx]], outputs = [self.out_read1_files[idx], self.out_read2_files[idx], self.stderrs[idx]])
def post_process(self):
# nb read before
nb_before=[]
for read1 in self.read1_files:
with open(read1,"r") as r1:
nb = len(r1.readlines())/4
nb_before.append(nb)
# nb read after
nb_dup=[]
for stderr in self.stderrs:
with open(stderr,"r") as std:
nb = std.readlines()[-1].split()[11]
nb_dup.append(nb)
# percent dup
per_dup_list=[]
for i in range(0,len(nb_before)) :
percent_dup = round(nb_dup[i]*100.0/nb_before[i],2)
per_dup_list.append(percent_dup)
self._add_result_element("clone_filter", "percent_dup", percent_dup)
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment