Commit 9ab1765f authored by Penom Nom's avatar Penom Nom
Browse files

No commit message

No commit message
parent e5bf4dc3
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from subprocess import Popen, PIPE
from ng6.analysis import Analysis
from weaver.function import PythonFunction
from collections import Counter
import numpy as np
import re
def wrap_cstacks(exec_path, mismatch, batch_id, out_dir, inputs_tags_list, output_tags, output_alleles, output_snps, stderr_path):
from subprocess import Popen, PIPE
cmd = [exec_path, "-b", batch_id, "-n", mismatch, "-o", out_dir]
with open(inputs_tags_list, 'r') as input_tags :
for i in input_tags:
cmd.append("-s")
cmd.append(os.path.abspath(i.strip()).replace(".tags"," ").split(" ")[0])
p = Popen(cmd, stderr=PIPE)
stderr = p.communicate()[1]
# write down the stderr
stdeh = open(stderr_path, "w")
stdeh.write(stderr)
stdeh.close()
class Cstacks (Analysis):
def define_parameters(self, alleles_files, snps_files, tags_files, samples_id, population_list, batch_id=1, mismatch=1):
"""
@param alleles_files : list of individuals alleles files
@param snps_files : list of individuals snps files
@param tags_files : list of individuals clustering files
@param samples_id : list of individuals id
@param samples_id : list of individuals id
@param population_list :which sample belong to which population, same order as sample_id
@param batch_id (-b) : batch id
@param mismatch (-n) : number of mismatches allowed between sample tags when generating the catalog. default 1
"""
self.add_input_file_list( "alleles_files", "list of individuals alleles files", default=alleles_files, required=True)
self.add_input_file_list( "snps_files", "list of individuals snps files", default=snps_files, required=True)
self.add_input_file_list( "tags_files", "list of individuals clustering files", default=tags_files, required=True)
if len(self.tags_files) != len(self.alleles_files) and len(self.tags_files)!= len(self.snps_files) :
raise Exception("[ERROR] : number of ustacks input files are not equal. Please check the ustacks result")
self.add_parameter_list( "samples_id", "individuals id list", default=samples_id, required=False)
self.add_parameter_list( "population_list", "which sample belong to which population, same order as sample_id", default=population_list, required=False)
if len(self.samples_id) != len(self.population_list) :
raise Exception("[ERROR] : samples_id and population_list are not equal. Please check your inputs")
self.add_parameter("batch_id", "batch_id", default=batch_id, type=int)
self.add_parameter("mismatch", "number of mismatches allowed between sample tags when generating the catalog.", default=mismatch, type=int)
self.add_output_file("catalog_tags", "catalog cluster files", filename="batch_"+`self.batch_id`+".catalog.tags.tsv")
self.add_output_file("catalog_alleles", "catalog alleles files", filename="batch_"+`self.batch_id`+".catalog.alleles.tsv")
self.add_output_file("catalog_snps", "catalog snps files", filename="batch_"+`self.batch_id`+".catalog.snps.tsv")
self.add_output_file("stderr", "cstacks log", filename="cstacks_batch_"+`self.batch_id`+".stderr")
def get_version(self):
cmd = [self.get_exec_path("cstacks"), "--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stderr.split()[1]
def define_analysis(self):
self.name = "cstacks"
self.description = "Construction of a catalog of consensus locus among all individuals"
self.software = "cstacks"
self.options = " -n " + str(self.mismatch)
def process(self):
tmp_list_file = self.get_temporary_file()
with open(tmp_list_file, 'w') as tmp_file :
for tag in self.tags_files:
tmp_file.write(tag+"\n")
cstacks = PythonFunction(wrap_cstacks, cmd_format='{EXE} {ARG} {IN} {OUT}')
cstacks(inputs = [tmp_list_file], outputs = [self.catalog_tags, self.catalog_alleles, self.catalog_snps, self.stderr], \
arguments = [self.get_exec_path("cstacks"), self.mismatch, self.batch_id,self.output_directory], \
includes = [self.tags_files, self.alleles_files, self.snps_files])
def post_process(self):
# parse stderr
tot_locus = 0
tot_sample = 0
samples_name,nb_locus =[],[]
samples_name,nb_locus = self.__parse_stderr(self.stderr)
tot_locus=nb_locus[-1]
tot_sample = len(samples_name)
# ==> faire courbe f(idx)=elem sur nb_locus
# parse catalog.tags.tsv
locus_dict={"singleton":[], "multi_sample":[], "half_sample":[],"all_sample":[]}
if len(self.population_list) > 0:
self.__parse_tags(self.catalog_tags,tot_sample,locus_dict, self.samples_id, self.population_list)
else :
self.__parse_tags(self.catalog_tags,tot_sample,locus_dict)
# print "tot_locus,len(locus_dict[singleton]), len(locus_dict[multi_sample]), len(locus_dict[half_sample]), \
# len(locus_dict[all_sample])"
# print tot_locus,len(locus_dict["singleton"]), len(locus_dict["multi_sample"]), len(locus_dict["half_sample"]), \
# len(locus_dict["all_sample"])
self._add_result_element("cstacks_tags", "nb_locus", tot_locus)
self._add_result_element("cstacks_tags", "locus_singleton", len(locus_dict["singleton"]))
self._add_result_element("cstacks_tags", "locus_multi_sample", len(locus_dict["multi_sample"]))
if len(self.population_list) > 0:
self._add_result_element("cstacks_tags", "locus_multi_pop", locus_dict["multi_pop"])
self._add_result_element("cstacks_tags", "locus_half_sample", len(locus_dict["half_sample"]))
self._add_result_element("cstacks_tags", "locus_all_sample", len(locus_dict["all_sample"]))
# parse catalog.alleles.tsv
snp_dict={"nb_snp":{"clust_var":[],"singleton":[0,0], "multi_sample":[0,0], "half_sample":[0,0],"all_sample":[0,0]},\
"nb_hap":{"1_hap":0, "2_hap":0,"3_hap":0,"4_hap":0,"sup5_hap":0,"sup10_hap":0,"max_hap":0}}
self.__parse_alleles(self.catalog_alleles, locus_dict, snp_dict)
nb_clust_var=len(snp_dict["nb_snp"]["clust_var"])
tot_var = np.sum(snp_dict["nb_snp"]["clust_var"])
mean_var = np.mean(snp_dict["nb_snp"]["clust_var"])
min_var = np.min(snp_dict["nb_snp"]["clust_var"])
max_var = np.max(snp_dict["nb_snp"]["clust_var"])
# print "nb_clust_var, tot_var, mean_var, min_var, max_var, snp_dict[nb_snp][singleton], snp_dict[nb_snp][multi_sample],\
# snp_dict[nb_snp][half_sample], snp_dict[nb_snp][all_sample] "
# print nb_clust_var, tot_var, mean_var, min_var, max_var, snp_dict["nb_snp"]["singleton"], snp_dict["nb_snp"]["multi_sample"],\
# snp_dict["nb_snp"]["half_sample"], snp_dict["nb_snp"]["all_sample"]
self._add_result_element("cstacks_snps", "nb_locus_var", nb_clust_var)
self._add_result_element("cstacks_snps", "tot_var", tot_var)
self._add_result_element("cstacks_snps", "mean_var", mean_var)
self._add_result_element("cstacks_snps", "min_var", min_var)
self._add_result_element("cstacks_snps", "max_var", max_var)
self._add_result_element("cstacks_snps", "singleton_var", snp_dict["nb_snp"]["singleton"])
self._add_result_element("cstacks_snps", "multi_sample_var", snp_dict["nb_snp"]["multi_sample"])
self._add_result_element("cstacks_snps", "half_sample_var", snp_dict["nb_snp"]["half_sample"])
self._add_result_element("cstacks_snps", "all_sample_var", snp_dict["nb_snp"]["all_sample"])
# ==> courbe de distribution du nombre de cluster en fonction du nombre de variant par cluster
# print "snp_dict[nb_hap][1_hap],snp_dict[nb_hap][2_hap], snp_dict[nb_hap][3_hap], snp_dict[nb_hap][4_hap], snp_dict[nb_hap][sup5_hap],\
# snp_dict[nb_hap][sup10_hap], snp_dict[nb_hap][max_hap]"
# print snp_dict["nb_hap"]["1_hap"], snp_dict["nb_hap"]["2_hap"], snp_dict["nb_hap"]["3_hap"], snp_dict["nb_hap"]["4_hap"], snp_dict["nb_hap"]["sup5_hap"],\
# snp_dict["nb_hap"]["sup10_hap"], snp_dict["nb_hap"]["max_hap"]
self._add_result_element("cstacks_alleles", "locus_var_1hap", snp_dict["nb_hap"]["1_hap"])
self._add_result_element("cstacks_alleles", "locus_var_2hap", snp_dict["nb_hap"]["2_hap"])
self._add_result_element("cstacks_alleles", "locus_var_3_hap", snp_dict["nb_hap"]["3_hap"])
self._add_result_element("cstacks_alleles", "locus_var_4_hap", snp_dict["nb_hap"]["4_hap"])
self._add_result_element("cstacks_alleles", "locus_var_sup5_hap", snp_dict["nb_hap"]["sup5_hap"])
self._add_result_element("cstacks_alleles", "locus_var_sup10_hap", snp_dict["nb_hap"]["sup10_hap"])
self._add_result_element("cstacks_alleles", "locus_var_max_hap", snp_dict["nb_hap"]["max_hap"])
def __parse_stderr(self, cstacks_stderr):
samples_name=[]
nb_locus=[0]
with open(cstacks_stderr, 'r') as stderr :
for l in stderr:
sample_regex = re.search("Parsing (.*tags.tsv.*)",l)
nb_locus_regex = re.search("(\d+) loci in the catalog.*",l)
if sample_regex:
s = os.path.basename(sample_regex.group(1)).replace(".tags.tsv"," ").split()[0]
samples_name.append(s)
if nb_locus_regex :
nb = nb_locus_regex.group(1)
nb_locus.append(int(nb))
return samples_name,nb_locus
def __parse_tags(self,cstacks_tags, tot_sample, locus_dict, sample_id=[], pop_list=[]):
dic_pop={}
if len(sample_id) != 0:
locus_dict["multi_pop"]=0
dic_pop=dict(zip(sample_id, pop_list))
with open(cstacks_tags,"r") as tags:
for line in tags:
locus_id=line.split()[2]
compo={"ind":[],"pop":[]}
tab=line.split()[7].split(",")
for clust in tab :
indiv=clust.split("_")[0]
# compte les individus differents fusionne sur le meme locus
if not indiv in compo['ind']:
compo['ind'].append(indiv)
# si info de pop, compte les pop differentes fusionnees sur le meme locus
if dic_pop :
pop=dic_pop[indiv]
if not pop in compo['pop']:
compo['pop'].append(pop)
nb_ind=len(compo['ind'])
nb_pop=len(compo['pop'])
if nb_ind == 1 :
locus_dict["singleton"].append(locus_id)
elif nb_ind >= 2 :
locus_dict["multi_sample"].append(locus_id)
if nb_pop > 1 :
locus_dict["multi_pop"] += 1
elif nb_ind >= tot_sample/2 :
locus_dict["half_sample"].append(locus_id)
elif nb_ind == tot_sample :
locus_dict["all_sample"].append(locus_id)
def __parse_alleles(self,cstacks_alleles,locus_dict,snp_dict):
clust_hap=[]
with open(cstacks_alleles,"r") as alleles:
for line in alleles:
tab=line.split('\t')
if len(tab[3]) > 0:
locus_id=tab[2]
if not locus_id in clust_hap:
snp_dict["nb_snp"]["clust_var"].append(len(tab[3]))
if locus_id in locus_dict['singleton'] :
snp_dict["nb_snp"]["singleton"][0]+=1
snp_dict["nb_snp"]["singleton"][1]+=len(tab[3])
elif locus_id in locus_dict['multi_sample'] :
snp_dict["nb_snp"]["multi_sample"][0]+=1
snp_dict["nb_snp"]["multi_sample"][1]+=len(tab[3])
elif locus_id in locus_dict['half_sample'] :
snp_dict["nb_snp"]["half_sample"][0]+=1
snp_dict["nb_snp"]["half_sample"][1]+=len(tab[3])
elif locus_id in locus_dict['all_sample'] :
snp_dict["nb_snp"]["all_sample"][0]+=1
snp_dict["nb_snp"]["all_sample"][1]+=len(tab[3])
clust_hap.append(locus_id)
nb_hap = Counter(clust_hap)
max=0
for c in nb_hap:
if nb_hap[c] == 1 :
snp_dict["nb_hap"]["1_hap"] += 1
elif nb_hap[c] == 2 :
snp_dict["nb_hap"]["2_hap"] += 1
elif nb_hap[c] == 3 :
snp_dict["nb_hap"]["3_hap"] += 1
elif nb_hap[c] == 4 :
snp_dict["nb_hap"]["4_hap"] += 1
elif nb_hap[c] >= 5 :
snp_dict["nb_hap"]["sup5_hap"] += 1
if nb_hap[c] >= 10 :
snp_dict["nb_hap"]["sup10_hap"] += 1
if nb_hap[c] > max :
max = nb_hap[c]
snp_dict["nb_hap"]["max_hap"] = max
# print snp_dict
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
import numpy as np
from subprocess import Popen, PIPE
from ng6.analysis import Analysis
from weaver.function import ShellFunction
class Fastuniq (Analysis):
def define_parameters(self, read1_files, read2_files):
"""
@param read1_files : paths to reads 1
@param read2_files : paths to reads 2
"""
self.add_input_file_list( "read1_files", "paths to reads 1", default=read1_files, required=True)
self.add_input_file_list( "read2_files", "paths to reads 2", default=read2_files, required=True)
if len(self.read1_files) != len(self.read2_files):
raise Exception("[ERROR] : the number of files for read 1 and read 2 are not equal. Please check your inputs")
self.add_output_file_list("out_read1_files", "uniq read1 files", pattern='{basename}', items=self.read1_files)
self.add_output_file_list("out_read2_files", "uniq read2 files", pattern='{basename}', items=self.read2_files)
def define_analysis(self):
self.name = "fastuniq"
self.description = "duplicate removed fastq paired files "
self.software = "fastuniq"
self.options = "-t q -c 0"
def get_version(self):
cmd = "1.1"
return cmd
def process(self):
self.options = "-t q -c 0"
for idx, read1 in enumerate(self.read1_files):
read2=self.read2_files[idx]
tmp_list_file = self.get_temporary_file()
with open(tmp_list_file, 'w') as tmp_file :
tmp_file.write(read1+"\n"+read2+"\n")
fastuniq = ShellFunction(self.get_exec_path("fastuniq") + " -i $1 -t q -o $2 -p $3 -c 0 ", cmd_format='{EXE} {IN} {OUT}')
fastuniq(inputs = [tmp_list_file], outputs = [self.out_read1_files[idx],self.out_read2_files[idx]], includes=[self.read1_files[idx],self.read2_files[idx]])
def post_process(self):
# nb read before
nb_before=[]
for read1 in self.read1_files:
with open(read1,"r") as r1:
nb = len(r1.readlines())/4
nb_before.append(nb)
# nb read after
nb_after=[]
for read1 in self.out_read1_files:
with open(read1,"r") as r1:
nb = len(r1.readlines())/4
nb_after.append(nb)
# percent dup
per_dup_list=[]
for i in range(0,len(nb_before)) :
percent_dup = round((nb_before[i]-nb_after[i])*100.0/nb_before[i],2)
per_dup_list.append(percent_dup)
self._add_result_element("fastuniq", "percent_dup", percent_dup)
# print "dup stat ",per_dup_list
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFileList, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
import jflow.seqio as seqio
list_enz=['apeKI', 'bamHI', 'claI', 'dpnII', 'eaeI', 'ecoRI','ecoT22I', 'hindIII', 'mluCI', 'mseI', 'mspI', 'ndeI','nheI', 'nlaIII',\
'notI', 'nsiI', 'pstI', 'sau3AI','sbfI', 'sexAI', 'sgrAI', 'sphI', 'taqI', 'xbaI']
class Process_radtag (Analysis):
def recover_mate_discards (read1,read2, output_file):
# enregistrement des ID de sequences 1
id_R1=[]
reader = seqio.SequenceReader(read1,fileformat="fastq")
for id, desc, seq, qual in reader :
id_R1.append(id)
# disctionnaire de sequences 2 correspondant aux ID de lecures 1
dic_R2={}
reader = seqio.SequenceReader(read2,fileformat="fastq")
for id, desc, seq, qual in reader :
if id in id_R1:
dic_R2[id]=desc+"\n"+seq+"\n+\n"+qual+"\n"
# ecriture du fichier fastq de lectures 2 equivalent aux lectures 1
handle=open(output_file,"w")
string=""
i=0
for name in id_R1:
string=string+"@"+dic_R2[name]
i=i+4
if i==4000:
handle.write(string)
string=""
i=0
if string !="":
handle.write(string)
string=""
i=0
handle.close()
def recover_mate_ok (read1,read2,output_file):
# enregistrement des ID de sequences 1
id_R1=[]
reader = seqio.SequenceReader(read1,fileformat="fastq")
for id, desc, seq, qual in reader :
id_R1.append(id[:-2])
# disctionnaire de sequences 2 correspondant aux ID des lcures 1
dic_R2={}
reader = seqio.SequenceReader(read2,fileformat="fastq")
for id, desc, seq, qual in reader :
convert_id="_".join(id.split(":")[3:7])
if convert_id in id_R1:
dic_R2[convert_id]=convert_id+"_2\n"+seq+"\n+\n"+qual+"\n"
# ecriture du fichier fastq de lectures 2 equivalent aux lectures 1
handle=open(output_file,"w")
string=""
i=0
for name in id_R1:
string=string+"@"+dic_R2[name]
i=i+4
if i==4000:
handle.write(string)
string=""
i=0
if string !="":
handle.write(string)
string=""
i=0
handle.close()
def define_parameters(self, read1_files, read2_files, uncall_remove=True, discard_low_qual=True, rescue_radtag=False, max_length=None, \
quality_encode = 'phred33', keep_discard_read=True, window_size=0.15, limit_score=10, enzyme_name=None, archive_name=False):
"""
@param read1_files (-1): paths to reads 1
@param read2_files (-2): paths to reads 2
@param uncall_remove (-c): clean data, remove any read with an uncalled base.
@param discard_low_qual (-q): discard reads with low quality scores.
@param rescue_radtag (-r) : rescue barcodes and RAD-Tags.
@param max_length (-t) : truncate final read length to this value. (default none)
@param quality_encode (-E) : specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5).
@param keep_discard_read (-D): capture discarded reads to a file.
@param window_size (-w) : set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15).
@param limit_score (-s) : set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10).
@param enzyme_name (-e) : -e [enz], --renz_1 [enz]: provide the restriction enzyme used (cut site occurs on single-end read)
@param archive_name : name for the output archive
"""
self.add_input_file_list( "read1_files", "paths to reads 1", default=read1_files, required=True, file_format = 'fastq')
self.add_input_file_list( "read2_files", "paths to reads 1", default=read2_files, required=True, file_format = 'fastq')
if len(read1_files) != len(read2_files):
raise Exception("[ERROR] : the number of files is not correct! (the number of files in read1_files and in read2_files must be the same)")
self.add_parameter("uncall_remove", "clean data, remove any read with an uncalled base.", default=uncall_remove)
self.add_parameter("discard_low_qual", "discard reads with low quality scores.", default=discard_low_qual)
self.add_parameter("rescue_radtag", "rescue barcodes and RAD-Tags.", default=rescue_radtag)
self.add_parameter("max_length", "truncate final read length to this value. (default none)", default=max_length)
self.add_parameter("quality_encode", "specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5).", default=quality_encode)
self.add_parameter("keep_discard_read", "capture discarded reads to a file.", default=keep_discard_read)
self.add_parameter("window_size", "set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15).", default=window_size)
self.add_parameter("limit_score", "set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10).", default=limit_score)
self.add_parameter("enzyme_name", "provide the restriction enzyme used (cut site occurs on single-end read)", default=limit_score)
self.archive_name = archive_name
self.prefixes = self.get_outputs('{basename_woext}', [read1_files, read2_files])
self.output_read_1 = OutputFileList(self.get_outputs('{basename}.gz', self.read1_files), Formats.FASTQ)
self.output_read_2 = OutputFileList(self.get_outputs('{basename}.gz', self.read2_files), Formats.FASTQ)
self.discard_read_1 = OutputFileList(self.get_outputs('{basename}.discard.gz', self.read1_files), Formats.FASTQ)
self.discard_read_2 = OutputFileList(self.get_outputs('{basename}.discard.gz', self.read2_files), Formats.FASTQ)
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.prefixes))
def define_analysis(self):
self.name = "Process radtag"
self.description = "Correct fastq radtag"
self.software = "process_radtags"
self.options = ""
if self.enzyme_name:
self.options += " -e " + str(self.enzyme_name)
if self.limit_score:
self.options += " -s " + str(self.limit_score)
if self.quality_encode:
self.options += " -E " + str(self.quality_encode)
if self.rescue_radtag:
self.options += " -r "
if self.discard_low_qual:
self.options += " -q "
if self.discard_read_files:
self.options += " -D "
if self.max_length:
self.options += " -t " + str(self.max_length)
if self.phred_offset:
self.options += " -p " + str(self.phred_offset)
def post_process(self):
samples = {}
# Save files
for filepath in self.extended_frags:
self._save_file(filepath)
# Process metrics from the extended fragments
for filepath in self.extended_frags:
[nb_seq, sizes] = self._get_length_table(filepath)
x = []
y = []
for val in sizes.keys():
x.append(val)
x = sorted(x)
for i in x:
y.append(sizes[i])
sample_name = os.path.basename(filepath).split(".extendedFrags")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_extended"] = str(nb_seq)
samples[sample_name]["size_extended"] = str(",".join([str(v) for v in x]))
samples[sample_name]["nb_size_extended"] = str(",".join([str(v) for v in y]))
# Process metrics from the not combined reads 1
for filepath in self.not_combined_read_1:
[nb_seq, sizes] = self._get_length_table(filepath)
sample_name = os.path.basename(filepath).split(".notCombined_1")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_notcombined1"] = str(nb_seq)
# Process metrics from the not combined reads 2
for filepath in self.not_combined_read_2:
[nb_seq, sizes] = self._get_length_table(filepath)
sample_name = os.path.basename(filepath).split(".notCombined_2")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_notcombined2"] = str(nb_seq)
# Save metrics
for sample in samples:
self._add_result_element(sample, "nb_extended", samples[sample]["nb_extended"])
self._add_result_element(sample, "size_extended", samples[sample]["size_extended"])
self._add_result_element(sample, "nb_size_extended", samples[sample]["nb_size_extended"])
self._add_result_element(sample, "nb_notcombined1", samples[sample]["nb_notcombined1"])
self._add_result_element(sample, "nb_notcombined2", samples[sample]["nb_notcombined2"])
def get_version(self):
cmd = [self.get_exec_path("process_radtags"), "--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stdout.split()[1]