<liclass="parameter">Unknown indices with a number of fragments <{$params[0]*100}%ofthenumberoffragmentsinthesamplewiththelittlestpopulationaremergedin"Allothers".</li>
self.index_count_threshold=index_count_threshold#Unknown indices with a number of fragments < index_count_threshold*number_of_fragments_in_sample_with_littlest_population are merged in "All others".
defdefine_analysis(self):
self.name="DemultiplexStats"
self.description="Statistics about demultiplexing"
self.software="-"
self.options=str(self.index_count_threshold)
def_count_indices(self,files):
indices_stat={}
forcurrent_fileinfiles:
reader=seqio.SequenceReader(current_file)
forid,desc,seq,qualitiesinreader:
match=re.search("^\d:[Y|N]:\d+:([ATGCN]+)",desc)
ifmatch:
index_seq=match.group(1)
ifnotindices_stat.has_key(index_seq):
indices_stat[index_seq]={}
indices_stat[index_seq]["number"]=0
indices_stat[index_seq]["passing_filter"]=0
indices_stat[index_seq]["number"]+=1
ifre.match("^\d:N:\d+:[ATGCN]+",desc):
indices_stat[index_seq]["passing_filter"]+=1
else:
raiseValueError,"The description '"+desc+"' of the sequence "+id+" is in an invalid format."