Commit 91945346 authored by Celine Noirot's avatar Celine Noirot
Browse files

Sequence without adaptors wasn't checked by length

parent 7b10a2ea
......@@ -35,7 +35,7 @@ def version_string ():
return "adaptorcleaner " + __version__
def mask_sequences (fafile, tab_adapt, len_adapt, options, log, minscore,minmatch):
def mask_sequences (fafile, len_adapt, options, log, minscore,minmatch , tab_adapt):
"""
Search and mask adaptors in seqs
@param seqs : table of seqs to filter
......@@ -75,11 +75,11 @@ def mask_sequences (fafile, tab_adapt, len_adapt, options, log, minscore,minmatc
fm = fwd_regex.match(line)
strand = ""
save = False
if rm != None: # If it's a primer2 matches
if rm != None: #reverse match
(score, percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, endSecondMatch, startSecondMatch)=(int(rm.group(2)), float(rm.group(3)), rm.group(4), int(rm.group(5)), int(rm.group(6)), rm.group(7), int(rm.group(8)), int(rm.group(9)))
save = True
strand = 'rvs'
elif fm != None: # If it's a primer1 matches
elif fm != None: #forward match
(score, percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(int(fm.group(2)), float(fm.group(3)), fm.group(4), int(fm.group(5)), int(fm.group(6)), fm.group(7), int(fm.group(8)), int(fm.group(9)))
save = True
strand = 'fwd'
......@@ -91,14 +91,13 @@ def mask_sequences (fafile, tab_adapt, len_adapt, options, log, minscore,minmatc
adaptators_found[primary_match].append([strand, secondary_match, startFirstMatch, endFirstMatch, startSecondMatch, endSecondMatch, score])
except:
adaptators_found[primary_match] = [[strand, secondary_match, startFirstMatch, endFirstMatch, startSecondMatch, endSecondMatch, score]]
log.write("%"+primary_match+"\t"+strand+"\t"+secondary_match+"\t"+str(startFirstMatch)+"\t"+str(endFirstMatch)+"\t"+str(startSecondMatch)+"\t"+str(endSecondMatch)+"\t"+str(score)+"\n")
log.write("%"+primary_match+"\t"+strand+"\t"+secondary_match+"\t"+str(startFirstMatch)+"\t"+str(endFirstMatch)+"\t"+str(startSecondMatch)+"\t"+str(endSecondMatch)+"\t"+str(score)+"\n")
log.write("###Number of sequences with adaptor "+ adapt_ids + " " +str(len(adaptators_found.keys())) +" \n")
# Clean up temp files
os.remove(os.path.join(options.output, os.path.basename(options.fasta) + "."+ adapt_ids + ".oligo"))
os.remove(os.path.join(options.output, os.path.basename(options.fasta)) + "."+ adapt_ids + ".cross_match.res")
#os.remove(os.path.join(options.output, os.path.basename(options.fasta) + "."+ adapt_ids + ".oligo"))
#os.remove(os.path.join(options.output, os.path.basename(options.fasta)) + "."+ adapt_ids + ".cross_match.res")
return(os.path.join(options.output, os.path.basename(options.fasta) + "."+ adapt_ids + ".screen.fasta"))
......@@ -278,7 +277,7 @@ if __name__ == '__main__':
os.system ("ln -s "+options.fasta+ " "+ screen_file)
for adapt_len in sorted(iadapt.keys(),reverse=True):
(minscore,minmatch)=adapt_parameters[adapt_len]
screen_file=mask_sequences(previous_file, iadapt[adapt_len],adapt_len, options,log,minscore,minmatch)
screen_file=mask_sequences(previous_file,adapt_len, options,log,minscore,minmatch, iadapt[adapt_len])
#clean output directory
if previous_file != start_file and os.path.exists(previous_file):
os.system ("rm "+previous_file )
......@@ -299,7 +298,7 @@ if __name__ == '__main__':
if record_dict.has_key(id) :
if (record_dict[id][2].find("X") != -1):
(x,y)=get_longest_subsequence(record_dict[id][2])
diff=y-x
diff=y-x
if ( diff <= 0 or diff < int(options.minlen) ) :
if (diff <= 0 ):
log.write("Discard "+id+" contains only adaptor\n" )
......@@ -310,11 +309,11 @@ if __name__ == '__main__':
new_record= [id, desc, record_dict[id][2][x-1:y-1], " ".join(qualv[x-1:y-1])]
new_fasta.append(new_record)
else :
if len(seq) >= int(options.minlen) :
if len(record_dict[id][2]) >= int(options.minlen) :
record_dict[id][3] = qual
new_fasta.append(record_dict[id])
else :
log.write("Discard "+id+" too short "+str(len(seq))+" < "+str(options.minlen)+"\n" )
log.write("Discard "+id+" too short "+str(len(record_dict[id][2]))+" < "+str(options.minlen)+"\n" )
output = os.path.join(options.output, os.path.splitext(os.path.basename(options.fasta))[0] + ".adaptorcleaner.fasta")
fasta = open(output, "w")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment