Commit a460e703 authored by Penom Nom's avatar Penom Nom
Browse files

Add Flash analyse.

parent 0755db9b
......@@ -254,7 +254,8 @@ class Utils(object):
if is_casava:
for file in file_list:
file_name_fields = os.path.basename(file).split(Utils.CASAVA_FILENAME_SEPARATOR)
basename_without_ext = os.path.basename(file).split(".")[0]
file_name_fields = basename_without_ext.split(Utils.CASAVA_FILENAME_SEPARATOR)
read_tag = file_name_fields[Utils.CASAVA_FILENAME['read']-1]
if read_tag == "R1":
read_1_list.append(file)
......
......@@ -17,8 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{extends file='BasicAnalyse.tpl'}
{block name=params_title} Parameters used {/block}
{block name=params_content}
{block name=params}
{assign var="params" value=" "|explode:$analyse.params}
{foreach from=$analyse_results key=sample item=sample_results}
{assign var="sample" value=$sample}
......@@ -44,6 +43,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{assign var="findex" value=$params|@array_keys:"-f"}
<li class="parameter">{$params[$findex[0]+1]}bp is the average fragment length.</li>
{/if}
{if in_array("-s", $params)}
{assign var="sindex" value=$params|@array_keys:"-s"}
<li class="parameter">The standard deviation of fragment lengths : {$params[$sindex[0]+1]}%.</li>
{/if}
{if in_array("-p", $params)}
{assign var="pindex" value=$params|@array_keys:"-p"}
<li class="parameter">{$params[$pindex[0]+1]} used as phred offest.</li>
......@@ -51,12 +54,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
</ul>
{/block}
{block name=results_title} Assembly results {/block}
{block name=results}
<div id="user_information_dialog" title=""></div>
<input type="hidden" id="analyse_name" value="{$analyse.name}"/>
<h3> Assembly results: </h3>
<div class="underline"> </div>
<br />
<table class="display analysis-result-table">
<thead>
<tr>
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFileList, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
import ng6.seqio as seqio
class Flash (Analysis):
def _get_length_table(self, input_file):
"""
@param input_file : the fastq file path
@return : [nb_seq, {size: nb, size:...}]
"""
nb_seq = 0
reader = seqio.SequenceReader(input_file)
sizes = {}
for id, desc, seq, qualities in reader:
nb_seq += 1
if sizes.has_key(len(seq)):
sizes[len(seq)] += 1
else:
sizes[len(seq)] = 1
return [nb_seq, sizes]
def define_parameters(self, read1_files, read2_files, mismatch_ratio=None, min_overlap=None, max_overlap=None, avg_reads_length=None, avg_fragment_length=None, standard_deviation=None, phred_offset=None, archive_name=None):
self.read1_files = InputFileList(read1_files, Formats.FASTQ)
self.read2_files = InputFileList(read2_files, Formats.FASTQ)
if len(read1_files) != len(read2_files):
raise Exception("[ERROR] : the number of files is not correct! (the number of files in read1_files and in read2_files must be the same)")
self.mismatch_ratio = mismatch_ratio
self.min_overlap = min_overlap
self.max_overlap = max_overlap
self.avg_reads_length = avg_reads_length
self.avg_fragment_length = avg_fragment_length
self.standard_deviation = standard_deviation
self.phred_offset = phred_offset
self.archive_name = archive_name
self.prefixes = self.get_outputs('{basename_woext}', [read1_files, read2_files])
self.extended_frags = OutputFileList(self.get_outputs('{basename_woext}.extendedFrags.fastq.gz', self.prefixes), Formats.FASTQ)
self.not_combined_read_1 = OutputFileList(self.get_outputs('{basename_woext}.notCombined_1.fastq.gz', self.prefixes), Formats.FASTQ)
self.not_combined_read_2 = OutputFileList(self.get_outputs('{basename_woext}.notCombined_2.fastq.gz', self.prefixes), Formats.FASTQ)
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.prefixes))
def define_analysis(self):
self.name = "JoinPairs"
self.description = "Combines the overlapping pairs."
self.software = "flash"
self.options = ""
if self.mismatch_ratio:
self.options += " -x " + str(mismatch_ratio)
if self.min_overlap:
self.options += " -m " + str(self.min_overlap)
if self.max_overlap:
self.options += " -M " + str(self.max_overlap)
if self.avg_reads_length:
self.options += " -r " + str(self.avg_reads_length)
if self.avg_fragment_length:
self.options += " -f " + str(self.avg_fragment_length)
if self.standard_deviation:
self.options += " -s " + str(self.standard_deviation)
if self.phred_offset:
self.options += " -p " + str(self.phred_offset)
def post_process(self):
samples = {}
# Save files
for filepath in self.extended_frags+self.not_combined_read_1+self.not_combined_read_2:
self._save_file(filepath)
# Process metrics from the extended fragments
for filepath in self.extended_frags:
[nb_seq, sizes] = self._get_length_table(filepath)
x = []
y = []
for val in sizes.keys():
x.append(val)
x = sorted(x)
for i in x:
y.append(sizes[i])
sample_name = os.path.basename(filepath).split(".extendedFrags")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_extended"] = str(nb_seq)
samples[sample_name]["size_extended"] = str(",".join([str(v) for v in x]))
samples[sample_name]["nb_size_extended"] = str(",".join([str(v) for v in y]))
# Process metrics from the not combined reads 1
for filepath in self.not_combined_read_1:
[nb_seq, sizes] = self._get_length_table(filepath)
sample_name = os.path.basename(filepath).split(".notCombined_1")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_notcombined1"] = str(nb_seq)
# Process metrics from the not combined reads 2
for filepath in self.not_combined_read_2:
[nb_seq, sizes] = self._get_length_table(filepath)
sample_name = os.path.basename(filepath).split(".notCombined_2")[0]
if not samples.has_key(sample_name):
samples[sample_name] = {}
samples[sample_name]["nb_notcombined2"] = str(nb_seq)
# Save metrics
for sample in samples:
self._add_result_element(sample, "nb_extended", samples[sample]["nb_extended"])
self._add_result_element(sample, "size_extended", samples[sample]["size_extended"])
self._add_result_element(sample, "nb_size_extended", samples[sample]["nb_size_extended"])
self._add_result_element(sample, "nb_notcombined1", samples[sample]["nb_notcombined1"])
self._add_result_element(sample, "nb_notcombined2", samples[sample]["nb_notcombined2"])
def get_version(self):
return "-"
def process(self):
# Creates list for temporary uncompressed files
tmp_extended_frags = self.get_outputs('{basename_woext}.extendedFrags.fastq', self.prefixes)
tmp_not_combined_read_1 = self.get_outputs('{basename_woext}.notCombined_1.fastq', self.prefixes)
tmp_not_combined_read_2 = self.get_outputs('{basename_woext}.notCombined_2.fastq', self.prefixes)
# Join pairs
for i in range(0, len(self.prefixes)):
flash = ShellFunction(self.get_exec_path("flash") + " $1 $2 " + self.options + " -o " + os.path.basename(self.prefixes[i]) + " -d " + self.output_directory + " 2> $3", cmd_format='{EXE} {IN} {OUT}')
flash(inputs = [self.read1_files[i], self.read2_files[i]], outputs = [self.stderrs[i], tmp_extended_frags[i], tmp_not_combined_read_1[i], tmp_not_combined_read_2[i]])
# Compress files
compress = ShellFunction("gzip $1 $2 $3", cmd_format='{EXE} {IN} {OUT}')
compress = MultiMap(compress, inputs = [tmp_extended_frags, tmp_not_combined_read_1, tmp_not_combined_read_2], outputs = [self.extended_frags, self.not_combined_read_1, self.not_combined_read_2])
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment