Commit 13164c27 authored by Audrey Gibert's avatar Audrey Gibert
Browse files

Fixes methylseq pipeline for single-end reads

parents bf9d3c96 489a7811
{*
Copyright (C) 2009 INRA
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*}
{extends file='AnalysisTemplate.tpl'}
{block name=params}
{assign var="params" value=" "|explode:$analyse.params}
{foreach from=$analyse_results key=sample item=sample_results}
{assign var="sample" value=$sample}
{/foreach}>
{/block}
{block name=results_title} Reports {/block}
{block name=results}
{assign var="analyse_results_sorted" value=$analyse_results["ont_sample"]|@ksort}
{assign var="metrics" value=$analyse_results["metrics"]}
{assign var='barcode_headers' value=','|explode:$metrics['barcode'].headers|@ksort}
{assign var='barcode_headers_count' value=$barcode_headers|@count}
{assign var='barcode_name_sample' value=','|explode:$metrics['barcode'].names|@ksort}
{assign var='barcode_name_count' value=$barcode_name_sample|@count}
{assign var='ont_sample_count' value=$analyse_results_sorted|@count}
{assign var='ont_metrics_names' value=','|explode:$metrics['statsporechop'].headers|@ksort}
{assign var='ont_metrics_count' value=$ont_metrics_names|@count}
{*debug*}
<legend>Analyse results report - Sample name : {$descriptions.sample_1}</legend>
<br>
<div class="tx-nG6-pi1-help">
<img src="" alt="" class="img" />
<p>Help for extract files results :</p>
<span class="meta">
<ul>
<li><strong>Download archive</strong> :
Use the Downloads view tab to download the archive.
The archive was compressed by tar + gzip.
</li>
<li><strong>Decompress archive</strong> :
Use tar -xzf fast5archive.tar.gz
</li>
</ul>
</span>
</div>
{/block}
......@@ -53,71 +53,67 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
</tr>
<tr>
{assign var="th_id" value=2}
{foreach from=$ont_metrics_names key=k item=head}
{if $head == 'read_trim_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">Read_trim_start</th>
{elseif $head == 'read_total_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">Read_total_start</th>
{elseif $head == 'bp_removed_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">Bp_removed_start</th>
{assign var="list_var" value=','|explode:"read_total_start,read_trim_start,bp_removed_start,read_trim_end,bp_removed_end"}
{foreach from=$list_var key=k item=head}
{if $head == 'read_total_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">NB_read_total</th>
{elseif $head == 'read_trim_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">NB_read_trim_5'</th>
{elseif $head == 'bp_removed_start'}
<th class = "numeric-sort" id="th_id_{$th_id}">NB_bases_trim_5'</th>
{elseif $head == 'read_trim_end'}
<th class = "numeric-sort" id="th_id_{$th_id}">Read_trim_end</th>
{elseif $head == 'read_total_end'}
<th class = "numeric-sort" id="th_id_{$th_id}">Read_total_end</th>
<th class = "numeric-sort" id="th_id_{$th_id}">NB_read_trim_3'</th>
{elseif $head == 'bp_removed_end'}
<th class = "numeric-sort" id="th_id_{$th_id}">Bp_removed_end</th>
{/if}
{$th_id = $th_id +1}
{/foreach}
</tr>
</thead>
<body>
{foreach from=$analyse_results_sorted key=sample item=sample_results}
<tr>
<td id='sample_{$i}_col_1' class="sample_name">{$sample}</td>
{$col_id = 2}
{foreach from=$ont_metrics_names key=k item=head}
{if $head == 'read_trim_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'read_total_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'bp_removed_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'read_trim_end'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'read_total_end'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'bp_removed_end'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{/if}
{$col_id = $col_id + 1}
{/foreach}
</tr>
{$i = $i + 1}
{/foreach}
</body>
</table>
<div class="tx-nG6-pi1-help">
<img src="" alt="" class="img" />
<p>Help for Trimmed ONT files results :</p>
<span class="meta">
<ul>
<li><strong>read_trim</strong> :
The number of reads trimmed for this sample.
</li>
<li><strong>read_total</strong> :
The total number of reads for this sample.
</li>
<li><strong>bp_removed</strong> :
The number of bases removed for this sample.
</li>
<li><strong>_start</strong> :
Previous metrics from the start of the read.
</li>
<li><strong>_end</strong> :
Previous metrics to the end of the read.
<th class = "numeric-sort" id="th_id_{$th_id}">NB_bases_trim_3'</th>
{/if}
{$th_id = $th_id +1}
{/foreach}
</tr>
</thead>
<body>
{foreach from=$analyse_results_sorted key=sample item=sample_results}
<tr>
<td id='sample_{$i}_col_1' class="sample_name">{$sample}</td>
{$col_id = 2}
{foreach from=$list_var key=k item=head}
{if $head == 'read_total_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'read_trim_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'bp_removed_start'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'read_trim_end'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'bp_removed_end'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{/if}
{$col_id = $col_id + 1}
{/foreach}
</tr>
{$i = $i + 1}
{/foreach}
</body>
</table>
<div class="tx-nG6-pi1-help">
<img src="" alt="" class="img" />
<p>Help for Trimmed ONT files results :</p>
<span class="meta">
<ul>
<li><strong>NB_read_total</strong> :
The total number of reads for this sample.
</li>
<li><strong>NB_read_trim_5'</strong> :
The number of reads trimmed because they have adapters in 5'.
</li>
<li><strong>NB_bases_trim_5'</strong> :
The number of bases removed because they are part of adapters in 5'.
</li>
<li><strong>NB_read_trim_3'</strong> :
The number of reads trimmed because they have adapters in 3'.
</li>
<li><strong>NB_bases_trim_3'</strong> :
The number of bases removed because they are part of adapters in 3'.
</li>
</ul>
</span>
</div>
......
......@@ -145,13 +145,13 @@ class SubsetAssignation (Analysis):
input_groups = Utils.get_filepath_by_prefix( self.sequence_files, self.group_prefixes )
sub_fasta_groups = Utils.get_filepath_by_prefix( self.sub_fasta_files, self.group_prefixes )
for prefix in self.group_prefixes:
self.add_python_function(extract_random_seq,cmd_format="{EXE} " +
self.add_python_execution(extract_random_seq,cmd_format="{EXE} " +
" ".join([str(self.extract_rate),str(self.min_nb_seq), str(self.max_nb_seq)]) + " {IN} {OUT}",
inputs = input_groups[prefix], outputs = sub_fasta_groups[prefix], map=False)
# Align on databank
self.add_shell_function(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -num_threads " + self.nb_threads +
" -use_index " + self.use_index + " -outfmt 7 -db " + self.databank + " -query $1 -out $2",
self.add_shell_execution(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -num_threads " + str(self.nb_threads) +
" -use_index " + str(self.use_index) + " -outfmt 7 -db " + str(self.databank) + " -query $1 -out $2",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.sub_fasta_files, outputs=self.blast_files)
# Create files groups
......@@ -163,11 +163,11 @@ class SubsetAssignation (Analysis):
for prefix in self.group_prefixes:
# Merge blast
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(blast_groups[prefix], 1)
self.add_shell_function('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}',
self.add_shell_execution('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = blast_groups[prefix], outputs = blast_merges_groups[prefix])
# Create krona
self.add_shell_function(self.get_exec_path("ktImportBLAST") + " -i -b $1 -o $2 > $3",
self.add_shell_execution(self.get_exec_path("ktImportBLAST") + " -i -b $1 -o $2 > $3",
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = blast_merges_groups[prefix], outputs = [html_groups[prefix], krona_groups[prefix]])
\ No newline at end of file
......@@ -42,12 +42,12 @@ class RemoveDuplicate (Analysis):
if self.get_memory() != None :
self.mem=self.get_memory()
self.add_output_file_list("flagstat_init", "Flagstat initialy", pattern='{basename_woext}.init_flagstat', items=self.bam)
self.add_output_file_list("flagstat_rmdup", "Flagstat result after rmdup", pattern='{basename_woext}.rmdup_flagstat', items=self.bam)
self.add_output_file_list("flagstat_finally", "Flagstat result after removing singleton", pattern='{basename_woext}.finally_flagstat', items=self.bam)
if self.is_paired:
self.add_output_file_list("flagstat_rmdup", "Flagstat result after rmdup", pattern='{basename_woext}.rmdup_flagstat', items=self.bam)
self.add_output_file_list("rmdup_stderr", "The error trace file", pattern='{basename_woext}_rmdup.stderr', items=self.bam)
self.add_output_file_list("output", "The bam with removed duplicates (and singleton if paired)", pattern='{basename_woext}_clean.bam', items=self.bam)
self.add_output_file_list("rmdup_stderr", "The error trace file", pattern='{basename_woext}_rmdup.stderr', items=self.bam)
self.add_output_file_list("rmsinglet_stderr","The error trace file", pattern='{basename_woext}_rmsinglet.stderr', items=self.bam)
def define_analysis(self):
......
......@@ -34,6 +34,7 @@ class OntQualityCheck (NG6Workflow):
self.add_parameter("fileformat", "The file format for porechop (default=fastq).", choices= [ "fastq", "fasta"], default = "fastq")
self.add_input_file( "summary_file", "Input summary basecalling file", default=None)
self.add_parameter("barcoded", "Barcoded run or not", choices= [ "yes", "no"], default = "no")
self.add_parameter("fast5dir", "path of the fast5 directory", default = None)
def process(self):
logging.getLogger("jflow").debug("Begin OntQualityCheck.process! test ont_qc")
......@@ -52,4 +53,6 @@ class OntQualityCheck (NG6Workflow):
#nanoplot = self.add_component("Nanoplot", [sample.name,self.get_all_reads(), self.nb_threads, True, "png", self.nanoplot_color,"nanoplot.tar.gz"])
ontstat = self.add_component("Run_stats", [self.summary_file, self.barcoded])
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads(), self.nb_threads, self.fileformat , "discard_middle"])
if self.fast5dir != None:
fast5archive = self.add_component("Fast5archive", [self.fast5dir, "fast5archive.tar"])
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re, os
from subprocess import Popen, PIPE
import logging
import time
from ng6.analysis import Analysis
from ng6.utils import Utils
from jflow.utils import get_argument_pattern
def is_last_dir(a_dir):
is_a_last_dir = True
for item in os.listdir(a_dir):
new_item = os.path.join(a_dir, item)
if os.path.isdir(new_item):
is_a_last_dir = False
return is_a_last_dir
def get_last_subdirectories(a_dir):
list_dir = []
list_dir_tmp = []
a_dir = os.path.join(a_dir,'')[:-1] # remove / at the end if there is one
a_dir_name = os.path.basename(a_dir)
for subdir in os.listdir(a_dir):
new_dir = os.path.join(a_dir, subdir)
if os.path.isdir(new_dir):
if is_last_dir(new_dir):
name_new_dir = subdir
list_dir.append(name_new_dir)
else :
list_dir_tmp = get_last_subdirectories(new_dir)
list_dir_tmp = [ subdir + '/' + item for item in list_dir_tmp]
list_dir = list_dir + list_dir_tmp
return list_dir
def get_immediate_subdirectories(a_dir):
return [name for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))]
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
class Fast5archive (Analysis):
"""
This module archive the fast5 files
"""
def define_parameters(self, fast5dir, archivename="fast5_archive.tar"):
self.add_parameter("fast5dir", "Path of the fast5 directory", default=fast5dir, type='str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
list_subdir = get_last_subdirectories(self.fast5dir)
list_subdir_name = [ item.replace('/','_') for item in list_subdir]
self.add_input_file_list("list_subdir", "list_subdir", default=list_subdir)
self.add_output_file_list("stderrs", "stderrs", pattern='Fast5archive_subdir_{basename_woext}.stderr', items=list_subdir_name)
self.add_output_file_list("subarchive", "subarchive", pattern=self.output_directory + '/' + self.archive_name + '_' +'{basename_woext}.tar', items=list_subdir_name)
#for item in self.stderrs:
# print('item self.stderrs :'+item)
#for item in self.subarchive:
# print('item self.subarchive :'+item)
def define_analysis(self):
self.name = "Fast5archive"
self.description = "Archive the fast5 files generated by Minknow"
self.software = "tar"
self.options = "-cf"
def post_process(self):
logging.getLogger("jflow").debug("Begin Fast5archive.post_process! ont_qc")
# Create dictionary : key = file name or prefix, value = files path
results_files = []
print(os.listdir(self.output_directory))
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
logging.getLogger("jflow").debug("Trimporechop.post_process : full_file_path "+full_file_path)
if file.endswith("tar"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .fastq : full_file_path "+full_file_path)
results_files.append(full_file_path)
#Finaly create and add the archive to the analysis
self._create_and_archive(results_files,self.archive_name)
logging.getLogger("jflow").debug("End Fast5archive.post_process! ")
def get_version(self):
#os.system("module load bioinfo/Porechop-0.2.1")
cmd = [self.get_exec_path("tar"), "--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
strmatch = re.compile('tar \(GNU tar\) ([0-9]*\.[0-9]*)')
return strmatch.search(stdout.decode()).group()
def process(self):
logging.getLogger("jflow").debug("Begin Fast5archive.process! self.fast5dir = "+self.fast5dir)
for output_pos,subdir in enumerate(self.list_subdir):
logging.getLogger("jflow").debug("Fast5archive.process : subdir = "+subdir+", archive = "+self.subarchive[output_pos]+ ', stderr = ' + self.stderrs[output_pos])
self.add_shell_execution('tar '+ self.options + ' $1 -C '+ self.fast5dir + ' ' + subdir +' 2> $2 ', cmd_format='{EXE} {OUT}', map=False, outputs = [self.subarchive[output_pos], self.stderrs[output_pos]])
logging.getLogger("jflow").debug("End Fast5archive.process! ")
......@@ -116,6 +116,10 @@ class Run_stats (Analysis):
sample = "ONT_sample"
group = "Stats"
self._add_result_element(sample, "sequencing_summary", self._save_file(self.sequencing_summary_file, sample + ".sequencing_summary.txt"), group)
#logging.getLogger("jflow").debug("Begin Nanoplot.post_process - sample "+file)
# stat file
statfile = os.path.join(self.output_directory,"plot_stats.txt")
......@@ -242,4 +246,3 @@ class Run_stats (Analysis):
# map=False, outputs = self.archive_name)
logging.getLogger("jflow").debug("End Run_stats.process! ")
print('END PROCESS TEST')
......@@ -29,7 +29,7 @@ class Trim_porechop (Analysis):
This module trim the reads from ONT data
"""
def define_parameters(self, fastq_files, nbthreads, formatfile, discard_middle, archivename="porechop_archive"):
def define_parameters(self, fastq_files, nbthreads, formatfile, discard_middle, archivename="porechop_archive.tar"):
self.add_input_file_list( "fastq_files", "fastq_files", default=fastq_files, required=True, file_format = 'fastq')
self.add_parameter("nbthreads", "number of threads to use (default=16)", default=16, type='int')
self.add_parameter("formatfile", "format of the input files", default="fastq", type='str')
......@@ -118,7 +118,8 @@ class Trim_porechop (Analysis):
self._add_result_element("ont_sample", "bp_removed_end", bp_removed_end,filename)
#Finaly create and add the archive to the analysis
self._create_and_archive(results_files,self.archive_name)
#self._create_and_archive(results_files,self.archive_name)
self._archive_files(results_files, "gz")
logging.getLogger("jflow").debug("End Trimporechop.post_process! ")
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
--date
25/06/2018
--data-nature
DNA
--sequencer
ONT_MinION
--name
ont_qc_barcoded
--type
Unknown
--species
arabido
--description
"ont demo workflow, Minknow 1.10 and Albacore 2.0.2"
--project-name
test-dev
--summary-file
/work/ng6-test/claire/test_ont_qc/sequencing_summary_barcoded_test.txt
--barcoded
yes
--nb-threads
4
--fileformat
fastq
--fast5dir
/work/ng6-test/claire/test_ont_qc/fast5_dir
--sample
sample-name=ONT_sample
#read1=/MinION/20170308_arab1D_LSK108_MN19767/20170308_arab1D_LSK108_MN19767_albacorev1.2.4/20170308_arab1D_LSK108_MN19767_albacorev1.2.4.fastq
read1=/work/ng6-test/claire/test_ont_qc/test*.fastq
#read2=/work/ng6-test/claire/test_ont_qc/test2.fastq
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment