Commit c1b80fa2 authored by Romain Therville's avatar Romain Therville 🐭
Browse files

Merge branch 'master' into 'issue#127'

# Conflicts:
#   ui/nG6/pi6/class.tx_nG6_pi6.php
parents 4075314c 126a821e
...@@ -377,9 +377,10 @@ class Utils(object): ...@@ -377,9 +377,10 @@ class Utils(object):
if prefix == "data" or prefix == "run" : if prefix == "data" or prefix == "run" :
run = t3mysql.select_run_informations(id) run = t3mysql.select_run_informations(id)
source_dir = data_folder +'/'+ run['directory'] source_dir = data_folder +'/'+ run['directory']
project_name = "Project_%s.%s" % ( re.sub( r"[\s\/]", "_", run['project_name']), run['project_id'] ) project_name = "Project_%s.%s" % ( re.sub( r"[\s\/]", "_", run['project_name']), run['project_id'] )
run_name = "Run_%s.%s" %( run['name'].replace(' ', '_').replace('/', '_'), id )
#run_name = "Run_%s.%s" %( run['name'].replace(' ', '_').replace('/', '_'), id )
run_name = "Run_%s.%s" % ( re.sub( r"[^A-Za-z0-9]", "_", run['name']), id)
raw_data_dir = os.path.join( output_folder, project_name, run_name, "RawData" ) raw_data_dir = os.path.join( output_folder, project_name, run_name, "RawData" )
if source_dir not in src_directories : if source_dir not in src_directories :
...@@ -392,6 +393,7 @@ class Utils(object): ...@@ -392,6 +393,7 @@ class Utils(object):
analysis_dir = data_folder + '/' + analyse_values["directory"] analysis_dir = data_folder + '/' + analyse_values["directory"]
analysis_name = "Analyse_%s.%s"%( re.sub( r"[\s\/]", "_", analyse_values['name']), analyse_id ) analysis_name = "Analyse_%s.%s"%( re.sub( r"[\s\/]", "_", analyse_values['name']), analyse_id )
dest_analysis_dir = os.path.join( output_folder, project_name, run_name, analysis_name ) dest_analysis_dir = os.path.join( output_folder, project_name, run_name, analysis_name )
if analysis_dir not in src_directories : if analysis_dir not in src_directories :
src_directories.append(analysis_dir) src_directories.append(analysis_dir)
if dest_analysis_dir not in dest_directories : if dest_analysis_dir not in dest_directories :
...@@ -409,6 +411,7 @@ class Utils(object): ...@@ -409,6 +411,7 @@ class Utils(object):
# it's a run analysis # it's a run analysis
if analyse["run_id"]: if analyse["run_id"]:
run_name = "Run_%s.%s" % ( re.sub( r"[\s\/]", "_", analyse["run_name"]), analyse["run_id"]) run_name = "Run_%s.%s" % ( re.sub( r"[\s\/]", "_", analyse["run_name"]), analyse["run_id"])
analysis_name = "Analyse_%s.%s" % ( re.sub( r"[\s\/]", "_", analyse["name"]), id) analysis_name = "Analyse_%s.%s" % ( re.sub( r"[\s\/]", "_", analyse["name"]), id)
dest_analysis_dir = os.path.join( output_folder, project_name, run_name, analysis_name ) dest_analysis_dir = os.path.join( output_folder, project_name, run_name, analysis_name )
if dest_analysis_dir not in dest_directories : if dest_analysis_dir not in dest_directories :
...@@ -429,7 +432,6 @@ class Utils(object): ...@@ -429,7 +432,6 @@ class Utils(object):
if os.path.isdir(e) : if os.path.isdir(e) :
sources.append(e) sources.append(e)
destinations.append(dest_directories[i]) destinations.append(dest_directories[i])
return sources, destinations return sources, destinations
......
...@@ -41,7 +41,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -41,7 +41,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{elseif $split_param[0] == "READ_NAME_REGEX"} {elseif $split_param[0] == "READ_NAME_REGEX"}
<li class="parameter">Regular expression that can be used to extract three variables : tile/region, x coordinate and y coordinate ({$split_param[0]}={$split_param[1]}). These values are used to estimate the rate of optical duplication.</li> <li class="parameter">Regular expression that can be used to extract three variables : tile/region, x coordinate and y coordinate ({$split_param[0]}={$split_param[1]}). These values are used to estimate the rate of optical duplication.</li>
{elseif $split_param[0] == "OPTICAL_DUPLICATE_PIXEL_DISTANCE"} {elseif $split_param[0] == "OPTICAL_DUPLICATE_PIXEL_DISTANCE"}
<li class="parameter">The maximum offset between two duplicte clusters in order to consider them optical duplicates is {$split_param[1]} ({$split_param[0]}={$split_param[1]}).</li> <li class="parameter">The maximum offset between two duplicate clusters in order to consider them optical duplicates is {$split_param[1]} ({$split_param[0]}={$split_param[1]}).</li>
{/if} {/if}
{/if} {/if}
{/foreach} {/foreach}
...@@ -71,7 +71,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -71,7 +71,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<th class="numeric-sort" style="vertical-align:Middle"><center>Singletons</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Singletons</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Mate mapped on a different chr</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Mate mapped on a different chr</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Supplementary</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Supplementary</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Duplicated</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Nb read duplicated</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Read pair duplicates</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Read pair duplicates</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Read pair optical duplicates</center></th> <th class="numeric-sort" style="vertical-align:Middle"><center>Read pair optical duplicates</center></th>
...@@ -122,7 +122,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -122,7 +122,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<td>-</td> <td>-</td>
<td>-</td> <td>-</td>
{else} {else}
<td>{($sample_results["default"].pairDuplicates*2 + $sample_results["default"].unpairDuplicates)|number_format:0:' ':' '}</td> <td>{($sample_results["default"].pairDuplicates*2 + $sample_results["default"].unpairDuplicates)|number_format:0:' ':' '} ({$sample_results["default"].percentDuplication}*100%)</td>
<td>{$sample_results["default"].pairDuplicates|number_format:0:' ':' '}</td> <td>{$sample_results["default"].pairDuplicates|number_format:0:' ':' '}</td>
<td>{$sample_results["default"].pairOpticalDuplicates|number_format:0:' ':' '}</td> <td>{$sample_results["default"].pairOpticalDuplicates|number_format:0:' ':' '}</td>
{/if} {/if}
......
...@@ -118,10 +118,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -118,10 +118,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{/foreach} {/foreach}
{if $nb_files == 0} {if $nb_files == 0}
<div class="alert alert-info"> <div class="alert alert-info">
Results folder not synchronized yet... {if $analyse.data_state=="purged"}
The data have been purged. (Retention limit : {$analyse.retention_date|date_format})
{else}
Results folder not synchronized yet...
{/if}
</div> </div>
{else} {else}
<ul> <ul>
<div class="alert alert-info" name="retention-info">
Retention date is {$analyse.retention_date|date_format}. After this deadline, these data will no longer be available. Only metadata and quality control results will remain in NG6.
</div>
{foreach $dir|scandir as $file} {foreach $dir|scandir as $file}
{assign var="link" value=(('fileadmin'|cat:'/'|cat:$analyse.directory)|cat:'/')|cat:$file} {assign var="link" value=(('fileadmin'|cat:'/'|cat:$analyse.directory)|cat:'/')|cat:$file}
{if $file != "." and $file != "" and $file != ".." and ($file|substr:-strlen(".png")) != ".png" and !is_dir($link)} {if $file != "." and $file != "" and $file != ".." and ($file|substr:-strlen(".png")) != ".png" and !is_dir($link)}
......
...@@ -122,7 +122,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -122,7 +122,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<div class="tab-pane fade" id="downloads"> <div class="tab-pane fade" id="downloads">
{$dir=$data_folder|cat:$runs[key($runs)].directory} {$dir=$data_folder|cat:$runs[key($runs)].directory}
{assign var="nb_files" value=0} {assign var="nb_files" value=0}
{foreach $dir|scandir as $file}
{if $file != "." and $file !="" and $file != ".." and ($file|substr:-strlen(".png")) != ".png"}
{$nb_files = $nb_files + 1}
{/if}
{/foreach}
<ul> <ul>
{if $nb_files >= 1}
<div class="alert alert-info" name="retention-info">
Retention date is {$runs[key($runs)].retention_date|date_format}. After this deadline, these data will no longer be available. Only metadata and quality control results will remain in NG6.
</div>
{/if}
{assign var="nb_files" value=0}
{foreach $dir|scandir as $file} {foreach $dir|scandir as $file}
{if $file != "." and $file !="" and $file != ".." and ($file|substr:-strlen(".png")) != ".png"} {if $file != "." and $file !="" and $file != ".." and ($file|substr:-strlen(".png")) != ".png"}
{$link=(('fileadmin'|cat:$runs[key($runs)].directory)|cat:'/')|cat:$file} {$link=(('fileadmin'|cat:$runs[key($runs)].directory)|cat:'/')|cat:$file}
...@@ -133,7 +145,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. ...@@ -133,7 +145,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
</ul> </ul>
{if $nb_files == 0} {if $nb_files == 0}
<div class="alert alert-info"> <div class="alert alert-info">
Results folder not synchronized yet... {if $runs[key($runs)].data_state=="purged"}
The data have been purged. (Retention limit : {$runs[key($runs)].retention_date|date_format})
{else}
Results folder not synchronized yet...
{/if}
</div> </div>
{/if} {/if}
</div> </div>
......
...@@ -109,6 +109,10 @@ class tx_nG6_pi6 extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin { ...@@ -109,6 +109,10 @@ class tx_nG6_pi6 extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin {
#retrieve project data #retrieve project data
$p=tx_nG6_db::select_a_project_retention_data_info($project_id, FALSE, TRUE); $p=tx_nG6_db::select_a_project_retention_data_info($project_id, FALSE, TRUE);
#retrieve discarded emails
$string_emails_to_discard = $GLOBALS['TSFE']->tmpl->setup["plugin."]["tx_nG6_pi6."]["email_to_discard"] ;
$array_emails_to_discard = explode(',',$string_emails_to_discard);
#build email list of managers #build email list of managers
$users_id=array(); $users_id=array();
$users_emails = array(); $users_emails = array();
...@@ -121,7 +125,7 @@ class tx_nG6_pi6 extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin { ...@@ -121,7 +125,7 @@ class tx_nG6_pi6 extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin {
$users_emails[$u["right_level_label"]][] = $u["email"]; $users_emails[$u["right_level_label"]][] = $u["email"];
} }
#Warn if mail is sent to several manager //Warn if mail is sent to several manager
$email_warn="" ; $email_warn="" ;
$purge_email_to=""; $purge_email_to="";
......
...@@ -83,6 +83,7 @@ $(function () { ...@@ -83,6 +83,7 @@ $(function () {
}; };
var _retrieve_data = function(workflow_id, callback, error_callback){ var _retrieve_data = function(workflow_id, callback, error_callback){
console.log("IN _retrieve_data, workflow_id=" + workflow_id);
$.ajax({ $.ajax({
url : $("#server_url").val() + '/get_workflow_outputs?workflow_id=' + workflow_id, url : $("#server_url").val() + '/get_workflow_outputs?workflow_id=' + workflow_id,
dataType : 'jsonp', dataType : 'jsonp',
...@@ -183,6 +184,14 @@ $(function () { ...@@ -183,6 +184,14 @@ $(function () {
} }
); );
} }
else if (statusdata.status == "failed") {
modal.$body.html([
'<div class="alert alert-danger">',
' <p>The workflow has failed, either the login or password is incorrect. </p> <p>You must provide the credentials you use to connect to the GenoToul bioinformatics infrastructure.</p>',
'</div>',
].join(''));
}
else { else {
$('#wfstatus').wfstatus('reload'); $('#wfstatus').wfstatus('reload');
button.disabled = false; button.disabled = false;
......
...@@ -43,7 +43,7 @@ class AlignmentStats (Analysis): ...@@ -43,7 +43,7 @@ class AlignmentStats (Analysis):
self.add_parameter("assume_sorted", "assume_sorted", default=assume_sorted, type=bool) self.add_parameter("assume_sorted", "assume_sorted", default=assume_sorted, type=bool)
self.add_parameter("validation_stringency", "validation_stringency", default=validation_stringency) self.add_parameter("validation_stringency", "validation_stringency", default=validation_stringency)
self.add_parameter("max_file_handles", "max_file_handles", default=max_file_handles, type=int) self.add_parameter("max_file_handles", "max_file_handles", default=max_file_handles, type=int)
self.add_parameter("sorting_collection_size_ratio", "sorting_collection_size_ratio", default=max_file_handles, type=float) self.add_parameter("sorting_collection_size_ratio", "sorting_collection_size_ratio", default=sorting_collection_size_ratio, type=float)
self.add_parameter("archive_name", "archive_name", default=archive_name) self.add_parameter("archive_name", "archive_name", default=archive_name)
self.memory = '4G' self.memory = '4G'
if self.get_memory() != None : if self.get_memory() != None :
...@@ -136,6 +136,7 @@ class AlignmentStats (Analysis): ...@@ -136,6 +136,7 @@ class AlignmentStats (Analysis):
self._add_result_element(sample, "pairDuplicates", dupl_info["Unknown Library"]["READ_PAIR_DUPLICATES"]) self._add_result_element(sample, "pairDuplicates", dupl_info["Unknown Library"]["READ_PAIR_DUPLICATES"])
self._add_result_element(sample, "unpairDuplicates", dupl_info["Unknown Library"]["UNPAIRED_READ_DUPLICATES"]) self._add_result_element(sample, "unpairDuplicates", dupl_info["Unknown Library"]["UNPAIRED_READ_DUPLICATES"])
self._add_result_element(sample, "pairOpticalDuplicates", dupl_info["Unknown Library"]["READ_PAIR_OPTICAL_DUPLICATES"]) self._add_result_element(sample, "pairOpticalDuplicates", dupl_info["Unknown Library"]["READ_PAIR_OPTICAL_DUPLICATES"])
self._add_result_element(sample, "percentDuplication", dupl_info["Unknown Library"]["PERCENT_DUPLICATION"])
# Finaly create and add the archive to the analyse # Finaly create and add the archive to the analyse
...@@ -156,7 +157,7 @@ class AlignmentStats (Analysis): ...@@ -156,7 +157,7 @@ class AlignmentStats (Analysis):
xmx="-Xmx"+self.memory.lower() xmx="-Xmx"+self.memory.lower()
if self.search_dupl: if self.search_dupl:
self.tmp_bam = self.get_outputs('{basename_woext}_noDupl.bam', self.bam_files) self.tmp_bam = self.get_outputs('{basename_woext}_noDupl.bam', self.bam_files)
self.add_shell_execution(self.get_exec_path("javaPICARD")+ " "+ xmx +"-jar " + self.get_exec_path("Picard") + " MarkDuplicates INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4", self.add_shell_execution(self.get_exec_path("javaPICARD")+ " "+ xmx +" -jar " + self.get_exec_path("Picard") + " MarkDuplicates INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4",
cmd_format='{EXE} {IN} {OUT}', map=True, cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=[self.duplication_files, self.tmp_bam, self.dupl_stderrs]) inputs=self.bam_files, outputs=[self.duplication_files, self.tmp_bam, self.dupl_stderrs])
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
import os import os
import pickle import pickle
import logging
from jflow.component import Component from jflow.component import Component
from ng6.utils import Utils from ng6.utils import Utils
...@@ -27,6 +28,7 @@ def create_symbolik_link (ids_dump_path, output_list, user_script, username, pas ...@@ -27,6 +28,7 @@ def create_symbolik_link (ids_dump_path, output_list, user_script, username, pas
import os import os
import pickle import pickle
import subprocess import subprocess
import logging
from ng6.config_reader import NG6ConfigReader from ng6.config_reader import NG6ConfigReader
from ng6.utils import Utils, SSH from ng6.utils import Utils, SSH
...@@ -44,7 +46,6 @@ def create_symbolik_link (ids_dump_path, output_list, user_script, username, pas ...@@ -44,7 +46,6 @@ def create_symbolik_link (ids_dump_path, output_list, user_script, username, pas
fh.close() fh.close()
prefixed_ids = ";".join(prefixed_ids) prefixed_ids = ";".join(prefixed_ids)
src_directories, dest_directories = Utils.get_directories_structure_and_content(ng6_username, data_folder, output_folder, prefixed_ids) src_directories, dest_directories = Utils.get_directories_structure_and_content(ng6_username, data_folder, output_folder, prefixed_ids)
destinations = [] destinations = []
with open(user_script, "w") as us: with open(user_script, "w") as us:
......
...@@ -64,7 +64,7 @@ class IlluminaQualityCheck (CasavaNG6Workflow): ...@@ -64,7 +64,7 @@ class IlluminaQualityCheck (CasavaNG6Workflow):
bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "mem", not self.delete_bam], parent = fastqilluminafilter) bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "mem", not self.delete_bam], parent = fastqilluminafilter)
# make some statistic on the alignement # make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files, self.is_paired_end(), False], parent = bwa) alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files, self.is_paired_end(), True], parent = bwa)
if self.is_paired_end(): if self.is_paired_end():
# process insert sizes # process insert sizes
......
...@@ -16,11 +16,18 @@ ...@@ -16,11 +16,18 @@
# #
import logging import logging
import os import os
from glob import glob
from subprocess import Popen, PIPE
from ng6.ng6workflow import NG6Workflow from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils from ng6.utils import Utils
from _codecs import encode
class OntQualityCheck (NG6Workflow): class OntQualityCheck (NG6Workflow):
def __init__(self, args={}, id=None, function= "process"):
NG6Workflow.__init__(self, args, id, function)
self.log_files = []
def get_name(self): def get_name(self):
return 'ont_qc' return 'ont_qc'
...@@ -31,8 +38,8 @@ class OntQualityCheck (NG6Workflow): ...@@ -31,8 +38,8 @@ class OntQualityCheck (NG6Workflow):
logging.getLogger("jflow").debug("Begin OntQualityCheck.define_parameters! ont_qc") logging.getLogger("jflow").debug("Begin OntQualityCheck.define_parameters! ont_qc")
self.add_parameter("compression", "How should the data be compressed once archived", choices= [ "none", "gz", "bz2"], default = "gz") self.add_parameter("compression", "How should the data be compressed once archived", choices= [ "none", "gz", "bz2"], default = "gz")
self.add_parameter("trimming", "use trimming with porechop or not",choices= [ "yes", "no"], default = "no") self.add_parameter("trimming", "use trimming with porechop or not",choices= [ "yes", "no"], default = "no")
self.add_input_file( "summary_file", "Input summary basecalling file", default=None) self.add_input_file("summary_file", "Input summary basecalling file", default=None)
self.add_parameter("barcoded", "Barcoded run or not", choices= [ "yes", "no"], default = "no") self.add_parameter("barcoded", "If barcoded run : correspondance file", default = None)
self.add_parameter("fast5dir", "path of the fast5 directory", default = None) self.add_parameter("fast5dir", "path of the fast5 directory", default = None)
def process(self): def process(self):
...@@ -43,13 +50,31 @@ class OntQualityCheck (NG6Workflow): ...@@ -43,13 +50,31 @@ class OntQualityCheck (NG6Workflow):
sample_names.append( sample.name ) sample_names.append( sample.name )
infiles.append(sample.reads1[0]) infiles.append(sample.reads1[0])
# add raw # add raw
print(self.get_all_reads()) logging.getLogger("jflow").debug("OntQualityCheck.process! get_all_reads : "+",".join(self.get_all_reads()))
print(sample_names) logging.getLogger("jflow").debug("OntQualityCheck.process! sample_name : "+str(sample_names))
print(self.summary_file) logging.getLogger("jflow").debug("OntQualityCheck.process! summary_file : "+str(self.summary_file))
### check for log file
# get current path
cmd = [self.get_exec_path("pwd")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
exec_path = stdout.decode("utf-8").rsplit()[0]
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile pwd = " + str(exec_path))
# find .log files
for file in glob(exec_path+"/*.log"):
self.log_files.append(file)
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile self.log_files = " + ",".join(self.log_files))
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile exiting")
# add logs
if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression]) addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression])
#nanoplot = self.add_component("Nanoplot", [sample.name,self.get_all_reads(), self.nb_threads, True, "png", self.nanoplot_color,"nanoplot.tar.gz"]) ontstat = self.add_component("Run_stats", [self.summary_file, sample_names[0]])
ontstat = self.add_component("Run_stats", [self.summary_file, self.barcoded, sample_names[0]]) if (self.barcoded != None) or (self.barcoded != "no") :
demultiplexont = self.add_component("Demultiplex_ONT", [self.get_all_reads() , self.barcoded])
if self.trimming == "yes": if self.trimming == "yes":
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"]) trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"])
if self.fast5dir != None: if self.fast5dir != None:
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re, os
from subprocess import Popen, PIPE
import logging
import time
from ng6.analysis import Analysis
from ng6.utils import Utils
from jflow.utils import get_argument_pattern
class Demultiplex_ONT (Analysis):
"""
This module demultiplexes the total fastq of a barcoded ONT run and produces stats
"""
def __init__(self, args={}, id=None, function= "process"):
Analysis.__init__(self, args, id, function)
def define_parameters(self, fastq_files, barcode_file, archivename="DemultiplexONT_archive.tar"):
self.add_input_file_list( "fastq_files", "fastq_files", default=fastq_files, required=True, file_format = 'fastq')
self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=True , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
def define_analysis(self):
self.name = "DemultiplexONT"
self.description = "Demultiplexes the total fastq of a barcoded ONT run and produces stats"
self.software = "Qcat"
#if self.discard_middle == "discard_middle":
# self.options = "--discard_middle"
def __parse_stat_file (self, stat_file):
logging.getLogger("jflow").debug("Begin DemultiplexONT.__parse_stat_file! file =",stat_file)
"""
Parse the stat file
@param stat_file : the stdout porechop
@return : {"read_trim_start" : read_trim_start, ...}
"""
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
# Create dictionary : key = file name or prefix, value = files path
results_files = []
# add header of stats
group = "statsporechop"
self._add_result_element("metrics", "headers", ','.join(["read_trim_start", "read_total_start", "bp_removed_start", "read_trim_end", "read_total_end", "bp_removed_end"]), group)
print(os.listdir(self.output_directory))
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
logging.getLogger("jflow").debug("Trimporechop.post_process : full_file_path "+full_file_path)
if file.endswith(".fastq"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .fastq : full_file_path "+full_file_path)
results_files.append(full_file_path)
elif file.endswith(".stdout"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .stdout: full_file_path "+full_file_path)
results_files.append(full_file_path)
filename = os.path.basename(file).split(".stdout")[0]
resultlist = self.__parse_stat_file(full_file_path)
read_trim_start = resultlist[0]
read_total_start = resultlist[1]
bp_removed_start = resultlist[2]
read_trim_end = resultlist[3]
read_total_end = resultlist[4]
bp_removed_end = resultlist[5]
#add stats for each fastq file
self._add_result_element("ont_sample", "read_trim_start", read_trim_start,filename)
self._add_result_element("ont_sample", "read_total_start", read_total_start,filename)
self._add_result_element("ont_sample", "bp_removed_start", bp_removed_start,filename)
self._add_result_element("ont_sample", "read_trim_end", read_trim_end,filename)
self._add_result_element("ont_sample", "read_total_end", read_total_end,filename)
self._add_result_element("ont_sample", "bp_removed_end", bp_removed_end,filename)
#Finaly create and add the archive to the analysis
#self._create_and_archive(results_files,self.archive_name)
self._archive_files(results_files, "gz")
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
def get_version(self):
shell_script = "module load system/Python-3.6.3;" + self.get_exec_path("qcat") + " --version"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
logging.getLogger("jflow").debug("DemultiplexONT.get_version !" + str(stderr))
return stdout
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
# Create cmd
self.add_shell_execution(self.get_exec_path("qcat") +" " + self.options + "-f $1 -b " + str(self.outpath) + " -k " + str(self.kit) + " > ${" + str() + "}",
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.fastq_files)
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Trimporechop.process! ")
...@@ -32,17 +32,13 @@ class Run_stats (Analysis): ...@@ -32,17 +32,13 @@ class Run_stats (Analysis):
This module make some statistic from ONT run with graphs This module make some statistic from ONT run with graphs
""" """
def define_parameters(self, sequencing_summary_file, barcoded=False, sample_name="plot", archive_name="RunStats_archive.tar.gz"): def define_parameters(self, sequencing_summary_file, sample_name="plot", archive_name="RunStats_archive.tar.gz"):
logging.getLogger("jflow").debug("Begin Run_stats parameters") logging.getLogger("jflow").debug("Begin Run_stats parameters")
self.add_input_file( "sequencing_summary_file", "Input sequencing summary file from Basecaller", default=sequencing_summary_file, file_format = "txt", required=True) self.add_input_file( "sequencing_summary_file", "Input sequencing summary file from Basecaller", default=sequencing_summary_file, file_format = "txt", required=True)
self.add_parameter("barcoded", "Indicate that barcodes are used for this run", default=barcoded, type='str')
self.add_parameter("sample_name", "Sample name for prefix", default=sample_name, type='str') self.add_parameter("sample_name", "Sample name for prefix", default=sample_name, type='str')
self.add_parameter("archive_name", "Archive name", default=archive_name) self.add_parameter("archive_name", "Archive name", default=archive_name)
self.add_output_file_list("stderr", "stderr ouput file",pattern='Run_stats.stderr', items = self.sequencing_summary_file) self.add_output_file_list("stderr", "stderr ouput file",pattern='Run_stats.stderr', items = self.sequencing_summary_file)
if self.barcoded == "yes":
self.add_output_file_list("stderr_barcoded", "stderr ouput barcoded file",pattern='Run_stats_barcoded.stderr', items = self.sequencing_summary_file)
def get_version(self): def get_version(self):
#cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"] #cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"]
...@@ -113,23 +109,6 @@ class Run_stats (Analysis): ...@@ -113,23 +109,6 @@ class Run_stats (Analysis):
#print(stats) #print(stats)
return stats return stats
def __parse_barcode_file (self, barcode_file):
"""
Parse the barcode file
@param barcode_file : the runstatsR barcode file
@return : {"" : "", ...}
"""
stats = {}