Commit ab314f5b authored by Maxime Manno's avatar Maxime Manno 🍜
Browse files

Merge branch 'nG6-Software_updates' into 'master'

Ng6 software updates

See merge request !5
parents e9a3fbda b60303e6
......@@ -15,6 +15,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# TEST de software update
1. WHAT IS NG6
......
......@@ -103,7 +103,7 @@ user_base_directory = /work/
#fastx_reverse_complement = /usr/bin/fastx_reverse_complement
#STAR = /usr/bin/STAR
#fastuniq = /usr/local/bin/fastuniq
#
#javaPICARD = /tools/java/jre1.8.0_45/bin/java
# workflow specific
#
......
......@@ -607,6 +607,7 @@ class Component(object):
trace_fh.close()
def __write_element(self,fh, title, element):
logging.getLogger("jflow").debug("element = "+str(element))
to_write=''
if isinstance(element, list):
if len (element)> 0 :
......@@ -617,7 +618,7 @@ class Component(object):
else :
to_write+="\n".join(element)+"\n"
else :
to_write+= element+"\n"
to_write+= str(element)+"\n"
if to_write != "" :
fh.write(title+" :\n")
fh.write(to_write)
......@@ -527,9 +527,9 @@ class CasavaNG6Workflow(NG6Workflow):
except : pass
# contamination_search
if contam :
if self.contamination_databank: contam.extend(self.contamination_databank)
contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files+filtered_read2_files, contam, reads_prefixes], parent = fastqilluminafilter)
#if contam :
# if self.contamination_databank: contam.extend(self.contamination_databank)
# contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files+filtered_read2_files, contam, reads_prefixes], parent = fastqilluminafilter)
# make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), self.no_group, "fastqc.tar.gz"], parent = fastqilluminafilter)
......
{*
Copyright (C) 2009 INRA
This program is free software: you can redistribute it and/or modify
This program is a free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
......
......@@ -70,7 +70,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<th class="numeric-sort" style="vertical-align:Middle"><center>With itself and mate mapped</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Singletons</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Mate mapped on a different chr</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Supplimentary</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Supplementary</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Duplicated</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Read pair duplicates</center></th>
<th class="numeric-sort" style="vertical-align:Middle"><center>Read pair optical duplicates</center></th>
......@@ -116,7 +116,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<td>{$sample_results["default"].matemapped|number_format:0:' ':' '}</td>
<td>{$singletons[0]|number_format:0:' ':' '} {$singletons[1]}</td>
<td>{$sample_results["default"].mapch1|number_format:0:' ':' '}</td>
<td>{$sample_results["default"].supplimentary|number_format:0:' ':' '}</td>
<td>{$sample_results["default"].supplementary|number_format:0:' ':' '}</td>
{if !isset($sample_results["default"]["pairOpticalDuplicates"]) }
<td>-</td>
<td>-</td>
......
......@@ -12,7 +12,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
along with this program. If not, see <http://www.gnu.org/licenses/>. #
*}
{extends file='AnalysisTemplate.tpl'}
......@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{block name=params_content}
{assign var="params" value=" "|explode:$analyse.params}
<ul>
<li class="parameter">Unknown indices with a number of fragments < {$params[0]*100}% of the number of fragments in the sample with the littlest population are merged in "All others".</li>
<li class="parameter">Unknown indices with a number of fragments < {$params[0]*100}% of the number of fragments in the sample with the littlest population are merged in "All others". In other words, each unknown indice with a number of fragments > {$params[0]*100}% (included) of the number of fragments in the sample with the smallest population is displayed </li>
</ul>
{/block}
......
......@@ -45,6 +45,9 @@ class AlignmentStats (Analysis):
self.add_parameter("max_file_handles", "max_file_handles", default=max_file_handles, type=int)
self.add_parameter("sorting_collection_size_ratio", "sorting_collection_size_ratio", default=max_file_handles, type=float)
self.add_parameter("archive_name", "archive_name", default=archive_name)
self.memory = '4G'
if self.get_memory() != None :
self.memory=self.get_memory()
self.add_output_file_list( "stat_files", "stat_files", pattern='{basename_woext}.stat', items=self.bam_files)
self.add_output_file_list( "cigar_stderrs", "cigar_stderrs", pattern='{basename_woext}.cigar_stderr', items=self.bam_files)
......@@ -102,7 +105,7 @@ class AlignmentStats (Analysis):
self._add_result_element(sample, "matemapped", str(summary_info_flagstat["matemapped"]))
self._add_result_element(sample, "singletons", str(summary_info_flagstat["singletons"][0]) + ' (' + str(summary_info_flagstat["singletons"][1]) + ')')
self._add_result_element(sample, "mapch1", str(summary_info_flagstat["mapch1"]))
self._add_result_element(sample, "supplimentary" , str(summary_info_flagstat["supplimentary"]))
self._add_result_element(sample, "supplementary" , str(summary_info_flagstat["supplementary"]))
all_csv_files = self.csv_files_r1
if self.csv_files_r2:
......@@ -150,9 +153,10 @@ class AlignmentStats (Analysis):
def process(self):
# Duplication stats
xmx="-Xmx"+self.memory.lower()
if self.search_dupl:
self.tmp_bam = self.get_outputs('{basename_woext}_noDupl.bam', self.bam_files)
self.add_shell_execution("java -Xmx4g -jar " + self.get_exec_path("MarkDuplicates") + " INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4",
self.add_shell_execution(self.get_exec_path("javaPICARD")+ xmx +"-jar " + self.get_exec_path("Picard") + " MarkDuplicates INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=[self.duplication_files, self.tmp_bam, self.dupl_stderrs])
......@@ -203,8 +207,8 @@ class AlignmentStats (Analysis):
singletons_regex = re.compile("(\d+) .*singletons \(([^:]*).*\)")
# mapch1 regexp
mapch1_regex = re.compile("(\d+) .*with mate mapped to a different chr")
# supplimentary regexp
supplimentary_regex = re.compile("(\d+).*supplimentary")
# supplementary regexp
supplementary_regex = re.compile("(\d+).*supplementary")
summary = {}
for line in open(flagstat_file, 'r').readlines():
......@@ -219,7 +223,7 @@ class AlignmentStats (Analysis):
ppr = properlypaired_regex.match(line)
sr = singletons_regex.match(line)
mc1r = mapch1_regex.match(line)
sur = supplimentary_regex.match(line)
sur = supplementary_regex.match(line)
if tr != None :
summary["total"] = tr.group(1)
if qcfr != None :
......@@ -246,7 +250,7 @@ class AlignmentStats (Analysis):
if mc1r != None :
summary["mapch1"] = mc1r.group(1)
if sur != None :
summary["supplimentary"] = sur.group(1)
summary["supplementary"] = sur.group(1)
return summary
......
......@@ -92,14 +92,14 @@ class BWA (Analysis):
if self.read2:
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;",
self.get_exec_path("samtools") + " sort - -o $3 2>> $4;",
cmd_format='{EXE} {IN} {OUT}' , map=True,
inputs=[self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
# Single-end
else:
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 2>> $3 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $2 2>> $3 ; mv $2.bam $2;",
self.get_exec_path("samtools") + " sort - -o $2 2>> $3 ;",
cmd_format='{EXE} {IN} {OUT}' , map=True,
inputs=[self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
......@@ -118,7 +118,7 @@ class BWA (Analysis):
inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
self.add_shell_execution(self.get_exec_path("bwa") + " sampe " + self.reference_genome +
" $1 $2 $3 $4 2>> $6 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $5 2>> $6; mv $5.bam $5;",
self.get_exec_path("samtools") + " sort - -o $5 2>> $6;",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
# Single-end
......@@ -128,7 +128,7 @@ class BWA (Analysis):
inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
self.add_shell_execution(self.get_exec_path("bwa") + " samse " + self.reference_genome +
" $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;",
self.get_exec_path("samtools") + " sort - -o $3 2>> $4;",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[self.sai1, self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
......
......@@ -20,7 +20,19 @@ from subprocess import Popen, PIPE
from ng6.analysis import Analysis
def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, collectinsertsizemetrics_path, options_dump_path, memory):
# def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, collectinsertsizemetrics_path, options_dump_path, memory):
# """
# @param bam_file : path for bam
# @param pairs_count_file : path to the produced file with the number of reads pairs in bam
# @param metrics_file : path to the metrics file produced by collectinsertsizemetrics
# @param hist_file : path to the histogram produced by collectinsertsizemetrics
# @param log_file : path to the log produced by collectinsertsizemetrics
# @param samtools_path : path to the software samtools
# @param collectinsertsizemetrics_path : path to the software collectinsertsizemetrics
# @param picard_path : path to the software picard.jar
# @param options : options for the software collectinsertsizemetrics
# """
def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, picard_path, options_dump_path, memory):
"""
@param bam_file : path for bam
@param pairs_count_file : path to the produced file with the number of reads pairs in bam
......@@ -28,7 +40,7 @@ def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_fil
@param hist_file : path to the histogram produced by collectinsertsizemetrics
@param log_file : path to the log produced by collectinsertsizemetrics
@param samtools_path : path to the software samtools
@param collectinsertsizemetrics_path : path to the software collectinsertsizemetrics
@param picard_path : path to the software picard.jar
@param options : options for the software collectinsertsizemetrics
"""
from subprocess import Popen, PIPE
......@@ -45,7 +57,7 @@ def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_fil
if properly_paired_nb > 0 :
# Process inserts sizes metrics
command = Popen( ["-c", "java "+xmx+" -jar " + collectinsertsizemetrics_path + " " +options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
command = Popen( ["-c", self.get_exec_path("javaPICARD")+" " +xmx+" -jar " + picard_path + " CollectInsertSizeMetrics " +options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
stdout, stderr = command.communicate()
# Count nb pairs in bam file
command = Popen( ["-c", samtools_path + " view -F384 " + bam_file + " | wc -l"], shell=True, stdout=PIPE, stderr=PIPE) # First read in pair
......@@ -126,10 +138,10 @@ class InsertsSizes (Analysis):
self._create_and_archive(self.info_files, self.archive_name)
def get_version(self):
cmd = ["java", "-Xmx1g", "-jar", self.get_exec_path("CollectInsertSizeMetrics"),"--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
cmd = self.get_exec_path("javaPICARD")+" " + xmx +" -jar {} CollectInsertSizeMetrics --version".format(self.get_exec_path("Picard"))
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return stderr.split()[0]
return(stderr.decode("utf-8").rsplit()[0])
def process(self):
options_dump_path = self.get_temporary_file(".dump")
......@@ -140,11 +152,11 @@ class InsertsSizes (Analysis):
for i in range(len(self.bam_files)):
self.add_python_execution(inserts_metrics,cmd_format="{EXE} {IN} {OUT} {ARG}",
inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]],
arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), options_dump_path, self.memory])
arguments=[self.get_exec_path("samtools"), self.get_exec_path("Picard"), options_dump_path, self.memory])
def parse_pairs_count_file(self, input_file):
"""
@param input_file : the pairs count file path
@param input_file : the pairs count file path
@return : the number of properly paired
"""
count_file_fh = open(input_file, "r")
......
......@@ -47,13 +47,13 @@ class RnaSeqQualityCheck (CasavaNG6Workflow):
if not os.path.exists( self.reference_transcriptome + ".bwt" ):
bwaindex = self.add_component("BWAIndex", [self.reference_transcriptome])
indexed_ref = bwaindex.databank
# align reads against indexed genome
sample_lane_prefixes = None
if self.group_prefix != None :
sample_lane_prefixes = list((Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "lane")).keys())
bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "mem", not self.delete_bam], parent = fastqilluminafilter)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files, self.is_paired_end()], parent = bwa, component_prefix="bwa")
......@@ -77,17 +77,19 @@ class RnaSeqQualityCheck (CasavaNG6Workflow):
concat_read2_files = filtered_read2_files
concat_read1_files = sorted(concat_read1_files)
concat_read2_files = sorted(concat_read2_files)
sample_lane_prefixes = None
if self.group_prefix != None :
sample_lane_prefixes = sorted(list((Utils.get_group_basenames(concat_read1_files+concat_read2_files, "lane")).keys()))
star = self.add_component("STAR", [reference_genome, index_dir, concat_read1_files, concat_read2_files,sample_lane_prefixes, not self.delete_bam, self.n_threads], parent = fastqilluminafilter)
# make some statistic on the alignment
alignmentstats = self.add_component("AlignmentStats", [star.output_bams, self.is_paired_end()], parent = star, component_prefix="star")
#Quality RNA Seq analysis
if self.annotation:
rseqc = self.add_component("RSeQC", [star.output_bams, self.annotation], parent = star)
# 02/08/2018 Audrey Gibert (était-ce un boulet?)
# C'est commenté parce qu'on a un bug à cause du script inner_distance.py "start must be smaller than end..." c'est embetant, on ne s'en sert jamais voilà voilà
# if self.annotation:
# rseqc = self.add_component("RSeQC", [star.output_bams, self.annotation], parent = star)
......@@ -76,7 +76,7 @@ class STAR (Analysis):
# sort
self.add_shell_execution( self.get_exec_path("samtools") + ' view -Sb $1 | ' + self.get_exec_path("samtools") +
' sort -@ ' + str(self.n_threads) + ' -f - $2 ' , cmd_format='{EXE} {IN} {OUT}', map=True,
' sort -@ ' + str(self.n_threads) + ' - -o $2 ' , cmd_format='{EXE} {IN} {OUT}', map=True,
inputs = [self.output_sams_no_md], outputs = [self.output_sorted_bams])
#calmd and convert to bam
......
......@@ -12,7 +12,7 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
......@@ -35,7 +35,7 @@ class STARIndex (Component):
def process(self):
xmx="-Xmx" + self.memory.lower()
# normalize fasta
self.add_shell_execution( "java " + xmx + " -jar " + self.get_exec_path("NormalizeFasta.jar") + " I=$1 O=$2 ",
self.add_shell_execution( self.get_exec_path("javaPICARD")+" " + xmx + " -jar " + self.get_exec_path("Picard") + " NormalizeFasta I=$1 O=$2 ",
cmd_format="{EXE} {IN} {OUT}", map=False,
inputs = self.input_fasta, outputs = self.normalized_fasta_file)
......
......@@ -71,7 +71,7 @@ class RemoveDuplicate (Analysis):
self.add_shell_execution(self.get_exec_path("samtools") + " flagstat $1 > $2", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.bam], outputs=[self.flagstat_init], map=True)
self.add_shell_execution(self.get_exec_path("samtools") + " sort -m "+self.mem+" -@"+str(self.cpu)+" $1 $2; mv $2.bam $2",
self.add_shell_execution(self.get_exec_path("samtools") + " sort -m "+self.mem+" -@"+str(self.cpu)+" $1 -o $2; mv $2.bam $2",
cmd_format='{EXE} {IN} {OUT}', inputs=[self.bam], outputs=[self.temp_sorted1], map=True)
if self.is_paired :
#samtools rmdup
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment