Commit 41bfe84b authored by Celine Noirot's avatar Celine Noirot
Browse files

Use get_cpu and get_memory to configure cpu and memory usage in component

parent b8242aa7
......@@ -39,6 +39,8 @@ class FastQC (Analysis):
self.add_parameter("no_group", "True disables grouping of bases for reads >50bp", default=no_group, type='bool')
self.add_parameter("archive_name", "Archive name", default=archive_name)
self.add_parameter("nb_threads", "Number of threads for fastqc", default=nb_threads)
if self.get_cpu() != None :
self.nb_threads=self.get_cpu()
items = self.input_files
if self.is_casava :
......@@ -197,7 +199,7 @@ class FastQC (Analysis):
# Create cmd
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(file_group, 1)
self.add_shell_execution(self.get_exec_path("fastqc") + ' -t ' + self.nb_threads + ' --extract --outdir ' + self.output_directory + ' '
self.add_shell_execution(self.get_exec_path("fastqc") + ' -t ' + str(self.nb_threads) + ' --extract --outdir ' + self.output_directory + ' '
+ self.options + ' ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '} 2> ${' + str(next_arg_number+1) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = file_group, outputs = [self.stdouts[output_pos], self.stderrs[output_pos]])
......
......@@ -67,7 +67,8 @@ class Flash (Analysis):
self.add_parameter("standard_deviation", "standard_deviation", default=standard_deviation, type=int)
self.add_parameter("phred_offset", "phred_offset", default=phred_offset, type=int)
self.add_parameter("archive_name", "archive_name", default=archive_name)
if self.get_cpu() != None :
self.nb_thread=self.get_cpu()
if len(self.read1_files) != len(self.read2_files):
raise Exception("[ERROR] : the number of files is not correct! (the number of files in read1_files and in read2_files must be the same)")
......
......@@ -20,7 +20,7 @@ from subprocess import Popen, PIPE
from ng6.analysis import Analysis
def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, collectinsertsizemetrics_path, options_dump_path):
def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, collectinsertsizemetrics_path, options_dump_path, memory):
"""
@param bam_file : path for bam
@param pairs_count_file : path to the produced file with the number of reads pairs in bam
......@@ -37,15 +37,15 @@ def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_fil
options_dump = open(options_dump_path, "rb")
options = pickle.load(options_dump)
options_dump.close()
xmx="-Xmx"+memory.lower()
# Count nb properly paired in bam file
command = Popen( ["-c", samtools_path + " view -f67 " + bam_file + "| wc -l"], shell=True, stdout=PIPE, stderr=PIPE)
properly_paired_nb, stderr = command.communicate()
properly_paired_nb = int(properly_paired_nb.decode().strip())
if properly_paired_nb > 0 :
# Process inserts sizes metrics
command = Popen( ["-c", "java -Xmx1g -jar " + collectinsertsizemetrics_path + " " +options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
command = Popen( ["-c", "java "+xmx+" -jar " + collectinsertsizemetrics_path + " " +options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
stdout, stderr = command.communicate()
# Count nb pairs in bam file
command = Popen( ["-c", samtools_path + " view -F384 " + bam_file + " | wc -l"], shell=True, stdout=PIPE, stderr=PIPE) # First read in pair
......@@ -84,7 +84,10 @@ class InsertsSizes (Analysis):
self.add_output_file_list( "hist_files", "hist_files", pattern='{basename_woext}.pdf', items=self.bam_files)
self.add_output_file_list( "log_files", "log_files", pattern='{basename_woext}.log', items=self.bam_files)
self.add_output_file_list( "pairs_count_files", "pairs_count_files", pattern='{basename_woext}.count', items=self.bam_files)
self.memory = '1G'
if self.get_memory() != None :
self.memory=self.get_memory()
def define_analysis(self):
self.name = "InsertsSizes"
self.description = "Insert size statistics"
......@@ -137,7 +140,7 @@ class InsertsSizes (Analysis):
for i in range(len(self.bam_files)):
self.add_python_execution(inserts_metrics,cmd_format="{EXE} {IN} {OUT} {ARG}",
inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]],
arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), options_dump_path])
arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), options_dump_path, self.memory])
def parse_pairs_count_file(self, input_file):
"""
......
......@@ -90,7 +90,9 @@ class SubsetAssignation (Analysis):
self.add_output_file_list( "sub_fasta_files", "sub fasta files", pattern='{basename_woext}_sub.fasta', items=self.sequence_files, file_format = 'fasta')
self.add_output_file_list( "blast_files", "sub fasta files", pattern='{basename_woext}.blast', items=self.sequence_files)
self.nb_threads=1
if self.get_cpu() != None :
self.nb_threads=self.get_cpu()
self.use_index = "true"
if not os.path.exists( self.databank + ".00.idx" ) :
self.use_index = "false"
......@@ -148,7 +150,8 @@ class SubsetAssignation (Analysis):
inputs = input_groups[prefix], outputs = sub_fasta_groups[prefix], map=False)
# Align on databank
self.add_shell_function(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -use_index " + self.use_index + " -outfmt 7 -db " + self.databank + " -query $1 -out $2",
self.add_shell_function(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -num_threads " + self.nb_threads +
" -use_index " + self.use_index + " -outfmt 7 -db " + self.databank + " -query $1 -out $2",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.sub_fasta_files, outputs=self.blast_files)
# Create files groups
......
......@@ -28,7 +28,8 @@ class STAR (Analysis):
self.add_input_file_list( "read2", "read2", default=read2)
self.add_parameter("keep_bam", "keep_bam", default=keep_bam, type=bool)
self.add_parameter("n_threads", "n_threads", default=n_threads, type='int')
if self.get_cpu() != None :
self.n_threads=self.get_cpu()
items = group_prefix if group_prefix != None else self.read1
self.add_output_file("fasta_file_link", "fasta_file_link", filename=os.path.basename(self.fasta_file))
......
......@@ -26,14 +26,21 @@ class STARIndex (Component):
self.add_output_file("index_directory", "index_directory", filename="index")
self.add_output_file("normalized_fasta_file", "Fasta file normalized using picard tools", filename=os.path.basename(input_fasta))
self.memory = '4G'
if self.get_memory() != None :
self.memory=self.get_memory()
self.cpu = "8"
if self.get_cpu() != None :
self.n_threads=self.get_cpu()
def process(self):
xmx="-Xmx" + self.memory.lower()
# normalize fasta
self.add_shell_execution( "java -Xmx4g -jar " + self.get_exec_path("NormalizeFasta.jar") + " I=$1 O=$2 ",
self.add_shell_execution( "java " + xmx + " -jar " + self.get_exec_path("NormalizeFasta.jar") + " I=$1 O=$2 ",
cmd_format="{EXE} {IN} {OUT}", map=False,
inputs = self.input_fasta, outputs = self.normalized_fasta_file)
# index for star
self.add_shell_execution( "mkdir $2 ; " + self.get_exec_path("STAR") + " --runMode genomeGenerate --genomeFastaFiles $1 --genomeDir $2",
self.add_shell_execution( "mkdir $2 ; " + self.get_exec_path("STAR") + " --runMode genomeGenerate --genomeFastaFiles $1 "+
"--genomeDir $2 --runThreadN " + str(self.cpu),
cmd_format='{EXE} {IN} {OUT}' , map=False, inputs = self.normalized_fasta_file, outputs = self.index_directory)
......@@ -47,6 +47,9 @@ class Bismark (Analysis):
self.add_parameter("max_insert_size", "max_insert_size", default=max_insert_size, type="int")
self.add_parameter("nb_proc", "nb_proc", default=nb_proc, type="int")
self.add_parameter("description", "description", default=description)
if self.get_cpu() != None :
self.nb_proc=self.get_cpu()
self.source_file = self.reference_genome + "_source"
extention_bowtie=""
......@@ -92,7 +95,8 @@ class Bismark (Analysis):
self.args += " --non_directional"
if not self.bowtie1:
if self.nb_proc :
self.args += " --p "+ str(self.nb_proc)
# 2 bowtie process are launch in directional mode so divided allocated cpu for each bowtie
self.args += " --p "+ str(int(self.nb_proc/2))
self.args += " --bowtie2"
if not os.path.dirname(self.get_exec_path("bowtie2")) == "" :
self.args += " --path_to_bowtie " + os.path.dirname(self.get_exec_path("bowtie2"))
......
......@@ -37,6 +37,10 @@ class RemoveDuplicate (Analysis):
self.add_parameter("is_paired", "Does library is paired ? ", default=is_paired, type="bool")
self.add_parameter("mem", "Memory for samtools ", default=mem, type="string")
self.add_parameter("cpu", "Cpu for samtools ", default=cpu, type="int")
if self.get_cpu() != None :
self.cpu=self.get_cpu()
if self.get_memory() != None :
self.mem=self.get_memory()
self.add_output_file_list("flagstat_init", "Flagstat initialy", pattern='{basename_woext}.init_flagstat', items=self.bam)
self.add_output_file_list("flagstat_rmdup", "Flagstat result after rmdup", pattern='{basename_woext}.rmdup_flagstat', items=self.bam)
self.add_output_file_list("flagstat_finally", "Flagstat result after removing singleton", pattern='{basename_woext}.finally_flagstat', items=self.bam)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment