From f019a2822ae2a2126640efd20e5fa576ca9dc61d Mon Sep 17 00:00:00 2001 From: mariabernard <maria.bernard@inra.fr> Date: Thu, 28 Mar 2019 12:34:51 +0100 Subject: [PATCH] add cluster partition resources in IMAGE workflows --- Snakemake/IMAGE_calling/README.md | 2 +- Snakemake/IMAGE_calling/SLURM.sh | 4 +-- Snakemake/IMAGE_calling/resources_SLURM.yaml | 4 +++ Snakemake/IMAGE_vqsr/README.md | 2 +- Snakemake/IMAGE_vqsr/SLURM.sh | 23 +++++++++++++ Snakemake/IMAGE_vqsr/resources_SLURM.yaml | 1 + Snakemake/IMAGE_vqsr/test_SLURM.sh | 34 -------------------- 7 files changed, 32 insertions(+), 38 deletions(-) create mode 100644 Snakemake/IMAGE_vqsr/SLURM.sh delete mode 100644 Snakemake/IMAGE_vqsr/test_SLURM.sh diff --git a/Snakemake/IMAGE_calling/README.md b/Snakemake/IMAGE_calling/README.md index 9d1598f..c5b1dea 100644 --- a/Snakemake/IMAGE_calling/README.md +++ b/Snakemake/IMAGE_calling/README.md @@ -180,7 +180,7 @@ Absolut paths need to be precise in the config.yaml # Configure workflow and running workflow - Copy the config.yaml file into your working directory and update it according to the instructions inside the file. -- If necessary update resources_SLURM.yaml to define cpu and memory resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section. +- If necessary update resources_SLURM.yaml to define cpu, memory and cluster partition resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section. - launch snakemake command as presented in SLURM.sh to launch the workflow. Resources_SLURM.yaml and SLURM.sh are specific to SLURM cluster. Therefore, if you use another cluster, you need to create other files. However, you can use these files to inspire you. diff --git a/Snakemake/IMAGE_calling/SLURM.sh b/Snakemake/IMAGE_calling/SLURM.sh index 200ffaf..0fbffe4 100755 --- a/Snakemake/IMAGE_calling/SLURM.sh +++ b/Snakemake/IMAGE_calling/SLURM.sh @@ -17,8 +17,8 @@ snakemake -s $WORKFLOW_DIR/Snakefile --printshellcmds --jobs 200 \ --configfile config.yaml --dryrun # This is an example of the snakemake command line to launch on a SLURM cluster - # {cluster.cpu} and {cluster.mem} will be replace by value defined in the resources_SLURM.yaml file + # {cluster.cpu}, {cluster.mem} and {cluster.partition} will be replace by value defined in the resources_SLURM.yaml file snakemake -s $WORKFLOW_DIR/Snakefile --printshellcmds --jobs 200 \ --configfile config.yaml \ --cluster-config $WORKFLOW_DIR/resources_SLURM.yaml \ - --cluster "sbatch --cpus-per-task={cluster.cpu} --mem-per-cpu={cluster.mem} --error=logs/%x.stderr --output=logs/%x.stdout " --latency-wait 30 + --cluster "sbatch -p {cluster.partition} --cpus-per-task={cluster.cpu} --mem-per-cpu={cluster.mem} --error=logs/%x.stderr --output=logs/%x.stdout " --latency-wait 30 diff --git a/Snakemake/IMAGE_calling/resources_SLURM.yaml b/Snakemake/IMAGE_calling/resources_SLURM.yaml index 4c279c6..1bf89ab 100644 --- a/Snakemake/IMAGE_calling/resources_SLURM.yaml +++ b/Snakemake/IMAGE_calling/resources_SLURM.yaml @@ -1,6 +1,7 @@ __default__ : mem : "7G" cpu : 1 + partition : "workq" fastqc : cpu : 8 @@ -38,12 +39,15 @@ AnalyzeCovariates: HaplotypeCaller : mem : "30G" + partition : "unlimitq" CombineGVCFs : mem : "60G" + partition : "unlimitq" GenotypeGVCFs : mem : "60G" + partition : "unlimitq" freebayes: mem : "30G" diff --git a/Snakemake/IMAGE_vqsr/README.md b/Snakemake/IMAGE_vqsr/README.md index 52c36e7..f06c00a 100644 --- a/Snakemake/IMAGE_vqsr/README.md +++ b/Snakemake/IMAGE_vqsr/README.md @@ -140,7 +140,7 @@ For the final step, the python script filter_multi_sample_vcf.py need PyVCF for ## 4. Configure and running workflow - Copy the config.yaml file into your working directory and update it according to the instructions inside the file. -- If necessary update resources_SLURM.yaml to define cpu and memory resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section. +- If necessary update resources_SLURM.yaml to define cpu memory and cluster partition resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section. - launch snakemake command as presented in SLURM.sh to launch the workflow. Resources_SLURM.yaml and SLURM.sh are specific to SLURM cluster. Therefore, if you use another cluster, you need to create other files. However, you can use these files to inspire you. diff --git a/Snakemake/IMAGE_vqsr/SLURM.sh b/Snakemake/IMAGE_vqsr/SLURM.sh new file mode 100644 index 0000000..91ce334 --- /dev/null +++ b/Snakemake/IMAGE_vqsr/SLURM.sh @@ -0,0 +1,23 @@ +#!/bin/bash +############################################################################## +## launch workflow + +# if used on Genologin cluster (INRA Toulouse ) +# module load system/Python-3.6.3 +# and because we are not using module for all other dependencies, you need to add vcftool librairy in the PERL5LIB environment variable +# export PERL5LIB=/usr/local/bioinfo/src/VCFtools/vcftools-0.1.15/src/perl/:$PERL5LIB + +WORKFLOW_DIR=?? + +mkdir -p logs + +# This is an example of the snakemake command line to launch a dryrun +snakemake -s $WORKFLOW_DIR/Snakefile --printshellcmds --jobs 200 \ + --configfile config.yaml --dryrun + +# This is an example of the snakemake command line to launch on a SLURM cluster + # {cluster.cpu}, {cluster.mem} and {cluster.partition} will be replace by value defined in the resources_SLURM.yaml file +snakemake -s $WORKFLOW_DIR/Snakefile --printshellcmds --jobs 200 \ + --configfile config.yaml \ + --cluster-config $WORKFLOW_DIR/resources_SLURM.yaml \ + --cluster "sbatch -p {cluster.partition} --cpus-per-task={cluster.cpu} --mem-per-cpu={cluster.mem} --error=logs/%x.stderr --output=logs/%x.stdout " --latency-wait 30 diff --git a/Snakemake/IMAGE_vqsr/resources_SLURM.yaml b/Snakemake/IMAGE_vqsr/resources_SLURM.yaml index 0bfc71c..deed11b 100644 --- a/Snakemake/IMAGE_vqsr/resources_SLURM.yaml +++ b/Snakemake/IMAGE_vqsr/resources_SLURM.yaml @@ -1,3 +1,4 @@ __default__: mem : "20G" cpu : 1 + partition : "workq" diff --git a/Snakemake/IMAGE_vqsr/test_SLURM.sh b/Snakemake/IMAGE_vqsr/test_SLURM.sh deleted file mode 100644 index 7fd9564..0000000 --- a/Snakemake/IMAGE_vqsr/test_SLURM.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - - -############### -# module load # -############### - -# if used on Genologin cluster (INRA Toulouse ) -# module load system/Python-3.6.3 -# and because we are not using module for all other dependencies, you need to add vcftool librairy in the PERL5LIB environment variable -# export PERL5LIB=/usr/local/bioinfo/src/VCFtools/vcftools-0.1.15/src/perl/:$PERL5LIB - - -############### -# dryrun # -############### - -#~ snakemake -s Snakefile -p --jobs 999 \ - #~ --configfile config.yaml \ - #~ --cluster-config resources_SLURM.yaml \ - #~ --cluster "sbatch --cpus-per-task={cluster.cpu} --mem={cluster.mem} --error=SLURM_logs/%x.stderr --output=SLURM_logs/%x.stdout " --latency-wait 30 --dryrun - -############### -# run # -############### - -# sbatch -J gatk_variantFiltration_SnakemakeTest_run1 -o SLURM_logs/%x.out -e SLURM_logs/%x.err test_SLURM.sh - -mkdir -p SLURM_logs/run1 - -snakemake -s Snakefile -p --jobs 999 \ - --configfile config.yaml \ - --cluster-config resources_SLURM.yaml \ - --cluster "sbatch --cpus-per-task={cluster.cpu} --mem={cluster.mem} --error=SLURM_logs/run1/%x.stderr --output=SLURM_logs/ruhn1/%x.stdout " --latency-wait 30 -- GitLab