Skip to content
Snippets Groups Projects
config_calling.yaml.example 2.5 KiB
Newer Older
# bin_dir is a directory containing software binary used in this pipeline:
# cutadapt 
# trimgalore (version 0.4.5
# STAR (version 2.5.2b)
# picard.jar (version 2.1.1)
# samtools (version 1.3.1 )
# rsem-prepare-reference ( RSEM version 1.3.0)
# rsem-calculate-expression ( RSEM version 1.3.0)
# GenomeAnalysisTK.jar (version 3.7)
# java (version 8)
# if not bin_dir, the binary need to be available in the PATH

# data_dir contains fastq files described in sample_config file

# sample_config file is a tabular file describing each input fastq file
# sample_config file name will be used as prefix of the output files.
# header columns are : idx	name	forward_read	reverse_read	sequencer	read_length	oriented	phred_scale
# 	forward_read and reverse_read are fastq file names. These files need to be in the data_dir directory
# 	sample may be paired end or single end. 
#       - If single end, leave an empty column in the reverse_read
#       - If paired, file names need to ends with _R1.fastq.gz or _R2.fastq.gz (or fq instead of fastq, and not necessarly compressed)
# 	sequencer need to be choosen in the list : ["ILLUMINA","SLX","SOLEXA","SOLID","454","LS454","COMPLETE","PACBIO","IONTORRENT","CAPILLARY","HELICOS","UNKNOWN"]
# 	oriented is the forward_prob RSEM parameter to indicate : 
#       1 for a strand-specific protocol where all (upstream) reads are derived from the forward strand, 
#       0 for a strand-specific protocol where all (upstream) read are derived from the reverse strand, 
#       0.5 for a non-strand-specific protocol. 
# 	phred scale indicate the phred score scale use to code base quality: either 33 (Sanger and illumina 1.8+) or 64 (Solexa, illumina 1.3 to 1.8 excluded)
sample_config : data/population.tsv.example

# fasta_ref file is the genome reference Fasta file
fasta_ref : data/reference.fa
# gtf_ref file is the genome reference GTF file
gtf_ref : data/reference.gtf
# known_vcf file is set of known variants used to recalibrate bases quality in GATK preprocessing steps RealignerTargetCreator and BaseRecalibrator
known_vcf : data/reference_known_var.vcf.gz
# quality trimming threshold used in trimgalore to remove low quality bases.
trimming_quality : 15

# computing ressources, also give to --cluster-config snakemake option if executed on a cluster
# this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default
resources: resources_calling_SLURM.yaml