Newer
Older
# bin_dir is a directory containing software binary used in this pipeline:
# cutadapt
# trimgalore (version 0.4.5
# STAR (version 2.5.2b)
# picard.jar (version 2.1.1)
# samtools (version 1.3.1 )
# rsem-prepare-reference ( RSEM version 1.3.0)
# rsem-calculate-expression ( RSEM version 1.3.0)
# GenomeAnalysisTK.jar (version 3.7)
# java (version 8)
# if not bin_dir, the binary need to be available in the PATH
# data_dir contains fastq files described in sample_config file
# sample_config file is a tabular file describing each input fastq file
# sample_config file name will be used as prefix of the output files.
# header columns are : idx name forward_read reverse_read sequencer read_length oriented phred_scale
# forward_read and reverse_read are fastq file names. These files need to be in the data_dir directory
# sample may be paired end or single end.
# - If single end, leave an empty column in the reverse_read
# - If paired, file names need to ends with _R1.fastq.gz or _R2.fastq.gz (or fq instead of fastq, and not necessarly compressed)
# sequencer need to be choosen in the list : ["ILLUMINA","SLX","SOLEXA","SOLID","454","LS454","COMPLETE","PACBIO","IONTORRENT","CAPILLARY","HELICOS","UNKNOWN"]
# oriented is the forward_prob RSEM parameter to indicate :
# 1 for a strand-specific protocol where all (upstream) reads are derived from the forward strand,
# 0 for a strand-specific protocol where all (upstream) read are derived from the reverse strand,
# 0.5 for a non-strand-specific protocol.
# phred scale indicate the phred score scale use to code base quality: either 33 (Sanger and illumina 1.8+) or 64 (Solexa, illumina 1.3 to 1.8 excluded)
sample_config : data/population.tsv.example
# fasta_ref file is the genome reference Fasta file
fasta_ref : data/reference.fa
# gtf_ref file is the genome reference GTF file
gtf_ref : data/reference.gtf
# known_vcf file is set of known variants used to recalibrate bases quality in GATK preprocessing steps RealignerTargetCreator and BaseRecalibrator
known_vcf : data/reference_known_var.vcf.gz
Maria Bernard
committed
# quality trimming threshold used in trimgalore to remove low quality bases.
trimming_quality : 15
# computing ressources, also give to --cluster-config snakemake option if executed on a cluster
# this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default
resources: resources_calling_SLURM.yaml