Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# bin_dir is a directory containing software binary used in this pipeline:
# cutadapt
# trimgalore (version 0.4.5
# STAR (version 2.5.2b)
# picard.jar (version 2.1.1)
# samtools (version 1.3.1 )
# rsem-prepare-reference ( RSEM version 1.3.0)
# rsem-calculate-expression ( RSEM version 1.3.0)
# GenomeAnalysisTK.jar (version 3.7)
# java (version 8)
# if not bin_dir, the binary need to be available in the PATH
bin_dir : <ABS_PATH>/bin
# data_dir contains fastq files described in sample_config file
data_dir : <ABS_PATH>/data
# sample_config file is a tabular file describing each input fastq file
# sample_config file name will be used to agregated sample in the final VCF
# forward_read and reverse_read are fastq file names. These files need to be in the data_dir directory
# sample may be paired end or single end.
# - If single end leave an empty column in the reverse_read
# - If paired, file names need to ends with _R1.fastq.gz or _R2.fastq.gz (or fq instead of fastq, and not necessarly compressed)
# sequencer need to be choosen in the list : ["ILLUMINA","SLX","SOLEXA","SOLID","454","LS454","COMPLETE","PACBIO","IONTORRENT","CAPILLARY","HELICOS","UNKNOWN"]
# oriented is the forward_prob RSEM parameter to indicate :
# 1 for a strand-specific protocol where all (upstream) reads are derived from the forward strand,
# 0 for a strand-specific protocol where all (upstream) read are derived from the reverse strand,
# 0.5 for a non-strand-specific protocol. (Default: off)
# phred scale indicate the phred score scale use to code base quality: either 33 (Sanger and illumina 1.8+) or 64 (Solexa, illumina 1.3 to 1.8 excluded)
# header columns are : idx name forward_read reverse_read sequencer read_length oriented phred_scale
sample_config : population.tsv.example
# fasta_ref file is the genome reference Fasta file
fasta_ref : <ABS_PATH>/reference.fa
# gtf_ref file is the genome reference GTF file
gtf_ref : <ABS_PATH>/reference.gtf
# known_vcf file is set of known variants used to recalibrate bases quality in GATK preprocessing steps RealignerTargetCreator and BaseRecalibrator
known_vcf : <ABS_PATH>/refence_known_var.vcf
# computing ressources, also give to --cluster-config snakemake option if executed on a cluster
# this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default
resources: <ABS_PATH>/resources.yaml