"git@forgemia.inra.fr:philippe.bardou/cnvpipelines.git" did not exist on "8d56ce385ff0737604744dd8556cd73900cb9e95"
Newer
Older
# bin_dir is a directory containing software binary used in this pipeline:
# trimgalore (version 0.6.5 with cutadapt (version 2.1))
# tabix and bgzip (version 0.2.5)
# STAR (version 2.6.0c )
# GenomeAnalysisTK.jar (version 4.1.2.0)
# samtools (version 1.9)
# phASER (version downloaded 23-03-2020 ) with :
# python2.7 and associated Scipy and Numpy library
# bgzip (version 0.2.5, cf tabix)
# bcftools (version 1.9)
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# if not bin_dir, the binary need to be available in the PATH
bin_dir : bin
# fasta_ref file is the masked genome reference Fasta file (need to be indexed with samtools faidx and picard CreateSequenceDictionary )
fasta_ref : data/reference.fa
# gtf_ref file is the genome reference GTF file
gtf_ref : data/reference.gtf
# VCF input files to analyse (including SNP only). The VCF files need to be indexed by tabix
vcf : data/variants.vcf.gz
# sample_config file is a tabular file describing each input fastq file(s) corresponding to samples included in the previous VCF file.
# header columns are :
# idx name forward_read reverse_read sequencer read_length phred_scale group
#
# forward_read and reverse_read are fastq file names. These files need to be in the data_dir directory
# sample may be paired end or single end.
# - If single end leave an empty column in the reverse_read
# - If paired, file names need to ends with _R1.fastq.gz or _R2.fastq.gz (or fq instead of fastq, and not necessarly compressed)
# sequencer need to be choosen in the list : ["ILLUMINA","SLX","SOLEXA","SOLID","454","LS454","COMPLETE","PACBIO","IONTORRENT","CAPILLARY","HELICOS","UNKNOWN"]
# phred scale indicate the phred score scale use to code base quality: either 33 (Sanger and illumina 1.8+) or 64 (Solexa, illumina 1.3 to 1.8 excluded)
sample_config : data/population.tsv.example
# data_dir contains fastq files described in sample_config file
data_dir : data
# Filter on variant type if not already done (True or False)
SNP_filter : True
# minimum calling depth
depth : 10
# minimum percentage (between 0 and 1) of sample with known genotype
GTpopCR_th : 0.50
# minimum percentage (between 0 and 1) of sample with known genotype with DP > 5
DPgt05rPopCR_th : 0.20
# minimum base quality
baseQuality : 20
# minimum mapping quality
mappingQuality : 10
# phASER option:
# Separator to use when generating unique IDs. Must not be found in reference contig name, and cannot include ':'. Default ( "_" )
id_separator : ""
# computing ressources, also give to --cluster-config snakemake option if executed on a cluster
# this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default
resources: resources_SLURM.yaml