# bin_dir is a directory containing software binary used in this pipeline: # cutadapt # trimgalore (version 0.4.5 # STAR (version 2.5.2b) # picard.jar (version 2.1.1) # samtools (version 1.3.1 ) # rsem-prepare-reference ( RSEM version 1.3.0) # rsem-calculate-expression ( RSEM version 1.3.0) # GenomeAnalysisTK.jar (version 3.7) # java (version 8) # if not bin_dir, the binary need to be available in the PATH bin_dir : ./ # data_dir contains fastq files described in sample_config file data_dir : data # sample_config file is a tabular file describing each input fastq file # sample_config file name will be used as prefix of the output files. # header columns are : idx name forward_read reverse_read sequencer read_length oriented phred_scale # forward_read and reverse_read are fastq file names. These files need to be in the data_dir directory # sample may be paired end or single end. # - If single end, leave an empty column in the reverse_read # - If paired, file names need to ends with _R1.fastq.gz or _R2.fastq.gz (or fq instead of fastq, and not necessarly compressed) # sequencer need to be choosen in the list : ["ILLUMINA","SLX","SOLEXA","SOLID","454","LS454","COMPLETE","PACBIO","IONTORRENT","CAPILLARY","HELICOS","UNKNOWN"] # oriented is the forward_prob RSEM parameter to indicate : # 1 for a strand-specific protocol where all (upstream) reads are derived from the forward strand, # 0 for a strand-specific protocol where all (upstream) read are derived from the reverse strand, # 0.5 for a non-strand-specific protocol. # phred scale indicate the phred score scale use to code base quality: either 33 (Sanger and illumina 1.8+) or 64 (Solexa, illumina 1.3 to 1.8 excluded) sample_config : data/population.tsv.example # fasta_ref file is the genome reference Fasta file fasta_ref : data/reference.fa # gtf_ref file is the genome reference GTF file gtf_ref : data/reference.gtf # known_vcf file is set of known variants used to recalibrate bases quality in GATK preprocessing steps RealignerTargetCreator and BaseRecalibrator known_vcf : data/reference_known_var.vcf.gz # quality trimming threshold used in trimgalore to remove low quality bases. trimming_quality : 15 # computing ressources, also give to --cluster-config snakemake option if executed on a cluster # this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default resources: resources_calling_SLURM.yaml