Skip to content
Snippets Groups Projects
config.yaml 2.44 KiB
Newer Older
Helene Rimbert's avatar
Helene Rimbert committed
##### QUERY related files/parameters (refseqv2.1)
annotationQuery: 'data/IWGSC_refseqv2.1_annotation_200916_HC.gff3'
# feature type used for anchoring on target genome
featureType: 'gene'
# FASTA of the query (used to check the sequences after the coordinates are calculated on the target genome)
queryFasta: 'data/CS_pesudo_v2.1.fa'
# blastdb of all mrnas. used to rescue genes which have failed in the transfert using the targeted approache
blastdb: 'data/IWGSC_refseqv2.1_annotation_200916_HC_mrna.fasta'
Helene Rimbert's avatar
Helene Rimbert committed
# map of all chromosome ids --> NEED TO BE UPDATED in another version WITH ONE ARRAY FOR THE QUERY AND ONE ARRAY FOR THE TARGET GENOME ASSEMBLY
chromosomes: ['1A', '2A', '3A', '4A', '5A', '6A', '7A', '1B', '2B', '3B', '4B', '5B', '6B', '7B', '1D', '2D', '3D', '4D', '5D', '6D', '7D']
refChrom: ['Chr1A', 'Chr1B', 'Chr1D', 'Chr2A', 'Chr2B', 'Chr2D', 'Chr3A', 'Chr3B', 'Chr3D', 'Chr4A', 'Chr4B', 'Chr4D', 'Chr5A', 'Chr5B', 'Chr5D', 'Chr6A', 'Chr6B', 'Chr6D', 'Chr7A', 'Chr7B', 'Chr7D', 'ChrUn']
##### Transfert mode
# transfert all isoforms (all) or only the '.1' (first)
transferType: 'first'

##### TARGET related files/parameters
targetFasta: 'data/Triticum_aestivum_arinalrfor.PGSBv2.1.dna.toplevel.fa'
#GMAP index of the genome for -d option
targetGmapIndex: 'ensembl_Triticum_aestivum_arinalrfor_2023-2-17'
#GMAP index: path to the gmapindex directory, for -D option
targetGmapIndexPath: '/home/herimbert/gdec/shared/triticum_aestivum/arinalrfor/current/gmapdb/all/'

##### ISBP/markers related config and parameters
# BAM file of markers/ISBPs mapped on the target genome
isbpBam: '/home/masirvent/wheat10plus-pangenome/data/mappingISBP/session2/arina/arina_CS_ISBP.bam'
Helene Rimbert's avatar
Helene Rimbert committed
# BED file of coordinates on the query genome (REFSEQ v2.1)
isbpBed: 'data/Tae.Chinese_Spring.refSeqv2.1.ISBPs.bed'
# minimum mapping quality of markers on the target genome
mapq: 30
# max mismatches per ISBP/marker
Helene Rimbert's avatar
Helene Rimbert committed
mismatches: 2
Helene Rimbert's avatar
Helene Rimbert committed
results: 'results'
finalPrefix: 'arina_magatt'
Helene Rimbert's avatar
Helene Rimbert committed
# this file contains two columns: the first is the chromosome name as it appears in the genome.fasta of the new reference,
# and the second the chromosome name as it will appear in the new gene Names
chromMapID: '/home/masirvent/wheat10plus-pangenome/data/liftoff/arina/chromstab.txt'

##### Nomenclature for final gene IDs
# used in rule renameGeneIds (rules/geneAnchoring.smk)
gff_prefix: 'TraesAR'
Helene Rimbert's avatar
Helene Rimbert committed
gff_version: '01G'
gff_source: 'MAGATT-IWGSCCSv2.1'