diff --git a/test_data/config.yaml b/test_data/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7107ffa7c6dbe2889c9169406dc812f0a578099 --- /dev/null +++ b/test_data/config.yaml @@ -0,0 +1,51 @@ +##### QUERY related files/parameters (refseqv2.1) +# GFF annotatin to transfert +annotationQuery: 'test_data/reference.gff3' +# feature type used for anchoring on target genome +featureType: 'gene' +# FASTA of the query (used to check the sequences after the coordinates are calculated on the target genome) +queryFasta: 'test_data/reference.fa' +# blastdb of all mrnas. used to rescue genes which have failed in the transfert using the targeted approache +blastdb: 'test_data/reference_mrna.fa' +# map of all chromosome ids --> NEED TO BE UPDATED in another version WITH ONE ARRAY FOR THE QUERY AND ONE ARRAY FOR THE TARGET GENOME ASSEMBLY +chromosomes: ['1A', '2A', '3A', '4A', '5A', '6A', '7A', '1B', '2B', '3B', '4B', '5B', '6B', '7B', '1D', '2D', '3D', '4D', '5D', '6D', '7D'] +refChrom: ['chr1A', 'chr1B', 'chr1D', 'chr2A', 'chr2B', 'chr2D', 'chr3A', 'chr3B', 'chr3D', 'chr4A', 'chr4B', 'chr4D', 'chr5A', 'chr5B', 'chr5D', 'chr6A', 'chr6B', 'chr6D', 'chr7A', 'chr7B', 'chr7D', 'chrUn'] + +##### Transfert mode +# transfert all isoforms (all) or only the '.1' (first) +transferType: 'first' + +##### TARGET related files/parameters +# FASTA of the target genome +targetFasta: 'test_data/target.fa' +#GMAP index of the genome for -d option +targetGmapIndex: 'target.fa.gmapidx' +#GMAP index: path to the gmapindex directory, for -D option +targetGmapIndexPath: './test_data/' +#BWA index prefix +targetBwaIdx: 'test_data/target.fa' + +##### ISBP/markers related config and parameters +# BED file of coordinates on the query genome (REFSEQ v2.1) +isbpBed: 'test_data/isbps.bed' +# BWA threads for mapping +bwaThreads: 16 +# FLAG : F flag for samtools +flag_F: 3844 +# minimum mapping quality of markers on the target genome +mapq: 30 +# max mismatches per ISBP/marker +mismatches: 2 + +##### OUTPUT directory +results: 'test_data/results' +finalPrefix: 'MAGATT_TEST' +# this file contains two columns: the first is the chromosome name as it appears in the genome.fasta of the new reference, +# and the second the chromosome name as it will appear in the new gene Names +chromMapID: 'test_data/chrMapping.txt' + +##### Nomenclature for final gene IDs +# used in rule renameGeneIds (rules/geneAnchoring.smk) +gff_prefix: 'TraesCSTest' +gff_version: '01G' +gff_source: 'MAGATT-TEST' diff --git a/test_data/magatt_reference_test.tar.gz b/test_data/magatt_reference_test.tar.gz index 18ebf9a1a4e00d00f27b081d5fed57e2c1013488..e65a8f1348990d31eaa257a86868b673257d43be 100644 --- a/test_data/magatt_reference_test.tar.gz +++ b/test_data/magatt_reference_test.tar.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27dc409407d5fad4c2a4bbdef713c5467493258089c1d8d055f0439a00c30a0b -size 692841038 +oid sha256:9acea98b44354e82c715c561ebb490a666715c1b2609cf7ae7c25e61d3eb7dea +size 701455738