From 21fe79681c1491dd6838b6b152dc935580bc9eb2 Mon Sep 17 00:00:00 2001 From: mariabernard <maria.bernard@jouy.inra.fr> Date: Wed, 19 Jun 2019 12:57:24 +0200 Subject: [PATCH] 1000RNASEQ chicken ASE: correct wildcard constraint, rmdup command line and add fasta index as input in SplitCigar --- Snakemake/1000RNASeq_chicken/ASE/rules/STAR.smk | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Snakemake/1000RNASeq_chicken/ASE/rules/STAR.smk b/Snakemake/1000RNASeq_chicken/ASE/rules/STAR.smk index 4ffd456..315f8d7 100644 --- a/Snakemake/1000RNASeq_chicken/ASE/rules/STAR.smk +++ b/Snakemake/1000RNASeq_chicken/ASE/rules/STAR.smk @@ -8,7 +8,7 @@ Align RNASeq reads on masked genome ''' wildcard_constraints: - properlyPaired_rmDup = '_pp_|_' + properlyPaired = '_pp_|_' STAR_INDEX_FILE=['genomeParameters.txt', 'chrName.txt', 'chrLength.txt', 'chrStart.txt', 'chrNameLength.txt', 'exonGeTrInfo.tab', 'geneInfo.tab', 'transcriptInfo.tab', 'exonInfo.tab', 'sjdbList.fromGTF.out.tab', 'sjdbInfo.txt', 'sjdbList.out.tab', 'Genome', 'SA', 'SAindex'] @@ -157,18 +157,20 @@ def get_properly_paired_bam(wildcards): return "Results/STAR_Aln_2/" + wildcards.sample + "_rg_genomic.bam" +# note : This tool uses the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options as the primary methods to identify and differentiate duplicate types. Set READ_NAME_REGEX to null to skip optical duplicate detection, e.g. for RNA-seq or other data where duplicate sets are extremely large and estimating library complexity is not an aim + rule RmDuplicates: input: bam = get_properly_paired_bam output: - bam = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPaired}.bam", + bam = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPaired}rmdup.bam", metrics = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPaired}.metrics.txt", params: mem=str(int(config["rmdup"]["mem"].replace("G","")) -4 )+"G" if int(config["rmdup"]["mem"].replace("G","")) -4 > 1 else config["rmdup"]["mem"] , jar=find_jar("picard.jar") shell: """ - java -Xmx{params.mem} -jar {params.jar} MarkDuplicates READ_NAME_REGEX=null I={input.bam} O={output.bam} REMOVE_DUPLICATES=true CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M={output.metrics} + java -Xmx{params.mem} -jar {params.jar} MarkDuplicates CREATE_INDEX=false READ_NAME_REGEX=null I={input.bam} O={output.bam} REMOVE_DUPLICATES=true VALIDATION_STRINGENCY=SILENT M={output.metrics} """ rule remove_multimap: @@ -184,9 +186,11 @@ rule remove_multimap: rule SplitNCigarReads: input: fasta = 'Results/genomeMasked/' + os.path.splitext(os.path.basename(config['fasta_ref']))[0] + '_masked.fa', + idx = 'Results/genomeMasked/' + os.path.splitext(os.path.basename(config['fasta_ref']))[0] + '_masked.fa.fai', + dict = 'Results/genomeMasked/' + os.path.splitext(os.path.basename(config['fasta_ref']))[0] + '_masked.dict', bam = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPaired}rmdup_uniq.bam" output: - bam = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPairedrmdup_uniq_split.bam" + bam = "Results/STAR_Aln_2/{sample}_rg_genomic{properlyPaired}rmdup_uniq_split.bam" params: mem=config["SplitNCigarReads"]["mem"], jar=find_jar("GenomeAnalysisTK.jar") -- GitLab