Skip to content
Snippets Groups Projects
Commit a8c0bf77 authored by Maria Bernard's avatar Maria Bernard
Browse files

remove rule bgzip, and compressed vcf in previous rule, add on success code to...

remove rule bgzip, and compressed vcf in previous rule, add on success code to remove additionnal temps files
parent bcf69dc9
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ import collections
### system options
# Recover software memory and cpus resources
with open(config["resources"]) as yml:
config.update(yaml.load(yml))
config.update(yaml.load(yml, Loader=yaml.SafeLoader))
if not "__default__" in config or not "cpu" in config["__default__"] or not "mem" in config["__default__"] :
raise Exception("resources config file need to define at least default resources cpu mem (minimum 1G) in a __default__ section")
......@@ -56,7 +56,7 @@ trainingSets["VQSR_trainingSet_untrusted_uniquely_called_variants"]="-resource:u
if config["vqsr_resources"] is not None :
for Set in config['vqsr_resources'] :
if not Set['file'].endswith(".gz") or not os.path.exists(Set['file']+".tbi"):
raise Exception("your external resources" + os.path.basename(Set['file']) + " is not gziped and or indexed with tabix\n")
raise Exception("your external resources " + Set['file'] + " is not gziped and or indexed with tabix\n")
prefix = os.path.splitext(os.path.splitext(Set['file'])[0])[0] if Set['file'].endswith('gz') else os.path.splitext(Set['file'])[0]
trainingSets[os.path.basename(prefix)] = "-resource:" + Set['name'] + ",known=" + Set['known'] + ",training=" + Set['training'] + ",truth=" + Set['truth']+ ",prior=" + Set['prior']
......@@ -84,16 +84,28 @@ if gatk_prefix.endswith('vcf') :
gatk_prefix = os.path.splitext(gatk_prefix)[0]
final_outputs.append("results/VQSR/" + gatk_prefix +"_SNP_filtered.vcf.gz")
final_outputs.append("results/VQSR/" + gatk_prefix +"_INDEL_filtered.vcf.gz")
final_outputs.append("results/genoFilter/" + gatk_prefix +"vqsr_SNP_genFiltered.vcf.gz")
final_outputs.append("results/genoFilter/" + gatk_prefix +"vqsr_INDEL_genFiltered.vcf.gz")
final_outputs.append("results/genoFilter/" + gatk_prefix +"_vqsr_SNP_genFiltered.vcf.gz")
final_outputs.append("results/genoFilter/" + gatk_prefix +"_vqsr_INDEL_genFiltered.vcf.gz")
#~ print(final_outputs)
rule all:
input:
final_outputs,
expand("{file}.md5", file=final_outputs)
# onsuccess:
# if
# print("remove remained temporary file)
# expand("{file}.md5", file=final_outputs)
onsuccess:
if not "--notemp" in sys.argv :
temp_list = list()
print("remove remained temporary file :\n")
# splited VCF files, except for uniquely called training set
for prefix in trainingSets:
if prefix != 'VQSR_trainingSet_trusted_3callers_hardFiltered_variants' and prefix != 'VQSR_trainingSet_untrusted_uniquely_called_variants':
temp_list.append("results/split_SNP_INDEL/" + prefix + "_SNP.vcf.gz")
temp_list.append("results/split_SNP_INDEL/" + prefix + "_INDEL.vcf.gz")
temp_list.append('results/split_SNP_INDEL/common_SNP.vcf.gz')
temp_list.append('results/split_SNP_INDEL/common_INDEL.vcf.gz')
for f in temp_list:
print(f)
os.remove(f)
\ No newline at end of file
......@@ -17,17 +17,6 @@ rule md5sum:
shell:
"{config[bin][md5sum]} {input} > {output}"
rule bgzip:
input:
"{file}"
output:
"{file}.gz"
shell:
"{config[bin][bgzip]} -c {input} > {output}"
rule tabix:
......
......@@ -12,7 +12,7 @@ rule genoFilter:
sample_cov = config["sample_mean_cov"],
vcf = "results/VQSR/{GATK_input_prefix}_{var}_filtered.vcf.gz"
output:
vcf = temp("results/VQSR/{GATK_input_prefix}_vqsr_{var}_genFiltered.vcf.gz")
vcf = temp("results/genoFilter/{GATK_input_prefix}_vqsr_{var}_genFiltered.vcf.gz")
shell:
"""
filter_multi_sample_vcf.py --vcf_file {input.vcf} --bam_coverage {input.sample_cov} --output_vcf {output.vcf}
......
......@@ -22,12 +22,14 @@ rule uniq :
input :
get_ordered_input_uniq
output :
vcf = temp("results/intersect/uniq_{caller}_variants.vcf")
vcf = temp("results/intersect/uniq_{caller}_variants.vcf"),
gzip = temp("results/intersect/uniq_{caller}_variants.vcf.gz")
log:
"results/intersect/uniq_{caller}_variants.log"
shell:
"""
{config[bin][vcf-isec]} -f -c {input}| cut -f 1-8 > ` echo {output.vcf} | sed 's/.gz//'` 2> {log}
{config[bin][vcf-isec]} -f -c {input}| cut -f 1-8 > {output.vcf} 2> {log}
{config[bin][bgzip]} -c {output.vcf} > {output.gzip}
"""
rule concat_uniq:
......@@ -51,8 +53,12 @@ rule common_variant:
config["Freebayes_variants"],
config["Mpileup_variants"]
output:
vcf = temp("results/intersect/common_variants.vcf")
vcf = temp("results/intersect/common_variants.vcf"),
gzip = temp("results/intersect/common_variants.vcf.gz")
log:
"results/intersect/common_variants.log"
shell:
"{config[bin][vcf-isec]} -f -n +3 {input} > ` echo {output.vcf} | sed 's/.gz//'` 2> {log}"
"""
{config[bin][vcf-isec]} -f -n +3 {input} > {output.vcf} 2> {log}
{config[bin][bgzip]} -c {output.vcf} > {output.gzip}
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment