Commit d63977fe authored by MARTIN Pierre's avatar MARTIN Pierre
Browse files

new dev-test sub branch for functional tests

parent c4b53c13
Pipeline #36004 passed with stages
in 16 minutes and 5 seconds
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Usage
## python 01_clean_qc.py -w [root directory of metagwgs source folder] [optional: --skip_nextflow]
try:
import argparse
import sys
import re
import os
import os.path
import subprocess
import filecmp
except ImportError as error:
print(error)
exit(1)
# Lists of output files to compare
_01_1_cleaned_reads = [
("01_1_cleaned_reads/logs/a.no_filter.flagstat", "diff"),
("01_1_cleaned_reads/logs/a_cutadapt.log", "cut_diff"),
("01_1_cleaned_reads/logs/a_sickle.log", "diff"),
("01_1_cleaned_reads/logs/host_filter_flagstat/a.host_filter.flagstat", "diff"),
("01_1_cleaned_reads/logs/c.no_filter.flagstat", "diff"),
("01_1_cleaned_reads/logs/c_cutadapt.log", "cut_diff"),
("01_1_cleaned_reads/logs/c_sickle.log", "diff"),
("01_1_cleaned_reads/logs/host_filter_flagstat/c.host_filter.flagstat", "diff")
]
_01_2_qc = [
("01_2_qc/fastqc_cleaned/cleaned_a/cleaned_a_R1_fastqc.html", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_a/cleaned_a_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_a/cleaned_a_R2_fastqc.html", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_a/cleaned_a_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_c/cleaned_c_R1_fastqc.html", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_c/cleaned_c_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_c/cleaned_c_R2_fastqc.html", "not_empty"),
("01_2_qc/fastqc_cleaned/cleaned_c/cleaned_c_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_raw/a/a_R1_fastqc.html", "not_empty"),
("01_2_qc/fastqc_raw/a/a_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_raw/a/a_R2_fastqc.html", "not_empty"),
("01_2_qc/fastqc_raw/a/a_R2_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_raw/c/c_R1_fastqc.html", "not_empty"),
("01_2_qc/fastqc_raw/c/c_R1_fastqc.zip", "not_empty"),
("01_2_qc/fastqc_raw/c/c_R2_fastqc.html", "not_empty"),
("01_2_qc/fastqc_raw/c/c_R2_fastqc.zip", "not_empty")
]
_01_3_taxonomic_affiliation_reads = [
("01_3_taxonomic_affiliation_reads/a.krona.html", "not_empty"),
("01_3_taxonomic_affiliation_reads/c.krona.html", "not_empty"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_class.tsv", "diff"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_family.tsv", "diff"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_genus.tsv", "diff"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_order.tsv", "diff"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_phylum.tsv", "diff"),
("01_3_taxonomic_affiliation_reads/taxo_affi_reads_species.tsv", "diff")
]
# List of steps:
steps_list = [
"01_clean_qc",
"02",
"03",
"04",
"05",
"06",
"07",
"08"
]
# Arguments parsing (3 required / 1 optional)
## Usage example:
#
# python functional_tests.py \
# -step 01_clean_qc \
# -exp_dir ../../functional_tests/expected \
# -obs_dir ../../functional_tests/observed/results \
# --script ../../functional_tests/test.sh
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("-step", type = str, \
help = "(at least one required) step(s) of metagwgs you wish to perform a test on (multiple steps can be tested at once)")
parser.add_argument("-exp_dir", type = str, \
help = "(required) expected logs dir containing logs from a healthy metagwgs workflow")
parser.add_argument("-obs_dir", type = str, \
help = "(required) observed logs dir containing logs from the metagwgs workflow you wish to test")
parser.add_argument("--script", type = str, \
help = "(optional) script file containing metagwgs Nextflow launch command ")
if len(sys.argv) == 1:
parser.print_usage(sys.stderr)
sys.exit(1)
return parser.parse_args()
# Launch nextflow from script if given by user
def launch_nextflow(script):
script_file = os.path.abspath(script)
script_dir = os.path.dirname(script_file)
print("Launching test run with the provided file:\n\n{}\n".format(script_file))
process = subprocess.Popen(["sbatch", "{}".format(script_file)], cwd = script_dir, stdout = subprocess.PIPE, shell=True, executable = '/bin/bash')
output, error = process.communicate()
print("Test run completed")
# Do file comparisons for 01_clean_qc step and write output
def check_01_clean_qc(files_list):
f = open("ft_01_clean_qc.log", "w+")
expected_prefix = "{}/01_clean_qc/".format(os.path.abspath(args.exp_dir))
observed_prefix = "{}/01_clean_qc/".format(os.path.abspath(args.obs_dir))
f.write("Expected directory: {}\nObserved directory: {}\n".format(expected_prefix, observed_prefix))
max_cnt = len(files_list)
true_cnt = 0
false_cnt = 0
print("\nLaunching 01_clean_qc...\n")
for file in files_list:
file_path = file[0]
file_usage = file[1]
expected_path = "{}{}".format(expected_prefix, file_path)
observed_path = "{}{}".format(observed_prefix, file_path)
out = """
------------------------------------------------------------------------------
Path: {}
Test type: {}
""".format(file_path, file_usage)
f.write(out)
print(out)
if not os.path.exists("{}".format(expected_path)) or not os.path.exists("{}".format(observed_path)):
# Make log of non existing files
if not os.path.exists("{}".format(expected_path)):
sys.exit(
"\n{} doesn't exist\n\nPlease check expected/ directory for any missing file\n".format(expected_path)
)
elif not os.path.exists("{}".format(observed_path)):
sys.exit(
"\n{} doesn't exist\n\nPlease check observed/ directory for any missing file\n".format(observed_path)
)
else:
if file_usage == "diff":
test = filecmp.cmp(expected_path, observed_path)
if test:
true_cnt += 1
else:
false_cnt += 1
out = "Test result: {}\n".format(str(test))
f.write(out)
print(out)
elif file_usage == "cut_diff":
# Specifically remove header from cutadapt
command = "diff <(tail -n+6 {}) <(tail -n+6 {})".format(expected_path, observed_path)
process = subprocess.Popen(command, stdout = subprocess.PIPE, shell = True, executable='/bin/bash')
diff_out, error = process.communicate()
if diff_out.decode('ascii') != "":
test = "False : {}".format(diff_out)
false_cnt += 1
elif diff_out.decode('ascii') == "":
test = True
true_cnt += 1
else:
print(error)
out = "Test result: {}\n".format(str(test))
f.write(out)
print(out)
elif file_usage == "not_empty":
test = os.path.getsize(observed_path) > 0
if test:
true_cnt += 1
else:
false_cnt += 1
out = "Test result: {}\n".format(str(test))
print(out)
f.write(out)
else:
sys.exit("\n{} is not valid\n".format(file_usage))
continue
true_perc = (float(true_cnt) / float(max_cnt) * 100)
if false_cnt != 0:
false_perc = 100 - (float(true_cnt) / float(max_cnt) * 100)
else:
false_perc = 0
out = """
=========================================
-----------------------------------------
Testing the 01_clean_qc step of metagWGS:
Total: {}
Passed: {} ({}%)
Missed: {} ({}%)
-----------------------------------------
=========================================
""".format(max_cnt, true_cnt, true_perc, false_cnt, false_perc)
f.write(out)
print(out)
f.close()
# __main__
def main():
global args
args = parse_arguments()
if args.step not in steps_list:
sys.exit("-step doesn't exist, please chose from: {}".format(steps_list))
if args.script:
launch_nextflow(args.script)
if args.step == "01_clean_qc":
check_01_clean_qc(_01_1_cleaned_reads + _01_2_qc + _01_3_taxonomic_affiliation_reads)
else:
print("Step {}, nothing to do yet".format(args.step))
if __name__ == "__main__":
main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment