diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..5b89a769e853a71a7575d88a943fa89491d5d7c0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +magatt_reference_test.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 478366bc7a7281ecaf7e6c67d81edc3b0b9f3293..15bf684ed3c7d728c742faed726e021ae1dfc3da 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ resultsDEV .snakemake pipeline_v0.1.tar.gz run_* -slurm* +slurm-log diff --git a/cluster-hpc2.json b/cluster-hpc2.json deleted file mode 100644 index cdd1dfe75d865df914993d98b08765de7c61e1b2..0000000000000000000000000000000000000000 --- a/cluster-hpc2.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "__default__" : - { - "time" : "02:00:00", - "c" : 1, - "partition" : "gdec", - "mem" : "4G", - "jobName" : "magatt_{rule}", - "error" : "slurm-%x-%J.log", - "output" : "slurm-%x-%J.log" - }, - "isbpBwa": - { - "time" : "1-00:00:00", - "c" :"{threads}", - "mem" : "32G" - }, - "mapHomologousRegions" : - { - "time" : "1-00:00:00", - "mem" : "8G" - }, - "gmapRescue": - { - "time" : "3-00:00:00", - "mem" : "8G" - }, - "gmapIndexTarget": - { - "time" : "1-00:00:00", - "mem" : "64G" - }, - "indexQuery": - { - "time" : "01:00:00" - }, - "indexTarget": - { - "time" : "01:10:00" - }, - "selectMappedISBP": - { - "time": "00:30:00" - }, - "keepMappedOnSameChrom": - { - "time" : "01:00:00" - } -} diff --git a/cluster_profile/slurm/CookieCutter.py b/cluster_profile/slurm/CookieCutter.py new file mode 100644 index 0000000000000000000000000000000000000000..6c4d50c0992175ad2b8749c33fc27254c6049702 --- /dev/null +++ b/cluster_profile/slurm/CookieCutter.py @@ -0,0 +1,39 @@ +# +# Based on lsf CookieCutter.py +# +import os +import json + +d = os.path.dirname(__file__) +with open(os.path.join(d, "settings.json")) as fh: + settings = json.load(fh) + + +def from_entry_or_env(values, key): + """Return value from ``values`` and override with environment variables.""" + if key in os.environ: + return os.environ[key] + else: + return values[key] + + +class CookieCutter: + + SBATCH_DEFAULTS = from_entry_or_env(settings, "SBATCH_DEFAULTS") + CLUSTER_NAME = from_entry_or_env(settings, "CLUSTER_NAME") + CLUSTER_CONFIG = from_entry_or_env(settings, "CLUSTER_CONFIG") + + @staticmethod + def get_cluster_option() -> str: + cluster = CookieCutter.CLUSTER_NAME + if cluster != "": + return f"--cluster={cluster}" + return "" + + @staticmethod + def get_cluster_logpath() -> str: + return "logs/slurm/%r/%j" + + @staticmethod + def get_cluster_jobname() -> str: + return "MAGATT_%r_%w" diff --git a/cluster_profile/slurm/__pycache__/CookieCutter.cpython-312.pyc b/cluster_profile/slurm/__pycache__/CookieCutter.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ebb52f02c3515f19589292f4601dda6389f4990 Binary files /dev/null and b/cluster_profile/slurm/__pycache__/CookieCutter.cpython-312.pyc differ diff --git a/cluster_profile/slurm/__pycache__/slurm_utils.cpython-312.pyc b/cluster_profile/slurm/__pycache__/slurm_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ddd6091cac42c6c1bbb10cf9b618bb8225bf85f Binary files /dev/null and b/cluster_profile/slurm/__pycache__/slurm_utils.cpython-312.pyc differ diff --git a/cluster_profile/slurm/config.yaml b/cluster_profile/slurm/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bd75e4155fff137a714633305a636058894caf0 --- /dev/null +++ b/cluster_profile/slurm/config.yaml @@ -0,0 +1,36 @@ + +cluster-sidecar: "slurm-sidecar.py" +cluster-cancel: "scancel" +restart-times: "0" +jobscript: "slurm-jobscript.sh" +cluster: "slurm-submit.py" +cluster-status: "slurm-status.py" +max-jobs-per-second: "10" +max-status-checks-per-second: "10" +local-cores: 1 +latency-wait: "60" +use-conda: "True" +use-singularity: "True" +jobs: "500" +printshellcmds: "True" + +isbpBwa: + cpus-per-task: 16 + mem: "32G" + + + + +# Example resource configuration +# default-resources: +# - runtime=100 +# - mem_mb=6000 +# - disk_mb=1000000 +# # set-threads: map rule names to threads +# set-threads: +# - single_core_rule=1 +# - multi_core_rule=10 +# # set-resources: map rule names to resources in general +# set-resources: +# - high_memory_rule:mem_mb=12000 +# - long_running_rule:runtime=1200 diff --git a/cluster_profile/slurm/settings.json b/cluster_profile/slurm/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..b3d0e2b8dbb6ddc5a617556e5f67a1c535552571 --- /dev/null +++ b/cluster_profile/slurm/settings.json @@ -0,0 +1,5 @@ +{ + "SBATCH_DEFAULTS": "cpus-per-task=1 partition=gdec,smp time=02:00:00 mem=4G job-name=magatt_{rule}", + "CLUSTER_NAME": "", + "CLUSTER_CONFIG": "" +} diff --git a/cluster_profile/slurm/slurm-jobscript.sh b/cluster_profile/slurm/slurm-jobscript.sh new file mode 100755 index 0000000000000000000000000000000000000000..391741ef8824f4b691752e68651f097395d17f70 --- /dev/null +++ b/cluster_profile/slurm/slurm-jobscript.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# properties = {properties} +{exec_job} diff --git a/cluster_profile/slurm/slurm-sidecar.py b/cluster_profile/slurm/slurm-sidecar.py new file mode 100755 index 0000000000000000000000000000000000000000..e79f5da237cb260dfcbd099c165ca8ddf57984e0 --- /dev/null +++ b/cluster_profile/slurm/slurm-sidecar.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +"""Run a Snakemake v7+ sidecar process for Slurm + +This sidecar process will poll ``squeue --user [user] --format='%i,%T'`` +every 60 seconds by default (use environment variable +``SNAKEMAKE_SLURM_SQUEUE_WAIT`` for adjusting this). + +Note that you have to adjust the value to fit to your ``MinJobAge`` Slurm +configuration. Jobs remain at least ``MinJobAge`` seconds known to the +Slurm controller (default of 300 seconds). If you query ``squeue`` every +60 seconds then this is plenty and you will observe all relevant job status +states as they are relevant for Snakemake. + +If the environment variable ``SNAKEMAKE_CLUSTER_SIDECAR_VARS`` is set then +the ``slurm-status.py`` of the slurm profile will attempt to query this +sidecar process via HTTP. As the sidecar process does not update its +cache in real-time, setting ``SNAKEMAKE_SLURM_SQUEUE_WAIT`` too large might +lead to Snakemake missing the "done" job state. The defaults of +``SNAKEMAKE_SLURM_SQUEUE_WAIT=60`` and Slurm's ``MinJobAge=600`` work well +together and you will see all relevant job statuses. + +If the sidecar is queried for a job ID that it has not seen yet then it will +perform a query to ``sacct`` such that it works well if Snakemake "resume +external job" feature. The ``slurm-submit.py`` script of the Snakemake profile +will register all jobs via POST with this sidecar. +""" + +import http.server +import json +import logging +import os +import subprocess +import sys +import signal +import time +import threading +import uuid + +from CookieCutter import CookieCutter + + +#: Enables debug messages for slurm sidecar. +DEBUG = bool(int(os.environ.get("SNAKEMAKE_SLURM_DEBUG", "0"))) +#: Enables HTTP request logging in sidecar. +LOG_REQUESTS = bool(int(os.environ.get("SNAKEMAKE_SLURM_LOG_REQUESTS", "0"))) +#: Command to call when calling squeue +SQUEUE_CMD = os.environ.get("SNAKEMAKE_SLURM_SQUEUE_CMD", "squeue") +#: Number of seconds to wait between ``squeue`` calls. +SQUEUE_WAIT = int(os.environ.get("SNAKEMAKE_SLURM_SQUEUE_WAIT", "60")) + +logger = logging.getLogger(__name__) +if DEBUG: + logging.basicConfig(level=logging.DEBUG) + logger.setLevel(logging.DEBUG) + + +class PollSqueueThread(threading.Thread): + """Thread that polls ``squeue`` until stopped by ``stop()``""" + + def __init__( + self, + squeue_wait, + squeue_cmd, + squeue_timeout=2, + sleep_time=0.01, + max_tries=3, + *args, + **kwargs + ): + super().__init__(target=self._work, *args, **kwargs) + #: Time to wait between squeue calls. + self.squeue_wait = squeue_wait + #: Command to call squeue with. + self.squeue_cmd = squeue_cmd + #: Whether or not the thread should stop. + self.stopped = threading.Event() + #: Previous call to ``squeue`` + self.prev_call = 0.0 + #: Time to sleep between iterations in seconds. Thread can only be + #: terminated after this interval when waiting. + self.sleep_time = sleep_time + #: Maximal running time to accept for call to ``squeue``. + self.squeue_timeout = squeue_timeout + #: Maximal number of tries if call to ``squeue`` fails. + self.max_tries = max_tries + #: Dict mapping the job id to the job state string. + self.states = {} + #: Make at least one call to squeue, must not fail. + logger.debug("initializing trhead") + self._call_squeue(allow_failure=False) + self.prev_call = time.time() + + def _work(self): + """Execute the thread's action""" + while not self.stopped.is_set(): + now = time.time() + if now - self.prev_call > self.squeue_wait: + self._call_squeue() + self.prev_call = now + time.sleep(self.sleep_time) + + def get_state(self, jobid): + """Return the job state for the given jobid.""" + jobid = str(jobid) + if jobid not in self.states: + try: + self.states[jobid] = self._get_state_sacct(jobid) + except: + return "__not_seen_yet__" + return self.states.get(jobid, "__not_seen_yet__") + + def register_job(self, jobid): + """Register job with the given ID.""" + self.states.setdefault(jobid, None) + + def _get_state_sacct(self, jobid): + """Implement retrieving state via sacct for resuming jobs.""" + cluster = CookieCutter.get_cluster_option() + cmd = ["sacct", "-P", "-b", "-j", jobid, "-n"] + if cluster: + cmd.append(cluster) + try_num = 0 + while try_num < self.max_tries: + try_num += 1 + try: + logger.debug("Calling %s (try %d)", cmd, try_num) + output = subprocess.check_output(cmd, timeout=self.squeue_timeout, text=True) + except subprocess.TimeoutExpired as e: + logger.warning("Call to %s timed out (try %d of %d)", cmd, try_num, self.max_tries) + continue + except subprocess.CalledProcessError as e: + logger.warning("Call to %s failed (try %d of %d)", cmd, try_num, self.max_tries) + continue + try: + parsed = {x.split("|")[0]: x.split("|")[1] for x in output.strip().split("\n")} + logger.debug("Returning state of %s as %s", jobid, parsed[jobid]) + return parsed[jobid] + except IndexError: + logger.warning("Could not parse %s (try %d of %d)", repr(output), try_num, self.max_tries) + secs = try_num / 2.0 + loger.info("Sleeping %f seconds", secs) + time.sleep(secs) + raise Exception("Problem with call to %s" % cmd) + + def stop(self): + """Flag thread to stop execution""" + logger.debug("stopping thread") + self.stopped.set() + + def _call_squeue(self, allow_failure=True): + """Run the call to ``squeue``""" + cluster = CookieCutter.get_cluster_option() + try_num = 0 + cmd = [SQUEUE_CMD, "--user={}".format(os.environ.get("USER")), "--format=%i,%T", "--state=all"] + if cluster: + cmd.append(cluster) + while try_num < self.max_tries: + try_num += 1 + try: + logger.debug("Calling %s (try %d)", cmd, try_num) + output = subprocess.check_output(cmd, timeout=self.squeue_timeout, text=True) + logger.debug("Output is:\n---\n%s\n---", output) + break + except subprocess.TimeoutExpired as e: + if not allow_failure: + raise + logger.debug("Call to %s timed out (try %d of %d)", cmd, try_num, self.max_tries) + except subprocess.CalledProcessError as e: + if not allow_failure: + raise + logger.debug("Call to %s failed (try %d of %d)", cmd, try_num, self.max_tries) + if try_num >= self.max_tries: + logger.debug("Giving up for this round") + else: + logger.debug("parsing output") + self._parse_output(output) + + def _parse_output(self, output): + """Parse output of ``squeue`` call.""" + header = None + for line in output.splitlines(): + line = line.strip() + arr = line.split(",") + if not header: + if not line.startswith("JOBID"): + continue # skip leader + header = arr + else: + logger.debug("Updating state of %s to %s", arr[0], arr[1]) + self.states[arr[0]] = arr[1] + + +class JobStateHttpHandler(http.server.BaseHTTPRequestHandler): + """HTTP handler class that responds to ```/job/status/${jobid}/`` GET requests""" + + def do_GET(self): + """Only to ``/job/status/${job_id}/?``""" + logger.debug("--- BEGIN GET") + # Remove trailing slashes from path. + path = self.path + while path.endswith("/"): + path = path[:-1] + # Ensure that /job/status was requested + if not self.path.startswith("/job/status/"): + self.send_response(400) + self.end_headers() + return + # Ensure authentication bearer is correct + auth_required = "Bearer %s" % self.server.http_secret + auth_header = self.headers.get("Authorization") + logger.debug( + "Authorization header is %s, required: %s" % (repr(auth_header), repr(auth_required)) + ) + if auth_header != auth_required: + self.send_response(403) + self.end_headers() + return + # Otherwise, query job ID status + job_id = self.path[len("/job/status/") :] + try: + job_id=job_id.split("%20")[3] + except IndexError: + pass + logger.debug("Querying for job ID %s" % repr(job_id)) + status = self.server.poll_thread.get_state(job_id) + logger.debug("Status: %s" % status) + if not status: + self.send_response(404) + self.end_headers() + else: + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + output = json.dumps({"status": status}) + logger.debug("Sending %s" % repr(output)) + self.wfile.write(output.encode("utf-8")) + logger.debug("--- END GET") + + def do_POST(self): + """Handle POSTs (only to ``/job/register/${job_id}/?``)""" + logger.debug("--- BEGIN POST") + # Remove trailing slashes from path. + path = self.path + while path.endswith("/"): + path = path[:-1] + # Ensure that /job/register was requested + if not self.path.startswith("/job/register/"): + self.send_response(400) + self.end_headers() + return + # Ensure authentication bearer is correct + auth_required = "Bearer %s" % self.server.http_secret + auth_header = self.headers.get("Authorization") + logger.debug( + "Authorization header is %s, required: %s", repr(auth_header), repr(auth_required) + ) + # Otherwise, register job ID + job_id = self.path[len("/job/status/") :] + self.server.poll_thread.register_job(job_id) + self.send_response(200) + self.end_headers() + logger.debug("--- END POST") + + def log_request(self, *args, **kwargs): + if LOG_REQUESTS: + super().log_request(*args, **kwargs) + + +class JobStateHttpServer(http.server.HTTPServer): + """The HTTP server class""" + + allow_reuse_address = False + + def __init__(self, poll_thread): + """Initialize thread and print the ``SNAKEMAKE_CLUSTER_SIDECAR_VARS`` to stdout, then flush.""" + super().__init__(("0.0.0.0", 0), JobStateHttpHandler) + #: The ``PollSqueueThread`` with the state dictionary. + self.poll_thread = poll_thread + #: The secret to use. + self.http_secret = str(uuid.uuid4()) + sidecar_vars = { + "server_port": self.server_port, + "server_secret": self.http_secret, + "pid": os.getpid(), + } + logger.debug(json.dumps(sidecar_vars)) + sys.stdout.write(json.dumps(sidecar_vars) + "\n") + sys.stdout.flush() + + def log_message(self, *args, **kwargs): + """Log messages are printed if ``DEBUG`` is ``True``.""" + if DEBUG: + super().log_message(*args, **kwargs) + + +def main(): + # Start thread to poll ``squeue`` in a controlled fashion. + poll_thread = PollSqueueThread(SQUEUE_WAIT, SQUEUE_CMD, name="poll-squeue") + poll_thread.start() + + # Initialize HTTP server that makes available the output of ``squeue --user [user]`` + # in a controlled fashion. + http_server = JobStateHttpServer(poll_thread) + http_thread = threading.Thread(name="http-server", target=http_server.serve_forever) + http_thread.start() + + # Allow for graceful shutdown of poll thread and HTTP server. + def signal_handler(signum, frame): + """Handler for Unix signals. Shuts down http_server and poll_thread.""" + logger.info("Shutting down squeue poll thread and HTTP server...") + # from remote_pdb import set_trace + # set_trace() + poll_thread.stop() + http_server.shutdown() + logger.info("... HTTP server and poll thread shutdown complete.") + for thread in threading.enumerate(): + logger.info("ACTIVE %s", thread.name) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Actually run the server. + poll_thread.join() + logger.debug("poll_thread done") + http_thread.join() + logger.debug("http_thread done") + + +if __name__ == "__main__": + sys.exit(int(main() or 0)) diff --git a/cluster_profile/slurm/slurm-status.py b/cluster_profile/slurm/slurm-status.py new file mode 100755 index 0000000000000000000000000000000000000000..7cc28d151373ec61ef187509f77b64da0b1b58ca --- /dev/null +++ b/cluster_profile/slurm/slurm-status.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +import json +import os +import re +import requests +import subprocess as sp +import shlex +import sys +import time +import logging +from CookieCutter import CookieCutter + +logger = logging.getLogger(__name__) + +STATUS_ATTEMPTS = 20 +SIDECAR_VARS = os.environ.get("SNAKEMAKE_CLUSTER_SIDECAR_VARS", None) +DEBUG = bool(int(os.environ.get("SNAKEMAKE_SLURM_DEBUG", "0"))) + +if DEBUG: + logging.basicConfig(level=logging.DEBUG) + logger.setLevel(logging.DEBUG) + + +def get_status_direct(jobid): + """Get status directly from sacct/scontrol""" + cluster = CookieCutter.get_cluster_option() + for i in range(STATUS_ATTEMPTS): + try: + sacct_res = sp.check_output(shlex.split(f"sacct {cluster} -P -b -j {jobid} -n")) + res = {x.split("|")[0]: x.split("|")[1] for x in sacct_res.decode().strip().split("\n")} + break + except sp.CalledProcessError as e: + logger.error("sacct process error") + logger.error(e) + except IndexError as e: + logger.error(e) + pass + # Try getting job with scontrol instead in case sacct is misconfigured + try: + sctrl_res = sp.check_output(shlex.split(f"scontrol {cluster} -o show job {jobid}")) + m = re.search(r"JobState=(\w+)", sctrl_res.decode()) + res = {jobid: m.group(1)} + break + except sp.CalledProcessError as e: + logger.error("scontrol process error") + logger.error(e) + if i >= STATUS_ATTEMPTS - 1: + print("failed") + exit(0) + else: + time.sleep(1) + + return res[jobid] or "" + + +def get_status_sidecar(jobid): + """Get status from cluster sidecar""" + sidecar_vars = json.loads(SIDECAR_VARS) + url = "http://localhost:%d/job/status/%s" % (sidecar_vars["server_port"], jobid) + headers = {"Authorization": "Bearer %s" % sidecar_vars["server_secret"]} + try: + resp = requests.get(url, headers=headers) + if resp.status_code == 404: + return "" # not found yet + logger.debug("sidecar returned: %s" % resp.json()) + resp.raise_for_status() + return resp.json().get("status") or "" + except requests.exceptions.ConnectionError as e: + logger.warning("slurm-status.py: could not query side car: %s", e) + logger.info("slurm-status.py: falling back to direct query") + return get_status_direct(jobid) + + +jobid = sys.argv[1] + +if SIDECAR_VARS: + logger.debug("slurm-status.py: querying sidecar") + status = get_status_sidecar(jobid) +else: + logger.debug("slurm-status.py: direct query") + status = get_status_direct(jobid) + +logger.debug("job status: %s", repr(status)) + +if status == "BOOT_FAIL": + print("failed") +elif status == "OUT_OF_MEMORY": + print("failed") +elif status.startswith("CANCELLED"): + print("failed") +elif status == "COMPLETED": + print("success") +elif status == "DEADLINE": + print("failed") +elif status == "FAILED": + print("failed") +elif status == "NODE_FAIL": + print("failed") +elif status == "PREEMPTED": + print("failed") +elif status == "TIMEOUT": + print("failed") +elif status == "SUSPENDED": + print("running") +else: + print("running") diff --git a/cluster_profile/slurm/slurm-submit.py b/cluster_profile/slurm/slurm-submit.py new file mode 100755 index 0000000000000000000000000000000000000000..c5544b41904efa75f5d8d00b4527aab7ca105fcf --- /dev/null +++ b/cluster_profile/slurm/slurm-submit.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Snakemake SLURM submit script. +""" +import json +import logging +import os + +import requests +from snakemake.utils import read_job_properties + +import slurm_utils +from CookieCutter import CookieCutter + +logger = logging.getLogger(__name__) + +SIDECAR_VARS = os.environ.get("SNAKEMAKE_CLUSTER_SIDECAR_VARS", None) +DEBUG = bool(int(os.environ.get("SNAKEMAKE_SLURM_DEBUG", "0"))) + +if DEBUG: + logging.basicConfig(level=logging.DEBUG) + logger.setLevel(logging.DEBUG) + + +def register_with_sidecar(jobid): + if SIDECAR_VARS is None: + return + sidecar_vars = json.loads(SIDECAR_VARS) + url = "http://localhost:%d/job/register/%s" % (sidecar_vars["server_port"], jobid) + logger.debug("POST to %s", url) + headers = {"Authorization": "Bearer %s" % sidecar_vars["server_secret"]} + requests.post(url, headers=headers) + + +# cookiecutter arguments +SBATCH_DEFAULTS = CookieCutter.SBATCH_DEFAULTS +CLUSTER = CookieCutter.get_cluster_option() +CLUSTER_CONFIG = CookieCutter.CLUSTER_CONFIG + +RESOURCE_MAPPING = { + "time": ("time", "runtime", "walltime"), + "mem": ("mem", "mem_mb", "ram", "memory"), + "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"), + "nodes": ("nodes", "nnodes"), + "partition": ("partition", "queue"), +} + +# parse job +jobscript = slurm_utils.parse_jobscript() +job_properties = read_job_properties(jobscript) + +sbatch_options = {} +cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG) + +# 1) sbatch default arguments and cluster +sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS)) +sbatch_options.update(slurm_utils.parse_sbatch_defaults(CLUSTER)) + +# 2) cluster_config defaults +sbatch_options.update(cluster_config["__default__"]) + +# 3) Convert resources (no unit conversion!) and threads +sbatch_options.update(slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING)) + +# 4) cluster_config for particular rule +sbatch_options.update(cluster_config.get(job_properties.get("rule"), {})) + +# 5) cluster_config options +sbatch_options.update(job_properties.get("cluster", {})) + +# convert human-friendly time - leaves slurm format time as is +if "time" in sbatch_options: + duration = str(sbatch_options["time"]) + sbatch_options["time"] = str(slurm_utils.Time(duration)) + +# 6) Format pattern in snakemake style +sbatch_options = slurm_utils.format_values(sbatch_options, job_properties) + +# 7) create output and error filenames and paths +joblog = slurm_utils.JobLog(job_properties) +log = "" +if "output" not in sbatch_options and CookieCutter.get_cluster_logpath(): + outlog = joblog.outlog + log = outlog + sbatch_options["output"] = outlog + +if "error" not in sbatch_options and CookieCutter.get_cluster_logpath(): + errlog = joblog.errlog + log = errlog + sbatch_options["error"] = errlog + +# ensure sbatch output dirs exist +for o in ("output", "error"): + slurm_utils.ensure_dirs_exist(sbatch_options[o]) if o in sbatch_options else None + +# 9) Set slurm job name +if "job-name" not in sbatch_options and "job_name" not in sbatch_options: + sbatch_options["job-name"] = joblog.jobname + +# submit job and echo id back to Snakemake (must be the only stdout) +jobid = slurm_utils.submit_job(jobscript, **sbatch_options) +logger.debug("Registering %s with sidecar...", jobid) +register_with_sidecar(jobid) +logger.debug("... done registering with sidecar") +print(jobid) diff --git a/cluster_profile/slurm/slurm_utils.py b/cluster_profile/slurm/slurm_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c42015490a7c09f81877186cbed6420ec70e4405 --- /dev/null +++ b/cluster_profile/slurm/slurm_utils.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +import argparse +import math +import os +import re +import subprocess as sp +import sys +from datetime import timedelta +from os.path import dirname +from time import time as unix_time +from typing import Union +from uuid import uuid4 +import shlex +from io import StringIO + +from CookieCutter import CookieCutter +from snakemake import io +from snakemake.exceptions import WorkflowError +from snakemake.io import Wildcards +from snakemake.logging import logger +from snakemake.utils import AlwaysQuotedFormatter +from snakemake.utils import QuotedFormatter +from snakemake.utils import SequenceFormatter + + +def _convert_units_to_mb(memory): + """If memory is specified with SI unit, convert to MB""" + if isinstance(memory, int) or isinstance(memory, float): + return int(memory) + siunits = {"K": 1e-3, "M": 1, "G": 1e3, "T": 1e6} + regex = re.compile(r"(\d+)({})$".format("|".join(siunits.keys()))) + m = regex.match(memory) + if m is None: + logger.error( + (f"unsupported memory specification '{memory}';" " allowed suffixes: [K|M|G|T]") + ) + sys.exit(1) + factor = siunits[m.group(2)] + return int(int(m.group(1)) * factor) + + +def parse_jobscript(): + """Minimal CLI to require/only accept single positional argument.""" + p = argparse.ArgumentParser(description="SLURM snakemake submit script") + p.add_argument("jobscript", help="Snakemake jobscript with job properties.") + return p.parse_args().jobscript + + +def parse_sbatch_defaults(parsed): + """Unpack SBATCH_DEFAULTS.""" + d = shlex.split(parsed) if type(parsed) == str else parsed + args = {} + for keyval in [a.split("=") for a in d]: + k = keyval[0].strip().strip("-") + v = keyval[1].strip() if len(keyval) == 2 else None + args[k] = v + return args + + +def load_cluster_config(path): + """Load config to dict + + Load configuration to dict either from absolute path or relative + to profile dir. + """ + if path: + path = os.path.join(dirname(__file__), os.path.expandvars(path)) + dcc = io.load_configfile(path) + else: + dcc = {} + if "__default__" not in dcc: + dcc["__default__"] = {} + return dcc + + +# adapted from format function in snakemake.utils +def format(_pattern, _quote_all=False, **kwargs): # noqa: A001 + """Format a pattern in Snakemake style. + This means that keywords embedded in braces are replaced by any variable + values that are available in the current namespace. + """ + fmt = SequenceFormatter(separator=" ") + if _quote_all: + fmt.element_formatter = AlwaysQuotedFormatter() + else: + fmt.element_formatter = QuotedFormatter() + try: + return fmt.format(_pattern, **kwargs) + except KeyError as ex: + raise NameError( + f"The name {ex} is unknown in this context. Please " + "make sure that you defined that variable. " + "Also note that braces not used for variable access " + "have to be escaped by repeating them " + ) + + +# adapted from Job.format_wildcards in snakemake.jobs +def format_wildcards(string, job_properties): + """Format a string with variables from the job.""" + + class Job(object): + def __init__(self, job_properties): + for key in job_properties: + setattr(self, key, job_properties[key]) + + job = Job(job_properties) + if "params" in job_properties: + job._format_params = Wildcards(fromdict=job_properties["params"]) + else: + job._format_params = None + if "wildcards" in job_properties: + job._format_wildcards = Wildcards(fromdict=job_properties["wildcards"]) + else: + job._format_wildcards = None + _variables = dict() + _variables.update(dict(params=job._format_params, wildcards=job._format_wildcards)) + if hasattr(job, "rule"): + _variables.update(dict(rule=job.rule)) + try: + return format(string, **_variables) + except NameError as ex: + raise WorkflowError("NameError with group job {}: {}".format(job.jobid, str(ex))) + except IndexError as ex: + raise WorkflowError("IndexError with group job {}: {}".format(job.jobid, str(ex))) + + +# adapted from ClusterExecutor.cluster_params function in snakemake.executor +def format_values(dictionary, job_properties): + formatted = dictionary.copy() + for key, value in list(formatted.items()): + if key == "mem": + value = str(_convert_units_to_mb(value)) + if isinstance(value, str): + try: + formatted[key] = format_wildcards(value, job_properties) + except NameError as e: + msg = "Failed to format cluster config " "entry for job {}.".format( + job_properties["rule"] + ) + raise WorkflowError(msg, e) + return formatted + + +def convert_job_properties(job_properties, resource_mapping=None): + options = {} + if resource_mapping is None: + resource_mapping = {} + resources = job_properties.get("resources", {}) + for k, v in resource_mapping.items(): + options.update({k: resources[i] for i in v if i in resources}) + + if "threads" in job_properties: + options["cpus-per-task"] = job_properties["threads"] + + slurm_opts = resources.get("slurm", "") + if not isinstance(slurm_opts, str): + raise ValueError( + "The `slurm` argument to resources must be a space-separated string" + ) + + for opt in slurm_opts.split(): + kv = opt.split("=", maxsplit=1) + k = kv[0] + v = None if len(kv) == 1 else kv[1] + options[k.lstrip("-").replace("_", "-")] = v + + return options + + +def ensure_dirs_exist(path): + """Ensure output folder for Slurm log files exist.""" + di = dirname(path) + if di == "": + return + if not os.path.exists(di): + os.makedirs(di, exist_ok=True) + return + + +def format_sbatch_options(**sbatch_options): + """Format sbatch options""" + options = [] + for k, v in sbatch_options.items(): + val = "" + if v is not None: + val = f"={v}" + options.append(f"--{k}{val}") + return options + + +def submit_job(jobscript, **sbatch_options): + """Submit jobscript and return jobid.""" + options = format_sbatch_options(**sbatch_options) + try: + cmd = ["sbatch"] + ["--parsable"] + options + [jobscript] + res = sp.check_output(cmd) + except sp.CalledProcessError as e: + raise e + # Get jobid + res = res.decode() + try: + jobid = re.search(r"(\d+)", res).group(1) + except Exception as e: + raise e + return jobid + + +timeformats = [ + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+):(?P<minutes>\d+)$"), + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+)$"), + re.compile(r"^(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<minutes>\d+)$"), +] + + +def time_to_minutes(time): + """Convert time string to minutes. + + According to slurm: + + Acceptable time formats include "minutes", "minutes:seconds", + "hours:minutes:seconds", "days-hours", "days-hours:minutes" + and "days-hours:minutes:seconds". + + """ + if not isinstance(time, str): + time = str(time) + d = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0} + regex = list(filter(lambda regex: regex.match(time) is not None, timeformats)) + if len(regex) == 0: + return + assert len(regex) == 1, "multiple time formats match" + m = regex[0].match(time) + d.update(m.groupdict()) + minutes = ( + int(d["days"]) * 24 * 60 + + int(d["hours"]) * 60 + + int(d["minutes"]) + + math.ceil(int(d["seconds"]) / 60) + ) + assert minutes > 0, "minutes has to be greater than 0" + return minutes + + +class InvalidTimeUnitError(Exception): + pass + + +class Time: + _nanosecond_size = 1 + _microsecond_size = 1000 * _nanosecond_size + _millisecond_size = 1000 * _microsecond_size + _second_size = 1000 * _millisecond_size + _minute_size = 60 * _second_size + _hour_size = 60 * _minute_size + _day_size = 24 * _hour_size + _week_size = 7 * _day_size + units = { + "s": _second_size, + "m": _minute_size, + "h": _hour_size, + "d": _day_size, + "w": _week_size, + } + pattern = re.compile(rf"(?P<val>\d+(\.\d*)?|\.\d+)(?P<unit>[a-zA-Z])") + + def __init__(self, duration: str): + self.duration = Time._from_str(duration) + + def __str__(self) -> str: + return Time._timedelta_to_slurm(self.duration) + + def __repr__(self): + return str(self) + + @staticmethod + def _timedelta_to_slurm(delta: Union[timedelta, str]) -> str: + if isinstance(delta, timedelta): + d = dict() + d["hours"], rem = divmod(delta.seconds, 3600) + d["minutes"], d["seconds"] = divmod(rem, 60) + d["hours"] += delta.days * 24 + return "{hours}:{minutes:02d}:{seconds:02d}".format(**d) + elif isinstance(delta, str): + return delta + else: + raise ValueError("Time is in an unknown format '{}'".format(delta)) + + @staticmethod + def _from_str(duration: str) -> Union[timedelta, str]: + """Parse a duration string to a datetime.timedelta""" + + matches = Time.pattern.finditer(duration) + + total = 0 + n_matches = 0 + for m in matches: + n_matches += 1 + value = m.group("val") + unit = m.group("unit").lower() + if unit not in Time.units: + raise InvalidTimeUnitError( + "Unknown unit '{}' in time {}".format(unit, duration) + ) + + total += float(value) * Time.units[unit] + + if n_matches == 0: + return duration + + microseconds = total / Time._microsecond_size + return timedelta(microseconds=microseconds) + + +class JobLog: + def __init__(self, job_props: dict): + self.job_properties = job_props + self.uid = str(uuid4()) + + @property + def wildcards(self) -> dict: + return self.job_properties.get("wildcards", dict()) + + @property + def wildcards_str(self) -> str: + return ( + ".".join("{}={}".format(k, v) for k, v in self.wildcards.items()) + or "unique" + ) + + @property + def rule_name(self) -> str: + if not self.is_group_jobtype: + return self.job_properties.get("rule", "nameless_rule") + return self.groupid + + @property + def groupid(self) -> str: + return self.job_properties.get("groupid", "group") + + @property + def is_group_jobtype(self) -> bool: + return self.job_properties.get("type", "") == "group" + + @property + def short_uid(self) -> str: + return self.uid.split("-")[0] + + def pattern_replace(self, s: str) -> str: + """ + %r - rule name. If group job, will use the group ID instead + %i - snakemake job ID + %w - wildcards. e.g., wildcards A and B will be concatenated as 'A=<val>.B=<val>' + %U - a random universally unique identifier + %S - shortened version od %U + %T - Unix time, aka seconds since epoch (rounded to an integer) + """ + replacement = { + "%r": self.rule_name, + "%i": self.jobid, + "%w": self.wildcards_str, + "%U": self.uid, + "%T": str(int(unix_time())), + "%S": self.short_uid, + } + for old, new in replacement.items(): + s = s.replace(old, new) + + return s + + @property + def jobname(self) -> str: + jobname_pattern = CookieCutter.get_cluster_jobname() + if not jobname_pattern: + return "" + + return self.pattern_replace(jobname_pattern) + + @property + def jobid(self) -> str: + """The snakemake jobid""" + if self.is_group_jobtype: + return self.job_properties.get("jobid", "").split("-")[0] + return str(self.job_properties.get("jobid")) + + @property + def logpath(self) -> str: + logpath_pattern = CookieCutter.get_cluster_logpath() + if not logpath_pattern: + return "" + + return self.pattern_replace(logpath_pattern) + + @property + def outlog(self) -> str: + return self.logpath + ".out" + + @property + def errlog(self) -> str: + return self.logpath + ".err" diff --git a/report/dag.dot b/report/dag.dot index e293147e26e6a90bb2709be222dae135d5867568..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/report/dag.dot +++ b/report/dag.dot @@ -1,1083 +0,0 @@ -digraph snakemake_dag { - graph[bgcolor=white, margin=0]; - node[shape=box, style=rounded, fontname=sans, fontsize=10, penwidth=2]; - edge[penwidth=2, color=grey]; - 0[label = "all", color = "0.39 0.6 0.85", style="rounded"]; - 1[label = "renameGeneIds", color = "0.16 0.6 0.85", style="rounded"]; - 2[label = "concatAllChromResults", color = "0.11 0.6 0.85", style="rounded"]; - 3[label = "generateFastaSequencesHC", color = "0.23 0.6 0.85", style="rounded"]; - 4[label = "validateCdsHC", color = "0.25 0.6 0.85", style="rounded"]; - 5[label = "generateFastaSequencesLC", color = "0.28 0.6 0.85", style="rounded"]; - 6[label = "validateCdsLC", color = "0.48 0.6 0.85", style="rounded"]; - 7[label = "concatAnchoringSummary", color = "0.18 0.6 0.85", style="rounded"]; - 8[label = "concatblatSummary", color = "0.44 0.6 0.85", style="rounded"]; - 9[label = "createDiagrams", color = "0.07 0.6 0.85", style="rounded,dashed"]; - 10[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 11[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 12[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 13[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 14[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 15[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 16[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 17[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 18[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 19[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 20[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 21[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 22[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 23[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 24[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 25[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 26[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 27[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 28[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 29[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 30[label = "mergeFinalGff3", color = "0.09 0.6 0.85", style="rounded"]; - 31[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 32[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 33[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 34[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 35[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 36[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 37[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 38[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 39[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 40[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 41[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 42[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 43[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 44[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 45[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 46[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 47[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 48[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 49[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 50[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 51[label = "saveGmapWG", color = "0.41 0.6 0.85", style="rounded"]; - 52[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 53[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 54[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 55[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 56[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 57[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 58[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 59[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 60[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 61[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 62[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 63[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 64[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 65[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 66[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 67[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 68[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 69[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 70[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 71[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 72[label = "checkMissing", color = "0.46 0.6 0.85", style="rounded"]; - 73[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 74[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 75[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 76[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 77[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 78[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 79[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 80[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 81[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 82[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 83[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 84[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 85[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 86[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 87[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 88[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 89[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 90[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 91[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 92[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 93[label = "mapHomologousRegions", color = "0.62 0.6 0.85", style="rounded"]; - 94[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 95[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 96[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 97[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 98[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 99[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 100[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 101[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 102[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 103[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 104[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 105[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 106[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 107[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 108[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 109[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 110[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 111[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 112[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 113[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 114[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 115[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 116[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 117[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 118[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 119[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 120[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 121[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 122[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 123[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 124[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 125[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 126[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 127[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 128[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 129[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 130[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 131[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 132[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 133[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 134[label = "gtCleanBlatGff", color = "0.55 0.6 0.85", style="rounded"]; - 135[label = "recalcGmapRescue", color = "0.57 0.6 0.85", style="rounded"]; - 136[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 137[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 138[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 139[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 140[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 141[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 142[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 143[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 144[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 145[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 146[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 147[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 148[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 149[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 150[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 151[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 152[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 153[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 154[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 155[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 156[label = "gmapRescue", color = "0.05 0.6 0.85", style="rounded"]; - 157[label = "splitGffPerChrom\nchrom: 1A", color = "0.21 0.6 0.85", style="rounded"]; - 158[label = "splitGffPerChrom\nchrom: 2A", color = "0.21 0.6 0.85", style="rounded"]; - 159[label = "splitGffPerChrom\nchrom: 3A", color = "0.21 0.6 0.85", style="rounded"]; - 160[label = "splitGffPerChrom\nchrom: 4A", color = "0.21 0.6 0.85", style="rounded"]; - 161[label = "splitGffPerChrom\nchrom: 5A", color = "0.21 0.6 0.85", style="rounded"]; - 162[label = "splitGffPerChrom\nchrom: 6A", color = "0.21 0.6 0.85", style="rounded"]; - 163[label = "splitGffPerChrom\nchrom: 7A", color = "0.21 0.6 0.85", style="rounded"]; - 164[label = "splitGffPerChrom\nchrom: 1B", color = "0.21 0.6 0.85", style="rounded"]; - 165[label = "splitGffPerChrom\nchrom: 2B", color = "0.21 0.6 0.85", style="rounded"]; - 166[label = "splitGffPerChrom\nchrom: 3B", color = "0.21 0.6 0.85", style="rounded"]; - 167[label = "splitGffPerChrom\nchrom: 4B", color = "0.21 0.6 0.85", style="rounded"]; - 168[label = "splitGffPerChrom\nchrom: 5B", color = "0.21 0.6 0.85", style="rounded"]; - 169[label = "splitGffPerChrom\nchrom: 6B", color = "0.21 0.6 0.85", style="rounded"]; - 170[label = "splitGffPerChrom\nchrom: 7B", color = "0.21 0.6 0.85", style="rounded"]; - 171[label = "splitGffPerChrom\nchrom: 1D", color = "0.21 0.6 0.85", style="rounded"]; - 172[label = "splitGffPerChrom\nchrom: 2D", color = "0.21 0.6 0.85", style="rounded"]; - 173[label = "splitGffPerChrom\nchrom: 3D", color = "0.21 0.6 0.85", style="rounded"]; - 174[label = "splitGffPerChrom\nchrom: 4D", color = "0.21 0.6 0.85", style="rounded"]; - 175[label = "splitGffPerChrom\nchrom: 5D", color = "0.21 0.6 0.85", style="rounded"]; - 176[label = "splitGffPerChrom\nchrom: 6D", color = "0.21 0.6 0.85", style="rounded"]; - 177[label = "splitGffPerChrom\nchrom: 7D", color = "0.21 0.6 0.85", style="rounded"]; - 178[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 179[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 180[label = "indexQuery", color = "0.64 0.6 0.85", style="rounded,dashed"]; - 181[label = "indexTarget", color = "0.30 0.6 0.85", style="rounded,dashed"]; - 182[label = "bam2bed\nchrom: 1A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 183[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 184[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 185[label = "bam2bed\nchrom: 2A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 186[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 187[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 188[label = "bam2bed\nchrom: 3A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 189[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 190[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 191[label = "bam2bed\nchrom: 4A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 192[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 193[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 194[label = "bam2bed\nchrom: 5A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 195[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 196[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 197[label = "bam2bed\nchrom: 6A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 198[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 199[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 200[label = "bam2bed\nchrom: 7A", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 201[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 202[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 203[label = "bam2bed\nchrom: 1B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 204[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 205[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 206[label = "bam2bed\nchrom: 2B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 207[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 208[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 209[label = "bam2bed\nchrom: 3B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 210[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 211[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 212[label = "bam2bed\nchrom: 4B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 213[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 214[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 215[label = "bam2bed\nchrom: 5B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 216[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 217[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 218[label = "bam2bed\nchrom: 6B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 219[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 220[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 221[label = "bam2bed\nchrom: 7B", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 222[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 223[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 224[label = "bam2bed\nchrom: 1D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 225[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 226[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 227[label = "bam2bed\nchrom: 2D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 228[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 229[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 230[label = "bam2bed\nchrom: 3D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 231[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 232[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 233[label = "bam2bed\nchrom: 4D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 234[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 235[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 236[label = "bam2bed\nchrom: 5D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 237[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 238[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 239[label = "bam2bed\nchrom: 6D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 240[label = "upstreamClosest", color = "0.32 0.6 0.85", style="rounded"]; - 241[label = "downstreamClosest", color = "0.37 0.6 0.85", style="rounded"]; - 242[label = "bam2bed\nchrom: 7D", color = "0.14 0.6 0.85", style="rounded,dashed"]; - 243[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 244[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 245[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 246[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 247[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 248[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 249[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 250[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 251[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 252[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 253[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 254[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 255[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 256[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 257[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 258[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 259[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 260[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 261[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 262[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 263[label = "recalcBlatMapped", color = "0.51 0.6 0.85", style="rounded"]; - 264[label = "grepGffFeature", color = "0.02 0.6 0.85", style="rounded"]; - 265[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 266[label = "filterBam", color = "0.60 0.6 0.85", style="rounded,dashed"]; - 267[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 268[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 269[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 270[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 271[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 272[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 273[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 274[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 275[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 276[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 277[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 278[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 279[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 280[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 281[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 282[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 283[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 284[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 285[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 286[label = "keepMappedOnSameChrom", color = "0.00 0.6 0.85", style="rounded,dashed"]; - 287[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 288[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 289[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 290[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 291[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 292[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 293[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 294[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 295[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 296[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 297[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 298[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 299[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 300[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 301[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 302[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 303[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 304[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 305[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 306[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 307[label = "selectMappedISBP", color = "0.34 0.6 0.85", style="rounded,dashed"]; - 1 -> 0 - 2 -> 0 - 3 -> 0 - 4 -> 0 - 5 -> 0 - 6 -> 0 - 7 -> 0 - 8 -> 0 - 9 -> 0 - 2 -> 1 - 10 -> 2 - 11 -> 2 - 12 -> 2 - 13 -> 2 - 14 -> 2 - 15 -> 2 - 16 -> 2 - 17 -> 2 - 18 -> 2 - 19 -> 2 - 20 -> 2 - 21 -> 2 - 22 -> 2 - 23 -> 2 - 24 -> 2 - 25 -> 2 - 26 -> 2 - 27 -> 2 - 28 -> 2 - 29 -> 2 - 30 -> 2 - 31 -> 2 - 32 -> 2 - 33 -> 2 - 34 -> 2 - 35 -> 2 - 36 -> 2 - 37 -> 2 - 38 -> 2 - 39 -> 2 - 40 -> 2 - 41 -> 2 - 42 -> 2 - 43 -> 2 - 44 -> 2 - 45 -> 2 - 46 -> 2 - 47 -> 2 - 48 -> 2 - 49 -> 2 - 50 -> 2 - 51 -> 2 - 52 -> 2 - 53 -> 2 - 54 -> 2 - 55 -> 2 - 56 -> 2 - 57 -> 2 - 58 -> 2 - 59 -> 2 - 60 -> 2 - 61 -> 2 - 62 -> 2 - 63 -> 2 - 64 -> 2 - 65 -> 2 - 66 -> 2 - 67 -> 2 - 68 -> 2 - 69 -> 2 - 70 -> 2 - 71 -> 2 - 72 -> 2 - 73 -> 2 - 74 -> 2 - 75 -> 2 - 76 -> 2 - 77 -> 2 - 78 -> 2 - 79 -> 2 - 80 -> 2 - 81 -> 2 - 82 -> 2 - 83 -> 2 - 84 -> 2 - 85 -> 2 - 86 -> 2 - 87 -> 2 - 88 -> 2 - 89 -> 2 - 90 -> 2 - 91 -> 2 - 92 -> 2 - 93 -> 2 - 1 -> 3 - 3 -> 4 - 1 -> 5 - 5 -> 6 - 73 -> 7 - 74 -> 7 - 75 -> 7 - 76 -> 7 - 77 -> 7 - 78 -> 7 - 79 -> 7 - 80 -> 7 - 81 -> 7 - 82 -> 7 - 83 -> 7 - 84 -> 7 - 85 -> 7 - 86 -> 7 - 87 -> 7 - 88 -> 7 - 89 -> 7 - 90 -> 7 - 91 -> 7 - 92 -> 7 - 93 -> 7 - 73 -> 8 - 74 -> 8 - 75 -> 8 - 76 -> 8 - 77 -> 8 - 78 -> 8 - 79 -> 8 - 80 -> 8 - 81 -> 8 - 82 -> 8 - 83 -> 8 - 84 -> 8 - 85 -> 8 - 86 -> 8 - 87 -> 8 - 88 -> 8 - 89 -> 8 - 90 -> 8 - 91 -> 8 - 92 -> 8 - 93 -> 8 - 94 -> 10 - 95 -> 10 - 31 -> 10 - 73 -> 10 - 96 -> 11 - 97 -> 11 - 32 -> 11 - 74 -> 11 - 98 -> 12 - 99 -> 12 - 33 -> 12 - 75 -> 12 - 100 -> 13 - 101 -> 13 - 34 -> 13 - 76 -> 13 - 102 -> 14 - 103 -> 14 - 35 -> 14 - 77 -> 14 - 104 -> 15 - 105 -> 15 - 36 -> 15 - 78 -> 15 - 106 -> 16 - 107 -> 16 - 37 -> 16 - 79 -> 16 - 108 -> 17 - 109 -> 17 - 38 -> 17 - 80 -> 17 - 110 -> 18 - 111 -> 18 - 39 -> 18 - 81 -> 18 - 112 -> 19 - 113 -> 19 - 40 -> 19 - 82 -> 19 - 114 -> 20 - 115 -> 20 - 41 -> 20 - 83 -> 20 - 116 -> 21 - 117 -> 21 - 42 -> 21 - 84 -> 21 - 118 -> 22 - 119 -> 22 - 43 -> 22 - 85 -> 22 - 120 -> 23 - 121 -> 23 - 44 -> 23 - 86 -> 23 - 122 -> 24 - 123 -> 24 - 45 -> 24 - 87 -> 24 - 124 -> 25 - 125 -> 25 - 46 -> 25 - 88 -> 25 - 126 -> 26 - 127 -> 26 - 47 -> 26 - 89 -> 26 - 128 -> 27 - 129 -> 27 - 48 -> 27 - 90 -> 27 - 130 -> 28 - 131 -> 28 - 49 -> 28 - 91 -> 28 - 132 -> 29 - 133 -> 29 - 50 -> 29 - 92 -> 29 - 134 -> 30 - 135 -> 30 - 51 -> 30 - 93 -> 30 - 136 -> 31 - 73 -> 31 - 137 -> 32 - 74 -> 32 - 138 -> 33 - 75 -> 33 - 139 -> 34 - 76 -> 34 - 140 -> 35 - 77 -> 35 - 141 -> 36 - 78 -> 36 - 142 -> 37 - 79 -> 37 - 143 -> 38 - 80 -> 38 - 144 -> 39 - 81 -> 39 - 145 -> 40 - 82 -> 40 - 146 -> 41 - 83 -> 41 - 147 -> 42 - 84 -> 42 - 148 -> 43 - 85 -> 43 - 149 -> 44 - 86 -> 44 - 150 -> 45 - 87 -> 45 - 151 -> 46 - 88 -> 46 - 152 -> 47 - 89 -> 47 - 153 -> 48 - 90 -> 48 - 154 -> 49 - 91 -> 49 - 155 -> 50 - 92 -> 50 - 156 -> 51 - 93 -> 51 - 10 -> 52 - 157 -> 52 - 11 -> 53 - 158 -> 53 - 12 -> 54 - 159 -> 54 - 13 -> 55 - 160 -> 55 - 14 -> 56 - 161 -> 56 - 15 -> 57 - 162 -> 57 - 16 -> 58 - 163 -> 58 - 17 -> 59 - 164 -> 59 - 18 -> 60 - 165 -> 60 - 19 -> 61 - 166 -> 61 - 20 -> 62 - 167 -> 62 - 21 -> 63 - 168 -> 63 - 22 -> 64 - 169 -> 64 - 23 -> 65 - 170 -> 65 - 24 -> 66 - 171 -> 66 - 25 -> 67 - 172 -> 67 - 26 -> 68 - 173 -> 68 - 27 -> 69 - 174 -> 69 - 28 -> 70 - 175 -> 70 - 29 -> 71 - 176 -> 71 - 30 -> 72 - 177 -> 72 - 178 -> 73 - 179 -> 73 - 180 -> 73 - 181 -> 73 - 182 -> 73 - 183 -> 74 - 184 -> 74 - 180 -> 74 - 181 -> 74 - 185 -> 74 - 186 -> 75 - 187 -> 75 - 180 -> 75 - 181 -> 75 - 188 -> 75 - 189 -> 76 - 190 -> 76 - 180 -> 76 - 181 -> 76 - 191 -> 76 - 192 -> 77 - 193 -> 77 - 180 -> 77 - 181 -> 77 - 194 -> 77 - 195 -> 78 - 196 -> 78 - 180 -> 78 - 181 -> 78 - 197 -> 78 - 198 -> 79 - 199 -> 79 - 180 -> 79 - 181 -> 79 - 200 -> 79 - 201 -> 80 - 202 -> 80 - 180 -> 80 - 181 -> 80 - 203 -> 80 - 204 -> 81 - 205 -> 81 - 180 -> 81 - 181 -> 81 - 206 -> 81 - 207 -> 82 - 208 -> 82 - 180 -> 82 - 181 -> 82 - 209 -> 82 - 210 -> 83 - 211 -> 83 - 180 -> 83 - 181 -> 83 - 212 -> 83 - 213 -> 84 - 214 -> 84 - 180 -> 84 - 181 -> 84 - 215 -> 84 - 216 -> 85 - 217 -> 85 - 180 -> 85 - 181 -> 85 - 218 -> 85 - 219 -> 86 - 220 -> 86 - 180 -> 86 - 181 -> 86 - 221 -> 86 - 222 -> 87 - 223 -> 87 - 180 -> 87 - 181 -> 87 - 224 -> 87 - 225 -> 88 - 226 -> 88 - 180 -> 88 - 181 -> 88 - 227 -> 88 - 228 -> 89 - 229 -> 89 - 180 -> 89 - 181 -> 89 - 230 -> 89 - 231 -> 90 - 232 -> 90 - 180 -> 90 - 181 -> 90 - 233 -> 90 - 234 -> 91 - 235 -> 91 - 180 -> 91 - 181 -> 91 - 236 -> 91 - 237 -> 92 - 238 -> 92 - 180 -> 92 - 181 -> 92 - 239 -> 92 - 240 -> 93 - 241 -> 93 - 180 -> 93 - 181 -> 93 - 242 -> 93 - 243 -> 94 - 73 -> 94 - 136 -> 95 - 73 -> 95 - 244 -> 96 - 74 -> 96 - 137 -> 97 - 74 -> 97 - 245 -> 98 - 75 -> 98 - 138 -> 99 - 75 -> 99 - 246 -> 100 - 76 -> 100 - 139 -> 101 - 76 -> 101 - 247 -> 102 - 77 -> 102 - 140 -> 103 - 77 -> 103 - 248 -> 104 - 78 -> 104 - 141 -> 105 - 78 -> 105 - 249 -> 106 - 79 -> 106 - 142 -> 107 - 79 -> 107 - 250 -> 108 - 80 -> 108 - 143 -> 109 - 80 -> 109 - 251 -> 110 - 81 -> 110 - 144 -> 111 - 81 -> 111 - 252 -> 112 - 82 -> 112 - 145 -> 113 - 82 -> 113 - 253 -> 114 - 83 -> 114 - 146 -> 115 - 83 -> 115 - 254 -> 116 - 84 -> 116 - 147 -> 117 - 84 -> 117 - 255 -> 118 - 85 -> 118 - 148 -> 119 - 85 -> 119 - 256 -> 120 - 86 -> 120 - 149 -> 121 - 86 -> 121 - 257 -> 122 - 87 -> 122 - 150 -> 123 - 87 -> 123 - 258 -> 124 - 88 -> 124 - 151 -> 125 - 88 -> 125 - 259 -> 126 - 89 -> 126 - 152 -> 127 - 89 -> 127 - 260 -> 128 - 90 -> 128 - 153 -> 129 - 90 -> 129 - 261 -> 130 - 91 -> 130 - 154 -> 131 - 91 -> 131 - 262 -> 132 - 92 -> 132 - 155 -> 133 - 92 -> 133 - 263 -> 134 - 93 -> 134 - 156 -> 135 - 93 -> 135 - 73 -> 136 - 74 -> 137 - 75 -> 138 - 76 -> 139 - 77 -> 140 - 78 -> 141 - 79 -> 142 - 80 -> 143 - 81 -> 144 - 82 -> 145 - 83 -> 146 - 84 -> 147 - 85 -> 148 - 86 -> 149 - 87 -> 150 - 88 -> 151 - 89 -> 152 - 90 -> 153 - 91 -> 154 - 92 -> 155 - 93 -> 156 - 264 -> 157 - 264 -> 158 - 264 -> 159 - 264 -> 160 - 264 -> 161 - 264 -> 162 - 264 -> 163 - 264 -> 164 - 264 -> 165 - 264 -> 166 - 264 -> 167 - 264 -> 168 - 264 -> 169 - 264 -> 170 - 264 -> 171 - 264 -> 172 - 264 -> 173 - 264 -> 174 - 264 -> 175 - 264 -> 176 - 264 -> 177 - 157 -> 178 - 265 -> 178 - 157 -> 179 - 265 -> 179 - 266 -> 182 - 158 -> 183 - 267 -> 183 - 158 -> 184 - 267 -> 184 - 266 -> 185 - 159 -> 186 - 268 -> 186 - 159 -> 187 - 268 -> 187 - 266 -> 188 - 160 -> 189 - 269 -> 189 - 160 -> 190 - 269 -> 190 - 266 -> 191 - 161 -> 192 - 270 -> 192 - 161 -> 193 - 270 -> 193 - 266 -> 194 - 162 -> 195 - 271 -> 195 - 162 -> 196 - 271 -> 196 - 266 -> 197 - 163 -> 198 - 272 -> 198 - 163 -> 199 - 272 -> 199 - 266 -> 200 - 164 -> 201 - 273 -> 201 - 164 -> 202 - 273 -> 202 - 266 -> 203 - 165 -> 204 - 274 -> 204 - 165 -> 205 - 274 -> 205 - 266 -> 206 - 166 -> 207 - 275 -> 207 - 166 -> 208 - 275 -> 208 - 266 -> 209 - 167 -> 210 - 276 -> 210 - 167 -> 211 - 276 -> 211 - 266 -> 212 - 168 -> 213 - 277 -> 213 - 168 -> 214 - 277 -> 214 - 266 -> 215 - 169 -> 216 - 278 -> 216 - 169 -> 217 - 278 -> 217 - 266 -> 218 - 170 -> 219 - 279 -> 219 - 170 -> 220 - 279 -> 220 - 266 -> 221 - 171 -> 222 - 280 -> 222 - 171 -> 223 - 280 -> 223 - 266 -> 224 - 172 -> 225 - 281 -> 225 - 172 -> 226 - 281 -> 226 - 266 -> 227 - 173 -> 228 - 282 -> 228 - 173 -> 229 - 282 -> 229 - 266 -> 230 - 174 -> 231 - 283 -> 231 - 174 -> 232 - 283 -> 232 - 266 -> 233 - 175 -> 234 - 284 -> 234 - 175 -> 235 - 284 -> 235 - 266 -> 236 - 176 -> 237 - 285 -> 237 - 176 -> 238 - 285 -> 238 - 266 -> 239 - 177 -> 240 - 286 -> 240 - 177 -> 241 - 286 -> 241 - 266 -> 242 - 73 -> 243 - 180 -> 243 - 181 -> 243 - 74 -> 244 - 180 -> 244 - 181 -> 244 - 75 -> 245 - 180 -> 245 - 181 -> 245 - 76 -> 246 - 180 -> 246 - 181 -> 246 - 77 -> 247 - 180 -> 247 - 181 -> 247 - 78 -> 248 - 180 -> 248 - 181 -> 248 - 79 -> 249 - 180 -> 249 - 181 -> 249 - 80 -> 250 - 180 -> 250 - 181 -> 250 - 81 -> 251 - 180 -> 251 - 181 -> 251 - 82 -> 252 - 180 -> 252 - 181 -> 252 - 83 -> 253 - 180 -> 253 - 181 -> 253 - 84 -> 254 - 180 -> 254 - 181 -> 254 - 85 -> 255 - 180 -> 255 - 181 -> 255 - 86 -> 256 - 180 -> 256 - 181 -> 256 - 87 -> 257 - 180 -> 257 - 181 -> 257 - 88 -> 258 - 180 -> 258 - 181 -> 258 - 89 -> 259 - 180 -> 259 - 181 -> 259 - 90 -> 260 - 180 -> 260 - 181 -> 260 - 91 -> 261 - 180 -> 261 - 181 -> 261 - 92 -> 262 - 180 -> 262 - 181 -> 262 - 93 -> 263 - 180 -> 263 - 181 -> 263 - 287 -> 265 - 182 -> 265 - 288 -> 267 - 185 -> 267 - 289 -> 268 - 188 -> 268 - 290 -> 269 - 191 -> 269 - 291 -> 270 - 194 -> 270 - 292 -> 271 - 197 -> 271 - 293 -> 272 - 200 -> 272 - 294 -> 273 - 203 -> 273 - 295 -> 274 - 206 -> 274 - 296 -> 275 - 209 -> 275 - 297 -> 276 - 212 -> 276 - 298 -> 277 - 215 -> 277 - 299 -> 278 - 218 -> 278 - 300 -> 279 - 221 -> 279 - 301 -> 280 - 224 -> 280 - 302 -> 281 - 227 -> 281 - 303 -> 282 - 230 -> 282 - 304 -> 283 - 233 -> 283 - 305 -> 284 - 236 -> 284 - 306 -> 285 - 239 -> 285 - 307 -> 286 - 242 -> 286 - 182 -> 287 - 185 -> 288 - 188 -> 289 - 191 -> 290 - 194 -> 291 - 197 -> 292 - 200 -> 293 - 203 -> 294 - 206 -> 295 - 209 -> 296 - 212 -> 297 - 215 -> 298 - 218 -> 299 - 221 -> 300 - 224 -> 301 - 227 -> 302 - 230 -> 303 - 233 -> 304 - 236 -> 305 - 239 -> 306 - 242 -> 307 -} diff --git a/rules/preprocessGenomes.smk b/rules/preprocessGenomes.smk index a724887ad7a0560463c8822b0027594bbecdd047..1d9a5eae5ccdf99ec8936f9e526f1a8e3c109f02 100644 --- a/rules/preprocessGenomes.smk +++ b/rules/preprocessGenomes.smk @@ -29,12 +29,3 @@ rule indexTarget: input: config['targetFasta'] output: config['targetFasta']+'.fai' shell: "samtools faidx {input}" - -#rule gmapIndexTarget: -# message: " Create Gmap Index for rescue" -# conda: "magatt" -# input: config['targetFasta'] -# output: directory(config['results']+"/target_gmapindex") -# params: indexname="target_gmapindex", indexPath=config['results'] -# log: config['results']+"/target_gmapindex.log" -# shell: "gmap_build -D {params.indexPath} -d {params.indexname} {input} &> {log}" diff --git a/rules/preprocessISBP.smk b/rules/preprocessISBP.smk index 9815401703332ba065019856b3d23b18b61cd149..dc739d934f5b852bc4d9fe71ae04b2bbb8ae7196 100644 --- a/rules/preprocessISBP.smk +++ b/rules/preprocessISBP.smk @@ -34,19 +34,3 @@ rule bam2bed: params: '{chrom}' shell: "bamToBed -i {input} |fgrep -i {params}|cut -d ':' -f 1|sort -k1,1 -k2,2n 1> {output} 2> {log}" - -#rule dumpISBPsID: -# message: "Dump ISBPs IDs" -# conda: "magatt" -# input: config['results']+"/1.filteredISBPs.bed" -# output: config['results']+"/1.filteredISBPs.ids" -# shell: " cut -f 4 {input} > {output}" - -#rule splitISBP: -# message: "Split isbps per chromosome" -# conda: "magatt" -# input: config['results']+"/1.filteredISBPs.bed" -# output: config['results']+"/1.filteredISBPs/{chrom}/sorted.bed" -# log: config['results']+"/1.filteredISBPs/{chrom}/sorted.log" -# params: 'Chr{chrom}' -# shell: "fgrep -i {params} {input} 1> {output} 2> {log}" diff --git a/test_data/config.yaml b/test_data/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7107ffa7c6dbe2889c9169406dc812f0a578099 --- /dev/null +++ b/test_data/config.yaml @@ -0,0 +1,51 @@ +##### QUERY related files/parameters (refseqv2.1) +# GFF annotatin to transfert +annotationQuery: 'test_data/reference.gff3' +# feature type used for anchoring on target genome +featureType: 'gene' +# FASTA of the query (used to check the sequences after the coordinates are calculated on the target genome) +queryFasta: 'test_data/reference.fa' +# blastdb of all mrnas. used to rescue genes which have failed in the transfert using the targeted approache +blastdb: 'test_data/reference_mrna.fa' +# map of all chromosome ids --> NEED TO BE UPDATED in another version WITH ONE ARRAY FOR THE QUERY AND ONE ARRAY FOR THE TARGET GENOME ASSEMBLY +chromosomes: ['1A', '2A', '3A', '4A', '5A', '6A', '7A', '1B', '2B', '3B', '4B', '5B', '6B', '7B', '1D', '2D', '3D', '4D', '5D', '6D', '7D'] +refChrom: ['chr1A', 'chr1B', 'chr1D', 'chr2A', 'chr2B', 'chr2D', 'chr3A', 'chr3B', 'chr3D', 'chr4A', 'chr4B', 'chr4D', 'chr5A', 'chr5B', 'chr5D', 'chr6A', 'chr6B', 'chr6D', 'chr7A', 'chr7B', 'chr7D', 'chrUn'] + +##### Transfert mode +# transfert all isoforms (all) or only the '.1' (first) +transferType: 'first' + +##### TARGET related files/parameters +# FASTA of the target genome +targetFasta: 'test_data/target.fa' +#GMAP index of the genome for -d option +targetGmapIndex: 'target.fa.gmapidx' +#GMAP index: path to the gmapindex directory, for -D option +targetGmapIndexPath: './test_data/' +#BWA index prefix +targetBwaIdx: 'test_data/target.fa' + +##### ISBP/markers related config and parameters +# BED file of coordinates on the query genome (REFSEQ v2.1) +isbpBed: 'test_data/isbps.bed' +# BWA threads for mapping +bwaThreads: 16 +# FLAG : F flag for samtools +flag_F: 3844 +# minimum mapping quality of markers on the target genome +mapq: 30 +# max mismatches per ISBP/marker +mismatches: 2 + +##### OUTPUT directory +results: 'test_data/results' +finalPrefix: 'MAGATT_TEST' +# this file contains two columns: the first is the chromosome name as it appears in the genome.fasta of the new reference, +# and the second the chromosome name as it will appear in the new gene Names +chromMapID: 'test_data/chrMapping.txt' + +##### Nomenclature for final gene IDs +# used in rule renameGeneIds (rules/geneAnchoring.smk) +gff_prefix: 'TraesCSTest' +gff_version: '01G' +gff_source: 'MAGATT-TEST' diff --git a/test_data/magatt_reference_test.tar.gz b/test_data/magatt_reference_test.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e65a8f1348990d31eaa257a86868b673257d43be --- /dev/null +++ b/test_data/magatt_reference_test.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acea98b44354e82c715c561ebb490a666715c1b2609cf7ae7c25e61d3eb7dea +size 701455738