Commit 14b056db authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Initial commit

parents
#!/usr/bin/env python3
import os
import pysam
import sys
import yaml
import subprocess
from collections import OrderedDict
PRG_PATH = os.path.dirname(os.path.realpath(__file__))
class Miniannotator:
def __init__(self, reference, assembly, map=None):
self.conf = self._load_config()
self.reference = reference
self.assembly = assembly
self.map = map
@staticmethod
def _load_config():
"""
Load config file to a dictionary
:return: config dictionary
:rtype: dict
"""
with open(os.path.join(PRG_PATH, "miniannotator.yml"), "r") as cfg_f:
conf = yaml.safe_load(cfg_f)
return conf
def launch_minimap(self, map_f="map.bam"):
"""
Launch mapping with minimap2. Compress output SAM as BAM
:param map_f: output bam file path
:type map_f: str
:return: 0 if succeed, else error return code
:rtype: int
"""
print("Mapping assembly to reference genome...", flush=True)
minimap = [self.conf["minimap2"], "-ax", "splice", "-t", str(self.conf["threads"]), self.reference,
self.assembly]
samtools_sort = [self.conf["samtools"], "sort"]
samtools_bam = [self.conf["samtools"], "view", "-b", "-o", map_file]
p1 = subprocess.Popen(minimap, stdout=subprocess.PIPE)
p2 = subprocess.Popen(samtools_sort, stdin=p1.stdout, stdout=subprocess.PIPE)
p1.stdout.close()
p3 = subprocess.Popen(samtools_bam, stdin=p2.stdout, stdout=subprocess.PIPE)
p2.stdout.close()
output = p3.communicate()[0]
p3.wait()
rcode = p3.returncode
self.map = map_f
return rcode
def search_genes(self):
"""
Parse BAM file to search genes and exons positions
Query match position on the reference defines gene position
Splice reads define exons: introns are the "N" in the cigarline
Save also DEL/INS events in genes
:return:
"""
genes = OrderedDict()
exons = OrderedDict()
indels = OrderedDict()
align = pysam.AlignmentFile(self.map)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Annotate genome and fix reference genome")
parser.add_argument("-r", "--reference", help="Reference fasta file", required=True)
parser.add_argument("-a", "--assembly", help="De novo assembly fasta file", required=True)
parser.add_argument("-m", "--map", help="BAM map file build with minimap2 (if not given, will be computed now)",
required=False)
parser.add_argument("-o", "--output-dir", help="Output folder path", required=False, default=".")
args = parser.parse_args()
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
elif not os.path.isdir(args.output_dir):
print("Error: output folder %s exists but is not a folder" % args.output_dir, file=sys.stderr)
annotator = Miniannotator(args.reference, args.assembly)
# Map:
if args.map is None:
map_file = os.path.join(args.output_dir, "map.bam")
annotator.launch_minimap(map_file)
else:
print("Using map from %s..." % args.map)
map_file = args.map
minimap2: minimap2
samtools: samtools
threads: 4
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment