Refactoring of build_pop script

fa876d5b · Floreal Cabanettes · 8d6c7883 · fa876d5b
Commit fa876d5b authored 7 years ago by Floreal Cabanettes
--- a/build_pop.py
+++ b/build_pop.py
 #!/usr/bin/env python3

+import sys
 import os
 import random
 import argparse
@@ -8,192 +9,220 @@ import vcf
 from Bio import SeqIO
 import tempfile
 from lib.svrunner_utils import eprint
+from pysam import tabix_compress, tabix_index

-parser = argparse.ArgumentParser(description='Generate simulated populations with SV')
-parser.add_argument("--nb-inds", help="Number of individuals to generate", required=True, type=int)
-parser.add_argument("--reference", help="Reference genome", required=True)
-parser.add_argument("--sv-list", help="File containing the SVs", required=True)
-parser.add_argument("--coverage", help="Coverage of reads (default: 15)", default=15, type=int)
-parser.add_argument("--output-directory", help="Output directory (default: res)", default="res")
-parser.add_argument("--force-polymorphism", help="Force polymorphism for each SV", action='store_const', const=True,
-                    default=False)
-parser.add_argument("--haploid", help="Make a haploid genome, instead of diploid one", action="store_const", const=True,
-                    default=False)
-parser.add_argument("-l", "--read-len", help="Generate reads having a length of LEN", type=int, default=100)
-parser.add_argument("-m", "--insert-len-mean", help="Generate inserts (fragments) having an average length of LEN",
-                    type=int, default=300)
-parser.add_argument("-v", "--insert-len-sd", help="Set the standard deviation of the insert (fragment) length (%%)",
-                    type=int, default=30)

-args = parser.parse_args()
-
-nb_inds = args.nb_inds
-
-if nb_inds < 2:
-    raise Exception("nb-inds must be at least 2")
-
-reference = args.reference
-sv_list = args.sv_list
-output_dir = args.output_directory
-tmp_dir = tempfile.mkdtemp()
-haploid = args.haploid
-
-if not os.path.isfile(reference + ".fai"):
-    os.system("samtools faidx " + reference)
-
-
-#############
-# FUNCTIONS #
-#############
-
-
-def allele(frequency):
+def parse_args():
+    """
+    Parse script arguments
+    :return: argparse arguments object
+    """
+    parser = argparse.ArgumentParser(description='Generate simulated populations with SV')
+    parser.add_argument("--nb-inds", help="Number of individuals to generate", required=True, type=int)
+    parser.add_argument("--reference", help="Reference genome", required=True)
+    parser.add_argument("--sv-list", help="File containing the SVs", required=True)
+    parser.add_argument("--coverage", help="Coverage of reads (default: 15)", default=15, type=int)
+    parser.add_argument("--output-directory", help="Output directory (default: res)", default="res")
+    parser.add_argument("--force-polymorphism", help="Force polymorphism for each SV", action='store_const', const=True,
+                        default=False)
+    parser.add_argument("--haploid", help="Make a haploid genome, instead of diploid one", action="store_const", const=True,
+                        default=False)
+    parser.add_argument("-l", "--read-len", help="Generate reads having a length of LEN", type=int, default=100)
+    parser.add_argument("-m", "--insert-len-mean", help="Generate inserts (fragments) having an average length of LEN",
+                        type=int, default=300)
+    parser.add_argument("-v", "--insert-len-sd", help="Set the standard deviation of the insert (fragment) length (%%)",
+                        type=int, default=30)
+
+    args = parser.parse_args()
+    return args
+
+
+def _allele(frequency):
+    """
+    Get randomly an allele, given a frequency
+    :param frequency: frequency of the allele {float}
+    :return: allele randomly choosen {0 or 1}
+    """
    return 1 if random.uniform(0, 1) < frequency else 0


-def get_genotypes_for_inds():
+def _get_genotypes_for_inds(nb_inds, haploid, freq):
+    """
+    Get genotypes for each individual
+    :param nb_inds: number of individuals {int}
+    :param haploid: is the genome hamploid {bool}
+    :param freq: frequency of the allele {float}
+    :return: list of genotypes, vcf data {list}
+    """
    all_genotypes = []
    genotypes_row = []
    for i in range(1, nb_inds + 1):
        if not haploid:
-            genotype = str(allele(freq)) + "/" + str(allele(freq))
+            genotype = str(_allele(freq)) + "/" + str(_allele(freq))
        else:
-            genotype = str(allele(freq))
+            genotype = str(_allele(freq))
        genotype_data = vcf.model._Call(None, "INDIV_" + str(i), vcf.model.make_calldata_tuple("GT")(GT=genotype))
        genotypes_row.append(genotype_data)
        all_genotypes.append(genotype)
    return all_genotypes, genotypes_row


-def svsort(sv, chr):
+def _svsort(sv, chrm, genotypes_for_inds):
    """
    Function to sort regions
+    :param sv: the variant
+    :param chrm: chromosome {str}
+    :param genotypes_for_inds: dictionary of genotypes for each individual
    """
-    return int(genotypes_for_inds[chr][sv]["start"])
-
-
-prg_path = os.path.dirname(os.path.realpath(__file__))
-
-if not os.path.isdir(output_dir):
-    os.mkdir(output_dir)
+    return int(genotypes_for_inds[chrm][sv]["start"])


 ###########################
 # 1. Get random deletions #
 ###########################

-print("GENERATE SV DELs...\n")
+def get_random_deletions(sv_list, reference, tmp_dir, prg_path):
+    """
+    Get random deletions
+    :param sv_list: file describing deletions (in SVsim format) {str}
+    :param reference: reference genome fasta file {str}
+    :param tmp_dir: temporary directory {str}
+    :param prg_path: program path {str}
+    """
+
+    print("GENERATE SV DELs...\n")

-os.system(os.path.join(prg_path, "SVsim -W -i {0} -r {1} -o {2}{3}reference-sv".format(sv_list, reference, tmp_dir, os.path.sep)))
+    os.system(os.path.join(prg_path, "SVsim -W -i {0} -r {1} -o {2}{3}reference-sv".format(sv_list, reference, tmp_dir, os.path.sep)))


 ###################################
 # 2. Build BED files of deletions #
 ###################################

-try:
-    with open(os.path.join(tmp_dir, "reference-sv.bed"), "w") as bed:
-        try:
-            with open(os.path.join(tmp_dir, "reference-sv.bedpe"), "r") as bedpe:
-                for line in bedpe:
-                    freq = 0.2 if random.uniform(0,1) < 0.5 else 0.5
-                    parts = line.split("\t")
-                    bed.write("\t".join([parts[0], parts[2], parts[4], parts[6].replace("::", "_"), str(freq)]) + "\n")
-        except IOError:
-            eprint("ERROR: SVSim failed to generate variants.")
-            exit(1)
-except IOError:
-    eprint("ERROR: Unable to generate BED file \"{0}\".".format(os.path.join(tmp_dir, "reference-sv.bed")))
-    exit(1)
+def build_bed_deletions_file(bed_file, tmp_dir):
+    """
+    Build BED file of deletions
+    :param bed_file: bed full path name {str}
+    :param tmp_dir: temporary directory {str}
+    """
+    try:
+        with open(bed_file, "w") as bed:
+            try:
+                with open(os.path.join(tmp_dir, "reference-sv.bedpe"), "r") as bedpe:
+                    for line in bedpe:
+                        freq = 0.2 if random.uniform(0,1) < 0.5 else 0.5
+                        parts = line.split("\t")
+                        bed.write("\t".join([parts[0], parts[2], parts[4], parts[6].replace("::", "_"), str(freq)]) + "\n")
+            except IOError:
+                eprint("ERROR: SVSim failed to generate variants.")
+                exit(1)
+    except IOError:
+        eprint("ERROR: Unable to generate BED file \"{0}\".".format(os.path.join(tmp_dir, "reference-sv.bed")))
+        exit(1)
+

 ###############################################################################
 # 3. Build VCF files containing genotypes for the given number of individuals #
 ###############################################################################
 # VCF file is a result file, not used by this script

-print("GENERATE SUMMARY VCF FILE...\n")
-
-genotypes_for_inds = OrderedDict()
-# { chr: { id_indiv: {start: #start, end: #end, genotypes: [[0,1],[1,1],...], ...}, # ...}
-
-# Build VCF header:
-try:
-    with open(os.path.join(prg_path, "template.vcf"), "r") as template:
-        try:
-            with open(os.path.join(tmp_dir, "template.vcf"), "w") as my_template:
-                for line in template:
-                    if line[:6] == "#CHROM":
-                        line = line.replace("\n", "")
-                        for i in range(0, nb_inds):
-                            line += "\tINDIV_" + str(i+1)
-                        line += "\n"
-                    my_template.write(line)
-        except IOError:
-            eprint("ERROR: unable to create template file \"{0}\" in temp dir.".format(os.path.join(tmp_dir, "template.vcf")))
-            exit(1)
-except IOError:
-    eprint("ERROR: template file not found in program directory.")
-    exit(1)
-
-try:
-    with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed:
-        vcf_reader = vcf.Reader(filename=os.path.join(tmp_dir, 'template.vcf'))
-        output_vcf = os.path.join(output_dir, "genotypes.vcf")
-        vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader)
-        for line in bed:
-            parts = line.replace("\n", "").split("\t")
-            freq = float(parts[4])
-            if parts[0] not in genotypes_for_inds:
-                genotypes_for_inds[parts[0]] = {}
-            genotypes_for_inds[parts[0]][parts[3]] = {"start": int(parts[1]), "end": int(parts[2]), "genotypes": []}
-
-            # Get genotypes:
-            all_genotypes, genotypes = [], []
-            if args.force_polymorphism:
-                polymorph = False
-                while not polymorph:
-                    all_genotypes, genotypes = get_genotypes_for_inds()
-                    polymorph = len(set(all_genotypes)) > 1
-            else:
-                all_genotypes, genotypes = get_genotypes_for_inds()
-            genotypes_for_inds[parts[0]][parts[3]]["genotypes"] = [x.split("/") for x in all_genotypes]
-
-            info = {"END": int(parts[2]), "AF": freq}
-            vcf_record = vcf.model._Record(parts[0], int(parts[1]), parts[3], "N", [vcf.model._SV("DEL")], ".", ".", info, "GT", [0], genotypes)
-            vcf_writer.write_record(vcf_record)
-        vcf_writer.close()
-
-        # Bgzip + tabix:
-        os.system("bgzip -c " + output_vcf + " > " + output_vcf + ".gz")
-        os.unlink(output_vcf)
-        output_vcf += ".gz"
-    os.system("tabix -p vcf " + output_vcf)
-except IOError:
-    eprint("ERROR: Unable to load bed file \"{0}\": file not found.".format(os.path.join(tmp_dir, "reference-sv.bed")))
-    exit(1)
+def _build_vcf_header(vcf_file, prg_path, tmp_dir, nb_inds):
+    """
+    Build header of the VCF file
+    :param vcf_file: vcf file full path name {str}
+    :param prg_path: program path {str}
+    :param tmp_dir: temporary directory {str}
+    :param nb_inds: number of individuals {int}
+    """
+    try:
+        with open(os.path.join(prg_path, "template.vcf"), "r") as template:
+            try:
+                with open(vcf_file, "w") as my_template:
+                    for line in template:
+                        if line[:6] == "#CHROM":
+                            line = line.replace("\n", "")
+                            for i in range(0, nb_inds):
+                                line += "\tINDIV_" + str(i+1)
+                            line += "\n"
+                        my_template.write(line)
+            except IOError:
+                eprint("ERROR: unable to create template file \"{0}\" in temp dir.".format(os.path.join(tmp_dir, "template.vcf")))
+                exit(1)
+    except IOError:
+        eprint("ERROR: template file not found in program directory.")
+        exit(1)


+def build_genotypes_vcf_list(bed_file, output_vcf, haploid, force_polymorphism, nb_inds, tmp_dir, prg_path):
+    """
+    Build VCF file describing genotypes for each individual (and the associated python dictionary)
+    :param bed_file: bed file describing deletions {str}
+    :param output_vcf: output VCF file full path name {str}
+    :param haploid: is haploid {bool}
+    :param force_polymorphism: force polymorphism {bool}
+    :param output_dir: output directory {str}
+    :param nb_inds: number of individuals {int}
+    :param tmp_dir: temporary directory {str}
+    :param prg_path: program path {str}
+    :return: dictionary of genotypes for each variant for each dictionary {OrderedDict}
+    """
+    print("GENERATE SUMMARY VCF FILE...\n")
+    genotypes_for_inds = OrderedDict()
+    # { chr: { id_indiv: {start: #start, end: #end, genotypes: [[0,1],[1,1],...], ...}, # ...}
+
+    vcf_file = os.path.join(tmp_dir, "template.vcf")
+
+    # Build VCF header:
+    _build_vcf_header(vcf_file, prg_path, tmp_dir, nb_inds)
+
+    try:
+        with open(bed_file, "r") as bed:
+            vcf_reader = vcf.Reader(filename=vcf_file)
+            vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader)
+            for line in bed:
+                parts = line.replace("\n", "").split("\t")
+                freq = float(parts[4])
+                if parts[0] not in genotypes_for_inds:
+                    genotypes_for_inds[parts[0]] = {}
+                genotypes_for_inds[parts[0]][parts[3]] = {"start": int(parts[1]), "end": int(parts[2]), "genotypes": []}
+
+                # Get genotypes:
+                all_genotypes, genotypes = [], []
+                if force_polymorphism:
+                    polymorph = False
+                    while not polymorph:
+                        all_genotypes, genotypes = _get_genotypes_for_inds(nb_inds, haploid, freq)
+                        polymorph = len(set(all_genotypes)) > 1
+                else:
+                    all_genotypes, genotypes = _get_genotypes_for_inds(nb_inds, haploid, freq)
+                genotypes_for_inds[parts[0]][parts[3]]["genotypes"] = [x.split("/") for x in all_genotypes]
+
+                info = {"END": int(parts[2]), "AF": freq}
+                vcf_record = vcf.model._Record(parts[0], int(parts[1]), parts[3], "N", [vcf.model._SV("DEL")], ".", ".", info, "GT", [0], genotypes)
+                vcf_writer.write_record(vcf_record)
+            vcf_writer.close()
+
+            tabix_compress(output_vcf, output_vcf + ".gz", True)
+            tabix_index(output_vcf + ".gz", force=True, preset="vcf")
+    except IOError:
+        eprint("ERROR: Unable to load bed file \"{0}\": file not found.".format(bed_file))
+        exit(1)
+
+    return genotypes_for_inds
+

 ###############################################
 # Build fasta chromosomes for each individual #
 ###############################################

-print("BUILD FASTA GENOME FOR EACH INDIVIDUAL...\n")
-
-
-fasta_orig = SeqIO.index(reference, "fasta")
-
-for chr, svs_infos in genotypes_for_inds.items():
-
-    print("PROCESSING CHROMOSOME {0}...\n".format(chr))
-
-    svs = list(svs_infos.keys())
-    svs = sorted(svs, key=lambda x:svsort(x, chr))
-
-    fasta_orig_chr = fasta_orig[chr]
-    last_nt = len(fasta_orig_chr)-1
-
-    # Compute keeped genomic regions for each diploid chromosome:
+def _compute_keeped_genomic_regions(svs, svs_infos, haploid):
+    """
+    Get list of all regions keeped (not deleted)
+    :param svs: list of variants (deletions) {list}
+    :param svs_infos: infos of variants {dict}
+    :param haploid: is haploid {bool}
+    :return: regions for each individuals {OrderedDict}, the last position for each region {OrderedDict}
+    """
    regions = OrderedDict()
    current_region_pointer = OrderedDict()
    for svs_i in svs:
@@ -203,7 +232,7 @@ for chr, svs_infos in genotypes_for_inds.items():
                regions[i] = {}
                current_region_pointer[i] = {}
            for j in range(0, 2 if not haploid else 1):  # For each chromosome of the diploid genome, or the chromosome
-                                                        # of the haploid genome
+                # of the haploid genome
                if j not in regions[i]:
                    regions[i][j] = []
                    current_region_pointer[i][j] = "0"
@@ -211,18 +240,31 @@ for chr, svs_infos in genotypes_for_inds.items():
                    regions[i][j].append([current_region_pointer[i][j], svs_infos[svs_i]["start"]])
                    current_region_pointer[i][j] = svs_infos[svs_i]["end"]
            i += 1
+    return regions, current_region_pointer

-    # Build FASTA of each diploid/haploid chromosome:
+
+def _build_fastas(chrm, regions, current_region_pointer, output_dir, fasta_orig_chr, last_nt):
+    """
+    Build fasta files
+    :param chrm:
+    :param regions:
+    :param current_region_pointer:
+    :param output_dir:
+    :param fasta_orig_chr:
+    :param last_nt:
+    :return:
+    """
    for indiv, chrs_dips in regions.items():
        id_chrs = list(chrs_dips.keys())
        id_chrs = sorted(id_chrs)
        for id_chr in id_chrs:
-            chr_dip = chrs_dips[id_chr] # SVs for each diploid chromosome
-            logs_regions = [] # Store logs
+            chr_dip = chrs_dips[id_chr]  # SVs for each diploid chromosome
+            logs_regions = []  # Store logs

            # Build FASTA and store logs:
            try:
-                with open(os.path.join(output_dir, "INDIV_" + str(indiv+1) + "_chr_" + str(id_chr) + ".fasta"), "a") as output_handle:
+                with open(os.path.join(output_dir, "INDIV_" + str(indiv + 1) + "_chr_" + str(id_chr) + ".fasta"),
+                          "a") as output_handle:
                    fasta = ""
                    last_one = 0
                    for chr_region in chr_dip:
@@ -235,39 +277,103 @@ for chr, svs_infos in genotypes_for_inds.items():
                    SeqIO.write(fasta, output_handle, "fasta")
            except IOError:
                eprint("ERROR: unable to write \"{0}\" file.".
-                       format(os.path.join(output_dir, "INDIV_" + str(indiv+1) + "_chr_" + str(id_chr) + ".fasta")))
+                       format(os.path.join(output_dir, "INDIV_" + str(indiv + 1) + "_chr_" + str(id_chr) + ".fasta")))
                exit(1)

            # Write logs
            try:
-                with open(os.path.join(output_dir, "INDIV_" + str(indiv+1) + ".regions.log"), "a") as log_handle:
-                    log_handle.write(chr + "_" + str(id_chr) + "\t")
+                with open(os.path.join(output_dir, "INDIV_" + str(indiv + 1) + ".regions.log"), "a") as log_handle:
+                    log_handle.write(chrm + "_" + str(id_chr) + "\t")
                    log_handle.write("\n\t".join(logs_regions))
                    log_handle.write("\n")
            except IOError:
                eprint("ERROR: unable to write log file: \"{0}\"".
-                       format(os.path.join(output_dir, "INDIV_" + str(indiv+1) + ".regions.log")))
-
-print("GENERATE RANDOM READS FOR EACH INDIVIDUAL FROM GENOME...\n")
-
-# Generate reads for all individuals:
-if not haploid:
-    cmd = str("{4}pirs/pirs simulate -z -x {3} -d -B {4}pirs/Profiles/Base-Calling_Profiles/humNew.PE100.matrix.gz "
-              "-I {4}pirs/Profiles/InDel_Profiles/phixv2.InDel.matrix -l {5} -m {6} -v {7} "
-              "-G {4}pirs/Profiles/GC-depth_Profiles/humNew.gcdep_100.dat -o {0} {1} {2}")
-else:
-    cmd = str("{4}pirs/pirs simulate -z -x {3} -B {4}pirs/Profiles/Base-Calling_Profiles/humNew.PE100.matrix.gz "
-              "-I {4}pirs/Profiles/InDel_Profiles/phixv2.InDel.matrix -l {5} -m {6} -v {7} "
-              "-G {4}pirs/Profiles/GC-depth_Profiles/humNew.gcdep_100.dat -o {0} {1}")
-for i in range(1, nb_inds+1):
-    prefix = os.path.join(output_dir, "INDIV_" + str(i))
-    chr0 = prefix + "_chr_0.fasta"
-    chr1 = prefix + "_chr_1.fasta"
+                       format(os.path.join(output_dir, "INDIV_" + str(indiv + 1) + ".regions.log")))
+
+
+def build_fastas_chromosomes(reference, genotypes_for_inds, haploid, output_dir):
+    print("BUILD FASTA GENOME FOR EACH INDIVIDUAL...\n")
+    fasta_orig = SeqIO.index(reference, "fasta")
+
+    for chrm, svs_infos in genotypes_for_inds.items():
+
+        print("PROCESSING CHROMOSOME {0}...\n".format(chrm))
+
+        svs = list(svs_infos.keys())
+        svs = sorted(svs, key=lambda x:_svsort(x, chrm, genotypes_for_inds))
+
+        fasta_orig_chr = fasta_orig[chrm]
+        last_nt = len(fasta_orig_chr)-1
+
+        # Compute keeped genomic regions for each diploid chromosome:
+        regions, current_region_pointer = _compute_keeped_genomic_regions(svs, svs_infos, haploid)
+
+        # Build FASTA of each diploid/haploid chromosome:
+        _build_fastas(chrm, regions, current_region_pointer, output_dir, fasta_orig_chr, last_nt)
+
+
+def generate_samples_fastq(haploid, nb_inds, output_dir, coverage, read_len, insert_len_mean, insert_len_sd,
+                           prg_path):
+
+    print("GENERATE RANDOM READS FOR EACH INDIVIDUAL FROM GENOME...\n")
+
+    # Generate reads for all individuals:
    if not haploid:
-        os.system(cmd.format(prefix, chr0, chr1, args.coverage, prg_path + os.path.sep, args.read_len,
-                             args.insert_len_mean, args.insert_len_sd))
+        cmd = str("{4}pirs/pirs simulate -z -x {3} -d -B {4}pirs/Profiles/Base-Calling_Profiles/humNew.PE100.matrix.gz "
+                  "-I {4}pirs/Profiles/InDel_Profiles/phixv2.InDel.matrix -l {5} -m {6} -v {7} "
+                  "-G {4}pirs/Profiles/GC-depth_Profiles/humNew.gcdep_100.dat -o {0} {1} {2}")
    else:
-        os.system(cmd.format(prefix, chr0, "", args.coverage, prg_path + os.path.sep, args.read_len,
-                             args.insert_len_mean, args.insert_len_sd))
-
-print("DONE!\n")
+        cmd = str("{4}pirs/pirs simulate -z -x {3} -B {4}pirs/Profiles/Base-Calling_Profiles/humNew.PE100.matrix.gz "
+                  "-I {4}pirs/Profiles/InDel_Profiles/phixv2.InDel.matrix -l {5} -m {6} -v {7} "
+                  "-G {4}pirs/Profiles/GC-depth_Profiles/humNew.gcdep_100.dat -o {0} {1}")
+    for i in range(1, nb_inds+1):
+        prefix = os.path.join(output_dir, "INDIV_" + str(i))
+        chr0 = prefix + "_chr_0.fasta"
+        chr1 = prefix + "_chr_1.fasta"
+        if not haploid:
+            os.system(cmd.format(prefix, chr0, chr1, coverage, prg_path + os.path.sep, read_len,
+                                 insert_len_mean, insert_len_sd))
+        else:
+            os.system(cmd.format(prefix, chr0, "", coverage, prg_path + os.path.sep, read_len,
+                                 insert_len_mean, insert_len_sd))
+
+
+def main():
+    args = parse_args()
+    reference = args.reference
+    sv_list = args.sv_list
+    output_dir = args.output_directory
+    tmp_dir = tempfile.mkdtemp()
+    haploid = args.haploid
+    nb_inds = args.nb_inds
+
+    if nb_inds < 2:
+        raise Exception("nb-inds must be at least 2")
+
+    if not os.path.isfile(reference + ".fai"):
+        os.system("samtools faidx " + reference)
+
+    prg_path = os.path.dirname(os.path.realpath(__file__))
+
+    if not os.path.isdir(output_dir):
+        os.mkdir(output_dir)
+
+    ################
+    # Launch steps #
+    ################
+
+    get_random_deletions(sv_list, reference, tmp_dir, prg_path)
+    bed_file = os.path.join(tmp_dir, "reference-sv.bed")
+    build_bed_deletions_file(bed_file, tmp_dir)
+    output_vcf = os.path.join(output_dir, "genotypes.vcf")
+    genotypes_for_inds = build_genotypes_vcf_list(bed_file, output_vcf, haploid, args.force_polymorphism, nb_inds, tmp_dir, prg_path)
+    build_fastas_chromosomes(reference, genotypes_for_inds, haploid, output_dir)
+    generate_samples_fastq(haploid, nb_inds, output_dir, args.coverage, args.read_len, args.insert_len_mean,
+                           args.insert_len_sd, prg_path)
+    print("DONE!\n")
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main())
+    except Exception as e:
+        print(e)
\ No newline at end of file