Skip to content
Snippets Groups Projects
Commit 6bf39874 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Force polymorphism is now optional

parent 83928951
No related branches found
No related tags found
No related merge requests found
......@@ -61,6 +61,8 @@ If increment is not specified, it defaults to 1.
1 line by SV type, like above.
IMPORTANT: only deletion is implemented yet.
We use SVsim to generate positions of SVs. https://github.com/GregoryFaust/SVsim
......
......@@ -14,6 +14,8 @@ parser.add_argument("--sv-list", help="File containing the SVs", required=True)
parser.add_argument("--coverage", help="Coverage of reads (default: 15)", default=15, type=int)
parser.add_argument("--output-directory", help="Output directory (default: res)", default="res")
parser.add_argument("--tmp-directory", help="Temporary directory (default: tmp)", default="tmp")
parser.add_argument("--force-polymorphism", help="Force polymorphism for each SV", action='store_const', const=True,
default=False)
args = parser.parse_args()
......@@ -31,9 +33,33 @@ if not os.path.isfile(reference + ".fai"):
os.system("samtools faidx " + reference)
#############
# FUNCTIONS #
#############
def allele(frequency):
return 1 if random.uniform(0, 1) < frequency else 0
def get_genotypes_for_inds():
all_genotypes = []
genotypes_row = []
for i in range(1, nb_inds + 1):
genotype = str(allele(freq)) + "/" + str(allele(freq))
genotype_data = vcf.model._Call(None, "INDIV_" + str(i), vcf.model.make_calldata_tuple("GT")(GT=genotype))
genotypes_row.append(genotype_data)
all_genotypes.append(genotype)
return all_genotypes, genotypes_row
def svsort(sv, chr):
"""
Function to sort regions
"""
return int(genotypes_for_inds[chr][sv]["start"])
prg_path = os.path.dirname(os.path.realpath(__file__))
if not os.path.isdir(tmp_dir):
......@@ -93,22 +119,16 @@ with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed:
if parts[0] not in genotypes_for_inds:
genotypes_for_inds[parts[0]] = {}
genotypes_for_inds[parts[0]][parts[3]] = {"start": int(parts[1]), "end": int(parts[2]), "genotypes": []}
genotypes = []
polymorph = False
while not polymorph:
unique_genotypes = set()
all_genotypes = []
genotypes_tmp = []
for i in range(1, nb_inds+1):
genotype = str(allele(freq)) + "/" + str(allele(freq))
genotype_data = vcf.model._Call(None, "INDIV_" + str(i), vcf.model.make_calldata_tuple("GT")(GT=genotype))
genotypes_tmp.append(genotype_data)
unique_genotypes.add(genotype)
all_genotypes.append(genotype)
polymorph = len(unique_genotypes) > 1
if polymorph:
genotypes += genotypes_tmp
genotypes_for_inds[parts[0]][parts[3]]["genotypes"] = [x.split("/") for x in all_genotypes]
# Get genotypes:
if args.force_polymorphism:
polymorph = False
while not polymorph:
all_genotypes, genotypes = get_genotypes_for_inds()
polymorph = len(set(all_genotypes)) > 1
else:
all_genotypes, genotypes = get_genotypes_for_inds()
genotypes_for_inds[parts[0]][parts[3]]["genotypes"] = [x.split("/") for x in all_genotypes]
info = {"END": int(parts[2]), "AF": freq}
vcf_record = vcf.model._Record(parts[0], int(parts[1]), parts[3], "N", [vcf.model._SV("DEL")], ".", ".", info, "GT", [0], genotypes)
......@@ -123,13 +143,6 @@ with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed:
print("BUILD FASTA GENOME FOR EACH INDIVIDUAL...\n")
def svsort(sv, chr):
"""
Function to sort regions
"""
return int(genotypes_for_inds[chr][sv]["start"])
fasta_orig = SeqIO.index(reference, "fasta")
for chr, svs_infos in genotypes_for_inds.items():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment