Skip to content
Snippets Groups Projects
Commit 1f3261fb authored by Maria Bernard's avatar Maria Bernard
Browse files

1000RNASeq ASE : add gtf2bed script

parent 7347a093
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def process(gtf_in, bed_out):
FH_out = open(bed_out, "w")
FH_in = open(gtf_in)
for line in FH_in:
if line.startswith('#'):
continue
line_list=line.strip().split("\t")
chr = line_list[0]
feature = line_list[2]
start = int(line_list[3])
stop = line_list[4]
if feature != "gene" and feature != "transcript":
continue
feature_id=""
for annot in line_list[8].split(";"):
annot = annot.strip()
if ( feature == "gene" and annot.startswith("gene_id") ) or (feature == "transcript" and annot.startswith("transcript_id") ):
feature_id = annot.split(" ")[1].replace('"','')
break
if feature_id == "":
raise Exceptionn("Could not find feature ID for feature "+ feature + ", start: "+str(start) + ", stop: " + stop + "\n")
FH_out.write(chr + "\t" + str(start-1) + "\t" + stop + "\t" + feature_id + "\n")
FH_out.close()
FH_in.close()
gtf_file = snakemake.input.gtf
bed = snakemake.output.bed
# convert gtf2bed by keeping only gene and transcript feature with their corresponding ID
process(gtf_file, bed)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment