Commit 81c792e2 authored by Aurelien Brionne's avatar Aurelien Brionne
Browse files

upgrade part1

parent 893bb6b4
......@@ -72,8 +72,12 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
###################
# build ids cross reference
crossref<-data.table::data.table(
AnnotationDbi::select(txdb,columns=c("TXID","TXNAME"),
keytype="GENEID",keys=AnnotationDbi::keys(txdb))
AnnotationDbi::select(
txdb,
columns=c("TXID","TXNAME"),
keytype="GENEID",
keys=AnnotationDbi::keys(txdb)
)
)
###################
......@@ -387,58 +391,76 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
# load annotation supplement
###################
###################
# read the gff
file=AnnotationDbi::metadata(txdb)$value[3]
if(!base::is.na(file)){
###################
# download
if(base::length(grep("ftp.+\\.gz$",file))>0){
# check file name
file=AnnotationDbi::metadata(txdb)$value[3]
if(!base::is.na(file)){
###################
# import the GFF
utils::download.file(file,quiet=T,destfile = "gff.gz")
# download
if(base::length(grep("ftp.+\\.gz$",file))>0){
##################
# uncompress
R.utils::gunzip("gff.gz")
###################
# output name
output=base::sub("^.+/","",file)
##################
# file to download
file="gff"
}
###################
# import the GFF
utils::download.file(file,quiet=T,destfile=output)
###################
# read the gff
#gff<-data.table::fread(file,fill=T,drop=base::c(2,6,8),verbose=F,showProgress=F,col.names=base::c("chr",
#"type","start","end","strand","att"))[data.table::like(type,"RNA|transcript") & !data.table::like(chr,"^#")]
##################
# uncompress
R.utils::gunzip(output)
gff<-data.table::fread(file,sep="",verbose=F,showProgress=F,header=F)[data.table::like(base::gsub("ID=.+$","",V1),"[[:space:]].+(RNA|transcript)"),]
##################
# file to download
file=output
}
###################
# parse it
gff<-gff[,{
tmp=data.table::data.table(base::t(base::strsplit(V1,"\t")[[1]]));
base::list(chr=tmp$V1,type=tmp$V3,start=tmp$V4,end=tmp$V5,strand=tmp$V7,att=tmp$V9)
},by=1:base::nrow(gff)][,base:=NULL]
###################
# remove files
if(file=="gff"){base::unlink("gff");base::unlink("gff.gz")}
###################
# read the gene file as GRanges
gene_table<-rtracklayer::import(file)
###################
# extract all GeneID
gff<-GenomeFeatures::GFF_att(gff,att.list = c("Name","GeneID","gene"))
###################
# convert to data.table
gene_table<-data.table::data.table(
BiocGenerics::as.data.frame(gene_table,,row.names = NULL)
)[ data.table::like(type,"RNA|transcript"),]
if(base::length(base::grep("gtf$",file))>0){
###################
# select columns from gtf
gene_table<-gene_table[,
.(seqnames,start,end,strand,type,gene_id,gene_name,gene_biotype,transcript_id)
]
}else{
###################
# select columns from gff
gene_table<-gene_table[,
.(seqnames,start,end,strand,type,Dbxref,gene,transcript_id)
]
###################
# extract gene_id
gene_table[,
`:=`(
gene_id=base::sub("^.+:","",Dbxref[[1]][1]),
Dbxref=NULL
),
by=1:base::nrow(gene_table)
]
}
###################
# extract all GeneID
gff<-base::unique(gff[,.(type,Name,GeneID,gene)])
}else{
###################
# else NA
gff=data.table::data.table(gff=NA)
gene_table=data.table::data.table(gene_table=NA)
}
###################
......@@ -456,6 +478,6 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
intron=base::list(GRanges=introns,DT=introns_DT),
cds=base::list(GRanges=cds,DT=cds_DT),
downstream=base::list(GRanges=downstream,DT=downstream_DT),
gff=gff
gene_table=gene_table
)
})
......@@ -24,7 +24,7 @@ setClass("genome_features",
cds="list",
UTR3="list",
downstream="list",
gff="data.table"
gene_table="data.table"
)
)
......@@ -60,19 +60,19 @@ setMethod("show", "genome_features",function(object) {
###################
# extract gff
gff<-methods::slot(object,"gff")
gene_table<-methods::slot(object,"gene_table")
###################
# count genes
genes<-base::length(base::unique(gff$GeneID))
genes<-base::length(base::unique(gene_table$GeneID))
###################
# count
gff<-gff[,.N,by=type]
gene_table<-gene_table[,.N,by=type]
###################
# chaneg header
base::names(gff)<-base::c("feature","count")
base::names(gene_table)<-base::c("feature","count")
###################
# bind results
......@@ -80,7 +80,7 @@ setMethod("show", "genome_features",function(object) {
base::c(
base::list(data.table::data.table(feature="gene",count=genes)),
Data,
base::list(gff)
base::list(gene_table)
)
)
......
......@@ -219,11 +219,11 @@ setMethod("genome_features_overlaps",definition=function(peaks,features,figs_pat
# remove tag
annot[,tag:=NULL]
if(base::ncol(features@gff)>1){
if(base::ncol(features@gene_table)>1){
###################
# merge with overlapped features
annot<-merge(annot,features@gff,by.x="tx_name",by.y="Name",all.x=T,sort=F)
annot<-merge(annot,features@gene_table,by.x="tx_name",by.y="Name",all.x=T,sort=F)
}else{
###################
......
......@@ -29,21 +29,16 @@ setMethod("show", "overlapped_genome_features",function(object) {
# extract annotations
Data<-methods::slot(object,"annotation")
###################
# available columns
colnum=base::c("condition","feature_type","type","GeneID")
colnum= colnum[colnum%in%base::names(Data)]
###################
# count unique genes match by feature type
Data<-base::unique(Data[,.SD,.SDcols=colnum])
Data<-base::unique(Data[,.(condition,feature_type,type,gene_id)])
###################
# count
Data<-data.table::dcast(
Data,
condition~feature_type,
value.var="GeneID",
value.var="gene_id",
fun=length
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment