Commit 6e280a7d authored by Aurelien Brionne's avatar Aurelien Brionne
Browse files

GRanges objects structure upgrade

parent 9346c73f
Package: GenomeFeatures
Version: 1.2
Version: 1.3
Title: Multiple genomic features annotations
Author: Aurelien Brionne [aut, cre]
Maintainer: Aurelien Brionne <aurelien.brionne@inrae.fr>
......
......@@ -24,12 +24,14 @@ importFrom(GenomicRanges,pintersect)
importFrom(IRanges,IRanges)
importFrom(IRanges,findOverlaps)
importFrom(IRanges,width)
importFrom(S4Vectors,mcols)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(UpSetR,upset)
importFrom(data.table,":=")
importFrom(data.table,.N)
importFrom(data.table,.SD)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,dcast)
importFrom(data.table,fread)
......
#' @title build a convenient genomic features object.
#' @description build a convenient \code{\link{genome_features-class}} object needed for find overlaps beetween
#' selected features and genomic coordinates in a single step.
#' @importFrom AnnotationDbi select keys as.data.frame metadata
#' @importFrom GenomicFeatures promoters transcripts fiveUTRsByTranscript threeUTRsByTranscript exonsBy intronsByTranscript cdsBy
#' @importFrom GenomicRanges flank
#' @importFrom AnnotationDbi select keys as.data.frame metadata
#' @importFrom GenomicFeatures promoters transcripts fiveUTRsByTranscript threeUTRsByTranscript exonsBy intronsByTranscript cdsBy
#' @importFrom S4Vectors mcols
#' @importFrom GenomicRanges flank
#' @importFrom rtracklayer import
#' @importFrom BiocGenerics as.data.frame
#' @importFrom data.table data.table := .N like
#' @importFrom data.table data.table := .N like as.data.table
#' @param txdb a txdb database built with \pkg{GenomicFeatures} (see example).
#' @param features a \code{\link[base]{vector}} of features \code{\link[base]{character}} with by
#' default all the availables features (("promoter","UTR5","UTR3","exon1","exons","intron1","introns","cds", and "downstream").
......@@ -90,7 +91,7 @@ setMethod(
## promoters
promoters=NULL;promoters_DT=NULL
promoters=NULL;DT=NULL
if("promoter"%in%features){
......@@ -102,7 +103,7 @@ setMethod(
)
# convert to table
promoters_DT<-data.table(
DT<-data.table(
as.data.frame(
promoters,
row.names = NULL
......@@ -110,11 +111,11 @@ setMethod(
)[,c("tx_id","tx_name"),with=FALSE]
# convert to character
promoters_DT[,"tx_id":=as.character(promoters_DT$tx_id)]
DT[,"tx_id":=as.character(DT$tx_id)]
# merge with crossref
promoters_DT<-merge(
promoters_DT,
DT<-merge(
DT,
crossref,
by.x="tx_id",
by.y="TXID",
......@@ -123,17 +124,17 @@ setMethod(
)
# rename
names(promoters_DT)[1:2]<-c("group","feature")
names(DT)[1:2]<-c("group","feature")
# format feature
promoters_DT[,"tx_name":=feature]
DT[,"tx_name":=feature]
# format feature
promoters_DT[,
DT[,
`:=`(
feature=paste(
promoters_DT$GENEID,
promoters_DT$feature,
DT$GENEID,
DT$feature,
"promoter",
sep=":"
),
......@@ -141,11 +142,14 @@ setMethod(
TXNAME=NULL
)
]
# assign metadata
mcols(promoters)<-DT
}
## downstream
downstream=NULL;downstream_DT=NULL
downstream=NULL;DT=NULL
if("downstream"%in%features){
......@@ -158,7 +162,7 @@ setMethod(
)
# convert to table
downstream_DT<-data.table(
DT<-data.table(
as.data.frame(
downstream,
row.names = NULL
......@@ -166,11 +170,11 @@ setMethod(
)[,c("tx_id","tx_name"),with=FALSE]
# convert to character
downstream_DT[,"tx_id":=as.character(downstream_DT$tx_id)]
DT[,"tx_id":=as.character(DT$tx_id)]
# merge with crossref
downstream_DT<-merge(
downstream_DT,
DT<-merge(
DT,
crossref,
by.x="tx_id",
by.y="TXID",
......@@ -179,17 +183,17 @@ setMethod(
)
# rename
names(downstream_DT)[1:2]<-c("group","feature")
names(DT)[1:2]<-c("group","feature")
# format feature
downstream_DT[,"tx_name":=feature]
DT[,"tx_name":=feature]
# format feature
downstream_DT[,
DT[,
`:=`(
feature=paste(
downstream_DT$GENEID,
downstream_DT$feature,
DT$GENEID,
DT$feature,
"downstream",
sep=":"
),
......@@ -197,11 +201,14 @@ setMethod(
TXNAME=NULL
)
]
# assign metadata
mcols(downstream)<-DT
}
## 5' UTR
UTR5=NULL;UTR5_DT=NULL
UTR5=NULL;DT=NULL
if("UTR5"%in%features){
......@@ -209,7 +216,7 @@ setMethod(
UTR5<-fiveUTRsByTranscript(txdb,use.names=FALSE)
# convert to table
UTR5_DT<-data.table(
DT<-data.table(
as.data.frame(
UTR5,
row.names = NULL
......@@ -220,8 +227,8 @@ setMethod(
UTR5<-unlist(UTR5)
# merge with crossref
UTR5_DT<-merge(
UTR5_DT,
DT<-merge(
DT,
crossref,
by.x="group_name",
by.y="TXID",
......@@ -230,29 +237,33 @@ setMethod(
)
# format feature
UTR5_DT[,
DT[,
`:=`(
feature=paste(
UTR5_DT$GENEID,
DT$GENEID,
":",
UTR5_DT$TXNAME,
DT$TXNAME,
":5'UTR(exon",
UTR5_DT$exon_rank,
DT$exon_rank,
")",
sep=""
),
exon_rank=NULL,
GENEID=NULL,
tx_name=UTR5_DT$TXNAME,
tx_name=DT$TXNAME,
TXNAME=NULL,
group_name=NULL
)
]
# assign metadata
mcols(UTR5)<-DT
}
## 3' UTR
UTR3=NULL;UTR3_DT=NULL
UTR3=NULL;DT=NULL
if("UTR3"%in%features){
......@@ -260,7 +271,7 @@ setMethod(
UTR3<-threeUTRsByTranscript(txdb,use.names=FALSE)
# convert to table
UTR3_DT<-data.table(
DT<-data.table(
as.data.frame(
UTR3,
row.names = NULL
......@@ -271,8 +282,8 @@ setMethod(
UTR3<-unlist(UTR3)
# merge with crossref
UTR3_DT<-merge(
UTR3_DT,
DT<-merge(
DT,
crossref,
by.x="group_name",
by.y="TXID",
......@@ -281,30 +292,32 @@ setMethod(
)
# format feature
UTR3_DT[,
DT[,
`:=`(
feature=paste(
UTR3_DT$GENEID,
DT$GENEID,
":",
UTR3_DT$TXNAME,
DT$TXNAME,
":3'UTR(exon",
UTR3_DT$exon_rank,
DT$exon_rank,
")",
sep=""
),
exon_rank=NULL,
GENEID=NULL,
tx_name= UTR3_DT$TXNAME,
tx_name= DT$TXNAME,
TXNAME=NULL,
group_name=NULL
)
]
# assign metadata
mcols(UTR3)<-DT
}
## Exons
exons=NULL;exons_DT=NULL
exon1=NULL;exon1_DT=NULL
exons=NULL;exons1=NULL;DT=NULL
if("exons"%in%features | "exon1"%in%features){
......@@ -312,7 +325,7 @@ setMethod(
exons<-exonsBy(txdb,use.names=FALSE)
# convert to table
exons_DT<-data.table(
DT<-data.table(
as.data.frame(
exons,
row.names = NULL
......@@ -323,11 +336,11 @@ setMethod(
exons<-unlist(exons)
# count exons by transcripts
exons_nb<-exons_DT[!is.na(group_name),.N,by="group_name"]
exons_nb<-DT[!is.na(group_name),.N,by="group_name"]
# merge count to exons_DT
exons_DT<-merge(
exons_DT,
DT<-merge(
DT,
exons_nb,
by="group_name",
all.x=TRUE,
......@@ -335,8 +348,8 @@ setMethod(
)
# merge with crossref
exons_DT<-merge(
exons_DT,
DT<-merge(
DT,
crossref,
by.x="group_name",
by.y="TXID",
......@@ -345,44 +358,49 @@ setMethod(
)
# format feature
exons_DT[,
DT[,
`:=`(
feature=paste(
exons_DT$GENEID,
DT$GENEID,
":",
exons_DT$TXNAME,
DT$TXNAME,
":exon",
exons_DT$exon_rank,
DT$exon_rank,
"/",
exons_DT$N,
DT$N,
sep=""
),
exon_rank=NULL,
GENEID=NULL,
tx_name= exons_DT$TXNAME,
tx_name= DT$TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)
]
# assign metadata
mcols(exons)<-DT
if("exon1"%in%features){
# locate intron 1
pos<-grepl("exon1/",exons_DT[,feature])
pos<-grepl("exon1/",DT[,feature])
# subset intron1
exon1_DT<-exons_DT[pos]
DT<-DT[pos]
# get intron1 GRanges
exon1<-exons[pos]
# assign metadata
mcols(exon1)<-DT
}
}
## Introns
introns=NULL;introns_DT=NULL
intron1=NULL;intron1_DT=NULL
introns=NULL;intron1=NULL;DT=NULL
if("introns"%in%features | "intron1"%in%features){
......@@ -390,7 +408,7 @@ setMethod(
introns<-intronsByTranscript(txdb,use.names=FALSE)
# convert to table
introns_DT<-data.table(
DT<-data.table(
as.data.frame(
introns,
row.names = NULL
......@@ -401,11 +419,11 @@ setMethod(
introns<-unlist(introns)
# count introns by transcripts
introns_nb<-introns_DT[!is.na(group_name),.N,by="group_name"]
introns_nb<-DT[!is.na(group_name),.N,by="group_name"]
# merge count to introns_DT
introns_DT<-merge(
introns_DT,
DT<-merge(
DT,
introns_nb,
by="group_name",
all.x=TRUE,
......@@ -413,14 +431,14 @@ setMethod(
)
# add intron_rank
introns_DT[,"intron_rank":=1:length(N),by="group_name"]
DT[,"intron_rank":=1:length(N),by="group_name"]
# reverse intron_rank for reverse strand
introns_DT["-","intron_rank":=rev(intron_rank),by="group_name",on="strand"]
DT["-","intron_rank":=rev(intron_rank),by="group_name",on="strand"]
# merge with crossref
introns_DT<-merge(
introns_DT,
DT<-merge(
DT,
crossref,
by.x="group_name",
by.y="TXID",
......@@ -429,45 +447,50 @@ setMethod(
)
# format feature
introns_DT[,
DT[,
`:=`(
feature=paste(
introns_DT$GENEID,
DT$GENEID,
":",
introns_DT$TXNAME,
DT$TXNAME,
":intron",
introns_DT$intron_rank,
DT$intron_rank,
"/",
introns_DT$N,
DT$N,
sep=""
),
intron_rank=NULL,
strand=NULL,
GENEID=NULL,
tx_name=introns_DT$TXNAME,
tx_name=DT$TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)
]
# assign metadata
mcols(introns)<-DT
if("intron1"%in%features){
# locate intron 1
pos<-grepl("intron1/",introns_DT[,feature])
pos<-grepl("intron1/",DT[,feature])
# subset intron1
intron1_DT<-introns_DT[pos]
DT<-DT[pos]
# get intron1 GRanges
intron1<-introns[pos]
# assign metadata
mcols(intron1)<-DT
}
}
## CDS
cds=NULL;cds_DT=NULL
cds=NULL;DT=NULL
if("cds"%in%features){
......@@ -475,7 +498,7 @@ setMethod(
cds<-cdsBy(txdb,use.names=FALSE)
# convert to table
cds_DT<-data.table(
DT<-data.table(
as.data.frame(
cds,
row.names = NULL
......@@ -486,11 +509,11 @@ setMethod(
cds<-unlist(cds)
# count exons by transcripts
cds_nb<-cds_DT[!is.na(group_name),.N,by="group_name"]
cds_nb<-DT[!is.na(group_name),.N,by="group_name"]
# merge count to exons_DT
cds_DT<-merge(
cds_DT,
DT<-merge(
DT,
cds_nb,
by="group_name",
all.x=TRUE,
......@@ -498,8 +521,8 @@ setMethod(
)
# merge with crossref
cds_DT<-merge(
cds_DT,
DT<-merge(
DT,
crossref,
by.x="group_name",
by.y="TXID",
......@@ -508,26 +531,29 @@ setMethod(
)
# format feature
cds_DT[,
DT[,
`:=`(
feature=paste(
cds_DT$GENEID,
DT$GENEID,
":",
cds_DT$TXNAME,
DT$TXNAME,
":exon",
cds_DT$exon_rank,
DT$exon_rank,
"/",
cds_DT$N,
DT$N,
sep=""
),
exon_rank=NULL,
GENEID=NULL,
tx_name=cds_DT$TXNAME,
tx_name=DT$TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)
]
# assign metadata
mcols(cds)<-DT
}
# load annotation complement
......@@ -607,15 +633,15 @@ setMethod(
new(
"genome_features",
metadata=list(data.table(metadata(txdb))),
promoter=list(GRanges=promoters,DT=promoters_DT),
UTR5=list(GRanges=UTR5,DT=UTR5_DT),
UTR3=list(GRanges=UTR3,DT=UTR3_DT),
exons=list(GRanges=exons,DT=exons_DT),
exon1=list(GRanges=exon1,DT=exon1_DT),
introns=list(GRanges=introns,DT=introns_DT),
intron1=list(GRanges=intron1,DT=intron1_DT),
cds=list(GRanges=cds,DT=cds_DT),
downstream=list(GRanges=downstream,DT=downstream_DT),
promoter=list(GRanges=promoters),
UTR5=list(GRanges=UTR5),
UTR3=list(GRanges=UTR3),
exons=list(GRanges=exons),
exon1=list(GRanges=exon1),
introns=list(GRanges=introns),
intron1=list(GRanges=intron1),
cds=list(GRanges=cds),
downstream=list(GRanges=downstream),
gene_table=gene_table
)
}
......
......@@ -3,11 +3,11 @@
#' @importFrom AnnotationDbi as.data.frame
#' @importFrom GenomicRanges pintersect
#' @importFrom IRanges findOverlaps width
#' @importFrom S4Vectors queryHits subjectHits
#' @importFrom S4Vectors queryHits subjectHits mcols
#' @importFrom plotly plot_ly layout
#' @importFrom grDevices png colorRampPalette
#' @importFrom UpSetR upset
#' @importFrom data.table data.table rbindlist .SD .N := melt setorder dcast
#' @importFrom data.table data.table rbindlist .SD .N := melt setorder dcast as.data.table
#' @param peaks a \code{\link[base]{list}} of \code{\link[GenomicRanges]{GRanges}} objects elements provided by \code{\link{readPeakFile}}.
#' @param features a \code{\link{genome_features-class}} object.
#' @param figs_path the path where print the \code{\link[UpSetR]{upset}} plots. ("./data/figs/", by default).
......@@ -83,7 +83,7 @@ setMethod(
subject=slot(features,y)$GRanges
# subject DT
subject_DT=slot(features,y)$DT
subject_DT=as.data.table(mcols(subject))
# find overlaps
hits=findOverlaps(query,subject,ignore.strand=ignore.strand)
......
......@@ -91,6 +91,7 @@ toplevel=GenomicFeatures::makeTxDbFromGFF(
Now, we build a convenient object with `GenomeFeatures::build_genome_features`, needed for find overlaps beetween all selected features and genomic coordinates targets into a single step (see below).
```{r features}
library(S4Vectors)
# build genome features
features<-build_genome_features(
toplevel,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment