Unverified Commit 53efacfd authored by Aurelien Brionne's avatar Aurelien Brionne Committed by GitHub
Browse files

first commit

parent 48c6ff63
Package: GenomeFeatures
Version: 0.99.0
Title: Multiple genomic features annotations
Authors@R: c(
person("Aurelien","Brionne", role=c("aut","cre"),
email="aurelien.brionne@inra.fr"))
Depends: R (>= 3.1.0)
Imports:
AnnotationDbi,
data.table,
GenomicFeatures,
GenomicRanges,
IRanges,
methods,
plotly,
UpSetR
Suggests:
BiocStyle,
knitr,
rmarkdown,
githubinstall
Description: The aim of this package is to provide a full genomic features annotation from genomic coordinates, without use an annotation priority.
VignetteBuilder: knitr
License: MIT
LazyData: TRUE
Author: Aurelien Brionne [aut, cre]
Maintainer: Aurelien Brionne <aurelien.brionne@inra.fr>
Repository: gitHub
RoxygenNote: 6.0.1
Collate:
'GFF_att.R'
'overlapped_genome_features.R'
'Plot.R'
'Table.R'
'genome_features.R'
'build_genome_features.R'
'genome_features_overlaps.R'
'readPeakFile.R'
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: knitr
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
YEAR: 2018
COPYRIGHT HOLDER: Institut National de la Recherche Agronomique (INRA)
# Generated by roxygen2: do not edit by hand
export(GFF_att)
export(Plot)
export(Table)
export(build_genome_features)
export(genome_features_overlaps)
export(readPeakFile)
import(data.table)
importFrom(AnnotationDbi,as.data.frame)
importFrom(AnnotationDbi,keys)
importFrom(AnnotationDbi,metadata)
importFrom(AnnotationDbi,select)
importFrom(BiocGenerics,as.data.frame)
importFrom(GenomicFeatures,cdsBy)
importFrom(GenomicFeatures,exonsBy)
importFrom(GenomicFeatures,fiveUTRsByTranscript)
importFrom(GenomicFeatures,intronsByTranscript)
importFrom(GenomicFeatures,promoters)
importFrom(GenomicFeatures,threeUTRsByTranscript)
importFrom(GenomicFeatures,transcripts)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,flank)
importFrom(GenomicRanges,pintersect)
importFrom(IRanges,IRanges)
importFrom(IRanges,findOverlaps)
importFrom(IRanges,width)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(UpSetR,upset)
importFrom(data.table,data.table)
importFrom(data.table,fread)
importFrom(grDevices,png)
importFrom(methods,new)
importFrom(methods,setClass)
importFrom(methods,setMethod)
importFrom(methods,slot)
importFrom(methods,slotNames)
importFrom(plotly,layout)
importFrom(plotly,plot_ly)
#' @title Extract attributes from gff.
#' @description Extract attributes from gff file \code{\link[data.table]{data.table}}.
#' @importFrom data.table data.table
#' @details This internal function is use by \code{\link{build_genome_features}} in order to extract attributes from gff
#' and return a \code{\link[data.table]{data.table}}.
#' @return a \code{\link[data.table]{data.table}}.
#' @keywords internal
#' @export
GFF_att<-function(...,att.list=c("GeneID","Name","gene")){
#################
# Data loading
Data<-data.table::data.table(...)
#################
# extract attributes in table
Dat=base::lapply(att.list,function(y){
#################
# substitute
if(y=="ID"){
#################
# particular case
tmp<-base::gsub("ID=","",Data$att)
}else{
#################
# remove annotation before
tmp<-base::gsub(base::paste(".*",y,"[:=]",sep=""),"",Data$att)
}
#################
# remove annotation after
base::gsub("[;,].+","",tmp)
})
#################
# create data.table
require(data.table)
Dat=base::do.call("data.table",Dat)
#################
# add header
names(Dat)<-base::gsub(":|=","",att.list)
#################
# results table
return(data.table::data.table(Data,Dat))
}
#' @title Plot overlapped_genome_features graphs.
#' @description Plot overlapped_genome_features graphs bar or line.
#' @param object an \code{\link{overlapped_genome_features-class}} object.
#' @param type a \code{\link[base]{character}} which coulde be "bar" or "line".
#' @details This method is a simple accessor to the stored \code{\link[plotly]{plot_ly}} bar or line plots from the \code{\link{overlapped_genome_features-class}} object.
#' @return a \code{\link[plotly]{plot_ly}} object.
#' @include overlapped_genome_features.R
#' @examples
#' ###################
#' # display bar plot
#' GenomeFeatures::Plot(features_overlaps,"bar")
#'
#' ###################
#' # display bar plot
#' GenomeFeatures::Plot(features_overlaps,"line")
#' @export
setGeneric(name="Plot",def=function(object,type){standardGeneric("Plot")})
setMethod("Plot",signature="overlapped_genome_features",definition=function(object,type) {
###################
# display graphs
switch(type,
bar=object@distribution$bar_plot,
line=object@distribution$lines_plot
)
})
#' @title extract the annotation table.
#' @description extract the annotation table from an \code{\link{overlapped_genome_features-class}} object.
#' @param object an \code{\link{overlapped_genome_features-class}} object.
#' @details This method is a simple accessor to the stored genomic coordinates annnotation \code{\link[data.table]{data.table}}
#' from an \code{\link{overlapped_genome_features-class}} object.
#' @return a \code{\link[data.table]{data.table}} object.
#' @include overlapped_genome_features.R
#' @examples
#' ###################
# # extract the annotation table
#' annot<-GenomeFeatures::Table(features_overlaps)
#' @export
setGeneric(name="Table",def=function(object){standardGeneric("Table")})
setMethod("Table",signature="overlapped_genome_features",definition=function(object){
###################
# display the table
object@annotation
})
#' @title build a convenient genomic features object.
#' @description build a convenient \code{\link{genome_features-class}} object needed for find overlaps beetween
#' selected features and genomic coordinates in a single step.
#' @importFrom AnnotationDbi select keys as.data.frame metadata
#' @importFrom GenomicFeatures promoters transcripts fiveUTRsByTranscript threeUTRsByTranscript exonsBy intronsByTranscript cdsBy
#' @importFrom GenomicRanges flank
#' @importFrom BiocGenerics as.data.frame
#' @importFrom methods new
#' @import data.table
#' @param txdb a txdb database built with \pkg{GenomicFeatures} (see example).
#' @param features a \code{\link[base]{vector}} of features \code{\link[base]{character}} with by
#' default all the availables features ("promoter", "UTR5", "UTR3", "exon", "intron", "cds", and "downstream").
#' @param parameters a \code{\link[base]{list}} of parameters to use in order to define promoter and
#' downstream ranges distance to TSS (Transcript Start Site) or TES (Transcript End Site).
#' \itemize{
#' \item{promoter_ranges: a \code{\link[base]{list}} with upstream and dowstream elements distance (in base) to TSS}.
#' \item{downstream_range: the dowstream distance (in base) to the TES}.
#' }
#' @details This method build a convenient \code{\link{genomic_features-class}} object, in order to be used for finding overlaps beetween
#' all selected features and genomic coordinates with the \code{\link{genome_features_overlaps}} method.
#' @return a \code{\link{genome_features-class}} object.
#' @include genome_features.R
#' @examples
#'
#' ###################
# # import the chicken genome GFF from NCBI ftp (galGal5 in this case)
#' utils::download.file("ftp://ftp.ncbi.nlm.nih.gov/genomes/Gallus_gallus/GFF/ref_Gallus_gallus-5.0_top_level.gff3.gz",
#' quiet=T,destfile = "galGal5_gff.gz")
#'
#' ##################
#' # uncompress the file
#' R.utils::gunzip("galGal5_gff.gz")
#'
#' ###################
#' # build the database
#' toplevel=GenomicFeatures::makeTxDbFromGFF(
#' file="galGal5_gff",
#' format="gff",
#' organism="Gallus gallus",
#' taxonomyId=9031,
#' dbxrefTag="ID"
#' )
#'
#' ###################
#' # build genome_features object from txdb
#' features<-GenomeFeatures::build_genome_features(
#' toplevel,
#' features=base::c("promoter","UTR5","UTR3","exon","intron","cds","downstream"),
#' parameters = base::list(
#' promoter_ranges=base::list(
#' upstream=3000,
#' downstream=500
#' ),
#' downstream_range=1000
#' )
#' )
#' @export
setGeneric(name="build_genome_features",def=function(txdb,features=base::c("promoter","UTR5","UTR3","exon","intron","cds","downstream"),
parameters=base::list(promoter_ranges=base::list(upstream=3000,downstream=500),downstream_range=1000)){standardGeneric("build_genome_features")})
setMethod("build_genome_features",definition=function(txdb,features=base::c("promoter","UTR5","UTR3","exon","intron","cds","downstream"),
parameters=base::list(promoter_ranges=base::list(upstream=3000,downstream=500),downstream_range=1000)) {
###################
# check txdb
###################
if(base::class(txdb)!="TxDb") base::stop("txdb must be a TxDb class object")
###################
# txdb ids crossref
###################
###################
# build ids cross reference
crossref<-data.table::data.table(
AnnotationDbi::select(txdb,columns=c("TXID","TXNAME"),
keytype="GENEID",keys=AnnotationDbi::keys(txdb))
)
###################
# convert TXID to character
crossref[,TXID:=base::as.character(TXID)]
###################
# promoters
###################
promoters=NULL;promoters_DT=NULL
if("promoter"%in%features){
###################
# get promoters
promoters <- GenomicFeatures::promoters(
txdb,
upstream=parameters$promoter_ranges$upstream,
downstream=parameters$promoter_ranges$upstream
)
###################
# convert to table
promoters_DT<-data.table::data.table(AnnotationDbi::as.data.frame(promoters))[,.(tx_id,tx_name)]
###################
# convert to character
promoters_DT[,tx_id:=base::as.character(tx_id)]
###################
# merge with crossref
promoters_DT<-merge(promoters_DT,crossref,by.x="tx_id",by.y="TXID",sort=F,all.x=T)
###################
# rename
base::names(promoters_DT)[1:2]<-base::c("group","feature")
###################
# format feature
promoters_DT[, tx_name:=feature]
###################
# format feature
promoters_DT[,`:=`(
feature=base::paste(GENEID,feature,"promoter",sep=":"),
GENEID=NULL,
TXNAME=NULL
)]
}
###################
# downstream
###################
downstream=NULL;downstream_DT=NULL
if("downstream"%in%features){
###################
# get downstream
downstream<- GenomicFeatures::transcripts(txdb)
downstream<-GenomicRanges::flank(downstream,parameters$downstream_range,start=F)
###################
# convert to table
downstream_DT<-data.table::data.table(AnnotationDbi::as.data.frame(downstream))[,.(tx_id,tx_name)]
###################
# convert to character
downstream_DT[,tx_id:=base::as.character(tx_id)]
###################
# merge with crossref
downstream_DT<-merge(downstream_DT,crossref,by.x="tx_id",by.y="TXID",sort=F,all.x=T)
###################
# rename
base::names(downstream_DT)[1:2]<-base::c("group","feature")
###################
# format feature
downstream_DT[,tx_name:=feature]
###################
# format feature
downstream_DT[,`:=`(
feature=base::paste(GENEID,feature,"downstream",sep=":"),
GENEID=NULL,
TXNAME=NULL
)]
}
###################
# 5' UTR
###################
UTR5=NULL;UTR5_DT=NULL
if("UTR5"%in%features){
###################
# get 5' UTR in GRangesList
UTR5<-GenomicFeatures::fiveUTRsByTranscript(txdb,use.names=F)
###################
# convert to table
UTR5_DT<-data.table::data.table(BiocGenerics::as.data.frame(UTR5))[,.(group_name,exon_rank)]
###################
# convert to GRanges
UTR5<-base::unlist(UTR5)
###################
# merge with crossref
UTR5_DT<-merge(UTR5_DT,crossref,by.x="group_name",by.y="TXID",sort=F,all.x=T)
###################
# format feature
UTR5_DT[,`:=`(
feature=base::paste(GENEID,":",TXNAME,":5'UTR(exon",exon_rank,")",sep=""),
exon_rank=NULL,
GENEID=NULL,
tx_name=TXNAME,
TXNAME=NULL,
group_name=NULL
)]
}
###################
# 3' UTR
###################
UTR3=NULL;UTR3_DT=NULL
if("UTR3"%in%features){
###################
# get 3' UTR
UTR3<-GenomicFeatures::threeUTRsByTranscript(txdb,use.names=F)
###################
# convert to table
UTR3_DT<-data.table::data.table(AnnotationDbi::as.data.frame(UTR3))[,.(group_name,exon_rank)]
###################
# convert to GRanges
UTR3<-base::unlist(UTR3)
###################
# merge with crossref
UTR3_DT<-merge(UTR3_DT,crossref,by.x="group_name",by.y="TXID",sort=F,all.x=T)
###################
# format feature
UTR3_DT[,`:=`(
feature=base::paste(GENEID,":",TXNAME,":3'UTR(exon",exon_rank,")",sep=""),
exon_rank=NULL,
GENEID=NULL,
tx_name=TXNAME,
TXNAME=NULL,
group_name=NULL
)]
}
###################
# Exons
###################
exons=NULL;exons_DT=NULL
if("exon"%in%features){
###################
# get exons
exons<-GenomicFeatures::exonsBy(txdb,use.names=F)
###################
# convert to table
exons_DT<-data.table::data.table(AnnotationDbi::as.data.frame(exons))[,.(group_name,exon_rank)]
###################
# convert to GRanges
exons<-base::unlist(exons)
###################
# count exons by transcripts
exons_nb<-exons_DT[!base::is.na(group_name),.N,by=group_name]
###################
# merge count to exons_DT
exons_DT<-merge(exons_DT,exons_nb,by="group_name",all.x=T,sort=F)
###################
# merge with crossref
exons_DT<-merge(exons_DT,crossref,by.x="group_name",by.y="TXID",sort=F,all.x=T)
###################
# format feature
exons_DT[,`:=`(
feature=base::paste(GENEID,":",TXNAME,":exon",exon_rank,"/",N,sep=""),
exon_rank=NULL,
GENEID=NULL,
tx_name=TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)]
}
###################
# Introns
###################
introns=NULL;introns_DT=NULL
if("intron"%in%features){
###################
# get introns
introns<-GenomicFeatures::intronsByTranscript(txdb,use.names=F)
###################
# convert to table
introns_DT<-data.table::data.table(AnnotationDbi::as.data.frame(introns))[,.(group_name,strand)]
###################
# convert to GRanges
introns<-base::unlist(introns)
###################
# count introns by transcripts
introns_nb<-introns_DT[!base::is.na(group_name),.N,by=group_name]
###################
# merge count to introns_DT
introns_DT<-merge(introns_DT,introns_nb,by="group_name",all.x=T,sort=F)
###################
# add intron_rank
introns_DT[,intron_rank:=1:base::length(N),by=group_name]
###################
# reverse intron_rank for reverse strand
introns_DT[strand=="-",intron_rank:=base::rev(intron_rank),by=group_name]
###################
# merge with crossref
introns_DT<-merge(introns_DT,crossref,by.x="group_name",by.y="TXID",sort=F,all.x=T)
###################
# format feature
introns_DT[,`:=`(
feature=base::paste(GENEID,":",TXNAME,":intron",intron_rank,"/",N,sep=""),
intron_rank=NULL,
strand=NULL,
GENEID=NULL,
tx_name=TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)]
}
###################
# CDS
###################
cds=NULL;cds_DT=NULL
if("cds"%in%features){
###################
# get exons
cds<-GenomicFeatures::cdsBy(txdb,use.names=F)
###################
# convert to table
cds_DT<-data.table::data.table(AnnotationDbi::as.data.frame(cds))[,.(group_name,exon_rank)]
###################
# convert to GRanges
cds<-base::unlist(cds)
###################
# count exons by transcripts
cds_nb<-cds_DT[!base::is.na(group_name),.N,by=group_name]
###################
# merge count to exons_DT
cds_DT<-merge(cds_DT,cds_nb,by="group_name",all.x=T,sort=F)
###################
# merge with crossref
cds_DT<-merge(cds_DT,crossref,by.x="group_name",by.y="TXID",sort=F,all.x=T)
###################
# format feature
cds_DT[,`:=`(
feature=base::paste(GENEID,":",TXNAME,":exon",exon_rank,"/",N,sep=""),
exon_rank=NULL,
GENEID=NULL,
tx_name=TXNAME,
TXNAME=NULL,
N=NULL,
group_name=NULL
)]
}
###################
# load annotation supplement
###################
###################
# read the gff
file=AnnotationDbi::metadata(txdb)$value[3]
###################
# download
if(base::length(grep("ftp.+\\.gz$",file))>0){
###################
# import the GFF
utils::download.file(file,quiet=T,destfile = "gff.gz")
##################
# uncompress
R.utils::gunzip("gff.gz")
##################
# file to download
file="gff"
}
###################
# read the gff
gff<-data.table::fread(file,fill=T,drop=base::c(2,6,8),verbose=F,showProgress=F,col.names=