Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Pour information : coupure de la forge ce matin entre 6h45 et 7h05 pour une mise à jour de sécurité
Open sidebar
Aurelien Brionne
GenomeFeatures
Commits
81c792e2
Commit
81c792e2
authored
Dec 21, 2018
by
Aurelien Brionne
Browse files
upgrade part1
parent
893bb6b4
Changes
4
Hide whitespace changes
Inline
Side-by-side
R/build_genome_features.R
View file @
81c792e2
...
...
@@ -72,8 +72,12 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
###################
# build ids cross reference
crossref
<-
data.table
::
data.table
(
AnnotationDbi
::
select
(
txdb
,
columns
=
c
(
"TXID"
,
"TXNAME"
),
keytype
=
"GENEID"
,
keys
=
AnnotationDbi
::
keys
(
txdb
))
AnnotationDbi
::
select
(
txdb
,
columns
=
c
(
"TXID"
,
"TXNAME"
),
keytype
=
"GENEID"
,
keys
=
AnnotationDbi
::
keys
(
txdb
)
)
)
###################
...
...
@@ -387,58 +391,76 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
# load annotation supplement
###################
###################
# read the gff
file
=
AnnotationDbi
::
metadata
(
txdb
)
$
value
[
3
]
if
(
!
base
::
is.na
(
file
)){
###################
# download
if
(
base
::
length
(
grep
(
"ftp.+\\.gz$"
,
file
))
>
0
){
# check file name
file
=
AnnotationDbi
::
metadata
(
txdb
)
$
value
[
3
]
if
(
!
base
::
is.na
(
file
)){
###################
#
import the GFF
utils
::
download.file
(
file
,
quiet
=
T
,
destfile
=
"gff.gz"
)
#
download
if
(
base
::
length
(
grep
(
"ftp.+\\.gz$"
,
file
))
>
0
){
##################
#
uncompress
R.utils
::
gunzip
(
"gff.gz"
)
#
##################
#
output name
output
=
base
::
sub
(
"^.+/"
,
""
,
file
)
##################
# file to download
file
=
"gff"
}
###################
# import the GFF
utils
::
download.file
(
file
,
quiet
=
T
,
destfile
=
output
)
###################
# read the gff
#gff<-data.table::fread(file,fill=T,drop=base::c(2,6,8),verbose=F,showProgress=F,col.names=base::c("chr",
#"type","start","end","strand","att"))[data.table::like(type,"RNA|transcript") & !data.table::like(chr,"^#")]
##################
# uncompress
R.utils
::
gunzip
(
output
)
gff
<-
data.table
::
fread
(
file
,
sep
=
""
,
verbose
=
F
,
showProgress
=
F
,
header
=
F
)[
data.table
::
like
(
base
::
gsub
(
"ID=.+$"
,
""
,
V1
),
"[[:space:]].+(RNA|transcript)"
),]
##################
# file to download
file
=
output
}
###################
# parse it
gff
<-
gff
[,{
tmp
=
data.table
::
data.table
(
base
::
t
(
base
::
strsplit
(
V1
,
"\t"
)[[
1
]]));
base
::
list
(
chr
=
tmp
$
V1
,
type
=
tmp
$
V3
,
start
=
tmp
$
V4
,
end
=
tmp
$
V5
,
strand
=
tmp
$
V7
,
att
=
tmp
$
V9
)
},
by
=
1
:
base
::
nrow
(
gff
)][,
base
:=
NULL
]
###################
# remove files
if
(
file
==
"gff"
){
base
::
unlink
(
"gff"
);
base
::
unlink
(
"gff.gz"
)}
###################
# read the gene file as GRanges
gene_table
<-
rtracklayer
::
import
(
file
)
###################
# extract all GeneID
gff
<-
GenomeFeatures
::
GFF_att
(
gff
,
att.list
=
c
(
"Name"
,
"GeneID"
,
"gene"
))
###################
# convert to data.table
gene_table
<-
data.table
::
data.table
(
BiocGenerics
::
as.data.frame
(
gene_table
,,
row.names
=
NULL
)
)[
data.table
::
like
(
type
,
"RNA|transcript"
),]
if
(
base
::
length
(
base
::
grep
(
"gtf$"
,
file
))
>
0
){
###################
# select columns from gtf
gene_table
<-
gene_table
[,
.
(
seqnames
,
start
,
end
,
strand
,
type
,
gene_id
,
gene_name
,
gene_biotype
,
transcript_id
)
]
}
else
{
###################
# select columns from gff
gene_table
<-
gene_table
[,
.
(
seqnames
,
start
,
end
,
strand
,
type
,
Dbxref
,
gene
,
transcript_id
)
]
###################
# extract gene_id
gene_table
[,
`:=`
(
gene_id
=
base
::
sub
(
"^.+:"
,
""
,
Dbxref
[[
1
]][
1
]),
Dbxref
=
NULL
),
by
=
1
:
base
::
nrow
(
gene_table
)
]
}
###################
# extract all GeneID
gff
<-
base
::
unique
(
gff
[,
.
(
type
,
Name
,
GeneID
,
gene
)])
}
else
{
###################
# else NA
g
ff
=
data.table
::
data.table
(
g
ff
=
NA
)
g
ene_table
=
data.table
::
data.table
(
g
ene_table
=
NA
)
}
###################
...
...
@@ -456,6 +478,6 @@ setMethod("build_genome_features",definition=function(txdb,features,parameters)
intron
=
base
::
list
(
GRanges
=
introns
,
DT
=
introns_DT
),
cds
=
base
::
list
(
GRanges
=
cds
,
DT
=
cds_DT
),
downstream
=
base
::
list
(
GRanges
=
downstream
,
DT
=
downstream_DT
),
g
ff
=
gff
g
ene_table
=
gene_table
)
})
R/genome_features.R
View file @
81c792e2
...
...
@@ -24,7 +24,7 @@ setClass("genome_features",
cds
=
"list"
,
UTR3
=
"list"
,
downstream
=
"list"
,
g
ff
=
"data.table"
g
ene_table
=
"data.table"
)
)
...
...
@@ -60,19 +60,19 @@ setMethod("show", "genome_features",function(object) {
###################
# extract gff
g
ff
<-
methods
::
slot
(
object
,
"g
ff
"
)
g
ene_table
<-
methods
::
slot
(
object
,
"g
ene_table
"
)
###################
# count genes
genes
<-
base
::
length
(
base
::
unique
(
g
ff
$
GeneID
))
genes
<-
base
::
length
(
base
::
unique
(
g
ene_table
$
GeneID
))
###################
# count
g
ff
<-
gff
[,
.N
,
by
=
type
]
g
ene_table
<-
gene_table
[,
.N
,
by
=
type
]
###################
# chaneg header
base
::
names
(
g
ff
)
<-
base
::
c
(
"feature"
,
"count"
)
base
::
names
(
g
ene_table
)
<-
base
::
c
(
"feature"
,
"count"
)
###################
# bind results
...
...
@@ -80,7 +80,7 @@ setMethod("show", "genome_features",function(object) {
base
::
c
(
base
::
list
(
data.table
::
data.table
(
feature
=
"gene"
,
count
=
genes
)),
Data
,
base
::
list
(
g
ff
)
base
::
list
(
g
ene_table
)
)
)
...
...
R/genome_features_overlaps.R
View file @
81c792e2
...
...
@@ -219,11 +219,11 @@ setMethod("genome_features_overlaps",definition=function(peaks,features,figs_pat
# remove tag
annot
[,
tag
:=
NULL
]
if
(
base
::
ncol
(
features
@
g
ff
)
>
1
){
if
(
base
::
ncol
(
features
@
g
ene_table
)
>
1
){
###################
# merge with overlapped features
annot
<-
merge
(
annot
,
features
@
g
ff
,
by.x
=
"tx_name"
,
by.y
=
"Name"
,
all.x
=
T
,
sort
=
F
)
annot
<-
merge
(
annot
,
features
@
g
ene_table
,
by.x
=
"tx_name"
,
by.y
=
"Name"
,
all.x
=
T
,
sort
=
F
)
}
else
{
###################
...
...
R/overlapped_genome_features.R
View file @
81c792e2
...
...
@@ -29,21 +29,16 @@ setMethod("show", "overlapped_genome_features",function(object) {
# extract annotations
Data
<-
methods
::
slot
(
object
,
"annotation"
)
###################
# available columns
colnum
=
base
::
c
(
"condition"
,
"feature_type"
,
"type"
,
"GeneID"
)
colnum
=
colnum
[
colnum
%in%
base
::
names
(
Data
)]
###################
# count unique genes match by feature type
Data
<-
base
::
unique
(
Data
[,
.
SD
,
.SDcols
=
colnum
])
Data
<-
base
::
unique
(
Data
[,
.
(
condition
,
feature_type
,
type
,
gene_id
)
])
###################
# count
Data
<-
data.table
::
dcast
(
Data
,
condition
~
feature_type
,
value.var
=
"
G
ene
ID
"
,
value.var
=
"
g
ene
_id
"
,
fun
=
length
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment