Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bios4biol
bioinfo-utils
Commits
e80d0f64
Commit
e80d0f64
authored
Dec 02, 2015
by
Celine Noirot
Browse files
Normalization and Expression differential.
parent
92fbc971
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
bin/DEG.R
0 → 100644
View file @
e80d0f64
This diff is collapsed.
Click to expand it.
bin/Normalization.R
0 → 100755
View file @
e80d0f64
################################ Normalization #################################
################################################################################
### ---------------------Packages
#common packages
packages
<-
c
(
"optparse"
,
# to read arguments from a command line
"ggplot2"
,
# nice plots
"reshape2"
)
# reshape grouped data
for
(
package
in
packages
){
# if package is installed locally, load
if
(
!
(
package
%in%
rownames
(
installed.packages
())))
{
install.packages
(
package
)
}
do.call
(
'library'
,
list
(
package
))
}
#Bioconductor package
if
(
!
(
"edgeR"
%in%
rownames
(
installed.packages
())))
{
source
(
"http://www.bioconductor.org/biocLite.R"
)
biocLite
(
"edgeR"
)
}
library
(
edgeR
)
# RNAseq data normalization
### ---------------------Parameters
#############
###### Parameters' initialization when the script is launched from command line
############
option_list
=
list
(
make_option
(
c
(
"-f"
,
"--file"
),
type
=
"character"
,
default
=
NULL
,
help
=
"tabulated raw count matrix with library name as header (e.g.
#gene_id lib1 lib2)"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
NULL
,
help
=
"folder path where results are stored"
,
metavar
=
"character"
)
);
opt_parser
=
OptionParser
(
usage
=
"Imports data, converts them into a DGEList object
(edgeR), does all normalization methods and produces
diagnostic plots."
,
option_list
=
option_list
);
opt
=
parse_args
(
opt_parser
);
##############
## Parameters' initialization when the script is launched from R or Rstudio
#file: tabulated raw count matrix with library name as header
#(e.g. gene_id lib1 lib2)
#out: folder path where results are stored
##############
#opt=list()
#opt$file=XXX # for example "C:/My documents/raw_count.csv"
#opt$out=XXX # for example"C:/My documents/results"
#Directory and count table file name are needed to continue
if
(
is.null
(
opt
$
file
)
|
is.null
(
opt
$
out
))
{
print_help
(
opt_parser
)
stop
(
"count table file name or output directory missing.\n"
,
call.
=
FALSE
)
}
#Create output folder if it didn't exist
ifelse
(
!
dir.exists
(
opt
$
out
),
dir.create
(
opt
$
out
),
FALSE
)
#####################################################
### ---------------------Input data and preprocessing
#####################################################
#Raw count
raw_count_table
<-
read.table
(
normalizePath
(
opt
$
file
),
header
=
TRUE
,
row.names
=
1
,
comment.char
=
""
)
#DGEList object
dge
<-
DGEList
(
raw_count_table
,
remove.zeros
=
TRUE
)
#--Pseudo counts before normalization
PC_before
<-
dge
$
counts
PC_before_log
<-
log2
(
dge
$
counts
+
1
)
##############-Plots before normalization
PC_before_log_long
<-
melt
(
PC_before_log
)
colnames
(
PC_before_log_long
)
<-
c
(
"name"
,
"lib"
,
"log2CPM"
)
## boxplot
ggplot
(
PC_before_log_long
,
aes
(
x
=
lib
,
y
=
log2CPM
))
+
geom_boxplot
(
fill
=
'gray'
,
color
=
"black"
)
+
theme_bw
()
+
xlab
(
label
=
""
)
+
ylab
(
label
=
expression
(
log
[
2
]
*
"(counts + 1)"
))
+
ggtitle
(
"Boxplot on raw pseudo-counts"
)
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"rawcount_boxplot.svg"
),
mustWork
=
FALSE
),
width
=
11
,
height
=
7
)
## MDS
MDS_PC
<-
data.frame
(
plotMDS
(
PC_before_log
)
$
cmdscale.out
)
ggplot
(
MDS_PC
,
aes
(
x
=
X1
,
y
=
X2
,
label
=
rownames
(
MDS_PC
)))
+
geom_point
(
size
=
4
)
+
geom_text
(
hjust
=
0.5
,
vjust
=
-0.5
)
+
theme_bw
()
+
xlab
(
label
=
"Dimension 1"
)
+
ylab
(
label
=
"Dimension 2"
)
+
ggtitle
(
"PCA plot on raw pseudo-counts"
)
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"rawcount_MDS.svg"
),
mustWork
=
FALSE
),
width
=
11
,
height
=
7
)
## density
ggplot
(
PC_before_log_long
,
aes
(
x
=
log2CPM
,
colour
=
lib
))
+
geom_density
()
+
theme_bw
()
+
xlab
(
label
=
expression
(
log
[
2
]
*
"(counts + 1)"
))
+
ylab
(
label
=
"Density"
)
+
ggtitle
(
"Density plot on raw pseudo-counts"
)
+
guides
(
color
=
guide_legend
(
title
=
"Library"
))
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"rawcount_density.svg"
),
mustWork
=
FALSE
),
width
=
8
,
height
=
6
)
#save data used for plots
write.table
(
PC_before_log_long
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
"raw_boxplot_density.csv"
),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
TRUE
,
quote
=
FALSE
)
write.table
(
MDS_PC
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
"raw_MDS.csv"
),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
TRUE
,
quote
=
FALSE
)
#######################################
### --------------------- Normalization
#######################################
normalization
<-
function
(
dge
,
method
)
{
#norm factor
dgeNorm
<-
calcNormFactors
(
dge
,
method
=
method
)
#Pseudo-counts
PC_after
<-
cpm
(
dgeNorm
)
PC_after_log
<-
log2
(
PC_after
+
1
)
#Normalized data
PC_after
<-
cbind
(
"#name"
=
rownames
(
PC_after
),
PC_after
)
sample_info
<-
data.frame
(
sample.name
=
rownames
(
dgeNorm
$
samples
),
dgeNorm
$
samples
[,
c
(
"lib.size"
,
"norm.factors"
)])
write.table
(
PC_after
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_pseudocounts.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
write.table
(
sample_info
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_info.txt"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
##### Plots
PC_after_log_long
<-
melt
(
PC_after_log
)
colnames
(
PC_after_log_long
)
<-
c
(
"name"
,
"lib"
,
"log2CPM"
)
#title
tit
<-
method
if
(
method
==
"upperquartile"
)
{
tit
<-
"upper-quartile"
}
#boxplot
ggplot
(
PC_after_log_long
,
aes
(
x
=
lib
,
y
=
log2CPM
))
+
geom_boxplot
(
fill
=
'gray'
,
color
=
"black"
)
+
theme_bw
()
+
xlab
(
label
=
""
)
+
ylab
(
label
=
expression
(
log
[
2
]
*
"(counts + 1)"
))
+
ggtitle
(
paste0
(
"Boxplot on pseudo-counts after normalization (method = "
,
tit
,
")"
))
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_boxplot.svg"
)),
mustWork
=
FALSE
),
width
=
8
,
height
=
6
)
#MDS
MDS_PC
<-
data.frame
(
plotMDS
(
PC_after_log
)
$
cmdscale.out
)
ggplot
(
MDS_PC
,
aes
(
x
=
X1
,
y
=
X2
,
label
=
rownames
(
MDS_PC
)))
+
geom_point
(
size
=
4
)
+
geom_text
(
hjust
=
0.5
,
vjust
=
-0.5
)
+
theme_bw
()
+
xlab
(
label
=
"Dimension 1"
)
+
ylab
(
label
=
"Dimension 2"
)
+
ggtitle
(
paste0
(
"PCA plot on pseudo-counts after normalization (method = "
,
tit
,
")"
))
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_MDS.svg"
)),
mustWork
=
FALSE
),
width
=
11
,
height
=
7
)
#density
ggplot
(
PC_after_log_long
,
aes
(
x
=
log2CPM
,
colour
=
lib
))
+
geom_density
()
+
theme_bw
()
+
xlab
(
label
=
expression
(
log
[
2
]
*
"(counts + 1)"
))
+
ylab
(
label
=
"Density"
)
+
ggtitle
(
paste0
(
"Density plot on pseudo-counts after normalization (method = "
,
tit
,
")"
))
+
guides
(
color
=
guide_legend
(
title
=
"Library"
))
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_density.svg"
)),
mustWork
=
FALSE
),
width
=
8
,
height
=
6
)
#save data used for plots
write.table
(
PC_after_log_long
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_boxplot_density.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
TRUE
,
quote
=
FALSE
)
write.table
(
MDS_PC
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
method
,
"_MDS.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
TRUE
,
quote
=
FALSE
)
}
normalization
(
dge
,
"upperquartile"
)
normalization
(
dge
,
"RLE"
)
normalization
(
dge
,
"TMM"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment