Skip to content
Snippets Groups Projects
Commit 66483579 authored by Samuel Mondy's avatar Samuel Mondy
Browse files

Add new file

parent 0a21f979
No related branches found
No related tags found
No related merge requests found
library("stats")
library("graphics")
library("grDevices utils")
library("datasets")
library("methods")
library("base")
library("gplots")
library("combinat")
library("ade4TkGUI")
library("adegraphics")
library("ade4")
library("tkrplot")
library("lattice")
library("gtools")
library("bitops")
library("MASS")
library("grid")
library("KernSmooth")
library("gdata")
library("sp")
library("latticeExtra")
library("RColorBrewer")
library("tools")
library("compiler")
library("caTools")
library("tcltk")
library("vegan")
library("ggplot2")
library("plyr")
Ten_max<-function(ref,index=20)
{
vref<-c()
for(i in 1:dim(ref)[2])
{
seuil<-sort(ref[,i],decreasing=T)[index]
temp<-which(ref[,i]>(seuil-1))
vref<-unique(c(vref,temp))
}
return(ref[vref,])
}
######################################################################
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 299)
#########################################
#########################################
#Create a color palette
#########################################
#########################################
#You have to provide a vector of color if you want add.alpha() to work. You can find an example below in the NMDS example.
add.alpha <- function(col, alpha=0.3){
if(missing(col))
stop("Please provide a vector of colours.")
apply(sapply(col, col2rgb)/255, 2,
function(x)
rgb(x[1], x[2], x[3], alpha=alpha))
}
#########################################
#Perform a chi-squared test
#########################################
#b must be a matrix containing the sums of the occurences of the OTUs for each variable with variables as columns
#you should use the aggregate() function as done afterwards in the script of the function ensemble(a,b,name,OTUnumber)
###chi square test with only 2 variables
chi_sam<-function(b,seuil=0.05)
#########################################
#########################################
{
t<-apply(b,1,sum)
selec<-which(t>0)
b<-b[selec,]
t<-apply(b,2,sum)
selec<-which(t>0)
b<-b[,selec]
y<-chisq.test(b)
print(y)
print(y$expected)
print(y$observed)
print (y$p.value)
if(y$p.value<seuil)
{
if(dim(b)[2]>2)
{
z<-combn(dim(b)[2],2)
for(i in 1:dim(z)[2])
{
r<-b[,c(z[1,i],z[2,i])]
s<-apply(r,1,sum)
selec<-which(s>0)
r<-r[selec,]
x<-chisq.test(r)
#print(paste(colnames(b)[z[1,i]],colnames(b)[z[2,i]],sep=" vs "))
#print(x$expected)
#print(x$observed)
if(x$p.value<(seuil/dim(z)[2]))
{
print(paste(colnames(b)[z[1,i]],colnames(b)[z[2,i]],sep=" vs "))
print(x$p.value)
}
}
print(paste("corrected p-value ",(seuil/dim(z)[2]),sep=""))
}
else
{
print (y$p.value)
}
}
}
###########################################################
###########################################################
#Here is a function which puts together the functions for heatmap, Between Group Analysis, boxplots and Kruskall-Wallis test, NMDS, PCA and allows you to perform analysis automatically and considering k factors, combinations of 2 of your k factors and combinations of 3 of your k factors.
#The functions Ten_max (Filter the most represented OTU) and the color palette have to be run separately.
#All of the functions are described before.
#a is your OTU dataset
#b is your correspondance table
#name is a generic name you want to add to your files, plots. For example, if you say name="Analysis", every time the word name is written in the script, it will be replaced by "Analysis".
#Don't forget the "" when writing the name
#OTUnumber is the OTUnumber defined above in the Ten_max() function.
ensemble2<-function(a,b,name,OTUnumber,seuil=0.05)
{
sum_a<-apply(a,2,sum)
if(length(which(sum_a==0))>0)
{
a<-a[,-which(sum_a==0)]
}
#heatmap
if(dim(a)[2]<OTUnumber)
{
aprime<-t(a)
}
else
{
aprime<-t(Ten_max(t(a),OTUnumber))
}
write.table(aprime, paste(Sys.Date(),name,"Ten_max",OTUnumber,".txt",sep="_"), row.names=T, sep="\t",quote=F)
logaprime <- log2(aprime)
logaprime[logaprime=="-Inf"] <- c(0)
bprime <- data.frame(unclass(b))
pdf(file=paste("heatmap",Sys.Date(),name,"OTUnumber",OTUnumber,".pdf",sep="_"),width=15,height=15)
heatmap.2(t(logaprime),
main = "Heatmap representant les occurrences par echantillon",
notecol="black",
labRow=rownames(t(logaprime)),
labCol=colnames(t(logaprime)),
density.info="none",
trace="none",
margins =c(12,9),
col=my_palette,
dendrogram="row",
Colv="NA")
dev.off()
pdf(file=paste("heatmap_cluster",Sys.Date(),name,".pdf",sep="_"),width=15,height=15)
heatmap.2(t(logaprime),
main = "Heatmap representant les occurrences par echantillon",
notecol="black",
labRow=rownames(t(logaprime)),
labCol=colnames(t(logaprime)),
density.info="none",
trace="none",
margins =c(12,9),
col=my_palette,
)
dev.off()
#acp
#pdf(file=paste("acp",Sys.Date(),name,".pdf",sep="_"),width=10,height=10)
datas <- dudi.pca(df = a, center = TRUE, scale = TRUE, scannf = FALSE, nf=15)
writeLines(as.character(datas$eig/sum(datas$eig)),paste("acp_eig",Sys.Date(),name,".pdf",sep="_"))
#s.label(dfxy = datas$li, xax = 1, yax = 2, labels = rownames(datas$li),
# facets = NULL, plot = TRUE, storeData = TRUE, add = FALSE,
# pos = -1)
#pca<-scatter(x = datas, xax = 1, yax = 2)
#dev.off()
v<-apply(aprime,1,sum)
w<-apply(a,1,sum)
pourseq<-mean(v/w)*100
con<-file(paste("Pourcentage de sequences representees", name, Sys.Date()), open="w")
cat(pourseq, file=con, name, sep="\t")
close(con)
k<- dim(b)[2]
########################################
#en fonction de 1 parametre
########################################
for(x in 1:k)
{
z=1
y=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[x],y,".pdf",sep="_"),width=15,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
for(m in 1:dim(aprime)[2])
{
condition <- length(which(!is.na(unique(bprime[,x]))==1))
if(condition[1]>1)
{
test <- kruskal.test(aprime[,m]~bprime[,x])
if(test$p.value<seuil)
{
if(z%%6==0)
{
boxplot(aprime[,m]~bprime[,x],main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
dev.off()
y=y+1
z=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[x],y,".pdf",sep="_"),width=30,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
}
else
{
boxplot(aprime[,m]~bprime[,x],main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
z <- z+1
}
}
}
else
{
print("kruskal not allowed")
}
}
dev.off()
c<-aggregate(a, list(a=b[,x]), FUN=sum)
write.table(c, paste(Sys.Date(),name,colnames(b)[x],".txt",sep="_"), row.names=F, sep="\t",quote=F)
tableau<-as.matrix(c[,-1])
print(dim(tableau))
print(rownames(tableau))
rownames(tableau)<-c[,1]
colnames(tableau)<-colnames(c)[-1]
print(dim(tableau))
print(rownames(tableau))
write.table(Ten_max(t(tableau),OTUnumber),paste(Sys.Date(),name,"Ten_max",colnames(b)[x],".txt",sep="_"), row.names=T, sep="\t",quote=F)
temp <- as.matrix(c[,-1])
d <- c()
for(i in 1:dim(c)[1]){d[i] <- paste(c[i,c(1)],collapse="_")}
rownames(temp) <- d
if(dim(t(temp))[2]>1)
{
chi_sam(t(temp),seuil)
pdf(file=paste("BGA",Sys.Date(),name,colnames(bprime)[x],".pdf",sep="_"),width=15,height=15)
{
bga1 <- bca(x = datas, fac = as.factor(bprime[,x]), scannf = FALSE, nf = 5)
s.class(datas$li,
fac = as.factor(bprime[,x]), # colorer par groupes
col = c("black","blue","red","darkgreen","orange","purple","grey","darkblue"))
}
dev.off()
#NMDS
pdf(file=paste("NMDS",Sys.Date(),name,colnames(b)[x],".pdf",sep="_"),width=15,height=15)
factor<-as.factor(b[,x])
essai<-metaMDS(a)
stressplot(essai)
data.scores <- as.data.frame(scores(essai))
col=add.alpha(rainbow(n=255))
NMDS.mean=aggregate(data.scores[,c("NMDS1", "NMDS2")], list(group = factor), mean)
graph<-ggplot(data=data.scores,aes(x = data.scores$NMDS1, y = data.scores$NMDS2, group = factor, color=factor, fill=factor)) +
geom_point(data=data.scores,aes(x=data.scores$NMDS1,y=data.scores$NMDS2, colour=factor)) +
annotate("text",x = NMDS.mean$NMDS1,y = NMDS.mean$NMDS2,label=NMDS.mean$group, size=9) +
stat_ellipse(geom="polygon", alpha=0.1, inherit.aes=TRUE) +
coord_equal() +
#erreur indice pour ggtitle
ggtitle(paste("NMDS", name,colnames(b)[x])) +
xlab(paste("NMDS1",datas$eig[1])) +
ylab(paste("NMDS2",datas$eig[2])) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
legend.title = element_text(size = 16),
legend.text = element_text(size = 16),
plot.title = element_text(size=21),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank())
print(graph)
dev.off()
}
}
#########################################################
#en fonction de 2 parametres
#########################################################
comb <- combn(1:k,2)
for(x in 1:dim(comb)[2])
{
temp1 <- comb[1,x]
temp2 <- comb[2,x]
print(paste(colnames(b)[temp1],colnames(b)[temp2],sep="_"))
{
grouping <- as.factor(paste(b[,temp1],b[,temp2], sep="_"))
z=1
y=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],y,".pdf",sep="_"),width=15,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
for(m in 1:dim(aprime)[2])
{
condition <- length(which(!is.na(unique(grouping))==1))
if(condition[1]>1)
{
test<-kruskal.test(aprime[,m]~grouping)
if(test$p.value<seuil)
{
if(z%%6==0)
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
dev.off()
y=y+1
z=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],y,".pdf",sep="_"),width=30,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
}
else
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
z <- z+1
}
}
}
}
dev.off()
pdf(file=paste("BGA",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".pdf",sep="_"),width=30,height=15)
{
bga1 <- bca(x = datas, fac = grouping, scannf = FALSE, nf = 10)
s.class(datas$li,
fac = grouping,
col = c("black","blue","red","darkgreen","orange","purple","grey","darkblue"))
}
dev.off()
c<-aggregate(a, list(a=b[,temp1],b=b[,temp2]), FUN=sum)
write.table(c, paste(Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".txt",sep="_"), row.names=F, sep="\t",quote=F)
temp3 <- c
c <- as.matrix(temp3[,-c(1:2)])
d <- c()
for(i in 1:dim(temp3)[1]){d[i] <- paste(temp3[i,c(1:2)],collapse="_")}
rownames(c) <- d
if(dim(t(c))[2]>1)
{
chi_sam(t(c),seuil)
#NMDS
pdf(file=paste("NMDS",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".pdf",sep="_"),width=15,height=15)
grouping <- as.factor(paste(b[,temp1],b[,temp2], sep="_"))
col=add.alpha(rainbow(n=255))
NMDS.mean=aggregate(data.scores[,c("NMDS1", "NMDS2")], list(group = grouping), mean)
graph<-ggplot(data=data.scores,aes(x = data.scores$NMDS1, y = data.scores$NMDS2, group = grouping, color=grouping, fill=grouping)) +
geom_point(data=data.scores,aes(x=data.scores$NMDS1,y=data.scores$NMDS2, colour=grouping)) +
#annotate("text",x = NMDS.mean$NMDS1,y = NMDS.mean$NMDS2,label=NMDS.mean$group, size=7) +
stat_ellipse(geom="polygon", alpha=0.1, inherit.aes=TRUE) +
coord_equal() +
ggtitle(paste("NMDS", name,colnames(b)[temp1],colnames(b)[temp2])) +
xlab(paste("NMDS1",datas$eig[1])) +
ylab(paste("NMDS2",datas$eig[2])) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
legend.title = element_text(size = 16),
legend.text = element_text(size = 16),
plot.title = element_text(size=21),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank())
print(graph)
}
dev.off()
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment