Skip to content
Snippets Groups Projects
Commit 0a21f979 authored by Samuel Mondy's avatar Samuel Mondy
Browse files

Update R_amplicon_analysis

parent b05edeb8
No related branches found
No related tags found
No related merge requests found
......@@ -83,7 +83,7 @@ add.alpha <- function(col, alpha=0.3){
#b must be a matrix containing the sums of the occurences of the OTUs for each variable with variables as columns
#you should use the aggregate() function as done afterwards in the script of the function ensemble(a,b,name,threshold)
#you should use the aggregate() function as done afterwards in the script of the function ensemble(a,b,name,OTUnumber)
###chi square test with only 2 variables
......@@ -143,10 +143,11 @@ chi_sam<-function(b,seuil=0.05)
#b is your correspondance table
#name is a generic name you want to add to your files, plots. For example, if you say name="Analysis", every time the word name is written in the script, it will be replaced by "Analysis".
#Don't forget the "" when writing the name
#threshold is the threshold defined above in the Ten_max() function.
#OTUnumber is the number of kept taxa that will be used by the Ten_max() function.
#seuil = risque de première espèce du test de kruskal-vallis
ensemble<-function(a,b,name,threshold,seuil=0.05)
ensemble<-function(a,b,name,OTUnumber,seuil=0.05)
{
sum_a<-apply(a,2,sum)
if(length(which(sum_a==0))>0)
......@@ -154,21 +155,21 @@ ensemble<-function(a,b,name,threshold,seuil=0.05)
a<-a[,-which(sum_a==0)]
}
#heatmap
if(dim(a)[2]<threshold)
if(dim(a)[2]<OTUnumber)
{
aprime<-t(a)
}
else
{
aprime<-t(Ten_max(t(a),threshold))
aprime<-t(Ten_max(t(a),OTUnumber))
}
write.table(aprime, paste(Sys.Date(),name,"Ten_max",threshold,".txt",sep="_"), row.names=T, sep="\t",quote=F)
write.table(aprime, paste(Sys.Date(),name,"Ten_max",OTUnumber,".txt",sep="_"), row.names=T, sep="\t",quote=F)
logaprime <- log2(aprime)
logaprime[logaprime=="-Inf"] <- c(0)
bprime <- data.frame(unclass(b))
pdf(file=paste("heatmap",Sys.Date(),name,"threshold",threshold,".pdf",sep="_"),width=15,height=15)
pdf(file=paste("heatmap",Sys.Date(),name,"OTUnumber",OTUnumber,".pdf",sep="_"),width=15,height=15)
heatmap.2(t(logaprime),
main = "Heatmap representant les occurrences par echantillon",
notecol="black",
......@@ -196,7 +197,7 @@ ensemble<-function(a,b,name,threshold,seuil=0.05)
#acp
#pdf(file=paste("acp",Sys.Date(),name,".pdf",sep="_"),width=10,height=10)
datas <- dudi.pca(df = a, center = TRUE, scale = TRUE, scannf = FALSE, nf=15)
writeLines(as.character(datas$eig/sum(datas$eig)),paste("acp_eig",Sys.Date(),name,".pdf",sep="_"))
writeLines(as.character(datas$eig/sum(datas$eig)),paste("acp_eig",Sys.Date(),name,".txt",sep="_"))
#s.label(dfxy = datas$li, xax = 1, yax = 2, labels = rownames(datas$li),
# facets = NULL, plot = TRUE, storeData = TRUE, add = FALSE,
# pos = -1)
......@@ -267,7 +268,7 @@ ensemble<-function(a,b,name,threshold,seuil=0.05)
colnames(tableau)<-colnames(c)[-1]
print(dim(tableau))
print(rownames(tableau))
write.table(Ten_max(t(tableau),threshold),paste(Sys.Date(),name,"Ten_max",colnames(b)[x],".txt",sep="_"), row.names=T, sep="\t",quote=F)
write.table(Ten_max(t(tableau),OTUnumber),paste(Sys.Date(),name,"Ten_max",colnames(b)[x],".txt",sep="_"), row.names=T, sep="\t",quote=F)
temp <- as.matrix(c[,-1])
d <- c()
for(i in 1:dim(c)[1]){d[i] <- paste(c[i,c(1)],collapse="_")}
......@@ -323,200 +324,6 @@ ensemble<-function(a,b,name,threshold,seuil=0.05)
}
#########################################################
#en foncion de 2 parametres
#########################################################
comb <- combn(1:k,2)
for(x in 1:dim(comb)[2])
{
temp1 <- comb[1,x]
temp2 <- comb[2,x]
print(paste(colnames(b)[temp1],colnames(b)[temp2],sep="_"))
{
grouping <- as.factor(paste(b[,temp1],b[,temp2], sep="_"))
z=1
y=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],y,".pdf",sep="_"),width=15,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
for(m in 1:dim(aprime)[2])
{
condition <- length(which(!is.na(unique(grouping))==1))
if(condition[1]>1)
{
test<-kruskal.test(aprime[,m]~grouping)
if(test$p.value<seuil)
{
if(z%%6==0)
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
dev.off()
y=y+1
z=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],y,".pdf",sep="_"),width=30,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
}
else
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
z <- z+1
}
}
}
}
dev.off()
pdf(file=paste("BGA",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".pdf",sep="_"),width=30,height=15)
{
bga1 <- bca(x = datas, fac = grouping, scannf = FALSE, nf = 10)
s.class(datas$li,
fac = grouping,
col = c("black","blue","red","darkgreen","orange","purple","grey","darkblue"))
}
dev.off()
c<-aggregate(a, list(a=b[,temp1],b=b[,temp2]), FUN=sum)
write.table(c, paste(Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".txt",sep="_"), row.names=F, sep="\t",quote=F)
temp3 <- c
c <- as.matrix(temp3[,-c(1:2)])
d <- c()
for(i in 1:dim(temp3)[1]){d[i] <- paste(temp3[i,c(1:2)],collapse="_")}
rownames(c) <- d
if(dim(t(c))[2]>1)
{
chi_sam(t(c),seuil)
#NMDS
pdf(file=paste("NMDS",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],".pdf",sep="_"),width=15,height=15)
grouping <- as.factor(paste(b[,temp1],b[,temp2], sep="_"))
col=add.alpha(rainbow(n=255))
NMDS.mean=aggregate(data.scores[,c("NMDS1", "NMDS2")], list(group = grouping), mean)
graph<-ggplot(data=data.scores,aes(x = data.scores$NMDS1, y = data.scores$NMDS2, group = grouping, color=grouping, fill=grouping)) +
geom_point(data=data.scores,aes(x=data.scores$NMDS1,y=data.scores$NMDS2, colour=grouping)) +
#annotate("text",x = NMDS.mean$NMDS1,y = NMDS.mean$NMDS2,label=NMDS.mean$group, size=7) +
stat_ellipse(geom="polygon", alpha=0.1, inherit.aes=TRUE) +
coord_equal() +
ggtitle(paste("NMDS", name,colnames(b)[temp1],colnames(b)[temp2])) +
xlab(paste("NMDS1",datas$eig[1])) +
ylab(paste("NMDS2",datas$eig[2])) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
legend.title = element_text(size = 16),
legend.text = element_text(size = 16),
plot.title = element_text(size=21),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank())
print(graph)
}
dev.off()
}
}
#########################################################
#en foncion de 3 parametres
#########################################################
comb <- combn(1:k,3)
for(x in 1:dim(comb)[2])
{
temp1 <- comb[1,x]
temp2 <- comb[2,x]
temp3 <- comb[3,x]
grouping <- as.factor(paste(b[,temp1],b[,temp2],b[,temp3], sep="_"))
z=1
y=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3],y,".pdf",sep="_"),width=15,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
for(m in 1:dim(aprime)[2])
{
condition <- length(which(!is.na(unique(bprime[,x]))==1))
if(condition[1]>1)
{
test <- kruskal.test(aprime[,m]~grouping)
if(test$p.value<seuil)
{
if(z%%6==0)
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.3,las=2)
dev.off()
y=y+1
z=1
pdf(file=paste("boxplot",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3],y,".pdf",sep="_"),width=30,height=15)
par(mfrow=c(3,2), mar=c(7,4,6,4)+2)
}
else
{
boxplot(aprime[,m]~grouping,main=paste(colnames(aprime)[m],"_kruskall_test=",formatC(test$p.value, format = "e", digits = 2),sep=""), cex.names=0.4, las=2)
z <- z+1
}
}
}
}
dev.off()
pdf(file=paste("BGA",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3],".pdf",sep="_"),width=30,height=15)
{
bga1 <- bca(x = datas, fac = grouping, scannf = FALSE, nf = 3)
s.class(datas$li,
fac = as.factor(grouping),
col = c("black","blue","red","darkgreen","orange","purple","grey","darkblue"))
}
dev.off()
c<-aggregate(a, list(a=b[,temp1],b=b[,temp2],c=b[,temp3]), FUN=sum)
write.table(c, paste(Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3],".txt",sep="_"), row.names=F, sep="\t",quote=F)
temp4 <- c
c <- as.matrix(temp4[,-c(1:3)])
d <- c()
for(i in 1:dim(temp4)[1]){d[i] <- paste(temp4[i,c(1:3)],collapse="_")}
rownames(c) <- d
if(dim(t(c))[2]>1)
{
chi_sam(t(c),seuil)
#NMDS
pdf(file=paste("NMDS",Sys.Date(),name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3],".pdf",sep="_"),width=15,height=15)
grouping <- as.factor(paste(b[,temp1],b[,temp2],b[,temp3], sep="_"))
col=add.alpha(rainbow(n=255))
NMDS.mean=aggregate(data.scores[,c("NMDS1", "NMDS2")], list(group = grouping), mean)
graph<-ggplot(data=data.scores,aes(x = data.scores$NMDS1, y = data.scores$NMDS2, group = grouping, color=grouping, fill=grouping)) +
geom_point(data=data.scores,aes(x=data.scores$NMDS1,y=data.scores$NMDS2, colour=grouping)) +
#annotate("text",x = NMDS.mean$NMDS1,y = NMDS.mean$NMDS2,label=NMDS.mean$group, size=5) +
stat_ellipse(geom="polygon", alpha=0.1, inherit.aes=TRUE) +
coord_equal() +
ggtitle(paste("NMDS", name,colnames(b)[temp1],colnames(b)[temp2],colnames(b)[temp3])) +
xlab(paste("NMDS1",datas$eig[1])) +
ylab(paste("NMDS2",datas$eig[2])) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_text(size=18),
axis.title.y = element_text(size=18),
legend.title = element_text(size = 16),
legend.text = element_text(size = 16),
plot.title = element_text(size=21),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank())
print(graph)
dev.off()
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment