aggregate_fun.R 12.5 KB
Newer Older
Etienne Rifa's avatar
Etienne Rifa committed
1
2
#' Aggregate
#'
Etienne Rifa's avatar
Etienne Rifa committed
3
#' Aggregate results of the tree differential analysis methods in one single file.
Etienne Rifa's avatar
Etienne Rifa committed
4
#'
Rifa Etienne's avatar
Rifa Etienne committed
5
#' @param data a phyloseq object (output from decontam or generate_phyloseq)
6
7
8
9
#' @param output Output directory
#' @param metacoder Path to the metacoder CSV file
#' @param deseq Path to deseq results folder
#' @param mgseq Path to metagenomeseq results folder
Rifa Etienne's avatar
Rifa Etienne committed
10
11
#' @param column1 Column name of factor to test (among sample_variables(data))
#' @param column2 Column name on which table were splitted
Etienne Rifa's avatar
Etienne Rifa committed
12
#' @param verbose Verbose level. (1: quiet, 2: print infos, 3: print infos + debug)
Rifa Etienne's avatar
Rifa Etienne committed
13
#' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)
14
#' @param comp Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.
Sebastien Theil's avatar
Sebastien Theil committed
15
#' @param returnval Boolean for function to return values.
16
17
#'
#' @return Export final CSV files, barplot with top significant ASV and Venn Digramm.
Etienne Rifa's avatar
Etienne Rifa committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#'
#' @import phyloseq
#' @import VennDiagram
#' @import ggplot2
#'
#' @export


# Decontam Function

aggregate_fun <- function(data = data, metacoder = NULL, deseq = NULL, mgseq = NULL, output = "./aggregate_diff/",
                          column1 = NULL, column2 = NULL, verbose = 1, rank = "Species", comp = "", returnval = TRUE){

  invisible(flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger"))



  if(verbose == 3){
    invisible(flog.threshold(DEBUG))
Etienne Rifa's avatar
Etienne Rifa committed
37
38
  }
  if(verbose == 2){
Etienne Rifa's avatar
Etienne Rifa committed
39
40
41
    invisible(flog.threshold(INFO))
  }

Etienne Rifa's avatar
Etienne Rifa committed
42
43
44
45
  if(verbose == 1){
    invisible(flog.threshold(ERROR))
  }

Etienne Rifa's avatar
Etienne Rifa committed
46
47
48
49
  if(!dir.exists(output)){
    dir.create(output,recursive=T )
  }

Etienne Rifa's avatar
Etienne Rifa committed
50
51
  flog.info('Glom Tax.')
  data.glom <- tax_glom(data, taxrank=rank)
Etienne Rifa's avatar
Etienne Rifa committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

  if(comp == ''){
    fun <- paste('combinaisons <- combn(na.omit(unique(sample_data(data)$',column1,')),2)',sep='')
    eval(parse(text=fun))
  }else{
    comp_list <- unlist(strsplit(comp,","))
    combinaisons <- matrix(, nrow = 2, ncol = length(comp_list))
    for (i in 1:length(comp_list)){
      tmp <- unlist(strsplit(comp_list[i],"~"))
      combinaisons[1,i] <- tmp[1]
      combinaisons[2,i] <- tmp[2]
    }
  }

  #Metacoder table
  otable <- otu_table(data)
  ttax <- tax_table(data)
  ssample <- as.matrix(sample_data(data))
  sseq <- refseq(data)
  if(file.exists(paste(metacoder))){
    mcoderTab <- read.table(paste(metacoder), h=TRUE)
  }

Rifa Etienne's avatar
Rifa Etienne committed
75
  outF = list()
Etienne Rifa's avatar
Etienne Rifa committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  TABfinal <- data.frame()
  for (col in (1:ncol(combinaisons))){
    flog.info(paste('Combinaison ',combinaisons[1,col], ' ' , combinaisons[2,col],sep=''))
    comp1 = paste(combinaisons[1,col], '_vs_' , combinaisons[2,col],sep='')
    flog.info('Deseq2.')
    if(file.exists(paste(deseq,"/signtab_",column1,"_",combinaisons[1,col],"_vs_",combinaisons[2,col],".csv",sep=""))){
      deseqT <- read.table(paste(deseq,"/signtab_",column1,"_",combinaisons[1,col],"_vs_",combinaisons[2,col],".csv",sep=""), h=TRUE,sep="\t")
    } else{
      flog.warn('File does not exists.')
      deseqT <- data.frame()
    }
    flog.info('MetagenomeSeq.')
    if(file.exists(paste(mgseq,"/signtab_",column1,"_",combinaisons[1,col],"_vs_",combinaisons[2,col],".csv",sep=""))){
      mgseqT <- read.table(paste(mgseq,"/signtab_",column1,"_",combinaisons[1,col],"_vs_",combinaisons[2,col],".csv",sep=""), h=TRUE,sep="\t")
    } else{
      if(file.exists(paste(mgseq,"/signtab_",column1,"_",combinaisons[2,col],"_vs_",combinaisons[1,col],".csv",sep=""))){
Etienne Rifa's avatar
Etienne Rifa committed
92
        mgseqT <- read.table(paste(mgseq,"/signtab_",column1,"_",combinaisons[2,col],"_vs_",combinaisons[1,col],".csv",sep=""), h=TRUE,sep="\t")
Etienne Rifa's avatar
Etienne Rifa committed
93
94
95
96
97
98
      }
      else{
        flog.warn('File does not exists.')
        mgseqT <- data.frame()
      }
    }
Sebastien Theil's avatar
Sebastien Theil committed
99
    flog.debug(pander(mgseqT, split.tables=2000))
Etienne Rifa's avatar
Etienne Rifa committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    # print(head(mgseqT))
    flog.info('Metacoder.')
    if(file.exists(paste(metacoder))){
      mcoderT <- mcoderTab[mcoderTab$treatment_1 == as.character(combinaisons[1,col]) & mcoderTab$treatment_2 == as.character(combinaisons[2,col]),]
      if(nrow(mcoderT) == 0){
        flog.warn("Metacoder table empty...")
        mcoderT <- mcoderTab[mcoderTab$treatment_2 == as.character(combinaisons[1,col]) & mcoderTab$treatment_1 == as.character(combinaisons[2,col]),]
      }
      mcoderT$wilcox_p_value[is.na(mcoderT$wilcox_p_value)] = 1
      mcoderTsignif <- mcoderT[mcoderT$wilcox_p_value <= 0.05,]
    }else{
      flog.warn('File does not exists.')
      mcoderT <- data.frame()
      mcoderTsignif <- data.frame()
    }
    # print(mcoderTsignif)

    flog.info("Retrieving IDS of differential features...")
    if(length(deseqT) > 0){
      deseq_ids <- as.character(deseqT[deseqT$padj <= 0.05 & !is.na(deseqT$padj),1])
    }else{
      deseq_ids <- NULL
    }
    if(length(mgseqT) > 0){
      mgseq_ids <- as.character(mgseqT[,1])
    }else{
      mgseq_ids <- NULL
    }
    if(length(mcoderTsignif) > 0){
      mcoder_ids <- as.character(mcoderTsignif$otu_id)
    }else{
      mcoder_ids <- NULL
    }

    # Unique IDS
    flog.info('Filtering unique IDS...')
    ListAllOtu = unique(c(deseq_ids,mgseq_ids,mcoder_ids))

    # flog.info('Deseq2.')
    # print(deseq_ids)
    # flog.info('MetagenomeSeq.')
    # print(mgseq_ids)
    # flog.info('Metacoder.')
    # print(mcoder_ids)
    # flog.info('All.')
    # print(ListAllOtu)

    #Construction de la table
    flog.info('Building table...')
    col_comp = rep(comp1, length(ListAllOtu))
    if(!is.null(column2)){
      col_env = rep(unique(ssample[,column2]), length(ListAllOtu))
      TABf = cbind.data.frame(ListAllOtu,col_env,col_comp)
    } else {TABf = cbind.data.frame(ListAllOtu, col_comp)}

    TF = list(x1=deseq_ids, x2=mgseq_ids, x3=mcoder_ids)
    names(TF) <- c("DESeq", "metagenomeSeq", "metacoder")
    # print(TF)
    if(length(unlist(TF)) == 0){next}

    # Test si chaque ASV est diff dans les méthdes.
    for (j in 1:length(TF)){
      TABtest = TF[[j]]
      # TABtest=gsub("\\[|\\]", "", TF[[j]]) # cherche les ASVids

      TABtest_signif=rep(0, length(ListAllOtu))
      for (i in 1:length(ListAllOtu)) {
        featureI = ListAllOtu[i]
        res=grep( paste('^',featureI,'$', sep="" ) , TABtest)
        #print(res)
        if(length(res)>0){TABtest_signif[i]=length(res)
        #print(c(featureI, TABtest[res]))
        }
      }

      TABf=cbind.data.frame(TABf, TABtest_signif)
      names(TABf)[ncol(TABf)] = names(TF)[j]
    }

Etienne Rifa's avatar
Etienne Rifa committed
179
    TABfbak0 <- TABf
Etienne Rifa's avatar
Etienne Rifa committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

    # add new columns, sumMethods, DeseqLFC, Mean Relative Abundance (TSS) condition 1 & 2
    row.names(deseqT) = deseqT[,1]
    if(nrow(deseqT)==0){
      TABf <- cbind(TABf, sumMethods = apply(TABf[3:5], 1, sum, na.rm=TRUE),
                    DESeqLFC = rep(NA, nrow(TABf)),
                    absDESeqLFC = rep(NA, nrow(TABf)))
    }else{
      TABf <- cbind( TABf, sumMethods = apply(TABf[3:5], 1, sum, na.rm=TRUE),
                     DESeqLFC = deseqT[as.character(TABf[,1]),"log2FoldChange"],
                     absDESeqLFC = abs(deseqT[as.character(TABf[,1]),"log2FoldChange"]) )
    }
    # clr = function(x){log(x+1) - rowMeans(log(x+1))}
    # otableNORM <- clr(otable)
    normf = function(x){ x/sum(x) }
Etienne Rifa's avatar
Etienne Rifa committed
195
    data.norm <- transform_sample_counts(data.glom, normf)
Etienne Rifa's avatar
Etienne Rifa committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
    otableNORM <- otu_table(data.norm)

    Gtab <- cbind(as.data.frame(ssample), t(otableNORM))
    MeanRelAbcond1=NULL
    for(i in TABf$ListAllOtu){
      tt=mean(Gtab[Gtab[,column1]==combinaisons[1,col],i], na.rm=TRUE)
      MeanRelAbcond1=c(MeanRelAbcond1,tt)
    }
    MeanRelAbcond2=NULL
    for(i in TABf$ListAllOtu){
      tt=mean(Gtab[Gtab[,column1]==combinaisons[2,col],i], na.rm=TRUE)
      MeanRelAbcond2=c(MeanRelAbcond2,tt)
    }
    TABfbak <- TABf <- cbind(TABf, MeanRelAbcond1, MeanRelAbcond2)

    #Adjust table
    TABf <- TABf[!is.na(TABf$DESeqLFC),]
    diff1=TABf$MeanRelAbcond1 - TABf$MeanRelAbcond2
    cbind( sign(diff1), diff1)
    TABf$DESeqLFC = abs(TABf$DESeqLFC)*sign(diff1)
    TABf$Condition = rep(NA, nrow(TABf))
    TABf[diff1>0, "Condition"] = as.character(combinaisons[1,col])
    TABf[diff1<0, "Condition"] = as.character(combinaisons[2,col])
    TABf$Condition = factor(TABf$Condition,
                            levels=c(as.character(combinaisons[1,col]),as.character(combinaisons[2,col])) )

    TABfinal <- rbind(TABfinal,TABf)

    # Barplot
    ## differentialy abundant features on 2 methods
    ## Top abs(LogFolchange)
    ## feature with relative abondance > 0.1% (0.001)
    TABbar = TABf[TABf$DESeq ==1 | TABf$metagenomeSeq ==1 |  TABf$sumMethods >=2, ]
    TABbar = TABbar[TABbar$MeanRelAbcond1>=0.001 | TABbar$MeanRelAbcond2>=0.001, ]
    TABbar = tail(TABbar[order(abs(TABbar$DESeqLFC)),],50)

    if(nrow(TABbar)){
Sebastien Theil's avatar
Sebastien Theil committed
233
      TABbar$tax = ttax[as.character(TABbar$ListAllOtu),rank]
Etienne Rifa's avatar
Etienne Rifa committed
234

235
      # png(paste(output,'/topDiffbarplot_',column1,'_',paste(combinaisons[,col],collapse="_vs_"),'.png',sep=''), width=20, height=20, units="cm", res=200)
236
      pbarplot <- p <-ggplot(data=TABbar, aes(x=reorder(tax, -abs(DESeqLFC)), y=DESeqLFC, fill=Condition ) ) +
237
        geom_bar(stat="identity", alpha = 1) + ggtitle(paste(combinaisons[,col],collapse="_vs_")) + labs(x='Features') +
Etienne Rifa's avatar
Etienne Rifa committed
238
        coord_flip() + theme_bw() +
239
240
241
242
243
244
245
246
247
248
249
250
        scale_y_continuous(minor_breaks = seq(-1E4 , 1E4, 1), breaks = seq(-1E4, 1E4, 5)) +
        theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text=element_text(size=12),
        axis.title=element_text(size=16,face="bold"),
        strip.text.x = element_text(size = 18,face="bold"),
        title=element_text(size=16,face="bold"))
      # print(p)
      # dev.off()

      ggsave(paste(output,'/topDiffbarplot_',column1,'_',paste(combinaisons[,col],collapse="_vs_"),'.eps',sep='')
        , plot=pbarplot, height = 20, width = 20, units="cm", dpi = 500, device="eps")

Rifa Etienne's avatar
Rifa Etienne committed
251
252
253
254


      outF[[paste(combinaisons[,col],collapse="_vs_")]] = list(plot = p)

Etienne Rifa's avatar
Etienne Rifa committed
255
256
257
258
259
    }else{flog.info('No ASV to plot...')}

    # Krona ? Diversity of differentialy abundant features.

    ## Venn diag pour chaque comparaison.
260
261
262
263
264
265
266
267
268
269
270
271
272
    # flog.info('Plotting Venn diagrams...')
    # TF = Filter(length, TF)
    # venn.plot <- venn.diagram(TF, filename = NULL, col = "black",
    #                           fill = rainbow(length(TF)), alpha = 0.50,
    #                           cex = 2, cat.col = 1, , lty = "blank",
    #                           cat.cex = 2.5, cat.fontface = "bold",
    #                           margin = 0.07, main=paste(combinaisons[,col],collapse="_vs_"), main.cex=2.5,
    #                           fontfamily ="Arial",main.fontfamily="Arial",cat.fontfamily="Arial");
    # png(paste(output,'/venndiag_',column1,'_',paste(combinaisons[,col],collapse="_vs_"),'.png',sep=''), width=20, height=20, units="cm", res=200)
    # grid.draw(venn.plot)
    # dev.off()

    # grid.draw(venn.plot)
Etienne Rifa's avatar
Etienne Rifa committed
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  }


  # print(TABfinal)
  if(length(TABfinal) > 0){
    flog.info('Building csv file...')


    TAX <- cbind(seqid = rownames(ttax[as.character(TABfinal$ListAllOtu),]),as.data.frame(ttax[as.character(TABfinal$ListAllOtu),]))

    sseq <- cbind(seqid = row.names(as.data.frame(sseq)), as.data.frame(sseq))
    SEQ <- sseq[as.character(TABfinal$ListAllOtu),]


    colnames(TABfinal)[1:2] = c("seqid", "Comparaison")
    TABfinal = cbind(TABfinal, TAX[,-1], sequence = SEQ[,-1])

    write.table(TABfinal, paste(output,"/aggregate_diff_",column1,'.csv', sep=""), row.names=FALSE, sep="\t", quote = FALSE)

292

Rifa Etienne's avatar
Rifa Etienne committed
293
294
295

    outF[["table"]] = TABfinal
    if(returnval){return(outF)}
Etienne Rifa's avatar
Etienne Rifa committed
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

    flog.info('Done.')

    # flog.info('Heatmap of significant %s.', rank)
    # signif_data <- phyloseq::prune_taxa(as.character(TABfinal[,1]), data)
    # if(rank != 'ASV'){
    #   signif_data <- tax_glom(signif_data, rank)
    #   taxa_names(signif_data) <- as.character(tax_table(signif_data)[,rank])
    # }
    # clr = function(x){log(x+1) - rowMeans(log(x+1))}
    # otable <- otu_table(signif_data)
    # otableCLR <- clr(otable)
    # data.norm <- signif_data; otu_table(data.norm) <- otableCLR
    #
    # hgroup=hclust(dist(otable) , method="ward.D2")
    # # plot(hgroup)
    #
    # #Heatmap
    # dataF <- data.norm
    # adf = psmelt(dataF)
    # adf[,"OTU"] = factor(adf[,"OTU"], levels=hgroup$labels)
    #
    # p = ggplot(adf, aes(x = Sample, y = OTU, fill = Abundance)) +
    #     geom_raster() + facet_grid(as.formula(paste("~",column1)), scales = "free", space = "free") +
    #     scale_fill_distiller(direction=-1, palette='Spectral') +
    #     theme(axis.text.x = element_text(angle=90, hjust=1)) + ylab(rank) +
    #     labs(fill='CLR Normalized\nabundance')
    # png(paste(output,'/heatmap_signif_',rank,'.png',sep=''), width=30, height=20, units="cm", res=200)
    # plot(p)
    # invisible(dev.off())
    # #Corrplot?
    # data <- signif_data
    # save(data, file=paste(output,"/signif_phyloseq.rdata",sep=""))
    #

  }


}