Commit d061446a authored by mahendra-mariadassou's avatar mahendra-mariadassou
Browse files

add support for fasta files

parent f2d46c5e
......@@ -30,10 +30,12 @@ app_server <- function(input, output, session) {
# Read the biom file --> phyloseq
biomfile <- read_frogs_biom(input$biom$datapath)
# Read the optional fasta file --> read_fasta
fasta <- read_fasta(input$fasta$datapath)
# Read the tsv file --> readr
multihits <- read_multihits(input$tsv$datapath)
# Sanitize physeq, multi_hits and build dictionary for short OTU names
all <- sanitize_physeq_and_affi(biomfile, multihits)
all <- sanitize_physeq_and_affi(biomfile, multihits, fasta)
physeq <- all$physeq
dict <- all$otu_dictionary
affi <- all$affi
......
......@@ -19,6 +19,10 @@ app_ui <- function() {
"Upload Biom File",
accept = c("text/plain", ".biom")
),
fileInput("fasta",
"Optional: upload Fasta File",
accept = c("text/plain", ".fasta")
),
fileInput("tsv",
"Upload MultiHits TSV File",
accept = c("text/tab-separated-values", ".tsv", "text/csv")
......
......@@ -8,6 +8,16 @@ read_multihits <- function(multihits_file) {
readr::read_tsv(multihits_file)
}
## Template for reading fasta files into a tibble
#' @importFrom Biostrings readDNAStringSet
read_fasta <- function(fasta_file) {
if (is.null(fasta_file)) return(NULL)
Biostrings::readDNAStringSet(fasta_file) %>%
as.character() %>%
tibble::tibble(OTU = names(.),
sequence = unname(.))
}
## Functions to create short taxa names -------------------------------------
long_taxa_names <- function(taxa_names, max_size = 32) {
## any taxa name longer than 32 characters
......@@ -37,7 +47,7 @@ create_otu_dictionary <- function(physeq) {
otu_dictionary
}
sanitize_physeq_and_affi <- function(physeq, affi) {
sanitize_physeq_and_affi <- function(physeq, affi, fasta) {
## Create dictionary
otu_dictionary <- create_otu_dictionary(physeq)
old_to_new <- otu_dictionary %>% dplyr::select(-abundance) %>% tibble::deframe()
......@@ -52,6 +62,16 @@ sanitize_physeq_and_affi <- function(physeq, affi) {
## Remove previously curated taxa from affi
affi <- affi %>% dplyr::filter(OTU %in% ambiguous_taxa(physeq))
## Add sequences from fasta file to affiliation table
if (!is.null(fasta)) {
if (long_taxa_names(otu_dictionary$sequence)) {
warning("Sequences already present in the multihits files.\nFasta file not used.")
} else {
affi <- affi %>% dplyr::select(-sequence) %>% dplyr::left_join(fasta, by = "OTU")
}
}
list(
physeq = physeq,
affi = affi,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment