Commit ad0d3c46 authored by Facundo Muñoz's avatar Facundo Muñoz ®️
Browse files

bugfix: read_network() fails to read integer columns

- More flexible input: Detect field and decimal separators.

- fixes #15
parent 54b0d5e3
Package: mapMCDA
Title: Produce an epidemiological risk map by weighting multiple risk factors
Version: 0.3.6
Version: 0.3.7
Date: 2019-04-06
Authors@R: c(
person("Andrea", "Apolloni", email = "andrea.apolloni@cirad.fr", role = c("ctb"), comment = "Animal mobility algorithm"),
......@@ -39,7 +39,8 @@ Imports:
shiny,
shinydashboard,
sp,
stringr
stringr,
utils
Suggests:
devtools,
knitr,
......
......@@ -5,6 +5,7 @@ S3method(epidemic_threshold,igraph)
export(align_layers)
export(borders0)
export(compute_weights)
export(csv_sep)
export(distance_map)
export(distance_to_vector)
export(download_country_admin)
......@@ -44,5 +45,6 @@ importFrom(igraph,vertex.attributes)
importFrom(raster,intersect)
importFrom(rgeos,gUnaryUnion)
importFrom(stats,setNames)
importFrom(utils,count.fields)
importFrom(utils,read.csv)
importMethodsFrom(raster,rasterize)
#' Detection of separator in csv files
#'
#' Returns the separator that consistently separates the lines in the same
#' number of fields, or fails.
#'
#' Tested separators are: ",", " ", "\\t", "|", ";", ":"
#' @param x character. File name.
#'
#' @return Character representation of the field separator.
#' @export
#' @importFrom utils count.fields
#'
#' @examples
#' tf <- tempfile()
#' td <- data.frame(T = LETTERS[1:10], N = 1:10)
#' write.csv(td, tf, row.names = FALSE)
#' csv_sep(tf) # ","
#' write.csv2(td, tf, row.names = FALSE)
#' csv_sep(tf) # ";"
csv_sep <- function(x) {
header <- readLines(x, n = 1L)
common_seps <- c(",", " ", "\t", "\\|", ";", ":")
## Separators present in the header (first line)
seps_in_header <- common_seps[vapply(common_seps, grepl, TRUE, header)]
all_identical <- function(x) all(vapply(x[-1], identical, TRUE, x[1]))
## Whether all lines have the same number of fields, assuming each of the
## seps_in_header
nfields_sep <-
vapply(
seps_in_header,
function(s) all_identical(utils::count.fields(x, sep = s)),
TRUE
)
if (sum(nfields_sep) != 1)
stop("Cannot determine field separator of ", x)
## Unescape | if necessary
names(nfields_sep) <- gsub("\\\\", "", names(nfields_sep))
return(names(nfields_sep)[nfields_sep])
}
......@@ -3,8 +3,10 @@
#' Read a csv file with a specific format (see Details) and intepret
#' it as network directed data with possibly weighted edges.
#'
#' The file must be plain text with comma-separated columns and varaible
#' The file must be plain text with comma-separated columns and variable
#' names in the first line.
#' Field separators other than comma are also supported. In particular,
#' semi-colon field separators and comma decimal separator.
#' There must be either 6 or 7 columns in the same order and of the
#' same types as follows:
#' \itemize{
......@@ -37,9 +39,6 @@
#' read_network(tf)
read_network <- function(x) {
## Read file
dat <- read.csv(x, stringsAsFactors = FALSE)
## Message on input format
input_format <- c(
"Input data format:",
......@@ -56,28 +55,65 @@ must be respected:",
"lat_dest numeric (decimal degrees, WGS84)",
"volume Optional. directed flux in some consistent unit."
)
## Detect field separator
sep = csv_sep(x)
## Number of fields
## take from first line since csv_sep() checks consistency
nc <- count.fields(x, sep = sep)[1]
## Check number of columns
if (ncol(dat) < 6 | ncol(dat) > 7) {
if (nc < 6 | nc > 7) {
stop("Expected 6 or 7 columns, observed ", ncol(dat),
message(paste(input_format, collapse = "\n")))
}
## Check type of columns
## transform any column of class 'integer' to 'numeric'
class(dat[vapply(dat, is.integer, T)]) <- "numeric"
col_classes <- sapply(dat, class)
exp_classes <- c(rep("character", 2), rep("numeric", length(col_classes)-2))
## Expected column types
exp_classes <-
c(rep("character", 2), rep("numeric", nc - 2))
if( any(idx <- col_classes != exp_classes) ) {
nms.idx <- paste(names(dat)[idx], collapse = ", ")
## Read file with either "." or "," decimal separator
dat <-
try(
read.csv(
x,
stringsAsFactors = FALSE,
colClasses = exp_classes,
sep = sep,
dec = "."
),
silent = TRUE
)
if (inherits(dat, "try-error"))
dat <- try(
read.csv(
x,
stringsAsFactors = FALSE,
colClasses = exp_classes,
sep = sep,
dec = ","
),
silent = TRUE
)
if (inherits(dat, "try-error")) {
## There has been an issue with some column type
## Read without expectations to identify problematic columns
dat <- read.csv(x, stringsAsFactors = FALSE, sep = sep)
nms <- paste(
names(dat[-(1:2)])[sapply(dat[-(1:2)], is.character)],
collapse = ", "
)
if(length(nms)<1) stop("This should not happen")
stop(
"Unexpected type in column(s) ", nms.idx,
message(paste(input_format, collapse = "\n"))
"Unexpected type in column(s) ", nms, "\n",
paste(input_format, collapse = "\n")
)
}
## Check duplicated edges
if ( any(idx <- duplicated(dat[, 1:2])) ) {
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv_sep.R
\name{csv_sep}
\alias{csv_sep}
\title{Detection of separator in csv files}
\usage{
csv_sep(x)
}
\arguments{
\item{x}{character. File name.}
}
\value{
Character representation of the field separator.
}
\description{
Returns the separator that consistently separates the lines in the same
number of fields, or fails.
}
\details{
Tested separators are: ",", " ", "\\t", "|", ";", ":"
}
\examples{
tf <- tempfile()
td <- data.frame(T = LETTERS[1:10], N = 1:10)
write.csv(td, tf, row.names = FALSE)
csv_sep(tf) # ","
write.csv2(td, tf, row.names = FALSE)
csv_sep(tf) # ";"
}
......@@ -17,8 +17,10 @@ Read a csv file with a specific format (see Details) and intepret
it as network directed data with possibly weighted edges.
}
\details{
The file must be plain text with comma-separated columns and varaible
The file must be plain text with comma-separated columns and variable
names in the first line.
Field separators other than comma are also supported. In particular,
semi-colon field separators and comma decimal separator.
There must be either 6 or 7 columns in the same order and of the
same types as follows:
\itemize{
......
context("Detection of separator in csv files")
test_that("Detects various separators correctly", {
tf <- tempfile()
tf2 <- tempfile()
## Standard csv file with decimal point
td <- data.frame(T = LETTERS[1:10], N = runif(10))
write.csv(td, tf, row.names = FALSE)
file.copy(tf, tf2)
expect_identical(csv_sep(tf), ",")
## Separate with ";" and decimal comma
writeLines(gsub(",", ";", readLines(tf)), file(tf2))
writeLines(gsub("\\.", ",", readLines(tf2)), file(tf2))
expect_identical(csv_sep(tf2), ";")
close(file(tf2))
## Separate with ":"
writeLines(gsub(",", ":", readLines(tf)), file(tf2))
expect_identical(csv_sep(tf2), ":")
close(file(tf2))
## Separate with " "
writeLines(gsub(",", " ", readLines(tf)), file(tf2))
expect_identical(csv_sep(tf2), " ")
close(file(tf2))
## Separate with tab
writeLines(gsub(",", "\t", readLines(tf)), file(tf2))
expect_identical(csv_sep(tf2), "\t")
close(file(tf2))
## Separate with "|"
writeLines(gsub(",", "|", readLines(tf)), file(tf2))
expect_identical(csv_sep(tf2), "|")
close(file(tf2))
close(file(tf))
})
......@@ -47,7 +47,34 @@ test_that("load_dir() skips unknnown file formats", {
test_that("Interpret network data", {
nf <- system.file("testdata", "mobility.csv", package = "mapMCDA")
expect_error(x <- read_network(nf), NA)
x <- expect_error(read_network(nf), NA)
expect_s3_class(x, "igraph")
## Test data with integer and double numeric columns
td <- data.frame(
from = LETTERS[1:3],
to = LETTERS[2:4],
fx = 1:3 + .1, # double
fy = 1:3L, # int
tx = 2:4 + .1,
ty = c(2:3, 1),
hc = 10 * (1:3)
)
tf <- tempfile()
write.csv(td, tf, row.names = FALSE)
n1 <- expect_error(read_network(tf), NA)
expect_s3_class(n1, "igraph")
## Test data with ";" field separator and "," decimal separator
tf2 <- tempfile()
writeLines(gsub(",", ";", readLines(tf)), file(tf2))
writeLines(gsub("\\.", ",", readLines(tf2)), file(tf2))
n2 <- expect_error(read_network(tf2), NA)
expect_s3_class(n2, "igraph")
# all.equal(n1, n2)
close(file(tf))
close(file(tf2))
})
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment