network.R 14.2 KB
Newer Older
1
2
3
#' Read and interpret network data
#' 
#' Read a csv file with a specific format (see Details) and intepret
4
#' it as network origin-destination data with possibly weighted edges.
5
#' 
6
#' The file must be plain text with comma-separated columns and variable
7
#' names in the first line.
8
9
#' Field separators other than comma are also supported. In particular,
#' semi-colon field separators and comma decimal separator.
10
11
12
13
14
15
16
17
18
19
20
21
22
#' There must be either 6 or 7 columns in the same order and of the
#' same types as follows:
#' \itemize{
#'   \item origin       character
#'   \item destination  character
#'   \item lon_orig     numeric (decimal degrees, WGS84)
#'   \item lat_orig     numeric (decimal degrees, WGS84)
#'   \item lon_dest     numeric (decimal degrees, WGS84)
#'   \item lat_dest     numeric (decimal degrees, WGS84)
#'   \item volume       Optional. directed flux in some consistent unit
#' }
#' 
#' Variable names can be different. If strings contain spaces they must
23
24
25
26
27
28
29
30
31
32
#' be quoted. For this reason, \strong{quotes cannot be part of node names}.
#' E.g. the name \emph{Abangminko'O} is invalid and will rise an error.
#' 
#' \strong{Each origin-destination must be specified only once}. If you
#' have multple animal shipments between the same origin and destination
#' spread in several lines, you need to aggregate them into a single line.
#' 
#' Make sure \strong{coordinates are consistent}. I.e. All nodes must
#' have the same coordinates in all the lines where it appears as
#' either source or destination. 
33
34
35
#' 
#' @param x character. Path of csv file
#'
36
#' @return Object of classes \code{geonetwork} and \code{igraph}.
37
38
#' @importFrom igraph graph_from_data_frame edge_attr_names
#'   edge.attributes edge.attributes<-
39
40
#' @importFrom stats setNames
#' @importFrom utils read.csv
41
#' @import geonetwork
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#' @export
#'
#' @examples
#'   d <- data.frame(from = "A", to = "B", fx = 0, fy = 0, tx = 1, ty = 1)
#'   tf <- tempfile()
#'   write.csv(d, tf, row.names = FALSE)
#'   read_network(tf)
read_network <- function(x) {
  
  ## Message on input format
  input_format <- c(
    "Input data format:",
    "Comma-separated plain text file with variable names in the first row.",
    "Decimal point symbol: dot.",
    "Variable names can be different, but the order and types
must be respected:",
    "",
    "origin       character",
    "destination  character",
    "lon_orig     numeric (decimal degrees, WGS84)",
    "lat_orig     numeric (decimal degrees, WGS84)",
    "lon_dest     numeric (decimal degrees, WGS84)",
    "lat_dest     numeric (decimal degrees, WGS84)",
    "volume       Optional. directed flux in some consistent unit."
  )
67
68
69
70
71
72
73
  
  ## Detect field separator
  sep = csv_sep(x)

  ## Number of fields
  ## take from first line since csv_sep() checks consistency
  nc <- count.fields(x, sep = sep)[1]
74
75

  ## Check number of columns
76
  if (nc < 6 | nc > 7) {
77
    stop("Expected 6 or 7 columns, observed ", nc,
78
79
80
81
         message(paste(input_format, collapse = "\n")))
    
  }
  
82
83
84
  ## Expected column types
  exp_classes <-
    c(rep("character", 2), rep("numeric", nc - 2))
85
  
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  ## Read file with either "." or "," decimal separator
  dat <-
    try(
      read.csv(
        x,
        stringsAsFactors = FALSE,
        colClasses = exp_classes,
        sep = sep,
        dec = "."
      ),
      silent = TRUE
    )
  if (inherits(dat, "try-error"))
    dat <- try(
      read.csv(
        x,
        stringsAsFactors = FALSE,
        colClasses = exp_classes,
        sep = sep,
        dec = ","
      ),
      silent = TRUE
    )

  if (inherits(dat, "try-error"))  {
    ## There has been an issue with some column type
    ## Read without expectations to identify problematic columns
    dat <- read.csv(x, stringsAsFactors = FALSE, sep = sep)
    nms <- paste(
      names(dat[-(1:2)])[sapply(dat[-(1:2)], is.character)],
      collapse = ", "
    )
    if(length(nms)<1) stop("This should not happen")
    
120
    stop(
121
122
      "Unexpected type in column(s) ", nms, "\n",
      paste(input_format, collapse = "\n")
123
124
    )
  }
125
    
126
127
128
  
  ## Check duplicated edges
  if ( any(idx <- duplicated(dat[, 1:2])) ) {
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    
    if (nc == 6L) {
      ## No volume. Just remove duplicated lines
      dat <- dat[!duplicated(dat),]
    } else {
      ## Aggregate volume with warning
      warning(
        "Aggregating volumes for multiple links.",
        "If this is not what you want, stop now and aggregate manually."
      )
      dat <- 
        aggregate(
          dat[,7],
          by = dat[, 1:6],
          FUN = "sum"
        )
    }
146
147
148
149
  }
  
  ## Check graph is directed
  ## At least one edge contains a reciprocal
150
151
152
153
154
  if( !any(duplicated(rbind(dat[, 1:2], setNames(dat[, 2:1], names(dat[, 1:2]))))) ) {
    warning(
      "All the links are unidirectional which seems implausible.\n",
      "Note that mapMCDA works only with directed networks.\n",
      "If you are sure that the network is directed, you can proceed."
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    )
  }
  
  ## Links
  e_list <- dat[, -(3:6)]
  
  ## Coordinates of vertices
  v_coord <- unique(
    rbind(
      setNames(dat[, c(1, 3:4)], c("Node", "Lon", "Lat")),
      setNames(dat[, c(2, 5:6)], c("Node", "Lon", "Lat"))
    )
  )
  v_coord <- v_coord[order(v_coord$Node),]
  v_coord$Node <- as.factor(v_coord$Node)
  
171
172
  ## Check consistency of node coordinates
  ## Each node has consistent coordinates
173
  if (any((idx <- table(v_coord$Node)) > 1)) {
174
175
176
    idx.nodes <- paste(names(idx)[idx > 1], collapse = ", ")
    stop("Inconsistent coordinates.\n",
         "The file specifies different coordinates in different lines for node(s): ", idx.nodes)
177
  }
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  ## Each coordinates represent a single node
  if ( any(idx <- duplicated(v_coord[, -1])) ) {
    
    ## Return the nodes in dat (first col) matching a given set of coords x.
    matching_coords <- function(x, dat) {
      vapply(seq_len(nrow(dat)), function(i) all(dat[i, -1] == x), TRUE)
    }
    
    iso_nodes <- 
      lapply(
        which(idx),
        function(x) as.character(v_coord[matching_coords(v_coord[x, -1], v_coord), 1])
      )
    
    stop("Detected different locations with the same coordinates.\n",
         "This can be due to typos in location names.\n",
         "Verify the coordinates of nodes ", 
         paste(lapply(iso_nodes, paste, collapse = ", "), collapse = " and "),
         ".")
  }
198
  
199
  ans <- geonetwork::geonetwork(e_list, v_coord, directed = TRUE)
200
  
201
202
203
204
205
206
207
  ## If there are edge attributes, it must be only one, and must be
  ## the weight.
  if((natt <- length(edge_attr_names(ans))) > 0) {
    stopifnot(identical(natt, 1L))
    names(edge.attributes(ans)) <- "weight"
  }

208
209
  return(ans)
}
210

211
212
# ' @import geonetwork
setOldClass("geonetwork")
213

214

215
#' Rasterize a geographic network
216
#' 
217
#' Copmute a raster with the importance of the nearest network node.
218
#' 
219
220
#' This assumes that the network object has node attributes "Lon" "Lat"
#' in the WGS84 reference system.
221
#' 
222
223
224
225
#' If the graph is weighted, the weighted importance will be computed.
#' If you want the unweighted importance, remove the weight attribute
#' from the graph with \code{igraph::delete_edge_attr(x, "weight")}.
#' 
226
227
228
229
#' The importance of each node is computed as the relative
#' contribution to the Potential for Transmision \eqn{R_0} of the
#' graph. I.e., \eqn{R_{0k}/R_0}. See \code{\link{epidemic_threshold}}.
#' 
230
#' @param x a geographic network (a geonetwork object with node attributes
231
232
233
234
235
236
237
238
239
240
241
#'   "Lon" and "Lat" in the WGS84 reference system)
#' @param y a Raster* or a SpatialPolygons* object.
#' @param field character. The attribute of the network nodes to be
#'   assigned to the voronoi polygon associated with each node. By
#'   default it computes the \emph{importance} of each node in the
#'   network. But you can manually compute any other measure of
#'   interest, and pass it as a vertex attribute (see
#'   \code{?igraph::`vertex_attr<-`}).
#' @param ... Other arguments passed on to
#'   \code{\link[raster]{rasterize}}.
#' @importMethodsFrom raster rasterize
242
#' @importFrom igraph vertex.attributes
243
#' @importFrom stats setNames
244
#' @export
245
246
#' @name rasterize_geonetwork
#' @rdname rasterize_geonetwork
247
#' @aliases rasterize,geonetwork,Raster-method
248
setMethod(
249
  rasterize, c("geonetwork", "Raster"),
250
251
252
253
254
255
256
257
258
259
260
261
262
  {
    function(
      x, y, field = "importance", ...) {
      
      etx <- epidemic_threshold(x, beta = 1)
      
      ## If the graph is weighted, use weights
      if (is.null(epiR0 <- etx$weighted)) {
        epiR0 <- etx$unweighted
      }
      
      sna <- attr(epiR0, "sna")

263
264
265
266
267
268
269
270
      if (isTRUE(all.equal(unname(epiR0["R0"]), 0))) {
        ## In this case, all individual contributions are equally 0
        stopifnot(all(vapply(sna$R0k, all.equal, TRUE, 0)))
        relative_contributions <- rep(1/length(sna$R0k), length(sna$R0k))
      } else {
        relative_contributions <- sna$R0k / epiR0["R0"]
      }
      
271
      node_importance <- setNames(
272
        data.frame(sna[, 1], 100 * relative_contributions),
273
274
275
        c("name", "importance")
      )
      
276
277
278
279
280
281
282
      nodes <- cbind(
        as.data.frame(igraph::vertex.attributes(x)),
        setNames(
          data.frame(sf::st_coordinates(attr(x, "geom_node"))),
          c("Lon", "Lat")
        )
      )
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
      
      nodes <- merge(nodes, node_importance, by = "name", all.y=FALSE)
      
      ## Cast to SpatialPointsDataFrame
      ## Assume CRS WGS84
      coordinates(nodes) <- ~ Lon + Lat
      proj4string(nodes) <- CRS("+proj=longlat +datum=WGS84")
      
      vor <- voronoi(nodes, ext = extent(y))
    
      ans <- raster::mask(raster::rasterize(vor, y, field = field, ...), y)
      return(ans)
    }
  }
)

299
300
301
302
#' @importMethodsFrom raster rasterize
#' @param res numeric vector of length 2. Resolution in Lon/Lat of
#'   rasterisation.
#' @rdname rasterize_geonetwork
303
#' @aliases rasterize,geonetwork,SpatialPolygons-method
304
305
#' @export
setMethod(
306
  rasterize, c("geonetwork", "SpatialPolygons"),
307
  {
308
    function (x, y, res = resolution(y, max_ncells = 100), ...) {
309
310
311
312
313
314
315
316
      
      ext_grid <- raster::raster(raster::extent(y), resolution = res)
      msk <- raster::rasterize(y, ext_grid, field = 1, background = NA, fun = "mean")
      rasterize(x, msk, ...)
    }
  }
)

317
318
319
320
321
322
323
324
325
#' Compute the Epidemic Threshold of a graph
#' 
#' Weighted and unweighted Epidemic Threshold \eqn{q} of a graph.
#'
#' The Epidemic Threshold \eqn{q} quantifies the minimal expeced
#' transmission coefficient necessary for diffusing an epidemy in a
#' network. It is computed as the inverse of the \emph{Potential for
#' transmission} of the network: a measure of the expected number of
#' nodes affected by an infectious node, which is a generalisation of
326
#' the Basic Reproduction Number \eqn{R_0} of an epidemy to the
327
328
329
#' context of a network. It thus quantifies the potential for
#' transmission of an infection throughout the contact network. It is
#' computed in terms of the incoming-outgoing rates from the network's
330
331
#' nodes: \deqn{R_0 = \beta \frac{\overline{k_\mathrm{in}\cdot
#' k_\mathrm{out}}}{\overline{k_\mathrm{in}}},}{R_0 = \beta <k_in*k_out>/<k_in>,}
332
#' where \eqn{\beta} is the transmission coefficient among animals,
333
#' \eqn{k_\mathrm{in/out}}{k_in/out} are the in/out-degrees of a node
334
#' and the \eqn{\overline{\cdot}}{<·>} symbol represents the average value
335
336
337
338
339
340
#' across all nodes in the graph.
#'
#' The unweighted value computed above is most appropriate for a
#' highly infectious epidemy with high animal-prevalence on nodes, as
#' it assumes that any contact is potentially infectious.
#'
341
#' In the weighted formulation, \eqn{k_\mathrm{in/out}}{k_in/out} are
342
343
344
345
346
347
#' the weight values for the incoming/outgoing edges in each node. It
#' is more appropriate for low-prevalence diseases, where the
#' transmission probability is assumed proportional to the number of
#' contacts.
#'
#' The default value of 1 for the probability of transmission
348
#' \eqn{\beta} implies that every infectious contact leads to
349
350
351
352
353
354
#' transmission.
#'
#' @param x an \code{igraph} object
#' @param beta numeric, between 0 and 1. Probability of transmission.
#'
#' @return a list the weighted and unweighted Potential for
355
#'   Transmission \eqn{R_0} and its inverse, the Epidemic
356
357
358
359
360
361
362
363
#'   Threshold \eqn{q}. As an attribute named "sna", a data.frame with
#'   the in/out-degrees of each node and their individual contribution
#'   to R0.
#' 
#' If the input graph is unweighted, the weighted component is NULL.
#' 
#' @importFrom igraph decompose.graph degree gorder is.weighted
#'   as_data_frame
364
#' @importFrom stats setNames
365
366
367
368
369
370
#'   
#' @references 
#'   Volkova VV, Howey R, Savill NJ, Woolhouse MEJ (2010) Sheep
#'   Movement Networks and the Transmission of Infectious Diseases.
#'   PLoS ONE 5(6): e11185.
#'   https://doi.org/10.1371/journal.pone.0011185
371
#' @export
372
373
374
375
376
377
378
379
380
381
382
#' @examples
#'   g <- igraph::graph_from_literal(A --+ B --+C, A --+C, B --+D)
#'   epidemic_threshold(g)
#'   
#'   ## weighted graph
#'   igraph::E(g)$weight <- c(10, 1, 2, 5)
#'   epidemic_threshold(g)
epidemic_threshold <- function(x, beta = 1) {
  UseMethod("epidemic_threshold")
}

383
#' @export
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
epidemic_threshold.igraph <- function(x, beta = 1) {
  sc <- decompose.graph(x)
  n_v <- lapply(sc, gorder)

  ## work only with the largest network
  ## (in terms of number of nodes)  
  largest_x <- sc[[which.max(n_v)]]
  
  sna <- 
    data.frame(
      indeg = degree(largest_x, mode = "in"),
      outdeg = degree(largest_x, mode = "out")
    )
  sna <- cbind(
    node = rownames(sna),
    sna
  )
  rownames(sna) <- NULL
  sna[is.na(sna)] <- 0
  
  R0_contrib <- epidemic_threshold(sna[, c("indeg", "outdeg")])
  epidata <- setNames(sum(R0_contrib)^c(1, -1), c("R0", "q"))
  attr(epidata, "sna") <- cbind(sna, R0k = R0_contrib)
  
  epidataw <- NULL
  if (is.weighted(largest_x)) {
    
    lxdf <- as_data_frame(largest_x)
    in_w <- setNames(
      aggregate(lxdf$weight, by = list(lxdf$to), sum),
      c("node", "in_w")
    )
    out_w <- setNames(
      aggregate(lxdf$weight, by = list(lxdf$from), sum),
      c("node", "out_w")
    )
    snaw <- merge(in_w, out_w, by = "node", all=TRUE)
    snaw[is.na(snaw)] <- 0
    
    R0w_contrib <- epidemic_threshold(snaw[, c("in_w", "out_w")])
    epidataw <- setNames(sum(R0w_contrib)^c(1, -1), c("R0", "q"))
    attr(epidataw, "sna") <- cbind(snaw, R0k_w = R0w_contrib)
    }

  return(list(unweighted = epidata, weighted = epidataw))
}

431
#' @export
432
433
434
435
436
437
epidemic_threshold.data.frame <- function(x, beta) {
  stopifnot(ncol(x) == 2)
  ## Individual contributions to R0
  return( x[,1]*x[,2]/sum(x[,1]) )
}