Commit 42eba020 authored by Renaud Lancelot's avatar Renaud Lancelot 🌍
Browse files

Updated version of the plan to reflect changes in the source for country...

Updated version of the plan to reflect changes in the source for country borders and population size. Updated versions of the trend(), dmrplot(), and rgrplot functions.
parent e24b66d4
Pipeline #15558 failed with stage
in 1 minute and 38 seconds
......@@ -49,8 +49,8 @@ stringency <- function(date_range = "2020-01-01/2020-05-31"){
## get country borders for the whole world and get rid of the
## apostrophe in "Cote d'Ivoire"
get_world_map <- function(){
ans <- readOGR(dsn = "./data",
layer = "GAULCOUNTRY2013")
ans <- readOGR(dsn = "./data/moodgaulcountrywithkos2013wpop.gpkg",
layer = "moodgaulcountrywithkos2013wpop")
xname <- as.character(ans$ADM0_NAME)
pos <- grep(pattern = "Ivoire", x = xname)
xname[pos] <- "Cote dIvoire"
......@@ -58,6 +58,18 @@ get_world_map <- function(){
return(ans)
}
get_world_pop <- function(){
ans <- readOGR(dsn = "./data/moodgaulcountrywithkos2013wpop.gpkg",
layer = "moodgaulcountrywithkos2013wpop")
xname <- as.character(ans$ADM0_NAME)
pos <- grep(pattern = "Ivoire", x = xname)
xname[pos] <- "Cote dIvoire"
ans$name <- standardise_country_names(xname)
return(data.frame(name = ans$name,
pop = ans$TOTPOP)
)
}
#' Simplify a vector layer
geosimplify <- function(x) {
sx <- gSimplify(x, tol=.025, topologyPreserve=T)
......@@ -167,6 +179,7 @@ standardise_country_names <- function(x) {
"Cabo Verde", "Cape Verde",
"Guinea-Bissau", "Guinea Bissau",
"Macedonia", "North Macedonia",
"The former Yugoslav Republic of Macedonia", "North Macedonia",
"U.K. of Great Britain and Northern Ireland", "United Kingdom",
"United kingdom", "united Kingdom",
"São Tomé and Príncipe", "Sao Tome and Príncipe",
......@@ -204,38 +217,38 @@ countryname <- function(x){
}
#' Total population by country for a given year
#'
#' Population counts in thousands of people.
#'
#' Some country names were corrected to match those from ECDC.
#' Population count for Kosovo was manually added.
#'
#' @return Data frame with one row per country. Country name and population count.
pop_countries <- function(x, year) {
## Filter year and select variables Country and Population count.
pop <- base::subset(
x,
subset = Time == year,
select = c("Location", "PopTotal")
)
names(pop) <- c("name", "pop")
rownames(pop) <- NULL
## Standardise country names
pop$name <- standardise_country_names(pop$name)
## Manually include missing population for Kosovo
## Source ???
pop <- rbind(
pop,
data.frame(name = "Kosovo", pop = 1845)
) %>%
arrange(name)
return(pop)
}
## #' Total population by country for a given year
## #'
## #' Population counts in thousands of people.
## #'
## #' Some country names were corrected to match those from ECDC.
## #' Population count for Kosovo was manually added.
## #'
## #' @return Data frame with one row per country. Country name and population count.
## pop_countries <- function(x, year) {
## ## Filter year and select variables Country and Population count.
## pop <- base::subset(
## x,
## subset = Time == year,
## select = c("Location", "PopTotal")
## )
## names(pop) <- c("name", "pop")
## rownames(pop) <- NULL
## ## Standardise country names
## pop$name <- standardise_country_names(pop$name)
## ## Manually include missing population for Kosovo
## ## Source ???
## pop <- rbind(
## pop,
## data.frame(name = "Kosovo", pop = 1845)
## ) %>%
## arrange(name)
## return(pop)
## }
current_date <- function() format(Sys.Date(), "%d %b %Y")
......@@ -334,7 +347,11 @@ dmrplot <- function(dmr, start = as.Date("2020-03-01"),
col = c(grey(.8), 2, 1, 2),
size = 3))
g <- xyplot(value ~ date | variable + name,
## data for 100,000 inh.
Z1$value100k <- 1e5 * Z1$value
Z2$value100k <- 1e5 * Z2$value
g <- xyplot(value100k ~ date | variable + name,
xlim = c(as.Date("2020-02-15"), Sys.Date()),
data = Z1, subscripts = T,
scales = list(cex = cex - .1, tck = .5,
......@@ -357,7 +374,7 @@ dmrplot <- function(dmr, start = as.Date("2020-03-01"),
if(xvar == "fitdr"){
## add observed daily death rate
X <- Z2$date[Z2$name == xnam]
Y <- Z2$value[Z2$name == xnam]
Y <- Z2$value100k[Z2$name == xnam]
o <- order(X)
lpoints(X[o], Y[o],
type = "h", col = grey(.8))
......@@ -414,7 +431,6 @@ dmrplot <- function(dmr, start = as.Date("2020-03-01"),
## fn to compute the trend in mortality growth rate
trend <- function(x){
browser()
## use function "by" to loop over the country names
xnam <- unique(x$name)
xlock <- if(is.null(x$lockdown)) NA else unique(x$lockdown)
......@@ -445,7 +461,7 @@ trend <- function(x){
k <- length(seq(min(x$date), Sys.Date(), by="1 week"))
## fits a negative binomial additive model for death coints
fm <- try(gam(deaths ~ s(day, bs = "cr", k = k) + offset(log(1000 * pop)),
fm <- try(gam(deaths ~ s(day, bs = "cr", k = k) + offset(log(pop)),
data = x, family = nb, link = "log",
optimizer = "perf"))
if(any(class(fm) == "try-error"))
......@@ -454,7 +470,7 @@ trend <- function(x){
pred <- predict(fm, se = T)
Y <- exp(pred$fit)
## these are counts we want rates
dmr <- 100 * Y / x$pop
dmr <- Y / x$pop
## 95% CI
z <- qnorm(p = .975, lower.tail = T)
......@@ -515,13 +531,13 @@ trend <- function(x){
date = x$date,
day = day,
date1st = rep(min(x$date), nrow(x)),
obsdr = 100 * tapply(x$deaths,
x$day,
sum) / x$pop,
obsdr = tapply(x$deaths,
x$day,
sum) / x$pop,
fitdr = dmr,
cumdr = cumsum(100 * tapply(x$deaths,
x$day,
sum) / x$pop),
cumdr = cumsum(tapply(x$deaths,
x$day,
sum) / x$pop),
pop = unique(x$pop),
lockdown = rep(xlock, nrow(x)),
d2peak = rep(if(l1 > 0) d2peak else NA, nrow(x)),
......
......@@ -39,7 +39,7 @@ plan <- drake_plan(## raw_data = read_excel(
"Georgia", "Israel", "Jordan", "Lebanon", "Libya",
"Moldova", "Morocco", "Syria", "Tunisia", "Ukraine"),
europe = c(eu28, efta, candi, noneu),
europe = standardise_country_names(sort(c(eu28, efta, candi, noneu))),
wca = c("Cameroon", "Central African Republic", "Chad",
"Congo DR", "Congo", "Equatorial Guinea", "Gabon",
......@@ -49,18 +49,16 @@ plan <- drake_plan(## raw_data = read_excel(
"Mauritania", "Niger", "Nigeria", "Senegal",
"Sierra Leone", "Togo"),
## Total population counts and densities by Country, Year and Sex.
## Source: United Nations'[World Population Prospects
## 2019](https://population.un.org/wpp/)
## https://population.un.org/wpp/Download/Standard/CSV/
world_population = read.table(
file_in("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv"),
header = T,
sep = ",",
as.is = T,
check.names = F,
quote = "\""
),
## Total population counts and by Country for 2020 from the WorlPop dtabase
## world_population = read.table(
## file_in("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv"),
## header = T,
## sep = ",",
## as.is = T,
## check.names = F,
## quote = "\""
## ),
world_population = get_world_pop(),
## ECDC Covid19 geographic distribution data
last_day = Sys.Date() - 1,
......@@ -120,7 +118,8 @@ plan <- drake_plan(## raw_data = read_excel(
## mutate(date = as.Date(date)),
## Population counts in thousands of people
pop_countries_2019 = pop_countries(world_population, year = 2019),
pop_countries_2020 = subset(world_population,
subset = name %in% europe),
## ## today (ANSI)
## tod = format(Sys.Date(), "%Y%m%d"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment