Commit b1dc7e2a authored by Facundo Muñoz's avatar Facundo Muñoz ®️
Browse files

R script.

parent 1f4218f0
## packages
pacman::p_load(
"boot",
"furrr",
"here",
"hrbrthemes",
"janitor",
"kableExtra",
"knitr",
"readxl",
"tidyverse"
)
## setup
knitr::opts_chunk$set(
echo = FALSE,
cache = FALSE,
dev = ifelse(is_latex_output(), "cairo_pdf", "png"),
dpi = 300
)
theme_set(theme_ipsum(grid = "Y"))
## parameters
sim_pars <- tribble(
~param, ~value,
"N", 334,
"b0", -2,
"b1", -.2
)
## data
data_file <- here::here("data/HumanDogRatioData.xlsx")
raw_data <- read_excel(data_file)
zone <- sample(c("Urban", "Rural"), size = with(sim_pars, value[param == "N"]), replace = TRUE)
log_lambda <- with(sim_pars, value[param == "b0"]) + with(sim_pars, value[param == "b1"]) * (zone == "Urban")
x <- 1 + rpois(with(sim_pars, value[param == "N"]), lambda = 3)
y <- rpois(with(sim_pars, value[param == "N"]), lambda = x * exp(log_lambda))
sim_data <- data.frame(
id = seq.int(with(sim_pars, value[param == "N"])),
zone = zone,
x = x,
y = y,
r = y/x
)
clean_data <- raw_data |>
select(Idmenage, zone = ru_urb, Nbh, Ndog) |>
mutate(dh_ratio = Ndog/Nbh)
## Sample distributions of survey data (dog-human ratio, number of humans and
## number of dogs) by zone.
clean_data |>
pivot_longer(
Nbh:dh_ratio,
names_to = "variable",
values_to = "value"
) |>
ggplot(aes(value)) +
geom_histogram(bins = 15) +
facet_grid(zone ~ variable, scales = "free")
## confint-ratio-normal
confint_ratio_normal <- function(x, alpha = 0.05) {
hatx <- mean(x)
s2 <- var(x)
n <- length(x)
ta2 <- qt(alpha/2, n - 1, lower.tail = FALSE)
return(hatx + ta2*sqrt(s2/n) * c(-1, 1))
}
## res-normal
res_normal <-
clean_data |>
select(zone, y = dh_ratio) |>
group_by(zone) |>
summarise(
Mean = mean(y),
Variance = var(y),
CI95_a = confint_ratio_normal(y)[1],
CI95_b = confint_ratio_normal(y)[2],
.groups = "drop"
)
kbl(
res_normal,
booktabs = TRUE,
digits = 3,
caption = "Results using the Normal asymptotic approximation."
)
## confint-ratio-boot
get_estimates <- function(x, i) {
i_u <- i[x$zone[i] == "URBAIN"]
i_r <- i[x$zone[i] == "RURAL"]
m_u <- mean(x$dh_ratio[i_u])
m_r <- mean(x$dh_ratio[i_r])
c(RURAL = m_r, URBAIN = m_u, `U-R` = m_u - m_r)
}
clean_data_boot <- boot(clean_data, get_estimates, R = 1e4)
## res-boot
res_boot <-
clean_data_boot$t0 |>
enframe(name = "zone", value = "Mean") |>
left_join(
map_dfr(
setNames(seq_along(clean_data_boot$t0), names(clean_data_boot$t0)),
~ boot.ci(clean_data_boot, index = ., type = "basic")$basic[1, 4:5]
) |>
add_column(side = c("CI95_a", "CI95_b")) |>
pivot_longer(
cols = -side,
names_to = "zone",
values_to = "value"
) |>
pivot_wider(
names_from = "side",
values_from = "value"
),
by = "zone"
)
kbl(
res_boot,
booktabs = TRUE,
digits = 3,
caption = "Results using the basic Bootstrap method."
)
## fm1
fm1 <- glm(
formula = Ndog ~ zone,
family = "poisson",
offset = clean_data$Nbh,
data = clean_data
)
## fm1-summary
summary(fm1)
## table-estimates-model
exp(c(coef(fm1), sum(coef(fm1)))[c(1, 3, 2)]) |>
setNames(c("RURAL", "URBAIN", "U/R")) |>
enframe(name = "zone", value = "Mean")
sim_pars |>
kbl(
booktabs = TRUE,
caption = "Parameters used for simulating data."
)
## Sample distributions of __simulated__ survey data (dog-human ratio (r),
## number of humans (x) and number of dogs (y)) by zone.
sim_data |>
pivot_longer(
x:r,
names_to = "variable",
values_to = "value"
) |>
ggplot(aes(value)) +
geom_histogram(bins = 15) +
facet_grid(zone ~ variable, scales = "free")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment