Commit c7284c47 authored by Facundo Muñoz's avatar Facundo Muñoz ®️
Browse files

Structure into a pipeline with targets.

Multiple formats, customised options.
parent e9aaa843
library(targets)
library(tarchetypes)
source("src/functions.R")
options(tidyverse.quiet = TRUE)
## Uncomment below to use local multicore computing
## when running tar_make_clustermq().
# options(clustermq.scheduler = "multicore")
# Uncomment below to deploy targets to parallel jobs
# on a Sun Grid Engine cluster when running tar_make_clustermq().
# options(clustermq.scheduler = "sge", clustermq.template = "sge.tmpl")
tar_option_set(
packages = c(
"boot",
"furrr",
"here",
"hrbrthemes",
"janitor",
"kableExtra",
"knitr",
"readxl",
"tidyverse"
)
)
## Interactive workflow
# pacman::p_load(char = tar_option_get("packages"))
# tar_load(everything())
## Define the pipeline. A pipeline is just a list of targets.
list(
# SOURCE FILES ------------------------------------------------------------
tar_target(
data_file,
"data/HumanDogRatioData.xlsx",
format = "file"
)
,
# SOURCE DATA ---------------------------------------------------
tar_target(
raw_data,
read_excel(data_file)
)
,
tar_target(
sim_pars,
tribble(
~param, ~value,
"N", 334,
"b0", -2,
"b1", -.2
)
)
,
tar_target(
sim_data,
do.call(
simulate_household_data,
as.list(pull(sim_pars, value, name = param))
)
)
,
# DERIVED DATA --------------------------------------------------
tar_target(
clean_data,
raw_data %>% cleanup()
)
,
# PARAMETERS ----------------------------------------------------
# DESCRIPTION ---------------------------------------------------
# MODELS --------------------------------------------------------
# DIAGNOSIS -----------------------------------------------------
# REPORTS -------------------------------------------------------
tar_render(
report_html,
"src/confint_ratio.Rmd",
output_dir = "public", # https://github.com/ropensci/drake/issues/742
output_format =
rmdformats::readthedown(
dev = "CairoPNG",
toc_depth = 3,
lightbox = T,
gallery = T,
use_bookdown = T,
number_sections = T),
output_file =
"public/confint_ratio.html",
quiet = FALSE
)
,
tar_render(
report_pdf,
"src/confint_ratio.Rmd",
output_dir = "reports", # https://github.com/ropensci/drake/issues/742
output_format =
bookdown::pdf_document2(
dev = "pdf",
includes = list(
in_header = "preamble.tex",
before_body = "before_body.tex"
),
toc = T,
toc_depth = 3,
number_sections = T,
latex_engine = "xelatex"
),
output_file = "reports/confint_ratio.pdf",
quiet = FALSE
)
)
......@@ -12,19 +12,10 @@ editor_options:
wrap: 72
---
```{r packages, include = FALSE}
pacman::p_load(
"boot",
"furrr",
"here",
"hrbrthemes",
"janitor",
"kableExtra",
"knitr",
"readxl",
"tidyverse"
)
```{r eval = interactive(), cache = FALSE, include = FALSE}
source("src/packages.R")
source("src/functions.R")
message("Assuming interactive session")
```
```{r setup, include=FALSE, cache = FALSE}
......@@ -38,35 +29,19 @@ knitr::opts_chunk$set(
theme_set(theme_ipsum(grid = "Y"))
```
```{r functions}
source("src/functions.R")
```
```{r parameters}
sim_pars <- tribble(
~param, ~value,
"N", 334,
"b0", -2,
"b1", -.2
)
```
```{r data}
data_file <- here::here("data/HumanDogRatioData.xlsx")
raw_data <- read_excel(data_file)
# Introduction
sim_data <- do.call(
simulate_household_data,
as.list(pull(sim_pars, value, name = param))
```{r load, include = FALSE}
tar_load(
c(
clean_data,
sim_data,
sim_pars,
raw_data
)
)
clean_data <- cleanup(raw_data)
```
# Introduction
We surveyed 2 variables ($X$ and $Y$ counts) from a population and we
are interested in their ratio $Y/X$. We want to make inference on the
**average ratio** in the population.
......@@ -126,6 +101,16 @@ confint_ratio_normal <- function(x, alpha = 0.05) {
```
```{r res-normal}
# res_normal <-
# bind_rows(
# clean_data |>
# select(zone, y = dh_ratio) |>
# add_column(dataset = "Real", .before = 1),
# sim_data |>
# select(zone, y = r) |>
# add_column(dataset = "Simulated", .before = 1)
# ) |>
# group_by(dataset, zone) |>
res_normal <-
clean_data |>
select(zone, y = dh_ratio) |>
......@@ -267,35 +252,35 @@ Note that we no longer talk of differences but of relative factor, as a conseque
Computing confidence intervals here would involve the use of the Bootstrap again, but using the model estimates instead of empirical averages.
However, there is evidence of over-dispersion in the data, so a more appropriate model should be first developed in order to continue the analysis.
\clearpage
<!-- \clearpage -->
# Simulation study
<!-- # Simulation study -->
```{r sim-pars}
cap <- "Parameters used for simulating data."
sim_pars |>
kbl(
booktabs = TRUE,
caption = cap
)
```
<!-- ```{r sim-pars} -->
<!-- cap <- "Parameters used for simulating data." -->
<!-- sim_pars |> -->
<!-- kbl( -->
<!-- booktabs = TRUE, -->
<!-- caption = cap -->
<!-- ) -->
<!-- ``` -->
In order to evaluate the accuracy of the alternative methods, I simulated data from model \@ref(eq:model) with parameters (Table \@ref(tab:sim-pars)) that mimic the real observed data (Figure \@ref(fig:sim-data-description)).
<!-- In order to evaluate the accuracy of the alternative methods, I simulated data from model \@ref(eq:model) with parameters (Table \@ref(tab:sim-pars)) that mimic the real observed data (Figure \@ref(fig:sim-data-description)). -->
```{r sim-data-description, fig.cap = cap}
cap <- "Sample distributions of __simulated__ survey data (dog-human ratio (r), number of humans (x) and number of dogs (y)) by zone."
sim_data |>
pivot_longer(
x:r,
names_to = "variable",
values_to = "value"
) |>
ggplot(aes(value)) +
geom_histogram(bins = 15) +
facet_grid(zone ~ variable, scales = "free")
```
<!-- ```{r sim-data-description, fig.cap = cap} -->
<!-- cap <- "Sample distributions of __simulated__ survey data (dog-human ratio (r), number of humans (x) and number of dogs (y)) by zone." -->
<!-- sim_data |> -->
<!-- pivot_longer( -->
<!-- x:r, -->
<!-- names_to = "variable", -->
<!-- values_to = "value" -->
<!-- ) |> -->
<!-- ggplot(aes(value)) + -->
<!-- geom_histogram(bins = 15) + -->
<!-- facet_grid(zone ~ variable, scales = "free") -->
<!-- ``` -->
......
\usepackage{fontspec} % Handle fonts with XeLaTeX
\defaultfontfeatures{Ligatures=TeX} % Use LaTeX font ligatures
\setmainfont{Palatino}
%\usepackage{palatino} % Unnecessary
%\renewcommand{\familydefault}{\sfdefault} % sans serif
%\fontfamily{ppl}\selectfont % LaTeX
% \usepackage[utf8]{inputenc}
% \usepackage[T1]{fontenc}
% \usepackage{newpxtext,newpxmath}
\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
\usepackage[document]{ragged2e}
\usepackage{array}
\usepackage{longtable}
\usepackage{booktabs}
\usepackage{multirow}
% \usepackage{mathtools}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment