Commit c7284c47 authored by Facundo Muñoz's avatar Facundo Muñoz ®️
Browse files

Structure into a pipeline with targets.

Multiple formats, customised options.
parent e9aaa843
library(targets)
library(tarchetypes)
source("src/functions.R")
options(tidyverse.quiet = TRUE)
## Uncomment below to use local multicore computing
## when running tar_make_clustermq().
# options(clustermq.scheduler = "multicore")
# Uncomment below to deploy targets to parallel jobs
# on a Sun Grid Engine cluster when running tar_make_clustermq().
# options(clustermq.scheduler = "sge", clustermq.template = "sge.tmpl")
tar_option_set(
packages = c(
"boot",
"furrr",
"here",
"hrbrthemes",
"janitor",
"kableExtra",
"knitr",
"readxl",
"tidyverse"
)
)
## Interactive workflow
# pacman::p_load(char = tar_option_get("packages"))
# tar_load(everything())
## Define the pipeline. A pipeline is just a list of targets.
list(
# SOURCE FILES ------------------------------------------------------------
tar_target(
data_file,
"data/HumanDogRatioData.xlsx",
format = "file"
)
,
# SOURCE DATA ---------------------------------------------------
tar_target(
raw_data,
read_excel(data_file)
)
,
tar_target(
sim_pars,
tribble(
~param, ~value,
"N", 334,
"b0", -2,
"b1", -.2
)
)
,
tar_target(
sim_data,
do.call(
simulate_household_data,
as.list(pull(sim_pars, value, name = param))
)
)
,
# DERIVED DATA --------------------------------------------------
tar_target(
clean_data,
raw_data %>% cleanup()
)
,
# PARAMETERS ----------------------------------------------------
# DESCRIPTION ---------------------------------------------------
# MODELS --------------------------------------------------------
# DIAGNOSIS -----------------------------------------------------
# REPORTS -------------------------------------------------------
tar_render(
report_html,
"src/confint_ratio.Rmd",
output_dir = "public", # https://github.com/ropensci/drake/issues/742
output_format =
rmdformats::readthedown(
dev = "CairoPNG",
toc_depth = 3,
lightbox = T,
gallery = T,
use_bookdown = T,
number_sections = T),
output_file =
"public/confint_ratio.html",
quiet = FALSE
)
,
tar_render(
report_pdf,
"src/confint_ratio.Rmd",
output_dir = "reports", # https://github.com/ropensci/drake/issues/742
output_format =
bookdown::pdf_document2(
dev = "pdf",
includes = list(
in_header = "preamble.tex",
before_body = "before_body.tex"
),
toc = T,
toc_depth = 3,
number_sections = T,
latex_engine = "xelatex"
),
output_file = "reports/confint_ratio.pdf",
quiet = FALSE
)
)
...@@ -12,19 +12,10 @@ editor_options: ...@@ -12,19 +12,10 @@ editor_options:
wrap: 72 wrap: 72
--- ---
```{r eval = interactive(), cache = FALSE, include = FALSE}
```{r packages, include = FALSE} source("src/packages.R")
pacman::p_load( source("src/functions.R")
"boot", message("Assuming interactive session")
"furrr",
"here",
"hrbrthemes",
"janitor",
"kableExtra",
"knitr",
"readxl",
"tidyverse"
)
``` ```
```{r setup, include=FALSE, cache = FALSE} ```{r setup, include=FALSE, cache = FALSE}
...@@ -38,35 +29,19 @@ knitr::opts_chunk$set( ...@@ -38,35 +29,19 @@ knitr::opts_chunk$set(
theme_set(theme_ipsum(grid = "Y")) theme_set(theme_ipsum(grid = "Y"))
``` ```
```{r functions} # Introduction
source("src/functions.R")
```
```{r parameters}
sim_pars <- tribble(
~param, ~value,
"N", 334,
"b0", -2,
"b1", -.2
)
```
```{r data}
data_file <- here::here("data/HumanDogRatioData.xlsx")
raw_data <- read_excel(data_file)
sim_data <- do.call( ```{r load, include = FALSE}
simulate_household_data, tar_load(
as.list(pull(sim_pars, value, name = param)) c(
clean_data,
sim_data,
sim_pars,
raw_data
)
) )
clean_data <- cleanup(raw_data)
``` ```
# Introduction
We surveyed 2 variables ($X$ and $Y$ counts) from a population and we We surveyed 2 variables ($X$ and $Y$ counts) from a population and we
are interested in their ratio $Y/X$. We want to make inference on the are interested in their ratio $Y/X$. We want to make inference on the
**average ratio** in the population. **average ratio** in the population.
...@@ -126,6 +101,16 @@ confint_ratio_normal <- function(x, alpha = 0.05) { ...@@ -126,6 +101,16 @@ confint_ratio_normal <- function(x, alpha = 0.05) {
``` ```
```{r res-normal} ```{r res-normal}
# res_normal <-
# bind_rows(
# clean_data |>
# select(zone, y = dh_ratio) |>
# add_column(dataset = "Real", .before = 1),
# sim_data |>
# select(zone, y = r) |>
# add_column(dataset = "Simulated", .before = 1)
# ) |>
# group_by(dataset, zone) |>
res_normal <- res_normal <-
clean_data |> clean_data |>
select(zone, y = dh_ratio) |> select(zone, y = dh_ratio) |>
...@@ -267,35 +252,35 @@ Note that we no longer talk of differences but of relative factor, as a conseque ...@@ -267,35 +252,35 @@ Note that we no longer talk of differences but of relative factor, as a conseque
Computing confidence intervals here would involve the use of the Bootstrap again, but using the model estimates instead of empirical averages. Computing confidence intervals here would involve the use of the Bootstrap again, but using the model estimates instead of empirical averages.
However, there is evidence of over-dispersion in the data, so a more appropriate model should be first developed in order to continue the analysis. However, there is evidence of over-dispersion in the data, so a more appropriate model should be first developed in order to continue the analysis.
\clearpage <!-- \clearpage -->
# Simulation study <!-- # Simulation study -->
```{r sim-pars} <!-- ```{r sim-pars} -->
cap <- "Parameters used for simulating data." <!-- cap <- "Parameters used for simulating data." -->
sim_pars |> <!-- sim_pars |> -->
kbl( <!-- kbl( -->
booktabs = TRUE, <!-- booktabs = TRUE, -->
caption = cap <!-- caption = cap -->
) <!-- ) -->
``` <!-- ``` -->
In order to evaluate the accuracy of the alternative methods, I simulated data from model \@ref(eq:model) with parameters (Table \@ref(tab:sim-pars)) that mimic the real observed data (Figure \@ref(fig:sim-data-description)). <!-- In order to evaluate the accuracy of the alternative methods, I simulated data from model \@ref(eq:model) with parameters (Table \@ref(tab:sim-pars)) that mimic the real observed data (Figure \@ref(fig:sim-data-description)). -->
```{r sim-data-description, fig.cap = cap} <!-- ```{r sim-data-description, fig.cap = cap} -->
cap <- "Sample distributions of __simulated__ survey data (dog-human ratio (r), number of humans (x) and number of dogs (y)) by zone." <!-- cap <- "Sample distributions of __simulated__ survey data (dog-human ratio (r), number of humans (x) and number of dogs (y)) by zone." -->
sim_data |> <!-- sim_data |> -->
pivot_longer( <!-- pivot_longer( -->
x:r, <!-- x:r, -->
names_to = "variable", <!-- names_to = "variable", -->
values_to = "value" <!-- values_to = "value" -->
) |> <!-- ) |> -->
ggplot(aes(value)) + <!-- ggplot(aes(value)) + -->
geom_histogram(bins = 15) + <!-- geom_histogram(bins = 15) + -->
facet_grid(zone ~ variable, scales = "free") <!-- facet_grid(zone ~ variable, scales = "free") -->
``` <!-- ``` -->
......
\usepackage{fontspec} % Handle fonts with XeLaTeX
\defaultfontfeatures{Ligatures=TeX} % Use LaTeX font ligatures
\setmainfont{Palatino}
%\usepackage{palatino} % Unnecessary
%\renewcommand{\familydefault}{\sfdefault} % sans serif
%\fontfamily{ppl}\selectfont % LaTeX
% \usepackage[utf8]{inputenc}
% \usepackage[T1]{fontenc}
% \usepackage{newpxtext,newpxmath}
\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
\usepackage[document]{ragged2e}
\usepackage{array}
\usepackage{longtable}
\usepackage{booktabs}
\usepackage{multirow}
% \usepackage{mathtools}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment