library(kfino)
library(dplyr)
library(foreach)
library(parallel)
library(doParallel)
#> Le chargement a nécessité le package : iterators
This vignette shows how to use parallelization on a data set
containing a set of animals weighted over time with the
walk-over-weighing system. The lambs
data set is included
in the kfino package and can be loaded using the
data()
function.
We use the parallel and doParallel libraries to accelerate the computing time.
data(lambs)
myIDE<-unique(lambs$IDE)
print(myIDE)
#> [1] "250017033503030" "250017033503074" "250017033503092" "250017033503096"
param=list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=10,
expertMax=45,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=15,
seqp=seq(0.4,0.7,0.1))
t0 <- Sys.time()
resu1<-list()
for (i in seq_along(myIDE)){
print(myIDE[i])
tp.test<-filter(lambs,IDE == myIDE[i])
print(dim(tp.test))
resu1[[i]]<-kfino_fit(datain=tp.test,
Tvar="dateNum",Yvar="Poids",
param=param,
doOptim=TRUE)
}
#> [1] "250017033503030"
#> [1] 101 5
#> [1] "250017033503074"
#> [1] 416 5
#> [1] "250017033503092"
#> [1] 213 5
#> [1] "250017033503096"
#> [1] 566 5
Sys.time() - t0
#> Time difference of 18.28121 secs
print(length(resu1))
#> [1] 4
An example improving the computation time of a run on a complete dataset by parallelizing the call.
param=list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=10,
expertMax=45,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=15,
seqp=seq(0.4,0.7,0.1))
t0<-Sys.time()
simpleCall<-function(datain,Index,Tvar,Yvar,param){
datain<-as.data.frame(datain)
ici<-unique(datain[,"IDE"])
tp.data<-datain[ datain[,"IDE"] == ici[Index],]
tp.resu<-kfino::kfino_fit(datain=tp.data,
Tvar=Tvar,Yvar=Yvar,
param=param,
doOptim=TRUE)
return(tp.resu)
}
# In a real example, take advantage of as many cores as you can
# ncores<-parallel::detectCores()
# myCluster<-parallel::makeCluster(ncores - 1)
# For CRAN requirements, we use here only 2 cores.
myCluster <- parallel::makeCluster(2)
doParallel::registerDoParallel(myCluster)
resu2<-foreach(i=seq_along(myIDE), .packages="kfino") %dopar%
simpleCall(datain=lambs,
Index=i,
Tvar="dateNum",
Yvar="Poids",
param=param)
parallel::stopCluster(myCluster)
Sys.time() - t0
#> Time difference of 12.13639 secs
print(length(resu2))
#> [1] 4
identical(resu1,resu2)
#> [1] TRUE
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 18.04.6 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
#>
#> locale:
#> [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
#> [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
#> [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] doParallel_1.0.17 iterators_1.0.14 foreach_1.5.2 ggplot2_3.3.6
#> [5] dplyr_1.0.10 kfino_1.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] highr_0.9 pillar_1.8.1 bslib_0.4.0 compiler_4.2.1
#> [5] jquerylib_0.1.4 tools_4.2.1 digest_0.6.29 jsonlite_1.8.2
#> [9] evaluate_0.16 lifecycle_1.0.2 tibble_3.1.8 gtable_0.3.1
#> [13] pkgconfig_2.0.3 rlang_1.0.6 cli_3.4.1 DBI_1.1.3
#> [17] rstudioapi_0.14 yaml_2.3.5 xfun_0.33 fastmap_1.1.0
#> [21] withr_2.5.0 stringr_1.4.1 knitr_1.40 generics_0.1.3
#> [25] vctrs_0.4.2 sass_0.4.2 grid_4.2.1 tidyselect_1.1.2
#> [29] glue_1.6.2 R6_2.5.1 fansi_1.0.3 rmarkdown_2.16
#> [33] farver_2.1.1 purrr_0.3.4 magrittr_2.0.3 codetools_0.2-18
#> [37] ellipsis_0.3.2 scales_1.2.1 htmltools_0.5.3 assertthat_0.2.1
#> [41] colorspace_2.0-3 labeling_0.4.2 utf8_1.2.2 stringi_1.7.8
#> [45] munsell_0.5.0 cachem_1.0.6