From ade17252f942bfaa1c43d35eb2d240cb32a418a4 Mon Sep 17 00:00:00 2001
From: unknown <isabelle.sanchez@inra.fr>
Date: Thu, 7 Apr 2022 16:49:29 +0200
Subject: [PATCH] add minp parameter in kfino_fit()

---
 R/kfino.R        | 18 ++++++++++--------
 man/kfino_fit.Rd |  9 +++++++--
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/R/kfino.R b/R/kfino.R
index d04e121..825a5bd 100644
--- a/R/kfino.R
+++ b/R/kfino.R
@@ -26,7 +26,8 @@
 #' @param sigma2_pp numeric, variance of pp, related to the unit of Yvar, 
 #'        default 5
 #' @param K numeric, cst in the outlier function (trapezoid), by default K=2
-#'          increasing K, XXX 
+#'          increasing K, XXX
+#' @param minp numeric, minimal pp probability to be correctly weighted. 
 #' 
 #' @details The initialization parameter vector X contains:
 #' \describe{
@@ -38,7 +39,9 @@
 #' or entirely constructed by the function. In the optimisation step, this 
 #' vector is initialized according to the input data (between the expert
 #' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for 
-#' m0 and 0.5 for mm). pp is a sequence varying between 0.4 and 0.8. 
+#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A 
+#' sub-sampling is performed to speed the algorithm if the number of possible
+#' observations studied is greater than 500. 
 #' 
 #' @importFrom stats dnorm quantile na.omit
 #' @importFrom dplyr mutate filter left_join arrange %>%
@@ -85,8 +88,8 @@
 kfino_fit<-function(datain,Tvar,Yvar,
                      expertMin=NULL,expertMax=NULL,
                      X=NULL,
-                     doOptim=TRUE,threshold=0.5,
-                     aa=0.001,sigma2_mm=0.05,sigma2_pp,K=2){
+                     doOptim=TRUE,threshold=0.5,aa=0.001,
+                     sigma2_mm=0.05,sigma2_pp,K=2,minp=0.4){
   
   if( any(is.null(expertMin) | is.null(expertMax)) ) 
     stop('You have to define expertMin and expertMax.')
@@ -106,7 +109,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
          mutate(rowNum=row_number(),
                 flag1=if_else(.data[[Yvar]] > expertMin & 
                               .data[[Yvar]] <= expertMax,"OK","Bad")) 
-  
   tp.dt<-datain %>% filter(.data$flag1 == "OK")
   
   Y<-tp.dt[,Yvar]
@@ -416,7 +418,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
       
       for (m0 in seq(bornem0[1],bornem0[2],2) ){
         for (mm in seq((m0-5),(m0+20),2) ){
-          for (p in seq(0.4,0.7,0.1)){
+          for (p in seq(minp,(minp + 0.3),0.1)){
             # A voir si 50 sous-echantillons au hasard suffisent. Comme dans
             #  Robbins Monroe, permet aussi de reduire l'impact de la troncature
             Subechant=sort(sample(1:NN,50))  
@@ -448,7 +450,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
       resultat=FK_para_connu_tronc(param)
       
     } else if (N > 50){
-      # optim sans sous-echantillonage
+      # optim sans sous-echantillonnage
       print("-------:")
       print("Optimisation of initial parameters - result:")
       print("no sub-sampling performed:")
@@ -466,7 +468,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
       
       for (m0 in seq(bornem0[1],bornem0[2],2) ){
         for (mm in seq((m0-5),(m0+20),2) ){
-          for (p in seq(0.4,0.7,0.1)){
+          for (p in seq(minp,(minp + 0.3),0.1)){
             V=KBO_vraiss(c(mm,p,m0))
             if (V > Vopt){
               Vopt=V
diff --git a/man/kfino_fit.Rd b/man/kfino_fit.Rd
index ae4b1d9..506427a 100644
--- a/man/kfino_fit.Rd
+++ b/man/kfino_fit.Rd
@@ -16,7 +16,8 @@ kfino_fit(
   aa = 0.001,
   sigma2_mm = 0.05,
   sigma2_pp,
-  K = 2
+  K = 2,
+  minp = 0.4
 )
 }
 \arguments{
@@ -49,6 +50,8 @@ default 5}
 
 \item{K}{numeric, cst in the outlier function (trapezoid), by default K=2
 increasing K, XXX}
+
+\item{minp}{numeric, minimal pp probability to be correctly weighted.}
 }
 \value{
 a S3 list with two data frames and a list of vectors of 
@@ -75,7 +78,9 @@ It can be given by the user following his knowledge of the animal or dataset
 or entirely constructed by the function. In the optimisation step, this 
 vector is initialized according to the input data (between the expert
 range) using quantile of the Y distribution (varying between 0.2 and 0.8 for 
-m0 and 0.5 for mm). pp is a sequence varying between 0.4 and 0.8.
+m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A 
+sub-sampling is performed to speed the algorithm if the number of possible
+observations studied is greater than 500.
 }
 \examples{
 data(spring1)
-- 
GitLab