MaJ issue 4 - amélioration aide kfino_fit()

4a25f1b5 · sanchezi · be6e704e · 4a25f1b5 · 4a25f1b5 · 4a25f1b5
Commit 4a25f1b5 authored 2 years ago by sanchezi
--- a/R/kfino.R
+++ b/R/kfino.R
 #' kfino_fit a function to detect outlier with a Kalman Filtering approach
 #' @param datain an input data.frame of one time course to study (unique IDE)
 #' @param Tvar char, time column name in the data.frame datain, a numeric vector
-#'             Tvar can be expressed as a proportion of day in seconds
+#'             Tvar should be expressed as a proportion of day in seconds
 #' @param Yvar char, name of the variable to predict in the data.frame datain
 #' @param param list, a list of initialization parameters
-#' @param doOptim logical, if TRUE optimisation of the initial parameters
+#' @param doOptim logical, if TRUE optimization of the initial parameters,
 #'                default TRUE
 #' @param method character, the method used to optimize the initial parameters:
 #'               Expectation-Maximization algorithm `"EM"` or Maximization
 #'               Likelihood `"ML"`, default `"ML"`
 #' @param threshold numeric, threshold to qualify an observation as outlier
 #'        according to the label_pred, default 0.5
-#' @param kappa numeric, truncation setting, default 10
-#' @param kappaOpt numeric, truncation setting, default 7
+#' @param kappa numeric, truncation setting for likelihood optimization, 
+#'        default 10
+#' @param kappaOpt numeric, truncation setting for initial parameters' 
+#'        optimization, default 7
 #'
-#' @details The initialization parameter list param contains:
+#' @details The initialization parameter list `param` contains:
 #' \describe{
-#'  \item{mm}{(optional) target weight, NULL if the user wants to optimize it}
-#'  \item{pp}{(optional) probability to be correctly weighed, NULL if the user
-#'            wants to optimize it}
-#'  \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
+#'  \item{mm}{(optional) numeric, target weight, NULL if the user wants to 
+#'            optimize it}
+#'  \item{pp}{(optional) numeric, probability to be correctly weighed, NULL if 
+#'            the user wants to optimize it}
+#'  \item{m0}{(optional) numeric, initial weight, NULL if the user wants to 
+#'            optimize it}
 #'  \item{aa}{numeric, rate of weight change, default 0.001 }
 #'  \item{expertMin}{numeric, the minimal weight expected by the user}
 #'  \item{expertMax}{numeric, the maximal weight expected by the user}
-#'  \item{sigma2_m0}{variance of m0, default 1}
+#'  \item{sigma2_m0}{numeric, variance of m0, default 1}
 #'  \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
 #'        default 0.05}
 #'  \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
 #'        default 5}
 #'  \item{K}{numeric, a constant value in the outlier function (trapezium),
 #'           by default K=5}
-#'  \item{seqp}{numeric, sequence of pp probability to be correctly weighted.
-#'              default seq(0.5,0.7,0.1)}
+#'  \item{seqp}{numeric vector, sequence of pp probability to be correctly 
+#'              weighted. default seq(0.5,0.7,0.1)}
 #' }
 #' It has to be given by the user following his knowledge of the animal or
 #' the data set. All parameters are compulsory except m0, mm and pp that can be
@@ -41,7 +45,7 @@
 #' m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
 #' sub-sampling is performed to speed the algorithm if the number of possible
 #' observations studied is greater than 500. Optimization is performed using
-#' `"EM"` or `"ML"` methods.
+#' `"EM"` or `"ML"` method.
 #'
 #' @importFrom stats dnorm quantile na.omit
 #' @importFrom dplyr mutate filter left_join arrange %>%
@@ -61,7 +65,7 @@
 #' data(spring1)
 #' library(dplyr)
 #'
-#' # --- With Optimisation on initial parameters - ML method
+#' # --- With Optimization on initial parameters - ML method
 #' t0 <- Sys.time()
 #' param1<-list(m0=NULL,
 #'              mm=NULL,
@@ -80,14 +84,14 @@
 #'               doOptim=TRUE,method="ML",param=param1)
 #' Sys.time() - t0
 #'
-#' # --- With Optimisation on initial parameters - EM method
+#' # --- With Optimization on initial parameters - EM method
 #' t0 <- Sys.time()
 #' resu1b<-kfino_fit(datain=spring1,
 #'               Tvar="dateNum",Yvar="Poids",
 #'               doOptim=TRUE,method="EM",param=param1)
 #' Sys.time() - t0
 #'
-#' # --- Without Optimisation on initial parameters
+#' # --- Without Optimization on initial parameters
 #' t0 <- Sys.time()
 #' param2<-list(m0=41,
 #'              mm=45,
@@ -194,7 +198,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
    if (N > 500){
      # optim with sub-sampling
      print("-------:")
-      print("Optimisation of initial parameters ")
+      print("Optimization of initial parameters ")
      print("with sub-sampling and ML method - result:")
      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
      m0opt=quantile(Y[1:N/4], probs = c(.5))
@@ -261,7 +265,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
      Y=YY
      Tps=TpsTps
      N=NN
-      print("Optimised parameters with ML method: ")
+      print("Optimized parameters with ML method: ")
      cat("Optimized m0: ",m0opt,"\n")
      cat("Optimized mm: ",mmopt,"\n")
      cat("Optimized pp: ",popt,"\n")
@@ -283,7 +287,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
      # optimization without sub-sampling, 2 methods, EM or ML
      if (method == "EM"){
          print("-------:")
-          print("Optimisation of initial parameters with EM method - result:")
+          print("Optimization of initial parameters with EM method - result:")
          print("no sub-sampling performed:")
          bornem0=quantile(Y[1:N/2], probs = c(.2, .8))
          cat("range m0: ",bornem0,"\n")
@@ -316,7 +320,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
            m0_tmp=Res_EM$m0[[1]]
            m_tmp=Res_EM$mm[[1]]
            p_tmp=Res_EM$pp
-            print(Res_EM$likelihood)
            if (k==N_etape_EM) break
          }
          Vopt_low=Res_EM$likelihood
@@ -351,7 +354,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
            m0_tmp=Res_EM$m0[[1]]
            m_tmp=Res_EM$mm[[1]]
            p_tmp=Res_EM$pp
-            print(Res_EM$likelihood)
            if (k==N_etape_EM) break
          }
          Vopt_up=Res_EM$likelihood
@@ -436,7 +438,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
            popt<-popt_low
          }
          
-          print("Optimised parameters with EM method: ")
+          print("Optimized parameters with EM method: ")
          cat("Optimized m0: ",m0opt,"\n")
          cat("Optimized mm: ",mmopt,"\n")
          cat("Optimized pp: ",popt,"\n")
@@ -455,7 +457,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
          
      } else if (method == "ML"){
        print("-------:")
-        print("Optimisation of initial parameters with ML method - result:")
+        print("Optimization of initial parameters with ML method - result:")
        print("no sub-sampling performed:")
        bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
        m0opt=quantile(Y[1:N/4], probs = c(.5))
@@ -503,7 +505,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
          }
        }

-        print("Optimised parameters: ")
+        print("Optimized parameters: ")
        cat("Optimized m0: ",m0opt,"\n")
        cat("Optimized mm: ",mmopt,"\n")
        cat("Optimized pp: ",popt,"\n")
@@ -531,8 +533,8 @@ kfino_fit<-function(datain,Tvar,Yvar,
          X<-c(m0,pp,mm)
        }
        print("-------:")
-        print("Optimisation of initial parameters - result:")
-        print("Not enough data => No optimisation performed:")
+        print("Optimization of initial parameters - result:")
+        print("Not enough data => No optimization performed:")
        print("Used parameters: ")
        print(X)
        print("-------:")
@@ -563,7 +565,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
        X<-c(m0,pp,mm)
      }
      print("-------:")
-      print("No optimisation of initial parameters:")
+      print("No optimization of initial parameters:")
      print("Used parameters: ")
      print(X)
      resultat=KBO_known(param=list(m0=X[[1]],
@@ -620,7 +622,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
    #--------------------------------------
    # 1. a whole dataset with the detected outliers flagged and prediction
    # 2. a dataset with the prediction on possible values
-    # 3. optimisation results (a list of vectors)
+    # 3. optimization results (a list of vectors)
    mylist<-list(dt.out,dt.pred,resultat)
    names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
    class(mylist) = c("kfino")

--- a/doc/HowTo.html
+++ b/doc/HowTo.html
--- a/man/kfino_fit.Rd
+++ b/man/kfino_fit.Rd
@@ -20,13 +20,13 @@ kfino_fit(
 \item{datain}{an input data.frame of one time course to study (unique IDE)}

 \item{Tvar}{char, time column name in the data.frame datain, a numeric vector
-Tvar can be expressed as a proportion of day in seconds}
+Tvar should be expressed as a proportion of day in seconds}

 \item{Yvar}{char, name of the variable to predict in the data.frame datain}

 \item{param}{list, a list of initialization parameters}

-\item{doOptim}{logical, if TRUE optimisation of the initial parameters
+\item{doOptim}{logical, if TRUE optimization of the initial parameters,
 default TRUE}

 \item{method}{character, the method used to optimize the initial parameters:
@@ -36,9 +36,11 @@ Likelihood `"ML"`, default `"ML"`}
 \item{threshold}{numeric, threshold to qualify an observation as outlier
 according to the label_pred, default 0.5}

-\item{kappa}{numeric, truncation setting, default 10}
+\item{kappa}{numeric, truncation setting for likelihood optimization, 
+default 10}

-\item{kappaOpt}{numeric, truncation setting, default 7}
+\item{kappaOpt}{numeric, truncation setting for initial parameters' 
+optimization, default 7}
 }
 \value{
 a S3 list with two data frames and a list of vectors of
@@ -55,24 +57,26 @@ kfino results
 kfino_fit a function to detect outlier with a Kalman Filtering approach
 }
 \details{
-The initialization parameter list param contains:
+The initialization parameter list `param` contains:
 \describe{
- \item{mm}{(optional) target weight, NULL if the user wants to optimize it}
- \item{pp}{(optional) probability to be correctly weighed, NULL if the user
-           wants to optimize it}
- \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
+ \item{mm}{(optional) numeric, target weight, NULL if the user wants to 
+           optimize it}
+ \item{pp}{(optional) numeric, probability to be correctly weighed, NULL if 
+           the user wants to optimize it}
+ \item{m0}{(optional) numeric, initial weight, NULL if the user wants to 
+           optimize it}
 \item{aa}{numeric, rate of weight change, default 0.001 }
 \item{expertMin}{numeric, the minimal weight expected by the user}
 \item{expertMax}{numeric, the maximal weight expected by the user}
- \item{sigma2_m0}{variance of m0, default 1}
+ \item{sigma2_m0}{numeric, variance of m0, default 1}
 \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
       default 0.05}
 \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
       default 5}
 \item{K}{numeric, a constant value in the outlier function (trapezium),
          by default K=5}
- \item{seqp}{numeric, sequence of pp probability to be correctly weighted.
-             default seq(0.5,0.7,0.1)}
+ \item{seqp}{numeric vector, sequence of pp probability to be correctly 
+             weighted. default seq(0.5,0.7,0.1)}
 }
 It has to be given by the user following his knowledge of the animal or
 the data set. All parameters are compulsory except m0, mm and pp that can be
@@ -82,13 +86,13 @@ range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
 m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
 sub-sampling is performed to speed the algorithm if the number of possible
 observations studied is greater than 500. Optimization is performed using
-`"EM"` or `"ML"` methods.
+`"EM"` or `"ML"` method.
 }
 \examples{
 data(spring1)
 library(dplyr)

-# --- With Optimisation on initial parameters - ML method
+# --- With Optimization on initial parameters - ML method
 t0 <- Sys.time()
 param1<-list(m0=NULL,
             mm=NULL,
@@ -107,14 +111,14 @@ resu1<-kfino_fit(datain=spring1,
              doOptim=TRUE,method="ML",param=param1)
 Sys.time() - t0

-# --- With Optimisation on initial parameters - EM method
+# --- With Optimization on initial parameters - EM method
 t0 <- Sys.time()
 resu1b<-kfino_fit(datain=spring1,
              Tvar="dateNum",Yvar="Poids",
              doOptim=TRUE,method="EM",param=param1)
 Sys.time() - t0

-# --- Without Optimisation on initial parameters
+# --- Without Optimization on initial parameters
 t0 <- Sys.time()
 param2<-list(m0=41,
             mm=45,