From 36510f3d1645983337b99611c5566f444ed493f6 Mon Sep 17 00:00:00 2001
From: unknown <isabelle.sanchez@inra.fr>
Date: Thu, 5 May 2022 11:34:44 +0200
Subject: [PATCH] add EM method for inital parameters optimization - KBO_EM()

---
 NAMESPACE           |   1 +
 R/kfino.R           | 219 ++++++++++++++++++-----------
 R/utils_functions.R | 325 ++++++++++++++++++++++++++++++++++++++++----
 man/KBO_EM.Rd       |  60 ++++++++
 man/KBO_L.Rd        |  11 +-
 man/KBO_known.Rd    |  23 +++-
 man/kfino_fit.Rd    |  26 +++-
 7 files changed, 543 insertions(+), 122 deletions(-)
 create mode 100644 man/KBO_EM.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 47bb4f5..1a3e3ed 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(KBO_EM)
 export(KBO_L)
 export(KBO_known)
 export(kfino_fit)
diff --git a/R/kfino.R b/R/kfino.R
index 77e7112..87678b3 100644
--- a/R/kfino.R
+++ b/R/kfino.R
@@ -6,6 +6,9 @@
 #' @param param list, a list of initialization parameters
 #' @param doOptim logical, if TRUE optimisation of the initial parameters
 #'                default TRUE
+#' @param method character, the method used to optimize the initial parameters:
+#'               Expectation-Maximization algorithm `"EM"` or Maximization 
+#'               Likelihhod `"ML"`, default `"ML"` 
 #' @param threshold numeric, threshold to qualify an observation as outlier
 #'        according to the label_pred, default 0.5
 #' @param kappa numeric, truncation setting, default 10
@@ -31,13 +34,14 @@
 #'              default seq(0.5,0.7,0.1)}
 #' }
 #' It has to be given by the user following his knowledge of the animal or
-#' the dataset. All parameters are compulsory execpt m0, mm and pp that can be
-#' optimized by the algorithm. In the optimisation step, those three parameters
+#' the dataset. All parameters are compulsory except m0, mm and pp that can be
+#' optimized by the algorithm. In the optimization step, those three parameters
 #' are initialized according to the input data (between the expert
 #' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
 #' m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
 #' sub-sampling is performed to speed the algorithm if the number of possible
-#' observations studied is greater than 500.
+#' observations studied is greater than 500. Optimization is performed using 
+#' `"EM"` or `"ML"` methods.
 #'
 #' @importFrom stats dnorm quantile na.omit
 #' @importFrom dplyr mutate filter left_join arrange %>%
@@ -57,7 +61,7 @@
 #' data(spring1)
 #' library(dplyr)
 #'
-#' # --- With Optimisation on initial parameters
+#' # --- With Optimisation on initial parameters - ML method
 #' t0 <- Sys.time()
 #' param1<-list(m0=NULL,
 #'              mm=NULL,
@@ -73,8 +77,16 @@
 #'
 #' resu1<-kfino_fit(datain=spring1,
 #'               Tvar="dateNum",Yvar="Poids",
-#'               doOptim=TRUE,param=param1)
+#'               doOptim=TRUE,method="ML",param=param1)
 #' Sys.time() - t0
+#' 
+#' # --- With Optimisation on initial parameters - EM method
+#' t0 <- Sys.time()
+#' resu1b<-kfino_fit(datain=spring1,
+#'               Tvar="dateNum",Yvar="Poids",
+#'               doOptim=TRUE,method="EM",param=param1)
+#' Sys.time() - t0
+#' 
 #' # --- Without Optimisation on initial parameters
 #' t0 <- Sys.time()
 #' param2<-list(m0=41,
@@ -111,11 +123,11 @@
 #'              seqp=seq(0.5,0.7,0.1))
 #' resu3<-kfino_fit(datain=merinos2,
 #'               Tvar="dateNum",Yvar="Poids",
-#'               doOptim=TRUE,param=param3)
+#'               doOptim=TRUE,method="ML",param=param3)
 #' Sys.time() - t0
 kfino_fit<-function(datain,Tvar,Yvar,
                     param=NULL,
-                    doOptim=TRUE,
+                    doOptim=TRUE,method="ML",
                     threshold=0.5,kappa=10,kappaOpt=7){
 
   if( any(is.null(param[["expertMin"]]) |
@@ -174,22 +186,22 @@ kfino_fit<-function(datain,Tvar,Yvar,
   dix=sqrt(expertMax - expertMin)
 
   #------------------------------------------------------------------------
-  # Optimisation sur les paramètres initiaux ou pas
+  # Optimisation on Initial parameters or not
   #------------------------------------------------------------------------
   if (doOptim == TRUE){
 
     if (N > 500){
-      # optim avec sous-echantillonnage
+      # optim with sub-sampling
       print("-------:")
-      print("Optimisation of initial parameters with sub-sampling - result:")
+      print("Optimisation of initial parameters ")
+      print("with sub-sampling and ML method - result:")
       bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
       m0opt=quantile(Y[1:N/4], probs = c(.5))
       mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
 
-      cat("bornem0: ",bornem0,"\n")
-      cat("m0opt: ",m0opt,"\n")
-      cat("mmopt: ",mmopt,"\n")
-
+      cat("range m0: ",bornem0,"\n")
+      cat("Initial m0opt: ",m0opt,"\n")
+      cat("Initial mmopt: ",mmopt,"\n")
       popt=0.5
       #--- Saving datain before sub-sampling
       YY=Y
@@ -197,7 +209,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
       NN=N
 
       Subechant=sort(sample(1:NN,50))
-      #Subechant=sort(.Internal(sample(NN, 50L, FALSE, NULL)))
       Y=YY[Subechant]
       Tps=TpsTps[Subechant]
       N=50
@@ -217,9 +228,8 @@ kfino_fit<-function(datain,Tvar,Yvar,
         for (mm in seq((m0-5),(m0+20),2) ){
           for (p in seqp){
             # A voir si 50 sous-echantillons au hasard suffisent. Comme dans
-            #  Robbins Monroe, permet aussi de reduire l'impact de la troncature
+            # Robbins Monroe, permet aussi de reduire l'impact de la troncature
             Subechant=sort(sample(1:NN,50))
-            #Subechant=sort(.Internal(sample(NN, 50L, FALSE, NULL)))
             Y=YY[Subechant]
             Tps=TpsTps[Subechant]
 
@@ -250,8 +260,10 @@ kfino_fit<-function(datain,Tvar,Yvar,
       Y=YY
       Tps=TpsTps
       N=NN
-      print("Optimised parameters: ")
-      print(c(mmopt,popt,m0opt))
+      print("Optimised parameters with ML method: ")
+      cat("Optimized m0: ",m0opt,"\n")
+      cat("Optimized mm: ",mmopt,"\n")
+      cat("Optimized pp: ",popt,"\n")
       print("-------:")
 
       resultat=KBO_known(param=list(mm=mmopt,
@@ -267,75 +279,118 @@ kfino_fit<-function(datain,Tvar,Yvar,
                          threshold=threshold,Y=Y,Tps=Tps,N=N,kappa=kappa)
 
     } else if (N > 50){
-      # optim sans sous-echantillonnage
-      print("-------:")
-      print("Optimisation of initial parameters - result:")
-      print("no sub-sampling performed:")
-      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
-      m0opt=quantile(Y[1:N/4], probs = c(.5))
-      mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
-
-      cat("bornem0: ",bornem0,"\n")
-      cat("m0opt: ",m0opt,"\n")
-      cat("mmopt: ",mmopt,"\n")
-
-      popt=0.5
-
-      Vopt=KBO_L(list(m0=m0opt,
-                      mm=mmopt,
-                      pp=popt,
-                      aa=aa,
-                      expertMin=expertMin,
-                      expertMax=expertMax,
-                      sigma2_mm=sigma2_mm,
-                      sigma2_m0=sigma2_m0,
-                      sigma2_pp=sigma2_pp,
-                      K=K),
-                 Y=Y,Tps=Tps,N=N,dix=dix,kappaOpt=kappaOpt)
-
-      for (m0 in seq(bornem0[1],bornem0[2],2) ){
-        for (mm in seq((m0-5),(m0+20),2) ){
-          for (p in seqp){
-            V=KBO_L(list(m0=m0,
-                         mm=mm,
-                         pp=p,
-                         aa=aa,
-                         expertMin=expertMin,
-                         expertMax=expertMax,
-                         sigma2_mm=sigma2_mm,
-                         sigma2_m0=sigma2_m0,
-                         sigma2_pp=sigma2_pp,
-                         K=K),
+      # optimization without sub-sampling
+      if (method == "EM"){
+        print("-------:")
+        print("Optimisation of initial parameters with EM method - result:")
+        print("no sub-sampling performed:")
+        bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
+        m0opt=quantile(Y[1:N/4], probs = c(.5))
+        mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
+        
+        cat("range m0: ",bornem0,"\n")
+        cat("initial m0opt: ",m0opt,"\n")
+        cat("initial mmopt: ",mmopt,"\n")
+        
+        popt=0.5
+        
+        EMopt=KBO_EM(param=list(m0=m0opt,
+                               mm=mmopt,
+                               pp=popt,
+                               aa=aa,
+                               expertMin=expertMin,
+                               expertMax=expertMax,
+                               sigma2_mm=sigma2_mm,
+                               sigma2_m0=sigma2_m0,
+                               sigma2_pp=sigma2_pp,
+                               K=K),
                     Y=Y,Tps=Tps,N=N,dix=dix,kappaOpt=kappaOpt)
-
-            if (V > Vopt){
-              Vopt=V
-              m0opt=m0
-              mmopt=mm
-              popt=p
+        print("Optimised parameters with EM method: ")
+        cat("Optimized m0: ",EMopt$m0[[1]],"\n")
+        cat("Optimized mm: ",EMopt$mm[[1]],"\n")
+        cat("Optimized pp: ",EMopt$pp[[1]],"\n")
+        print("-------:")
+        resultat=KBO_known(param=list(mm=EMopt$mm[[1]],
+                                      pp=EMopt$pp[[1]],
+                                      m0=EMopt$m0[[1]],
+                                      aa=aa,
+                                      expertMin=expertMin,
+                                      expertMax=expertMax,
+                                      sigma2_m0=sigma2_m0,
+                                      sigma2_mm=sigma2_mm,
+                                      sigma2_pp=sigma2_pp,
+                                      K=K),
+                           threshold=threshold,Y=Y,Tps=Tps,N=N,kappa=kappa)
+        
+      } else if (method == "ML"){
+        print("-------:")
+        print("Optimisation of initial parameters with ML method - result:")
+        print("no sub-sampling performed:")
+        bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
+        m0opt=quantile(Y[1:N/4], probs = c(.5))
+        mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
+        
+        cat("range m0: ",bornem0,"\n")
+        cat("initial m0opt: ",m0opt,"\n")
+        cat("initial mmopt: ",mmopt,"\n")
+        
+        popt=0.5
+        
+        Vopt=KBO_L(list(m0=m0opt,
+                        mm=mmopt,
+                        pp=popt,
+                        aa=aa,
+                        expertMin=expertMin,
+                        expertMax=expertMax,
+                        sigma2_mm=sigma2_mm,
+                        sigma2_m0=sigma2_m0,
+                        sigma2_pp=sigma2_pp,
+                        K=K),
+                   Y=Y,Tps=Tps,N=N,dix=dix,kappaOpt=kappaOpt)
+        for (m0 in seq(bornem0[1],bornem0[2],2) ){
+          for (mm in seq((m0-5),(m0+20),2) ){
+            for (p in seqp){
+              V=KBO_L(list(m0=m0,
+                           mm=mm,
+                           pp=p,
+                           aa=aa,
+                           expertMin=expertMin,
+                           expertMax=expertMax,
+                           sigma2_mm=sigma2_mm,
+                           sigma2_m0=sigma2_m0,
+                           sigma2_pp=sigma2_pp,
+                           K=K),
+                      Y=Y,Tps=Tps,N=N,dix=dix,kappaOpt=kappaOpt)
+              
+              if (V > Vopt){
+                Vopt=V
+                m0opt=m0
+                mmopt=mm
+                popt=p
+              }
             }
           }
         }
+        
+        print("Optimised parameters: ")
+        cat("Optimized m0: ",m0opt,"\n")
+        cat("Optimized mm: ",mmopt,"\n")
+        cat("Optimized pp: ",popt,"\n")
+        print("-------:")
+        resultat=KBO_known(param=list(mm=mmopt,
+                                      pp=popt,
+                                      m0=m0opt,
+                                      aa=aa,
+                                      expertMin=expertMin,
+                                      expertMax=expertMax,
+                                      sigma2_m0=sigma2_m0,
+                                      sigma2_mm=sigma2_mm,
+                                      sigma2_pp=sigma2_pp,
+                                      K=K),
+                           threshold=threshold,Y=Y,Tps=Tps,N=N,kappa=kappa)
       }
 
-      print("Optimised parameters: ")
-      print(c(mmopt,popt,m0opt))
-      print("-------:")
-
-      resultat=KBO_known(param=list(mm=mmopt,
-                                    pp=popt,
-                                    m0=m0opt,
-                                    aa=aa,
-                                    expertMin=expertMin,
-                                    expertMax=expertMax,
-                                    sigma2_m0=sigma2_m0,
-                                    sigma2_mm=sigma2_mm,
-                                    sigma2_pp=sigma2_pp,
-                                    K=K),
-                         threshold=threshold,Y=Y,Tps=Tps,N=N,kappa=kappa)
-
     } else {
-      # N petit, si trop petit on ne fait rien
       if (N > 5) {
         # Not enough data - no optim
         if (is.null(param[["m0"]])){
@@ -346,7 +401,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
         }
         print("-------:")
         print("Optimisation of initial parameters - result:")
-        print("Not enough data => so no optimisation performed:")
+        print("Not enough data => No optimisation performed:")
         print("Used parameters: ")
         print(X)
         print("-------:")
diff --git a/R/utils_functions.R b/R/utils_functions.R
index 512ca6e..9f59033 100644
--- a/R/utils_functions.R
+++ b/R/utils_functions.R
@@ -1,3 +1,12 @@
+#-------------------------------------------------------------------
+# utils_functions.R
+# some useful functions for kfino method
+# loi_outlier()
+# KBO_known()
+# KBO_L()
+# KBO_EM()
+#-------------------------------------------------------------------
+
 #' loi_outlier This function defines an outlier distribution (Surface of a
 #' trapezium) and uses input parameters given in the main function kfino_fit()
 #'
@@ -23,18 +32,28 @@ loi_outlier<-function(y,
 
 
 #--------------------------------------------------------------------------
-#' KBO_known
+#' KBO_known a function to calculate a likelihood on given parameters
 #'
 #' @param param list, a list of 10 input parameters for mm, pp and m0
 #' @param threshold numeric, threshold for CI, default 0.5
 #' @param kappa numeric, truncation setting, default 10
-#' @param Y num
-#' @param Tps num
-#' @param N num
-#'
+#' @param Y character, name of the numeric variable to predict in the  
+#'        data.frame datain
+#' @param Tps character, time column name in the data.frame datain, a 
+#'            numeric vector.
+#'            Tvar can be expressed as a proportion of day in seconds
+#' @param N numeric, length of Y
 #'
 #' @details uses the same input parameter list than the main function
 #' @return a list
+#' \describe{
+#'  \item{prediction}{vector, the prediction of weights}
+#'  \item{label}{vector, probability to be an outlier}
+#'  \item{likelihood}{numeric, the calculated likelihood}
+#'  \item{lwr}{vector of lower bound CI of the prediction }
+#'  \item{upr}{vector of upper bound CI of the prediction }
+#'  \item{flag}{char, is an outlier or not}
+#' }
 #' @export
 #'
 #' @examples
@@ -55,7 +74,7 @@ loi_outlier<-function(y,
 #'             seqp=seq(0.5,0.7,0.1))
 #' print(Y)
 #' KBO_known(param=param2,threshold=0.5,kappa=10,Y=Y,Tps=Tps,N=N)
-KBO_known<-function(param,threshold,kappa,Y,Tps,N){
+KBO_known<-function(param,threshold,kappa=10,Y,Tps,N){
   # load objects
   mm=param[["mm"]]
   pp=param[["pp"]]
@@ -94,7 +113,7 @@ KBO_known<-function(param,threshold,kappa,Y,Tps,N){
   #iteration (1.1.2)
   #-----------------------
   #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
-  #avant troncature
+  #before truncation
   for (k in 1:(kappa-1)){
     # je crée le vecteur des nouveaux m, ##pour plus tard: au lieu de les
     # faire vide, je prend direct m_{u0}
@@ -102,7 +121,7 @@ KBO_known<-function(param,threshold,kappa,Y,Tps,N){
     sigma2new=rep(0 ,2^(k+1))
     pnew=rep(0 ,2^(k+1))
     Lnew=rep(0 ,2^(k+1))
-    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    # CR: renormalization constant that intervenes in the denominator of the pu
     qnew=rep(0 ,2^(k+1))
     diffTps<-Tps[k+1] - Tps[k]
     #--- numérateur de pu0
@@ -138,20 +157,20 @@ KBO_known<-function(param,threshold,kappa,Y,Tps,N){
     sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2)
   }
 
-  #apres troncature
+  # after truncation
   #----------------------
   for (k in kappa:(N-1)){
-    # je cree le vecteur des nouveaux m,
-    # pour plus tard: au lieu de les faire vide, je prend direct m_{u0}
+    # Initialization of the new m vector
+    # TODO: instead of 0, perhaps set directly at m_{u0}
     mnew=rep(0,2^(kappa+1))
     sigma2new=rep(0 ,2^(kappa+1))
     pnew=rep(0 ,2^(kappa+1))
     Lnew=rep(0 ,2^(kappa+1))
-    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    # CR: renormalization constant that intervenes in the denominator of the pu
     qnew=rep(0 ,2^(kappa+1))
     diffTps<-Tps[k+1] - Tps[k]
 
-    #--- numérateur de pu0
+    #--- pu0 numerator
     tpbeta<-loi_outlier(Y[k+1],K,expertMin,expertMax)
 
     pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
@@ -212,9 +231,12 @@ KBO_known<-function(param,threshold,kappa,Y,Tps,N){
 #'
 #' @param param a list of 10 input parameters mm, pp and m0
 #' @param kappaOpt numeric, truncation setting, default 7
-#' @param Y num
-#' @param Tps num
-#' @param N num
+#' @param Y character, name of the numeric variable to predict in the  
+#'        data.frame datain
+#' @param Tps character, time column name in the data.frame datain, a 
+#'            numeric vector.
+#'            Tvar can be expressed as a proportion of day in seconds
+#' @param N numeric, length of Y
 #' @param dix num
 #'
 #' @details uses the same input parameter list than the main function
@@ -239,7 +261,7 @@ KBO_known<-function(param,threshold,kappa,Y,Tps,N){
 #'             seqp=seq(0.5,0.7,0.1))
 #' print(Y)
 #' KBO_L(param=param2,kappaOpt=7,Y=Y,Tps=Tps,N=N,dix=6)
-KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
+KBO_L<-function(param,kappaOpt=7,Y,Tps,N,dix){
   # load objects
   mm=param[["mm"]]
   pp=param[["pp"]]
@@ -252,9 +274,9 @@ KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
   sigma2_pp<-param[["sigma2_pp"]]
   K<-param[["K"]]
 
-  #---- paramètre de troncature
-  # Ici je met kappa =7, ca me fais retirer les proba <0.01 au lieu de 0.001
-  # comme qd kappa=10 mais ca divise par 10 le temps de calcul
+  #---- truncation parameter
+  # Here, kappa is set to 7, the probabilities are < 0.01 instead of 0.001
+  # with kappa=10 and so computing time is divided by 10.
   kappa=kappaOpt
 
   # initialisation (1.1.1)
@@ -277,13 +299,13 @@ KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
   #iteration (1.1.2)
   #-----------------------
   #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
-  # avant troncature
+  # before truncation
   for (k in 1:(kappa-1)){
     mnew=rep(0,2^(k+1))
     sigma2new=rep(0 ,2^(k+1))
     pnew=rep(0 ,2^(k+1))
     Lnew=rep(0 ,2^(k+1))
-    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    # CR: renormalization constant that intervenes in the denominator of the pu
     qnew=rep(0 ,2^(k+1))
     diffTps<-Tps[k+1] - Tps[k]
     #--- numérateur de pu0
@@ -310,7 +332,7 @@ KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
     q=dix*qnew
   }
 
-  # apres troncature
+  #  after truncation
   #----------------------
   for (k in kappa:(N-1)){
     # je cree le vecteur des nouveaux m,
@@ -318,7 +340,7 @@ KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
     sigma2new=rep(0 ,2^(kappa+1))
     pnew=rep(0 ,2^(kappa+1))
     Lnew=rep(0 ,2^(kappa+1))
-    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    # CR: renormalization constant that intervenes in the denominator of the pu
     qnew=rep(0 ,2^(kappa+1))
     diffTps<-Tps[k+1] - Tps[k]
     #--- numérateur de pu0
@@ -352,4 +374,259 @@ KBO_L<-function(param,kappaOpt,Y,Tps,N,dix){
   return(Vraisemblance)
 }
 
+#------------------------------------------------------------------------
+#' KBO_EM a function to calculate a likelihood on optimized parameters by
+#' an Expectation-Maximization (EM) algorithm
+#'
+#' @param param a list of 10 input parameters mm, pp and m0
+#' @param kappaOpt numeric, truncation setting, default 7
+#' @param Y character, name of the numeric variable to predict in the  
+#'        data.frame datain
+#' @param Tps character, time column name in the data.frame datain, a 
+#'            numeric vector.
+#'            Tvar can be expressed as a proportion of day in seconds
+#' @param N numeric, length of Y
+#' @param dix num
+#'
+#' @details uses the same input parameter list than the main function
+#' @return a list:
+#' \describe{
+#'  \item{m0}{numeric, optimized m0}
+#'  \item{mm}{numeric, optimized mm}
+#'  \item{pp}{numeric, optimized pp}
+#'  \item{likelihood}{numeric, the calculated likelihood}
+#' }
+#' @export
+#'
+#' @examples
+#' set.seed(1234)
+#' Y<-rnorm(n=10,mean=50,4)
+#' Tps<-seq(1,10)
+#' N=10
+#' param2<-list(m0=41,
+#'              mm=45,
+#'              pp=0.5,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'             seqp=seq(0.5,0.7,0.1))
+#' print(Y)
+#' KBO_EM(param=param2,kappaOpt=7,Y=Y,Tps=Tps,N=N,dix=6)
+KBO_EM<-function(param,kappaOpt,Y,Tps,N,dix){
+  # load objects
+  mm<-param[["mm"]]
+  pp<-param[["pp"]]
+  m0<-param[["m0"]]
+  aa<-param[["aa"]]
+  expertMin<-param[["expertMin"]]
+  expertMax<-param[["expertMax"]]
+  sigma2_m0<-param[["sigma2_m0"]]
+  sigma2_mm<-param[["sigma2_mm"]]
+  sigma2_pp<-param[["sigma2_pp"]]
+  K<-param[["K"]]
+  
+  #---- truncation parameter
+  # Here, kappa is set to 7, the probabilities are < 0.01 instead of 0.001
+  # with kappa=10 and so computing time is divided by 10.
+  kappa=kappaOpt
+  
+  # initialisation (1.1.1)
+  #--------------------
+  m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp)
+  sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp)
+  
+  l0<- loi_outlier(Y[1],K,expertMin,expertMax)
+  loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp))
+  
+  p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0)
+  p1=1-p0
+  
+  m=c(m0,m1)
+  p=c(p0,p1)
+  sigma2=c(sigma2_m0,sigma1)
+  L=dix*c(l0,loinorm1) #Attention *2 pr le grandir
+  q=c(1,1) #attention
+  
+  #Pmean=c(p1)#A VIRER
+  
+  #nouveaux paramètres
+  a=c(1,sigma2_pp/(sigma2_m0+sigma2_pp))
+  b=c(0,0)
+  c=c(0, Y[1]*sigma2_m0/(sigma2_m0+sigma2_pp))
+  matrice_A=matrix(a*a/(sigma2 + sigma2_pp), ncol=1, nrow=2, byrow=TRUE)
+  matrice_B=matrix(b*b/(sigma2 + sigma2_pp), ncol=1, nrow=2, byrow=TRUE)
+  matrice_C=matrix(a*b/(sigma2 + sigma2_pp), ncol=1, nrow=2, byrow=TRUE)
+  matrice_Ya=matrix((a*(Y[1]-c)/(sigma2 + sigma2_pp)), ncol=1, nrow=2, byrow=TRUE)
+  matrice_Yb=matrix((b*(Y[1]-c)/(sigma2 + sigma2_pp)), ncol=1, nrow=2, byrow=TRUE)
+  
+  Z=matrix(c(0,1), ncol=1, nrow=2, byrow=TRUE)
+  #iteration (1.1.2)
+  #-----------------------
+  # kappa < N-1 for now. add an if condition if necessary
+  # before truncation
+  
+  for (k in 1:(kappa-1)){
+    mnew=rep(0,2^(k+1))
+    sigma2new=rep(0 ,2^(k+1))
+    pnew=rep(0 ,2^(k+1))
+    Lnew=rep(0 ,2^(k+1))
+    anew=rep(0 ,2^(k+1))
+    bnew=rep(0 ,2^(k+1))
+    cnew=rep(0 ,2^(k+1))
+    
+    # CR: renormalization constant that intervenes in the denominator of the pu
+    qnew=rep(0 ,2^(k+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- numérateur de pu0
+    tpbeta<-loi_outlier(Y[k+1],K,expertMin,expertMax)
+    pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta
+    Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta
+    mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
+    sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp)
+    qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp
+    # new parameters
+    anew[1:(2^k)]= a[1:(2^k)]*exp(-aa*diffTps)
+    bnew[1:(2^k)]= b[1:(2^k)]*exp(-aa*diffTps) + (1-exp(-aa*diffTps))
+    cnew[1:(2^k)]= c[1:(2^k)]*exp(-aa*diffTps)
+    
+    # continuation
+    sommevar=sigma2new[1:(2^k)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar))
+    pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm
+    Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm
+    mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] + mnew[1:(2^k)]*sigma2_pp)/sommevar
+    sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar
+    # new parameters
+    anew[(1+2^k):2^(k+1)]= a[1:(2^k)]*sigma2_pp/sommevar
+    bnew[(1+2^k):2^(k+1)]= b[1:(2^k)]*sigma2_pp/sommevar
+    cnew[(1+2^k):2^(k+1)]= c[1:(2^k)]*sigma2_pp/sommevar + Y[k+1]*sigma2new[1:(2^k)]/sommevar
+    
+    #Proba1<-sum(pnew[(1+2^k):2^(k+1)])#A VIRER
+    #Pmean=c(Pmean,Proba1/sum(pnew))#A VIRER
+    
+    m=mnew
+    sigma2=sigma2new
+    p=pnew/sum(pnew)
+    # new parameters
+    a=anew
+    b=bnew
+    c=cnew
+    
+    matrice_A=cbind(rbind(matrice_A,matrice_A), a*a/(sigma2 + sigma2_pp)) 
+    matrice_B=cbind(rbind(matrice_B,matrice_B), b*b/(sigma2 + sigma2_pp)) 
+    matrice_C=cbind(rbind(matrice_C,matrice_C), a*b/(sigma2 + sigma2_pp)) 
+    matrice_Ya=cbind(rbind(matrice_Ya,matrice_Ya), a*(Y[k+1]-c)/(sigma2 + sigma2_pp)) 
+    matrice_Yb=cbind(rbind(matrice_Yb,matrice_Yb), b*(Y[k+1]-c)/(sigma2 + sigma2_pp)) 
+    
+    Znew=matrix(rep(0,(k+1)*2^(k+1)), ncol=k+1, nrow=2^(k+1), byrow=TRUE)
+    Znew[(1:2^k),]=cbind(Z, rep(0,2^k))
+    Znew[(2^k+1):(2^(k+1)),]=cbind(Z, rep(1,2^k))
+    Z=Znew
+    
+    L=dix*Lnew # fois 2 pr le grandir
+    q=dix*qnew
+  }
+  
+  # after truncation
+  #----------------------
+  for (k in kappa:(N-1)){
+    # Initialisation of the new m vectors
+    mnew=rep(0,2^(kappa+1))
+    sigma2new=rep(0 ,2^(kappa+1))
+    pnew=rep(0 ,2^(kappa+1))
+    Lnew=rep(0 ,2^(kappa+1))
+    anew=rep(0 ,2^(kappa+1))
+    bnew=rep(0 ,2^(kappa+1))
+    cnew=rep(0 ,2^(kappa+1))
+    # CR: renormalization constant that intervenes in the denominator of the pu
+    qnew=rep(0 ,2^(kappa+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- pu0 numerator
+    tpbeta<-loi_outlier(Y[k+1],K,expertMin,expertMax)
+    pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
+    Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta
+    # m_uO
+    mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps))
+    sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp)
+    qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp
+    # new parameters
+    anew[1:(2^kappa)]= a[1:(2^kappa)]*exp(-aa*diffTps)
+    bnew[1:(2^kappa)]= b[1:(2^kappa)]*exp(-aa*diffTps) + (1-exp(-aa*diffTps))
+    cnew[1:(2^kappa)]= c[1:(2^kappa)]*exp(-aa*diffTps)
+    sommevar=sigma2new[1:(2^kappa)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar))
+    pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm
+    Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm
+    # m_u1
+    mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] +
+                                     mnew[1:(2^kappa)]*sigma2_pp)/sommevar
+    sigma2new[(1+2^kappa):2^(kappa+1)]=sigma2new[1:(2^kappa)] * sigma2_pp/sommevar
+    
+    # new parameters
+    anew[(1+2^kappa):2^(kappa+1)]= a[1:(2^kappa)]*sigma2_pp/sommevar
+    bnew[(1+2^kappa):2^(kappa+1)]= b[1:(2^kappa)]*sigma2_pp/sommevar
+    cnew[(1+2^kappa):2^(kappa+1)]= c[1:(2^kappa)]*sigma2_pp/sommevar + 
+                                   Y[k+1]*sigma2new[1:(2^kappa)]/sommevar
+    
+    selection=order(pnew, decreasing=T)[1:2^kappa]
+    
+    #Proba1<-sum(pnew[(1+2^kappa):2^(kappa+1)]) #A VIRER
+    #Pmean=c(Pmean,Proba1/sum(pnew))#A VIRER
+    
+    m=mnew[selection]
+    sigma2=sigma2new[selection]
+    p=pnew[selection]/sum(pnew[selection])
+    L=dix*Lnew[selection] #fois 2 pr le grandir
+    q=dix*qnew[selection]
+    
+    Znew=matrix(rep(0,(k+1)*2^(kappa+1)), ncol=k+1, nrow=2^(kappa+1), byrow=TRUE)
+    Znew[1:2^kappa,]=cbind(Z, rep(0,2^kappa))
+    Znew[((1+2^kappa):2^(kappa+1)),]=cbind(Z, rep(1,2^kappa))
+    
+    Z=Znew[selection,]
+    
+    a=anew[selection]
+    b=bnew[selection]
+    c=cnew[selection]
+    matrice_A=(cbind(rbind(matrice_A,matrice_A), 
+                     anew*anew/(sigma2new + sigma2_pp)))[selection,]
+    matrice_B=(cbind(rbind(matrice_B,matrice_B), 
+                     bnew*bnew/(sigma2new + sigma2_pp)))[selection,]
+    matrice_C=(cbind(rbind(matrice_C,matrice_C), 
+                     anew*bnew/(sigma2new + sigma2_pp)))[selection,]
+    matrice_Ya=(cbind(rbind(matrice_Ya,matrice_Ya), 
+                      anew*(Y[k+1]-cnew)/(sigma2new + sigma2_pp)))[selection,]
+    matrice_Yb=(cbind(rbind(matrice_Yb,matrice_Yb), 
+                      bnew*(Y[k+1]-cnew)/(sigma2new + sigma2_pp)))[selection,]
+  }
+  
+  Vraisemblance=L%*%q
+  
+  A=(Z[,1]%*%p)/(sigma2_m0+sigma2_pp) + sum((p%*%(matrice_A*Z))[1:(N-1)])
+  Ya=(Z[,1]%*%p)*Y[1]/(sigma2_m0+sigma2_pp) + sum((p%*%(matrice_Ya*Z))[1:(N-1)])
+  B= sum((p%*%(matrice_B*Z))[1:(N-1)])
+  C= sum((p%*%(matrice_C*Z))[1:(N-1)])
+  Yb= sum((p%*%(matrice_Yb*Z))[1:(N-1)])
+  
+  newm0= (C*Yb-B*Ya)/(C^2-A*B)
+  newmm= (C*Ya-A*Yb)/(C^2-A*B)
+  ppnew=sum(p%*%Z)/N
+  
+  # Outputs
+  resultat=list("m0"=newm0,
+                "mm"=newmm, 
+                "pp"=ppnew, 
+                "likelihood"=Vraisemblance)
+  return(resultat)
+}
+
+
 #------------------------ End of file -----------------------------------
diff --git a/man/KBO_EM.Rd b/man/KBO_EM.Rd
new file mode 100644
index 0000000..578b5da
--- /dev/null
+++ b/man/KBO_EM.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils_functions.R
+\name{KBO_EM}
+\alias{KBO_EM}
+\title{KBO_EM a function to calculate a likelihood on optimized parameters by
+an Expectation-Maximization (EM) algorithm}
+\usage{
+KBO_EM(param, kappaOpt, Y, Tps, N, dix)
+}
+\arguments{
+\item{param}{a list of 10 input parameters mm, pp and m0}
+
+\item{kappaOpt}{numeric, truncation setting, default 7}
+
+\item{Y}{character, name of the numeric variable to predict in the
+data.frame datain}
+
+\item{Tps}{character, time column name in the data.frame datain, a 
+numeric vector.
+Tvar can be expressed as a proportion of day in seconds}
+
+\item{N}{numeric, length of Y}
+
+\item{dix}{num}
+}
+\value{
+a list:
+\describe{
+ \item{m0}{numeric, optimized m0}
+ \item{mm}{numeric, optimized mm}
+ \item{pp}{numeric, optimized pp}
+ \item{likelihood}{numeric, the calculated likelihood}
+}
+}
+\description{
+KBO_EM a function to calculate a likelihood on optimized parameters by
+an Expectation-Maximization (EM) algorithm
+}
+\details{
+uses the same input parameter list than the main function
+}
+\examples{
+set.seed(1234)
+Y<-rnorm(n=10,mean=50,4)
+Tps<-seq(1,10)
+N=10
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+            seqp=seq(0.5,0.7,0.1))
+print(Y)
+KBO_EM(param=param2,kappaOpt=7,Y=Y,Tps=Tps,N=N,dix=6)
+}
diff --git a/man/KBO_L.Rd b/man/KBO_L.Rd
index bb30361..ba5adef 100644
--- a/man/KBO_L.Rd
+++ b/man/KBO_L.Rd
@@ -4,18 +4,21 @@
 \alias{KBO_L}
 \title{KBO_L a function to calculate a likelihood on optimized parameters}
 \usage{
-KBO_L(param, kappaOpt, Y, Tps, N, dix)
+KBO_L(param, kappaOpt = 7, Y, Tps, N, dix)
 }
 \arguments{
 \item{param}{a list of 10 input parameters mm, pp and m0}
 
 \item{kappaOpt}{numeric, truncation setting, default 7}
 
-\item{Y}{num}
+\item{Y}{character, name of the numeric variable to predict in the
+data.frame datain}
 
-\item{Tps}{num}
+\item{Tps}{character, time column name in the data.frame datain, a 
+numeric vector.
+Tvar can be expressed as a proportion of day in seconds}
 
-\item{N}{num}
+\item{N}{numeric, length of Y}
 
 \item{dix}{num}
 }
diff --git a/man/KBO_known.Rd b/man/KBO_known.Rd
index f16a86a..641f638 100644
--- a/man/KBO_known.Rd
+++ b/man/KBO_known.Rd
@@ -2,9 +2,9 @@
 % Please edit documentation in R/utils_functions.R
 \name{KBO_known}
 \alias{KBO_known}
-\title{KBO_known}
+\title{KBO_known a function to calculate a likelihood on given parameters}
 \usage{
-KBO_known(param, threshold, kappa, Y, Tps, N)
+KBO_known(param, threshold, kappa = 10, Y, Tps, N)
 }
 \arguments{
 \item{param}{list, a list of 10 input parameters for mm, pp and m0}
@@ -13,17 +13,28 @@ KBO_known(param, threshold, kappa, Y, Tps, N)
 
 \item{kappa}{numeric, truncation setting, default 10}
 
-\item{Y}{num}
+\item{Y}{character, name of the numeric variable to predict in the
+data.frame datain}
 
-\item{Tps}{num}
+\item{Tps}{character, time column name in the data.frame datain, a 
+numeric vector.
+Tvar can be expressed as a proportion of day in seconds}
 
-\item{N}{num}
+\item{N}{numeric, length of Y}
 }
 \value{
 a list
+\describe{
+ \item{prediction}{vector, the prediction of weights}
+ \item{label}{vector, probability to be an outlier}
+ \item{likelihood}{numeric, the calculated likelihood}
+ \item{lwr}{vector of lower bound CI of the prediction }
+ \item{upr}{vector of upper bound CI of the prediction }
+ \item{flag}{char, is an outlier or not}
+}
 }
 \description{
-KBO_known
+KBO_known a function to calculate a likelihood on given parameters
 }
 \details{
 uses the same input parameter list than the main function
diff --git a/man/kfino_fit.Rd b/man/kfino_fit.Rd
index d6396eb..9278756 100644
--- a/man/kfino_fit.Rd
+++ b/man/kfino_fit.Rd
@@ -10,6 +10,7 @@ kfino_fit(
   Yvar,
   param = NULL,
   doOptim = TRUE,
+  method = "ML",
   threshold = 0.5,
   kappa = 10,
   kappaOpt = 7
@@ -28,6 +29,10 @@ Tvar can be expressed as a proportion of day in seconds}
 \item{doOptim}{logical, if TRUE optimisation of the initial parameters
 default TRUE}
 
+\item{method}{character, the method used to optimize the initial parameters:
+Expectation-Maximization algorithm `"EM"` or Maximization 
+Likelihhod `"ML"`, default `"ML"`}
+
 \item{threshold}{numeric, threshold to qualify an observation as outlier
 according to the label_pred, default 0.5}
 
@@ -70,19 +75,20 @@ The initialization parameter list param contains:
              default seq(0.5,0.7,0.1)}
 }
 It has to be given by the user following his knowledge of the animal or
-the dataset. All parameters are compulsory execpt m0, mm and pp that can be
-optimized by the algorithm. In the optimisation step, those three parameters
+the dataset. All parameters are compulsory except m0, mm and pp that can be
+optimized by the algorithm. In the optimization step, those three parameters
 are initialized according to the input data (between the expert
 range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
 m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
 sub-sampling is performed to speed the algorithm if the number of possible
-observations studied is greater than 500.
+observations studied is greater than 500. Optimization is performed using 
+`"EM"` or `"ML"` methods.
 }
 \examples{
 data(spring1)
 library(dplyr)
 
-# --- With Optimisation on initial parameters
+# --- With Optimisation on initial parameters - ML method
 t0 <- Sys.time()
 param1<-list(m0=NULL,
              mm=NULL,
@@ -98,8 +104,16 @@ param1<-list(m0=NULL,
 
 resu1<-kfino_fit(datain=spring1,
               Tvar="dateNum",Yvar="Poids",
-              doOptim=TRUE,param=param1)
+              doOptim=TRUE,method="ML",param=param1)
+Sys.time() - t0
+
+# --- With Optimisation on initial parameters - EM method
+t0 <- Sys.time()
+resu1b<-kfino_fit(datain=spring1,
+              Tvar="dateNum",Yvar="Poids",
+              doOptim=TRUE,method="EM",param=param1)
 Sys.time() - t0
+
 # --- Without Optimisation on initial parameters
 t0 <- Sys.time()
 param2<-list(m0=41,
@@ -136,6 +150,6 @@ param3<-list(m0=NULL,
              seqp=seq(0.5,0.7,0.1))
 resu3<-kfino_fit(datain=merinos2,
               Tvar="dateNum",Yvar="Poids",
-              doOptim=TRUE,param=param3)
+              doOptim=TRUE,method="ML",param=param3)
 Sys.time() - t0
 }
-- 
GitLab