extrait fonctions internes pour accéler code

7e3aa72b · sanchezi · ed8ae1f7 · 7e3aa72b · 7e3aa72b · 7e3aa72b
Commit 7e3aa72b authored 3 years ago by sanchezi
--- a/NAMESPACE
+++ b/NAMESPACE
 # Generated by roxygen2: do not edit by hand
+export(KBO_L)
+export(KBO_known)
 export(kfino_fit)
+export(kfino_fit2)
 export(kfino_plot)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,.data)

--- a/R/kfino2.R
+++ b/R/kfino2.R
+#' kafino_fit2 a function to detect outlier with a Kalman Filtering approach
+#' @param datain an input data.frame of one time course to study (unique IDE)
+#' @param Tvar char, time column name in the data.frame datain, a numeric vector
+#'             Tvar can be expressed as a proportion of day in seconds
+#' @param Yvar char, name of the variable to predict in the data.frame datain
+#' @param param list, a list of initialization parameter
+#' @param doOptim logical, if TRUE optimisation of the initial parameters
+#'                default TRUE
+#' @param threshold numeric, threshold to qualify an observation as outlier
+#'        according to the label_pred, default 0.5
+#'
+#' @details The initialization parameter list param contains:
+#' \describe{
+#'  \item{mm}{target weight, NULL if the user wants to optimize it}
+#'  \item{pp}{probability to be correctly weighed, NULL if the user wants to
+#'            optimize it}
+#'  \item{m0}{Initial weight, NULL if the user wants to optimize it}
+#'  \item{aa}{numeric, rate of weight change, default 0.001 }
+#'  \item{expertMin}{numeric, the minimal weight expected by the user}
+#'  \item{expertMax}{numeric, the maximal weight expected by the user}
+#'  \item{sigma2_m0}{variance of m0}
+#'  \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
+#'        default 0.05}
+#'  \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
+#'        default 5}
+#'  \item{K}{numeric, a constant value in the outlier function (trapezoid),
+#'           by default K=2 - increasing K, XXX}
+#'  \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
+#' }
+#' It can be given by the user following his knowledge of the animal or dataset
+#' or entirely constructed by the function. In the optimisation step, this
+#' vector is initialized according to the input data (between the expert
+#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
+#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
+#' sub-sampling is performed to speed the algorithm if the number of possible
+#' observations studied is greater than 500.
+#'
+#' @importFrom stats dnorm quantile na.omit
+#' @importFrom dplyr mutate filter left_join arrange %>%
+#' @importFrom dplyr .data if_else row_number select
+#' @return a S3 list with two data frames and a list of vectors of
+#' kafino results
+#' \describe{
+#' \item{detectOutlier}{The whole dataset with the detected outliers flagged
+#'                      and prediction}
+#' \item{PredictionOK}{A dataset with the predictions on possible values}
+#' \item{kafino.results}{kfino results (a list of vectors) on optimized input
+#'                      parameters or not}
+#' }
+#' @export
+#' @examples
+#' data(spring1)
+#' library(dplyr)
+#'
+#' # --- With Optimisation on initial parameters
+#' t0 <- Sys.time()
+#' param1<-list(m0=NULL,
+#'              mm=NULL,
+#'              pp=NULL,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'              seqp=seq(0.5,0.7,0.1))
+#'
+#' resu1<-kfino_fit2(datain=spring1,
+#'               Tvar="dateNum",Yvar="Poids",
+#'               doOptim=TRUE,param=param1)
+#' Sys.time() - t0
+#' # --- Without Optimisation on initial parameters
+#' t0 <- Sys.time()
+#' param2<-list(m0=41,
+#'              mm=45,
+#'              pp=0.5,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'              seqp=seq(0.5,0.7,0.1))
+#' resu2<-kfino_fit2(datain=spring1,
+#'               Tvar="dateNum",Yvar="Poids",
+#'               param=param2,
+#'               doOptim=FALSE)
+#' Sys.time() - t0
+#'
+#' # complex data on merinos2 dataset
+#' data(merinos2)
+#'
+#' t0 <- Sys.time()
+#' param3<-list(m0=NULL,
+#'              mm=NULL,
+#'              pp=NULL,
+#'              aa=0.001,
+#'              expertMin=10,
+#'              expertMax=45,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'              seqp=seq(0.5,0.7,0.1))
+#' resu3<-kfino_fit2(datain=merinos2,
+#'               Tvar="dateNum",Yvar="Poids",
+#'               doOptim=TRUE,param=param3)
+#' Sys.time() - t0
+kfino_fit2<-function(datain,Tvar,Yvar,
+                    param=NULL,
+                    doOptim=TRUE,threshold=0.5){
+  if( any(is.null(param[["expertMin"]]) |
+          is.null(param[["expertMax"]])) )
+    stop('You have to define expertMin and expertMax.')
+  if( any(!is.numeric(param[["expertMin"]]) |
+          !is.numeric(param[["expertMax"]])) )
+    stop('expertMin and expertMax must be numeric.')
+  if( !is.numeric(t(datain[,Yvar])))
+    stop('Input parameter Yvar must contain numeric values.')
+  if( !is.numeric(t(datain[,Tvar])))
+    stop('Input parameter Tvar must contain numeric values.')
+  #-------------------------------------------------------------
+  # load objects
+  m0<-param[["m0"]]
+  mm<-param[["mm"]]
+  pp<-param[["pp"]]
+  if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] }
+  expertMin<-param[["expertMin"]]
+  expertMax<-param[["expertMax"]]
+  if(is.null(param[["sigma2_m0"]])){
+    sigma2_m0<-1
+  } else {sigma2_m0<-param[["sigma2_m0"]]}
+  if(is.null(param[["sigma2_mm"]])){
+    sigma2_mm<-0.05
+  } else {sigma2_mm<-param[["sigma2_mm"]]}
+  if(is.null(param[["sigma2_pp"]])){
+    sigma2_pp<-5
+  } else {sigma2_pp<-param[["sigma2_pp"]]}
+  if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] }
+  seqp<-param[["seqp"]]
+  #-------------------------------------------------------------
+  # Flag impossible weight values (expert knowledge)
+  # the algorithm works on the dataset with possible values
+  # Create an artificial column numbering the rows for joining purpose
+  datain<-as.data.frame(datain)
+  datain<-datain %>%
+    mutate(rowNum=row_number(),
+           flag1=if_else(.data[[Yvar]] > expertMin &
+                           .data[[Yvar]] <= expertMax,"OK","Bad"))
+  tp.dt<-datain %>% filter(.data$flag1 == "OK")
+  Y<-tp.dt[,Yvar]
+  N<-nrow(tp.dt)
+  Tps<-tp.dt[,Tvar]
+  #WARNING WARNING: AU lieu de calculer L qui est arrondi à 0,
+  # je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée
+  # je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier()
+  # ca ne me parait pas disproportionné.
+  # Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin)
+  dix=sqrt(expertMax - expertMin)
+  #------------------------------------------------------------------------
+  # Optimisation sur les paramètres initiaux ou pas
+  #------------------------------------------------------------------------
+  if (doOptim == TRUE){
+    if (N > 500){
+      # optim avec sous-echantillonnage
+      print("-------:")
+      print("Optimisation of initial parameters with sub-sampling - result:")
+      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
+      m0opt=quantile(Y[1:N/4], probs = c(.5))
+      mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
+      cat("bornem0: ",bornem0,"\n")
+      cat("m0opt: ",m0opt,"\n")
+      cat("mmopt: ",mmopt,"\n")
+      popt=0.5
+      #--- Saving datain before sub-sampling
+      YY=Y
+      TpsTps=Tps
+      NN=N
+      Subechant=sort(sample(1:NN,50))
+      Y=YY[Subechant]
+      Tps=TpsTps[Subechant]
+      N=50
+      Vopt=KBO_L(list(m0=m0opt,
+                      mm=mmopt,
+                      pp=popt,
+                      aa=aa,
+                      expertMin=expertMin,
+                      expertMax=expertMax,
+                      sigma2_mm=sigma2_mm,
+                      sigma2_m0=sigma2_m0,
+                      sigma2_pp=sigma2_pp,
+                      K=K),
+                 Y,Tps,N,dix)
+      for (m0 in seq(bornem0[1],bornem0[2],2) ){
+        for (mm in seq((m0-5),(m0+20),2) ){
+          for (p in seqp){
+            # A voir si 50 sous-echantillons au hasard suffisent. Comme dans
+            #  Robbins Monroe, permet aussi de reduire l'impact de la troncature
+            Subechant=sort(sample(1:NN,50))
+            Y=YY[Subechant]
+            Tps=TpsTps[Subechant]
+            V=KBO_L(list(m0=m0,
+                         mm=mm,
+                         pp=p,
+                         aa=aa,
+                         expertMin=expertMin,
+                         expertMax=expertMax,
+                         sigma2_mm=sigma2_mm,
+                         sigma2_m0=sigma2_m0,
+                         sigma2_pp=sigma2_pp,
+                         K=K),
+                    Y,Tps,N,dix)
+            if (V > Vopt){
+              Vopt=V
+              m0opt=m0
+              mmopt=mm
+              popt=p
+            }
+          }
+        }
+      }
+      # Le fait de prendre des sous-echantillons remarquables
+      # (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs
+      # (au lieu de 4 comme imaginé cela vient p.e de la fonction sample())
+      Y=YY
+      Tps=TpsTps
+      N=NN
+      print("Optimised parameters: ")
+      print(c(mmopt,popt,m0opt))
+      print("-------:")
+      resultat=KBO_known(param=list(mm=mmopt,
+                                    pp=popt,
+                                    m0=m0opt,
+                                    aa=aa,
+                                    expertMin=expertMin,
+                                    expertMax=expertMax,
+                                    sigma2_m0=sigma2_m0,
+                                    sigma2_mm=sigma2_mm,
+                                    sigma2_pp=sigma2_pp,
+                                    K=K),
+                         threshold,Y=Y,Tps=Tps,N=N)
+    } else if (N > 50){
+      # optim sans sous-echantillonnage
+      print("-------:")
+      print("Optimisation of initial parameters - result:")
+      print("no sub-sampling performed:")
+      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
+      m0opt=quantile(Y[1:N/4], probs = c(.5))
+      mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
+      cat("bornem0: ",bornem0,"\n")
+      cat("m0opt: ",m0opt,"\n")
+      cat("mmopt: ",mmopt,"\n")
+      popt=0.5
+      Vopt=KBO_L(list(m0=m0opt,
+                      mm=mmopt,
+                      pp=popt,
+                      aa=aa,
+                      expertMin=expertMin,
+                      expertMax=expertMax,
+                      sigma2_mm=sigma2_mm,
+                      sigma2_m0=sigma2_m0,
+                      sigma2_pp=sigma2_pp,
+                      K=K),
+                 Y,Tps,N,dix)
+      for (m0 in seq(bornem0[1],bornem0[2],2) ){
+        for (mm in seq((m0-5),(m0+20),2) ){
+          for (p in seqp){
+            V=KBO_L(list(m0=m0,
+                         mm=mm,
+                         pp=p,
+                         aa=aa,
+                         expertMin=expertMin,
+                         expertMax=expertMax,
+                         sigma2_mm=sigma2_mm,
+                         sigma2_m0=sigma2_m0,
+                         sigma2_pp=sigma2_pp,
+                         K=K),
+                    Y,Tps,N,dix)
+            if (V > Vopt){
+              Vopt=V
+              m0opt=m0
+              mmopt=mm
+              popt=p
+            }
+          }
+        }
+      }
+      print("Optimised parameters: ")
+      print(c(mmopt,popt,m0opt))
+      print("-------:")
+      resultat=KBO_known(param=list(mm=mmopt,
+                                    pp=popt,
+                                    m0=m0opt,
+                                    aa=aa,
+                                    expertMin=expertMin,
+                                    expertMax=expertMax,
+                                    sigma2_m0=sigma2_m0,
+                                    sigma2_mm=sigma2_mm,
+                                    sigma2_pp=sigma2_pp,
+                                    K=K),
+                         threshold=threshold,Y=Y,Tps=Tps,N=N)
+    } else {
+      # N petit, si trop petit on ne fait rien
+      if (N > 5) {
+        # Not enough data - no optim
+        if (is.null(param[["m0"]])){
+          X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
+               round(quantile(Y[(3*N/4):N], probs = 0.8)))
+        } else {
+          X<-c(m0,pp,mm)
+        }
+        print("-------:")
+        print("Optimisation of initial parameters - result:")
+        print("Not enough data => so no optimisation performed:")
+        print("Used parameters: ")
+        print(X)
+        print("-------:")
+        resultat=KBO_known(param=list(m0=X[[1]],
+                                      pp=X[[2]],
+                                      mm=X[[3]],
+                                      aa=aa,
+                                      expertMin=expertMin,
+                                      expertMax=expertMax,
+                                      sigma2_m0=sigma2_m0,
+                                      sigma2_mm=sigma2_mm,
+                                      sigma2_pp=sigma2_pp,
+                                      K=K),
+                           threshold=threshold,Y=Y,Tps=Tps,N=N)
+      } else {
+        warning("Not enough data between expert knowledge. The algorithm is not performed.")
+        resultat<-NULL
+      }
+    }
+  } else {
+    # Pas d'optimisation et test si N petit - si trop petit on ne fait rien
+    if (N > 5) {
+      # No optimisation on initial parameters
+      if (is.null(param[["m0"]])){
+        X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
+             round(quantile(Y[(3*N/4):N], probs = 0.8)))
+      } else {
+        X<-c(m0,pp,mm)
+      }
+      print("-------:")
+      print("No optimisation of initial parameters:")
+      print("Used parameters: ")
+      print(X)
+      resultat=KBO_known(param=list(m0=X[[1]],
+                                    pp=X[[2]],
+                                    mm=X[[3]],
+                                    aa=aa,
+                                    expertMin=expertMin,
+                                    expertMax=expertMax,
+                                    sigma2_m0=sigma2_m0,
+                                    sigma2_mm=sigma2_mm,
+                                    sigma2_pp=sigma2_pp,
+                                    K=K),
+                         threshold=threshold,Y=Y,Tps=Tps,N=N)
+    } else {
+      warning("Not enough data between expert knowledge. The algorithm is not performed.")
+      resultat<-NULL
+    }
+  }
+  #---------------------------------------------------------------------------
+  # Formatting output results
+  #---------------------------------------------------------------------------
+  # If resultat NULL then create output with datain and 2 NULL objects
+  # else create output with detectoutlier, PredictionOK and kafino.results
+  # useful for the kafino_plot() function
+  #---------------------------------------------------------------------------
+  if (is.null(resultat)){
+    dt.out<-datain %>% mutate(flag=.data$flag1)
+    dt.pred<-NULL
+    resultat<-NULL
+    mylist<-list(dt.out,dt.pred,resultat)
+    names(mylist)<-c("detectOutlier","PredictionOK","kafino.results")
+    class(mylist) = c("kfino")
+    return(invisible(mylist))
+  } else {
+    prediction=na.omit(resultat$prediction)
+    label_pred=round(na.omit(resultat$label),2)
+    lwr=na.omit(resultat$lwr)
+    upr=na.omit(resultat$upr)
+    flag=na.omit(resultat$flag)
+    dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum),
+                             prediction,label_pred,lwr,upr,flag)
+    # join dt.pred with the initial datain containing all the observations
+    dt.out<-left_join(datain,dt.pred,by="rowNum")
+    dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag))
+    dt.out<-arrange(dt.out,.data$rowNum)
+    #--------------------------------------
+    # return a S3 list object
+    #--------------------------------------
+    # 1. a whole dataset with the detected outliers flagged and prediction
+    # 2. a dataset with the prediction on possible values
+    # 3. optimisation results (a list of vectors)
+    mylist<-list(dt.out,dt.pred,resultat)
+    names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
+    class(mylist) = c("kfino")
+    return(invisible(mylist))
+  }
+}
+#-------------------------------------- End of file --------------------------
--- a/R/utils.R
+++ b/R/utils.R
+#' KBO_known
+#'
+#' @param param list, a list of 10 input parameters for mm, pp and m0
+#' @param threshold numeric, threshold for CI
+#' @param Y num
+#' @param Tps num
+#' @param N num
+#'
+#'
+#' @details uses the same input parameter list than the main function
+#' @return a list
+#' @export
+#'
+#' @examples
+#' # not run
+KBO_known<-function(param,threshold,Y,Tps,N){
+  # load objects
+  mm=param[["mm"]]
+  pp=param[["pp"]]
+  m0=param[["m0"]]
+  aa=param[["aa"]]
+  expertMin=param[["expertMin"]]
+  expertMax=param[["expertMax"]]
+  sigma2_m0=param[["sigma2_m0"]]
+  sigma2_mm=param[["sigma2_mm"]]
+  sigma2_pp=param[["sigma2_pp"]]
+  K=param[["K"]]
+  # paramètre de troncature
+  kappa <-10
+  #--- function defining an outlier distribution (Surface of a trapezium)
+  # uses expertMin and expertMax
+  loi_outlier<-function(y){
+    2/((K+1)*(expertMax - expertMin)) +
+      (2*(K-1)/(K+1))*((y - expertMin)/((expertMax - expertMin)^2))
+  }
+  # initialisation (1.1.1)
+  #--------------------
+  m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp)
+  sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp)
+  l0<- loi_outlier(Y[1])
+  loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp))
+  p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0)
+  p1=1-p0
+  m=c(m0,m1)
+  p=c(p0,p1)
+  sigma2=c(sigma2_m0,sigma1)
+  L=c(l0,loinorm1)
+  Xmean=c(p0*m0 + p1*m1)
+  Pmean=c(p1)
+  q=c(1,1)
+  sigma2_new=c(sigma2_m0)
+  #iteration (1.1.2)
+  #-----------------------
+  #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
+  #avant troncature
+  for (k in 1:(kappa-1)){
+    # je crée le vecteur des nouveaux m, ##pour plus tard: au lieu de les
+    # faire vide, je prend direct m_{u0}
+    mnew=rep(0,2^(k+1))
+    sigma2new=rep(0 ,2^(k+1))
+    pnew=rep(0 ,2^(k+1))
+    Lnew=rep(0 ,2^(k+1))
+    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    qnew=rep(0 ,2^(k+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- numérateur de pu0
+    tpbeta<-loi_outlier(Y[k+1])
+    pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta
+    Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta
+    mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
+    sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp)
+    qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp
+    sommevar=sigma2new[1:(2^k)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar))
+    pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm
+    Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm
+    mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] +
+                               mnew[1:(2^k)]*sigma2_pp)/sommevar
+    sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar
+    CR<-sum(pnew)
+    Proba1<-sum(pnew[(1+2^k):2^(k+1)])
+    SommeMasse<-sum(pnew*mnew)
+    m=mnew
+    sigma2=sigma2new
+    p=pnew/CR
+    Xmean=c(Xmean,SommeMasse/CR)
+    Pmean=c(Pmean,Proba1/CR)
+    L=Lnew
+    q=qnew
+    sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2)
+  }
+  #apres troncature
+  #----------------------
+  for (k in kappa:(N-1)){
+    # je cree le vecteur des nouveaux m,
+    # pour plus tard: au lieu de les faire vide, je prend direct m_{u0}
+    mnew=rep(0,2^(kappa+1))
+    sigma2new=rep(0 ,2^(kappa+1))
+    pnew=rep(0 ,2^(kappa+1))
+    Lnew=rep(0 ,2^(kappa+1))
+    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    qnew=rep(0 ,2^(kappa+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- numérateur de pu0
+    tpbeta<-loi_outlier(Y[k+1])
+    pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
+    Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta
+    mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
+    sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp)
+    qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp
+    sommevar=sigma2new[1:(2^kappa)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar))
+    pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm
+    Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm
+    mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] +
+                                       mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1
+    sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar
+    CR<-sum(pnew)
+    Proba1<-sum(pnew[(1+2^kappa):2^(kappa+1)])
+    SommeMasse<-sum(pnew*mnew)
+    selection=order(pnew, decreasing=T)[1:2^kappa]
+    m=mnew[selection]
+    sigma2=sigma2new[selection]
+    p=pnew[selection]/sum(pnew[selection])
+    Xmean=c(Xmean,SommeMasse/CR)
+    Pmean=c(Pmean,Proba1/CR)
+    L=Lnew[selection]
+    q=qnew[selection]
+    sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2)
+  }
+  #----------------------------------------------
+  # Ajout Intervalle de confiance des estimations
+  # Variance (see section 1.2.1)
+  # IC est approximé...
+  #----------------------------------------------
+  lwr <- Xmean - 1.96*sqrt(sigma2_new)
+  upr <- Xmean + 1.96*sqrt(sigma2_new)
+  # flag
+  flag<-if_else(Pmean > threshold,"OK","KO")
+  #--- Calcul des derniers éléments
+  Vraisemblance=L%*%q
+  resultat=list("prediction"=Xmean,
+                "label"=Pmean,
+                "likelihood"=Vraisemblance,
+                "lwr"=lwr,
+                "upr"=upr,
+                "flag"=flag)
+  return(resultat)
+}
+#------------------------------------------------------------------------
+#' KBO_L a function to calculate a likelihood on optimized parameters
+#'
+#' @param param a list of 10 input parameters mm, pp and m0
+#' @param Y num
+#' @param Tps num
+#' @param N num
+#' @param dix num
+#'
+#' @details uses the same input parameter list than the main function
+#' @return a likelihood
+#' @export
+#'
+#' @examples
+#' # not run
+KBO_L<-function(param,Y,Tps,N,dix){
+  # load objects
+  mm=param[["mm"]]
+  pp=param[["pp"]]
+  m0=param[["m0"]]
+  aa=param[["aa"]]
+  expertMin=param[["expertMin"]]
+  expertMax=param[["expertMax"]]
+  sigma2_m0<-param[["sigma2_m0"]]
+  sigma2_mm<-param[["sigma2_mm"]]
+  sigma2_pp<-param[["sigma2_pp"]]
+  K<-param[["K"]]
+  #--- function defining an outlier distribution (Surface of a trapezium)
+  # uses expertMin and expertMax
+  loi_outlier<-function(y){
+    2/((K+1)*(expertMax - expertMin)) +
+      (2*(K-1)/(K+1))*((y - expertMin)/((expertMax - expertMin)^2))
+  }
+  #---- paramètre de troncature
+  # Ici je met kappa =7, ca me fais retirer les proba <0.01 au lieu de 0.001
+  # comme qd kappa=10 mais ca divise par 10 le temps de calcul
+  kappa=7
+  # initialisation (1.1.1)
+  #--------------------
+  m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp)
+  sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp)
+  l0<- loi_outlier(Y[1])
+  loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp))
+  p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0)
+  p1=1-p0
+  m=c(m0,m1)
+  p=c(p0,p1)
+  sigma2=c(sigma2_m0,sigma1)
+  L=dix*c(l0,loinorm1) #Attention *2 pr le grandir
+  q=c(1,1) #attention
+  #iteration (1.1.2)
+  #-----------------------
+  #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
+  # avant troncature
+  for (k in 1:(kappa-1)){
+    mnew=rep(0,2^(k+1))
+    sigma2new=rep(0 ,2^(k+1))
+    pnew=rep(0 ,2^(k+1))
+    Lnew=rep(0 ,2^(k+1))
+    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    qnew=rep(0 ,2^(k+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- numérateur de pu0
+    tpbeta<-loi_outlier(Y[k+1])
+    pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta
+    Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta
+    mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
+    sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp)
+    qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp
+    sommevar=sigma2new[1:(2^k)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar))
+    pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm
+    Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm
+    mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] + mnew[1:(2^k)]*sigma2_pp)/sommevar
+    sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar
+    m=mnew
+    sigma2=sigma2new
+    p=pnew/sum(pnew)
+    L=dix*Lnew # fois 2 pr le grandir
+    q=dix*qnew
+  }
+  # apres troncature
+  #----------------------
+  for (k in kappa:(N-1)){
+    # je cree le vecteur des nouveaux m,
+    mnew=rep(0,2^(kappa+1))
+    sigma2new=rep(0 ,2^(kappa+1))
+    pnew=rep(0 ,2^(kappa+1))
+    Lnew=rep(0 ,2^(kappa+1))
+    # CR = constante de renormalisation qui intervient dans le dénominateur des pu
+    qnew=rep(0 ,2^(kappa+1))
+    diffTps<-Tps[k+1] - Tps[k]
+    #--- numérateur de pu0
+    tpbeta<-loi_outlier(Y[k+1])
+    pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
+    Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta
+    mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
+    sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) +
+      (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
+    qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp)
+    qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp
+    sommevar=sigma2new[1:(2^kappa)] + sigma2_pp
+    tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar))
+    pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm
+    Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm
+    mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] +
+                                       mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1
+    sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar
+    selection=order(pnew, decreasing=T)[1:2^kappa]
+    m=mnew[selection]
+    sigma2=sigma2new[selection]
+    p=pnew[selection]/sum(pnew[selection])
+    L=dix*Lnew[selection] #fois 2 pr le grandir
+    q=dix*qnew[selection]
+  }
+  Vraisemblance=L%*%q
+  return(Vraisemblance)
+}
+#------------------------ End of file -----------------------------------
--- a/man/KBO_L.Rd
+++ b/man/KBO_L.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{KBO_L}
+\alias{KBO_L}
+\title{KBO_L a function to calculate a likelihood on optimized parameters}
+\usage{
+KBO_L(param, Y, Tps, N, dix)
+}
+\arguments{
+\item{param}{a list of 10 input parameters mm, pp and m0}
+\item{Y}{num}
+\item{Tps}{num}
+\item{N}{num}
+\item{dix}{num}
+}
+\value{
+a likelihood
+}
+\description{
+KBO_L a function to calculate a likelihood on optimized parameters
+}
+\details{
+uses the same input parameter list than the main function
+}
+\examples{
+# not run
+}
--- a/man/KBO_known.Rd
+++ b/man/KBO_known.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{KBO_known}
+\alias{KBO_known}
+\title{KBO_known}
+\usage{
+KBO_known(param, threshold, Y, Tps, N)
+}
+\arguments{
+\item{param}{list, a list of 10 input parameters for mm, pp and m0}
+\item{threshold}{numeric, threshold for CI}
+\item{Y}{num}
+\item{Tps}{num}
+\item{N}{num}
+}
+\value{
+a list
+}
+\description{
+KBO_known
+}
+\details{
+uses the same input parameter list than the main function
+}
+\examples{
+# not run
+}
--- a/man/kfino_fit2.Rd
+++ b/man/kfino_fit2.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/kfino2.R
+\name{kfino_fit2}
+\alias{kfino_fit2}
+\title{kafino_fit2 a function to detect outlier with a Kalman Filtering approach}
+\usage{
+kfino_fit2(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
+}
+\arguments{
+\item{datain}{an input data.frame of one time course to study (unique IDE)}
+\item{Tvar}{char, time column name in the data.frame datain, a numeric vector
+Tvar can be expressed as a proportion of day in seconds}
+\item{Yvar}{char, name of the variable to predict in the data.frame datain}
+\item{param}{list, a list of initialization parameter}
+\item{doOptim}{logical, if TRUE optimisation of the initial parameters
+default TRUE}
+\item{threshold}{numeric, threshold to qualify an observation as outlier
+according to the label_pred, default 0.5}
+}
+\value{
+a S3 list with two data frames and a list of vectors of
+kafino results
+\describe{
+\item{detectOutlier}{The whole dataset with the detected outliers flagged
+                     and prediction}
+\item{PredictionOK}{A dataset with the predictions on possible values}
+\item{kafino.results}{kfino results (a list of vectors) on optimized input
+                     parameters or not}
+}
+}
+\description{
+kafino_fit2 a function to detect outlier with a Kalman Filtering approach
+}
+\details{
+The initialization parameter list param contains:
+\describe{
+ \item{mm}{target weight, NULL if the user wants to optimize it}
+ \item{pp}{probability to be correctly weighed, NULL if the user wants to
+           optimize it}
+ \item{m0}{Initial weight, NULL if the user wants to optimize it}
+ \item{aa}{numeric, rate of weight change, default 0.001 }
+ \item{expertMin}{numeric, the minimal weight expected by the user}
+ \item{expertMax}{numeric, the maximal weight expected by the user}
+ \item{sigma2_m0}{variance of m0}
+ \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
+       default 0.05}
+ \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
+       default 5}
+ \item{K}{numeric, a constant value in the outlier function (trapezoid),
+          by default K=2 - increasing K, XXX}
+ \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
+}
+It can be given by the user following his knowledge of the animal or dataset
+or entirely constructed by the function. In the optimisation step, this
+vector is initialized according to the input data (between the expert
+range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
+m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
+sub-sampling is performed to speed the algorithm if the number of possible
+observations studied is greater than 500.
+}
+\examples{
+data(spring1)
+library(dplyr)
+# --- With Optimisation on initial parameters
+t0 <- Sys.time()
+param1<-list(m0=NULL,
+             mm=NULL,
+             pp=NULL,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
+resu1<-kfino_fit2(datain=spring1,
+              Tvar="dateNum",Yvar="Poids",
+              doOptim=TRUE,param=param1)
+Sys.time() - t0
+# --- Without Optimisation on initial parameters
+t0 <- Sys.time()
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
+resu2<-kfino_fit2(datain=spring1,
+              Tvar="dateNum",Yvar="Poids",
+              param=param2,
+              doOptim=FALSE)
+Sys.time() - t0
+# complex data on merinos2 dataset
+data(merinos2)
+t0 <- Sys.time()
+param3<-list(m0=NULL,
+             mm=NULL,
+             pp=NULL,
+             aa=0.001,
+             expertMin=10,
+             expertMax=45,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
+resu3<-kfino_fit2(datain=merinos2,
+              Tvar="dateNum",Yvar="Poids",
+              doOptim=TRUE,param=param3)
+Sys.time() - t0
+}
--- a/tests/testthat/test-outputAlgo.R
+++ b/tests/testthat/test-outputAlgo.R
@@ -5,10 +5,10 @@ test_that("Output type", {
                   expertMin=30,expertMax=75,
                   doOptim=FALSE,aa=0.001,sigma2_mm=0.05,
                   K=2,sigma2_pp=5)
-  expect_equal(str(resu1),"list")
+  expect_equal(length(resu1),3)
-  expect_equal(str(resu1[[1]]),"data.frame")
+  expect_s3_class(resu1[[1]],"data.frame")
-  expect_equal(str(resu1[[2]]),"data.frame")
+  expect_s3_class(resu1[[2]],"data.frame")
-  expect_equal(str(resu1[[3]]),"list")
+  expect_type(resu1[[3]],"list")
  #expect_true(is_SBM(netA))
 })
\ No newline at end of file