consolidation fonctions + rajout tests unitaires

00aef95f · sanchezi · cf354171 · 00aef95f · 00aef95f · 00aef95f
Commit 00aef95f authored 3 years ago by sanchezi
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,7 +3,6 @@
 export(KBO_L)
 export(KBO_known)
 export(kfino_fit)
-export(kfino_fit2)
 export(kfino_plot)
 export(loi_outlier)
 importFrom(dplyr,"%>%")

--- a/R/graph_functions.R
+++ b/R/graph_functions.R
-#' graphical function
+#' kfino_plot a graphical function for the result of a kfino run
 #'
 #' @param resuin a list resulting of the kfino algorithm
-#' @param typeG char, type of graphic, either detection of outliers (with 
+#' @param typeG char, type of graphic, either detection of outliers (with
-#'              qualitative or quantitative display) or prediction
+#'              qualitative or quantitative display) or prediction. must be
+#'              "quanti" or "quali" or "prediction"
 #' @param Tvar char, time variable in the data.frame datain
 #' @param Yvar char, variable which was analysed in the data.frame datain
 #' @param Ident char, column name of the individual id to be analyzed
@@ -10,14 +11,14 @@
 #'
 #' @details The produced graphic can be, according to typeG:
 #' \describe{
-#'  \item{quali}{The detection of outliers with a qualitative rule: OK values, 
+#'  \item{quali}{The detection of outliers with a qualitative rule: OK values,
-#'       KO values (outliers) and Bad values (impossible values defined by 
+#'       KO values (outliers) and Bad values (impossible values defined by
 #'       the user)}
 #'  \item{quanti}{The detection of outliers with a quantitative display using
 #'       the calculated probability of the kafino algorithm}
 #'  \item{prediction}{The prediction of the analyzed variable on OK values}
 #' }
-#' 
+#'
 #' @importFrom ggplot2 aes aes_string ggplot geom_point geom_line
 #' @importFrom ggplot2 scale_color_manual ggtitle
 #' @return a ggplot2 graphic
@@ -26,23 +27,34 @@
 #' @examples
 #' data(spring1)
 #' library(dplyr)
-#' 
+#'
 #' print(colnames(spring1))
-#' 
+#'
 #' # --- Without Optimisation on initial parameters
+#' param2<-list(m0=41,
+#'              mm=45,
+#'              pp=0.5,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'              seqp=seq(0.5,0.7,0.1))
 #' resu2<-kfino_fit(datain=spring1,
 #'               Tvar="dateNum",Yvar="Poids",
-#'               expertMin=30,expertMax=75,
+#'               param=param2,
-#'               doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
+#'               doOptim=FALSE)
-#'               
+#'
 #' # flags are qualitative
 #' kfino_plot(resuin=resu2,typeG="quali",
 #'             Tvar="Day",Yvar="Poids",Ident="IDE")
-#'             
+#'
 #' # flags are quantitative
 #' kfino_plot(resuin=resu2,typeG="quanti",
 #'             Tvar="Day",Yvar="Poids",Ident="IDE")
-#'             
+#'
 #' # predictions on OK values
 #' kfino_plot(resuin=resu2,typeG="prediction",
 #'             Tvar="Day",Yvar="Poids",Ident="IDE")
@@ -63,7 +75,7 @@ kfino_plot<-function(resuin,
  if (typeG %in% c("prediction","quanti") & is.null(resuin[[2]])) {
    stop("NULL object - No graph to provide. Please check your input object.")
  }
  # Some formatting
  tp<-as.data.frame(resuin[[1]])
  myIDE<-unique(tp[,Ident])
@@ -75,43 +87,45 @@ kfino_plot<-function(resuin,
    tp.title1<-paste0(title," - ",myIDE)
    tp.title2<-paste0(title," - ",myIDE)
  }
  # graphics
  if (typeG == "quali"){
    if (!is.null(resuin[[2]])){
      g1<-ggplot(tp,aes_string(x=Tvar))+
-          geom_point( aes_string(y=Yvar,color="flag")) + 
+          geom_point( aes_string(y=Yvar,color="flag")) +
          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) +
          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr),
                    color="green") +
          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr),
                    color="green") +
-          scale_color_manual(values = 
+          scale_color_manual(values =
                            c("KO"="purple", "OK" = "black", "Bad"="red")) +
          ggtitle(tp.title1)
    } else {
      g1<-ggplot(tp,aes_string(x=Tvar))+
-          geom_point( aes_string(y=Yvar,color="flag")) + 
+          geom_point( aes_string(y=Yvar,color="flag")) +
-          scale_color_manual(values = 
+          scale_color_manual(values =
                            c("KO"="purple", "OK" = "black", "Bad"="red")) +
          ggtitle(tp.title1)
    }
    return(g1)
  } else if (typeG == "quanti"){
-    g1<-ggplot(tp,aes_string(x=Tvar))+
+    if (!is.null(resuin[[2]])){
-        geom_point( aes_string(y=Yvar,color="label_pred")) + 
+      g1<-ggplot(tp,aes_string(x=Tvar))+
-        geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) +
+          geom_point( aes_string(y=Yvar,color="label_pred")) +
-        geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr),
+          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) +
+          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr),
                  color="green") +
-        geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr),
+          geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr),
                  color="green") +
-        ggtitle(tp.title1)
+          ggtitle(tp.title1)
-    return(g1)
+      return(g1)
+    }
  } else if (typeG == "prediction"){
    tp2<-filter(tp,.data$flag == "OK")
    g1<-ggplot(tp2,aes_string(x=Tvar))+
-        geom_point( aes_string(y=Yvar)) + 
+        geom_point( aes_string(y=Yvar)) +
        geom_line(data=tp2, aes(y=.data$prediction)) +
        geom_line(data=tp2, aes(y=.data$lwr),color="green") +
        geom_line(data=tp2, aes(y=.data$upr),color="green") +
@@ -120,3 +134,4 @@ kfino_plot<-function(resuin,
  }
 }
+#-------------------- End of file -----------------------------------
--- a/R/kfino.R
+++ b/R/kfino.R
--- a/R/kfino2.R
+++ b/R/kfino2.R
-#' kafino_fit2 a function to detect outlier with a Kalman Filtering approach
-#' @param datain an input data.frame of one time course to study (unique IDE)
-#' @param Tvar char, time column name in the data.frame datain, a numeric vector
-#'             Tvar can be expressed as a proportion of day in seconds
-#' @param Yvar char, name of the variable to predict in the data.frame datain
-#' @param param list, a list of initialization parameter
-#' @param doOptim logical, if TRUE optimisation of the initial parameters
-#'                default TRUE
-#' @param threshold numeric, threshold to qualify an observation as outlier
-#'        according to the label_pred, default 0.5
-#'
-#' @details The initialization parameter list param contains:
-#' \describe{
-#'  \item{mm}{target weight, NULL if the user wants to optimize it}
-#'  \item{pp}{probability to be correctly weighed, NULL if the user wants to
-#'            optimize it}
-#'  \item{m0}{Initial weight, NULL if the user wants to optimize it}
-#'  \item{aa}{numeric, rate of weight change, default 0.001 }
-#'  \item{expertMin}{numeric, the minimal weight expected by the user}
-#'  \item{expertMax}{numeric, the maximal weight expected by the user}
-#'  \item{sigma2_m0}{variance of m0}
-#'  \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
-#'        default 0.05}
-#'  \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
-#'        default 5}
-#'  \item{K}{numeric, a constant value in the outlier function (trapezoid),
-#'           by default K=2 - increasing K, XXX}
-#'  \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
-#' }
-#' It can be given by the user following his knowledge of the animal or dataset
-#' or entirely constructed by the function. In the optimisation step, this
-#' vector is initialized according to the input data (between the expert
-#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
-#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
-#' sub-sampling is performed to speed the algorithm if the number of possible
-#' observations studied is greater than 500.
-#'
-#' @importFrom stats dnorm quantile na.omit
-#' @importFrom dplyr mutate filter left_join arrange %>%
-#' @importFrom dplyr .data if_else row_number select
-#' @return a S3 list with two data frames and a list of vectors of
-#' kafino results
-#' \describe{
-#' \item{detectOutlier}{The whole dataset with the detected outliers flagged
-#'                      and prediction}
-#' \item{PredictionOK}{A dataset with the predictions on possible values}
-#' \item{kafino.results}{kfino results (a list of vectors) on optimized input
-#'                      parameters or not}
-#' }
-#' @export
-#' @examples
-#' data(spring1)
-#' library(dplyr)
-#'
-#' # --- With Optimisation on initial parameters
-#' t0 <- Sys.time()
-#' param1<-list(m0=NULL,
-#'              mm=NULL,
-#'              pp=NULL,
-#'              aa=0.001,
-#'              expertMin=30,
-#'              expertMax=75,
-#'              sigma2_m0=1,
-#'              sigma2_mm=0.05,
-#'              sigma2_pp=5,
-#'              K=2,
-#'              seqp=seq(0.5,0.7,0.1))
-#'
-#' resu1<-kfino_fit2(datain=spring1,
-#'               Tvar="dateNum",Yvar="Poids",
-#'               doOptim=TRUE,param=param1)
-#' Sys.time() - t0
-#' # --- Without Optimisation on initial parameters
-#' t0 <- Sys.time()
-#' param2<-list(m0=41,
-#'              mm=45,
-#'              pp=0.5,
-#'              aa=0.001,
-#'              expertMin=30,
-#'              expertMax=75,
-#'              sigma2_m0=1,
-#'              sigma2_mm=0.05,
-#'              sigma2_pp=5,
-#'              K=2,
-#'              seqp=seq(0.5,0.7,0.1))
-#' resu2<-kfino_fit2(datain=spring1,
-#'               Tvar="dateNum",Yvar="Poids",
-#'               param=param2,
-#'               doOptim=FALSE)
-#' Sys.time() - t0
-#'
-#' # complex data on merinos2 dataset
-#' data(merinos2)
-#'
-#' t0 <- Sys.time()
-#' param3<-list(m0=NULL,
-#'              mm=NULL,
-#'              pp=NULL,
-#'              aa=0.001,
-#'              expertMin=10,
-#'              expertMax=45,
-#'              sigma2_m0=1,
-#'              sigma2_mm=0.05,
-#'              sigma2_pp=5,
-#'              K=2,
-#'              seqp=seq(0.5,0.7,0.1))
-#' resu3<-kfino_fit2(datain=merinos2,
-#'               Tvar="dateNum",Yvar="Poids",
-#'               doOptim=TRUE,param=param3)
-#' Sys.time() - t0
-kfino_fit2<-function(datain,Tvar,Yvar,
-                    param=NULL,
-                    doOptim=TRUE,threshold=0.5){
-  if( any(is.null(param[["expertMin"]]) |
-          is.null(param[["expertMax"]])) )
-    stop('You have to define expertMin and expertMax.')
-  if( any(!is.numeric(param[["expertMin"]]) |
-          !is.numeric(param[["expertMax"]])) )
-    stop('expertMin and expertMax must be numeric.')
-  if( !is.numeric(t(datain[,Yvar])))
-    stop('Input parameter Yvar must contain numeric values.')
-  if( !is.numeric(t(datain[,Tvar])))
-    stop('Input parameter Tvar must contain numeric values.')
-  #-------------------------------------------------------------
-  # load objects
-  m0<-param[["m0"]]
-  mm<-param[["mm"]]
-  pp<-param[["pp"]]
-  if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] }
-  expertMin<-param[["expertMin"]]
-  expertMax<-param[["expertMax"]]
-  if(is.null(param[["sigma2_m0"]])){
-    sigma2_m0<-1
-  } else {sigma2_m0<-param[["sigma2_m0"]]}
-  if(is.null(param[["sigma2_mm"]])){
-    sigma2_mm<-0.05
-  } else {sigma2_mm<-param[["sigma2_mm"]]}
-  if(is.null(param[["sigma2_pp"]])){
-    sigma2_pp<-5
-  } else {sigma2_pp<-param[["sigma2_pp"]]}
-  if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] }
-  seqp<-param[["seqp"]]
-  #-------------------------------------------------------------
-  # Flag impossible weight values (expert knowledge)
-  # the algorithm works on the dataset with possible values
-  # Create an artificial column numbering the rows for joining purpose
-  datain<-as.data.frame(datain)
-  datain<-datain %>%
-    mutate(rowNum=row_number(),
-           flag1=if_else(.data[[Yvar]] > expertMin &
-                           .data[[Yvar]] <= expertMax,"OK","Bad"))
-  tp.dt<-datain %>% filter(.data$flag1 == "OK")
-  Y<-tp.dt[,Yvar]
-  N<-nrow(tp.dt)
-  Tps<-tp.dt[,Tvar]
-  #WARNING WARNING: AU lieu de calculer L qui est arrondi à 0,
-  # je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée
-  # je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier()
-  # ca ne me parait pas disproportionné.
-  # Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin)
-  dix=sqrt(expertMax - expertMin)
-  #------------------------------------------------------------------------
-  # Optimisation sur les paramètres initiaux ou pas
-  #------------------------------------------------------------------------
-  if (doOptim == TRUE){
-    if (N > 500){
-      # optim avec sous-echantillonnage
-      print("-------:")
-      print("Optimisation of initial parameters with sub-sampling - result:")
-      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
-      m0opt=quantile(Y[1:N/4], probs = c(.5))
-      mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
-      cat("bornem0: ",bornem0,"\n")
-      cat("m0opt: ",m0opt,"\n")
-      cat("mmopt: ",mmopt,"\n")
-      popt=0.5
-      #--- Saving datain before sub-sampling
-      YY=Y
-      TpsTps=Tps
-      NN=N
-      Subechant=sort(sample(1:NN,50))
-      Y=YY[Subechant]
-      Tps=TpsTps[Subechant]
-      N=50
-      Vopt=KBO_L(list(m0=m0opt,
-                      mm=mmopt,
-                      pp=popt,
-                      aa=aa,
-                      expertMin=expertMin,
-                      expertMax=expertMax,
-                      sigma2_mm=sigma2_mm,
-                      sigma2_m0=sigma2_m0,
-                      sigma2_pp=sigma2_pp,
-                      K=K),
-                 Y,Tps,N,dix)
-      for (m0 in seq(bornem0[1],bornem0[2],2) ){
-        for (mm in seq((m0-5),(m0+20),2) ){
-          for (p in seqp){
-            # A voir si 50 sous-echantillons au hasard suffisent. Comme dans
-            #  Robbins Monroe, permet aussi de reduire l'impact de la troncature
-            Subechant=sort(sample(1:NN,50))
-            Y=YY[Subechant]
-            Tps=TpsTps[Subechant]
-            V=KBO_L(list(m0=m0,
-                         mm=mm,
-                         pp=p,
-                         aa=aa,
-                         expertMin=expertMin,
-                         expertMax=expertMax,
-                         sigma2_mm=sigma2_mm,
-                         sigma2_m0=sigma2_m0,
-                         sigma2_pp=sigma2_pp,
-                         K=K),
-                    Y,Tps,N,dix)
-            if (V > Vopt){
-              Vopt=V
-              m0opt=m0
-              mmopt=mm
-              popt=p
-            }
-          }
-        }
-      }
-      # Le fait de prendre des sous-echantillons remarquables
-      # (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs
-      # (au lieu de 4 comme imaginé cela vient p.e de la fonction sample())
-      Y=YY
-      Tps=TpsTps
-      N=NN
-      print("Optimised parameters: ")
-      print(c(mmopt,popt,m0opt))
-      print("-------:")
-      resultat=KBO_known(param=list(mm=mmopt,
-                                    pp=popt,
-                                    m0=m0opt,
-                                    aa=aa,
-                                    expertMin=expertMin,
-                                    expertMax=expertMax,
-                                    sigma2_m0=sigma2_m0,
-                                    sigma2_mm=sigma2_mm,
-                                    sigma2_pp=sigma2_pp,
-                                    K=K),
-                         threshold,Y=Y,Tps=Tps,N=N)
-    } else if (N > 50){
-      # optim sans sous-echantillonnage
-      print("-------:")
-      print("Optimisation of initial parameters - result:")
-      print("no sub-sampling performed:")
-      bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
-      m0opt=quantile(Y[1:N/4], probs = c(.5))
-      mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
-      cat("bornem0: ",bornem0,"\n")
-      cat("m0opt: ",m0opt,"\n")
-      cat("mmopt: ",mmopt,"\n")
-      popt=0.5
-      Vopt=KBO_L(list(m0=m0opt,
-                      mm=mmopt,
-                      pp=popt,
-                      aa=aa,
-                      expertMin=expertMin,
-                      expertMax=expertMax,
-                      sigma2_mm=sigma2_mm,
-                      sigma2_m0=sigma2_m0,
-                      sigma2_pp=sigma2_pp,
-                      K=K),
-                 Y,Tps,N,dix)
-      for (m0 in seq(bornem0[1],bornem0[2],2) ){
-        for (mm in seq((m0-5),(m0+20),2) ){
-          for (p in seqp){
-            V=KBO_L(list(m0=m0,
-                         mm=mm,
-                         pp=p,
-                         aa=aa,
-                         expertMin=expertMin,
-                         expertMax=expertMax,
-                         sigma2_mm=sigma2_mm,
-                         sigma2_m0=sigma2_m0,
-                         sigma2_pp=sigma2_pp,
-                         K=K),
-                    Y,Tps,N,dix)
-            if (V > Vopt){
-              Vopt=V
-              m0opt=m0
-              mmopt=mm
-              popt=p
-            }
-          }
-        }
-      }
-      print("Optimised parameters: ")
-      print(c(mmopt,popt,m0opt))
-      print("-------:")
-      resultat=KBO_known(param=list(mm=mmopt,
-                                    pp=popt,
-                                    m0=m0opt,
-                                    aa=aa,
-                                    expertMin=expertMin,
-                                    expertMax=expertMax,
-                                    sigma2_m0=sigma2_m0,
-                                    sigma2_mm=sigma2_mm,
-                                    sigma2_pp=sigma2_pp,
-                                    K=K),
-                         threshold=threshold,Y=Y,Tps=Tps,N=N)
-    } else {
-      # N petit, si trop petit on ne fait rien
-      if (N > 5) {
-        # Not enough data - no optim
-        if (is.null(param[["m0"]])){
-          X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
-               round(quantile(Y[(3*N/4):N], probs = 0.8)))
-        } else {
-          X<-c(m0,pp,mm)
-        }
-        print("-------:")
-        print("Optimisation of initial parameters - result:")
-        print("Not enough data => so no optimisation performed:")
-        print("Used parameters: ")
-        print(X)
-        print("-------:")
-        resultat=KBO_known(param=list(m0=X[[1]],
-                                      pp=X[[2]],
-                                      mm=X[[3]],
-                                      aa=aa,
-                                      expertMin=expertMin,
-                                      expertMax=expertMax,
-                                      sigma2_m0=sigma2_m0,
-                                      sigma2_mm=sigma2_mm,
-                                      sigma2_pp=sigma2_pp,
-                                      K=K),
-                           threshold=threshold,Y=Y,Tps=Tps,N=N)
-      } else {
-        warning("Not enough data between expert knowledge. The algorithm is not performed.")
-        resultat<-NULL
-      }
-    }
-  } else {
-    # Pas d'optimisation et test si N petit - si trop petit on ne fait rien
-    if (N > 5) {
-      # No optimisation on initial parameters
-      if (is.null(param[["m0"]])){
-        X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
-             round(quantile(Y[(3*N/4):N], probs = 0.8)))
-      } else {
-        X<-c(m0,pp,mm)
-      }
-      print("-------:")
-      print("No optimisation of initial parameters:")
-      print("Used parameters: ")
-      print(X)
-      resultat=KBO_known(param=list(m0=X[[1]],
-                                    pp=X[[2]],
-                                    mm=X[[3]],
-                                    aa=aa,
-                                    expertMin=expertMin,
-                                    expertMax=expertMax,
-                                    sigma2_m0=sigma2_m0,
-                                    sigma2_mm=sigma2_mm,
-                                    sigma2_pp=sigma2_pp,
-                                    K=K),
-                         threshold=threshold,Y=Y,Tps=Tps,N=N)
-    } else {
-      warning("Not enough data between expert knowledge. The algorithm is not performed.")
-      resultat<-NULL
-    }
-  }
-  #---------------------------------------------------------------------------
-  # Formatting output results
-  #---------------------------------------------------------------------------
-  # If resultat NULL then create output with datain and 2 NULL objects
-  # else create output with detectoutlier, PredictionOK and kafino.results
-  # useful for the kafino_plot() function
-  #---------------------------------------------------------------------------
-  if (is.null(resultat)){
-    dt.out<-datain %>% mutate(flag=.data$flag1)
-    dt.pred<-NULL
-    resultat<-NULL
-    mylist<-list(dt.out,dt.pred,resultat)
-    names(mylist)<-c("detectOutlier","PredictionOK","kafino.results")
-    class(mylist) = c("kfino")
-    return(invisible(mylist))
-  } else {
-    prediction=na.omit(resultat$prediction)
-    label_pred=round(na.omit(resultat$label),2)
-    lwr=na.omit(resultat$lwr)
-    upr=na.omit(resultat$upr)
-    flag=na.omit(resultat$flag)
-    dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum),
-                             prediction,label_pred,lwr,upr,flag)
-    # join dt.pred with the initial datain containing all the observations
-    dt.out<-left_join(datain,dt.pred,by="rowNum")
-    dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag))
-    dt.out<-arrange(dt.out,.data$rowNum)
-    #--------------------------------------
-    # return a S3 list object
-    #--------------------------------------
-    # 1. a whole dataset with the detected outliers flagged and prediction
-    # 2. a dataset with the prediction on possible values
-    # 3. optimisation results (a list of vectors)
-    mylist<-list(dt.out,dt.pred,resultat)
-    names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
-    class(mylist) = c("kfino")
-    return(invisible(mylist))
-  }
-}
-#-------------------------------------- End of file --------------------------
--- a/R/utils.R
+++ b/R/utils.R
@@ -37,7 +37,23 @@ loi_outlier<-function(y,
 #' @export
 #'
 #' @examples
-#' # not run
+#' set.seed(1234)
+#' Y<-rnorm(n=10,mean=50,4)
+#' Tps<-seq(1,10)
+#' N=10
+#' param2<-list(m0=41,
+#'              mm=45,
+#'              pp=0.5,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'             seqp=seq(0.5,0.7,0.1))
+#' print(Y)
+#' KBO_known(param=param2,threshold=0.5,Y,Tps,N)
 KBO_known<-function(param,threshold,Y,Tps,N){
  # load objects
  mm=param[["mm"]]
@@ -204,7 +220,23 @@ KBO_known<-function(param,threshold,Y,Tps,N){
 #' @export
 #'
 #' @examples
-#' # not run
+#' set.seed(1234)
+#' Y<-rnorm(n=10,mean=50,4)
+#' Tps<-seq(1,10)
+#' N=10
+#' param2<-list(m0=41,
+#'              mm=45,
+#'              pp=0.5,
+#'              aa=0.001,
+#'              expertMin=30,
+#'              expertMax=75,
+#'              sigma2_m0=1,
+#'              sigma2_mm=0.05,
+#'              sigma2_pp=5,
+#'              K=2,
+#'             seqp=seq(0.5,0.7,0.1))
+#' print(Y)
+#' KBO_L(param=param2,Y,Tps,N,dix=6)
 KBO_L<-function(param,Y,Tps,N,dix){
  # load objects
  mm=param[["mm"]]

--- a/man/KBO_L.Rd
+++ b/man/KBO_L.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
+% Please edit documentation in R/utils_functions.R
 \name{KBO_L}
 \alias{KBO_L}
 \title{KBO_L a function to calculate a likelihood on optimized parameters}
@@ -27,5 +27,21 @@ KBO_L a function to calculate a likelihood on optimized parameters
 uses the same input parameter list than the main function
 }
 \examples{
-# not run
+set.seed(1234)
+Y<-rnorm(n=10,mean=50,4)
+Tps<-seq(1,10)
+N=10
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+            seqp=seq(0.5,0.7,0.1))
+print(Y)
+KBO_L(param=param2,Y,Tps,N,dix=6)
 }
--- a/man/KBO_known.Rd
+++ b/man/KBO_known.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
+% Please edit documentation in R/utils_functions.R
 \name{KBO_known}
 \alias{KBO_known}
 \title{KBO_known}
@@ -27,5 +27,21 @@ KBO_known
 uses the same input parameter list than the main function
 }
 \examples{
-# not run
+set.seed(1234)
+Y<-rnorm(n=10,mean=50,4)
+Tps<-seq(1,10)
+N=10
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+            seqp=seq(0.5,0.7,0.1))
+print(Y)
+KBO_known(param=param2,threshold=0.5,Y,Tps,N)
 }
--- a/man/kfino_fit.Rd
+++ b/man/kfino_fit.Rd
@@ -2,23 +2,9 @@
 % Please edit documentation in R/kfino.R
 \name{kfino_fit}
 \alias{kfino_fit}
-\title{kafino_fit a function to detect outlier with a Kalman Filtering approach}
+\title{kfino_fit a function to detect outlier with a Kalman Filtering approach}
 \usage{
-kfino_fit(
+kfino_fit(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
-  datain,
-  Tvar,
-  Yvar,
-  expertMin = NULL,
-  expertMax = NULL,
-  X = NULL,
-  doOptim = TRUE,
-  threshold = 0.5,
-  aa = 0.001,
-  sigma2_mm = 0.05,
-  sigma2_pp,
-  K = 2,
-  minp = 0.4
-)
 }
 \arguments{
 \item{datain}{an input data.frame of one time course to study (unique IDE)}
@@ -28,57 +14,54 @@ Tvar can be expressed as a proportion of day in seconds}
 \item{Yvar}{char, name of the variable to predict in the data.frame datain}
-\item{expertMin}{numeric, the minimal weight expected by the user}
+\item{param}{list, a list of initialization parameters}
-\item{expertMax}{numeric, the maximal weight expected by the user}
+\item{doOptim}{logical, if TRUE optimisation of the initial parameters
-\item{X}{vector, a vector of initialization parameter (will not be optimized)}
-\item{doOptim}{logical, if TRUE optimisation of the initial parameters 
 default TRUE}
-\item{threshold}{numeric, threshold to qualify an observation as outlier 
+\item{threshold}{numeric, threshold to qualify an observation as outlier
 according to the label_pred, default 0.5}
-\item{aa}{numeric, rate of weight change, default 0.001}
-\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, 
-default 0.05}
-\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, 
-default 5}
-\item{K}{numeric, cst in the outlier function (trapezoid), by default K=2
-increasing K, XXX}
-\item{minp}{numeric, minimal pp probability to be correctly weighted.}
 }
 \value{
-a S3 list with two data frames and a list of vectors of 
+a S3 list with two data frames and a list of vectors of
-kafino results 
+kfino results
 \describe{
-\item{detectOutlier}{The whole dataset with the detected outliers flagged 
+\item{detectOutlier}{The whole dataset with the detected outliers flagged
                     and prediction}
 \item{PredictionOK}{A dataset with the predictions on possible values}
-\item{kafino.results}{kfino results (a list of vectors) on optimized input 
+\item{kafino.results}{kfino results (a list of vectors) on optimized input
                     parameters or not}
 }
 }
 \description{
-kafino_fit a function to detect outlier with a Kalman Filtering approach
+kfino_fit a function to detect outlier with a Kalman Filtering approach
 }
 \details{
-The initialization parameter vector X contains:
+The initialization parameter list param contains:
 \describe{
- \item{mm}{target weight}
+ \item{mm}{(optional) target weight, NULL if the user wants to optimize it}
- \item{pp}{probability to be correctly weighed}
+ \item{pp}{(optional) probability to be correctly weighed, NULL if the user
- \item{m0}{Initial weight}
+           wants to optimize it}
+ \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
+ \item{aa}{numeric, rate of weight change, default 0.001 }
+ \item{expertMin}{numeric, the minimal weight expected by the user}
+ \item{expertMax}{numeric, the maximal weight expected by the user}
+ \item{sigma2_m0}{variance of m0, default 1}
+ \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
+       default 0.05}
+ \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
+       default 5}
+ \item{K}{numeric, a constant value in the outlier function (trapezium),
+          by default K=5}
+ \item{seqp}{numeric, sequence of pp probability to be correctly weighted.
+             default seq(0.5,0.7,0.1)}
 }
-It can be given by the user following his knowledge of the animal or dataset 
+It has to be given by the user following his knowledge of the animal or
-or entirely constructed by the function. In the optimisation step, this 
+the dataset. All parameters are compulsory execpt m0, mm and pp that can be
-vector is initialized according to the input data (between the expert
+optimized by the algorithm. In the optimisation step, those three parameters
-range) using quantile of the Y distribution (varying between 0.2 and 0.8 for 
+are initialized according to the input data (between the expert
-m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A 
+range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
+m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
 sub-sampling is performed to speed the algorithm if the number of possible
 observations studied is greater than 500.
 }
@@ -88,27 +71,58 @@ library(dplyr)
 # --- With Optimisation on initial parameters
 t0 <- Sys.time()
+param1<-list(m0=NULL,
+             mm=NULL,
+             pp=NULL,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
 resu1<-kfino_fit(datain=spring1,
              Tvar="dateNum",Yvar="Poids",
-              expertMin=30,expertMax=75,
+              doOptim=TRUE,param=param1)
-              doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
 Sys.time() - t0
 # --- Without Optimisation on initial parameters
 t0 <- Sys.time()
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
 resu2<-kfino_fit(datain=spring1,
              Tvar="dateNum",Yvar="Poids",
-              expertMin=30,expertMax=75,
+              param=param2,
-              X=c(45,0.5,41),
+              doOptim=FALSE)
-              doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
 Sys.time() - t0
 # complex data on merinos2 dataset
 data(merinos2)
 t0 <- Sys.time()
+param3<-list(m0=NULL,
+             mm=NULL,
+             pp=NULL,
+             aa=0.001,
+             expertMin=10,
+             expertMax=45,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
 resu3<-kfino_fit(datain=merinos2,
              Tvar="dateNum",Yvar="Poids",
-              expertMin=10,expertMax=45,
+              doOptim=TRUE,param=param3)
-              doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
 Sys.time() - t0
 }
--- a/man/kfino_fit2.Rd
+++ b/man/kfino_fit2.Rd
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/kfino2.R
-\name{kfino_fit2}
-\alias{kfino_fit2}
-\title{kafino_fit2 a function to detect outlier with a Kalman Filtering approach}
-\usage{
-kfino_fit2(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
-}
-\arguments{
-\item{datain}{an input data.frame of one time course to study (unique IDE)}
-\item{Tvar}{char, time column name in the data.frame datain, a numeric vector
-Tvar can be expressed as a proportion of day in seconds}
-\item{Yvar}{char, name of the variable to predict in the data.frame datain}
-\item{param}{list, a list of initialization parameter}
-\item{doOptim}{logical, if TRUE optimisation of the initial parameters
-default TRUE}
-\item{threshold}{numeric, threshold to qualify an observation as outlier
-according to the label_pred, default 0.5}
-}
-\value{
-a S3 list with two data frames and a list of vectors of
-kafino results
-\describe{
-\item{detectOutlier}{The whole dataset with the detected outliers flagged
-                     and prediction}
-\item{PredictionOK}{A dataset with the predictions on possible values}
-\item{kafino.results}{kfino results (a list of vectors) on optimized input
-                     parameters or not}
-}
-}
-\description{
-kafino_fit2 a function to detect outlier with a Kalman Filtering approach
-}
-\details{
-The initialization parameter list param contains:
-\describe{
- \item{mm}{target weight, NULL if the user wants to optimize it}
- \item{pp}{probability to be correctly weighed, NULL if the user wants to
-           optimize it}
- \item{m0}{Initial weight, NULL if the user wants to optimize it}
- \item{aa}{numeric, rate of weight change, default 0.001 }
- \item{expertMin}{numeric, the minimal weight expected by the user}
- \item{expertMax}{numeric, the maximal weight expected by the user}
- \item{sigma2_m0}{variance of m0}
- \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
-       default 0.05}
- \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
-       default 5}
- \item{K}{numeric, a constant value in the outlier function (trapezoid),
-          by default K=2 - increasing K, XXX}
- \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
-}
-It can be given by the user following his knowledge of the animal or dataset
-or entirely constructed by the function. In the optimisation step, this
-vector is initialized according to the input data (between the expert
-range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
-m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
-sub-sampling is performed to speed the algorithm if the number of possible
-observations studied is greater than 500.
-}
-\examples{
-data(spring1)
-library(dplyr)
-# --- With Optimisation on initial parameters
-t0 <- Sys.time()
-param1<-list(m0=NULL,
-             mm=NULL,
-             pp=NULL,
-             aa=0.001,
-             expertMin=30,
-             expertMax=75,
-             sigma2_m0=1,
-             sigma2_mm=0.05,
-             sigma2_pp=5,
-             K=2,
-             seqp=seq(0.5,0.7,0.1))
-resu1<-kfino_fit2(datain=spring1,
-              Tvar="dateNum",Yvar="Poids",
-              doOptim=TRUE,param=param1)
-Sys.time() - t0
-# --- Without Optimisation on initial parameters
-t0 <- Sys.time()
-param2<-list(m0=41,
-             mm=45,
-             pp=0.5,
-             aa=0.001,
-             expertMin=30,
-             expertMax=75,
-             sigma2_m0=1,
-             sigma2_mm=0.05,
-             sigma2_pp=5,
-             K=2,
-             seqp=seq(0.5,0.7,0.1))
-resu2<-kfino_fit2(datain=spring1,
-              Tvar="dateNum",Yvar="Poids",
-              param=param2,
-              doOptim=FALSE)
-Sys.time() - t0
-# complex data on merinos2 dataset
-data(merinos2)
-t0 <- Sys.time()
-param3<-list(m0=NULL,
-             mm=NULL,
-             pp=NULL,
-             aa=0.001,
-             expertMin=10,
-             expertMax=45,
-             sigma2_m0=1,
-             sigma2_mm=0.05,
-             sigma2_pp=5,
-             K=2,
-             seqp=seq(0.5,0.7,0.1))
-resu3<-kfino_fit2(datain=merinos2,
-              Tvar="dateNum",Yvar="Poids",
-              doOptim=TRUE,param=param3)
-Sys.time() - t0
-}
--- a/man/kfino_plot.Rd
+++ b/man/kfino_plot.Rd
@@ -2,15 +2,16 @@
 % Please edit documentation in R/graph_functions.R
 \name{kfino_plot}
 \alias{kfino_plot}
-\title{graphical function}
+\title{kfino_plot a graphical function for the result of a kfino run}
 \usage{
 kfino_plot(resuin, typeG, Tvar, Yvar, Ident, title = NULL)
 }
 \arguments{
 \item{resuin}{a list resulting of the kfino algorithm}
-\item{typeG}{char, type of graphic, either detection of outliers (with 
+\item{typeG}{char, type of graphic, either detection of outliers (with
-qualitative or quantitative display) or prediction}
+qualitative or quantitative display) or prediction. must be
+"quanti" or "quali" or "prediction"}
 \item{Tvar}{char, time variable in the data.frame datain}
@@ -24,13 +25,13 @@ qualitative or quantitative display) or prediction}
 a ggplot2 graphic
 }
 \description{
-graphical function
+kfino_plot a graphical function for the result of a kfino run
 }
 \details{
 The produced graphic can be, according to typeG:
 \describe{
- \item{quali}{The detection of outliers with a qualitative rule: OK values, 
+ \item{quali}{The detection of outliers with a qualitative rule: OK values,
-      KO values (outliers) and Bad values (impossible values defined by 
+      KO values (outliers) and Bad values (impossible values defined by
      the user)}
 \item{quanti}{The detection of outliers with a quantitative display using
      the calculated probability of the kafino algorithm}
@@ -44,19 +45,30 @@ library(dplyr)
 print(colnames(spring1))
 # --- Without Optimisation on initial parameters
+param2<-list(m0=41,
+             mm=45,
+             pp=0.5,
+             aa=0.001,
+             expertMin=30,
+             expertMax=75,
+             sigma2_m0=1,
+             sigma2_mm=0.05,
+             sigma2_pp=5,
+             K=2,
+             seqp=seq(0.5,0.7,0.1))
 resu2<-kfino_fit(datain=spring1,
              Tvar="dateNum",Yvar="Poids",
-              expertMin=30,expertMax=75,
+              param=param2,
-              doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
+              doOptim=FALSE)
 # flags are qualitative
 kfino_plot(resuin=resu2,typeG="quali",
            Tvar="Day",Yvar="Poids",Ident="IDE")
 # flags are quantitative
 kfino_plot(resuin=resu2,typeG="quanti",
            Tvar="Day",Yvar="Poids",Ident="IDE")
 # predictions on OK values
 kfino_plot(resuin=resu2,typeG="prediction",
            Tvar="Day",Yvar="Poids",Ident="IDE")

--- a/man/loi_outlier.Rd
+++ b/man/loi_outlier.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
+% Please edit documentation in R/utils_functions.R
 \name{loi_outlier}
 \alias{loi_outlier}
 \title{loi_outlier This function defines an outlier distribution (Surface of a

--- a/tests/testthat/test-outputAlgo.R
+++ b/tests/testthat/test-outputAlgo.R
-test_that("Output type", {
+test_that("kfino - no optimisation - computed correctly", {
  data(spring1)
+  param1<-list(m0=41,
+               mm=45,
+               pp=0.5,
+               aa=0.001,
+               expertMin=30,
+               expertMax=75,
+               sigma2_m0=1,
+               sigma2_mm=0.05,
+               sigma2_pp=5,
+               K=2,
+               seqp=seq(0.5,0.7,0.1))
  resu1<-kfino_fit(datain=spring1,
-                   Tvar="dateNum",Yvar="Poids",
+                    Tvar="dateNum",Yvar="Poids",
-                   expertMin=30,expertMax=75,
+                    param=param1,
-                   doOptim=FALSE,aa=0.001,sigma2_mm=0.05,
+                    doOptim=FALSE)
-                   K=2,sigma2_pp=5)
+  # Output type
  expect_equal(length(resu1),3)
  expect_s3_class(resu1[[1]],"data.frame")
  expect_s3_class(resu1[[2]],"data.frame")
  expect_type(resu1[[3]],"list")
-  #expect_true(is_SBM(netA))
+  expect_equal(length(resu1[[3]]),6)
+  # expected result
+  expect_equal(as.vector(resu1[[3]]$likelihood),1.245621e-150)
 })
+test_that("kfino - optimisation - computed correctly", {
+  data(spring1)
+  param1<-list(m0=NULL,
+               mm=NULL,
+               pp=NULL,
+               aa=0.001,
+               expertMin=30,
+               expertMax=75,
+               sigma2_m0=1,
+               sigma2_mm=0.05,
+               sigma2_pp=5,
+               K=2,
+               seqp=seq(0.5,0.7,0.1))
+  resu1<-kfino_fit(datain=spring1,
+                    Tvar="dateNum",Yvar="Poids",
+                    param=param1,
+                    doOptim=TRUE)
+  # Output type
+  expect_equal(length(resu1),3)
+  expect_s3_class(resu1[[1]],"data.frame")
+  expect_s3_class(resu1[[2]],"data.frame")
+  expect_type(resu1[[3]],"list")
+  expect_equal(length(resu1[[3]]),6)
+  # expected result
+  expect_equal(as.vector(resu1[[3]]$likelihood),1.035855e-135)
+})
+test_that("KBO_L - likelihood - computed correctly", {
+  set.seed(1234)
+  Y<-rnorm(n=10,mean=50,4)
+  Tps<-seq(1,10)
+  N=10
+  param2<-list(m0=41,
+               mm=45,
+               pp=0.5,
+               aa=0.001,
+               expertMin=30,
+               expertMax=75,
+               sigma2_m0=1,
+               sigma2_mm=0.05,
+               sigma2_pp=5,
+               K=2,
+               seqp=seq(0.5,0.7,0.1))
+  resu<-KBO_L(param=param2,Y,Tps,N,dix=6)
+  expect_equal(as.vector(resu),0.00490257592)
+})