Skip to content
Snippets Groups Projects
Commit 4a25f1b5 authored by sanchezi's avatar sanchezi
Browse files

MaJ issue 4 - amélioration aide kfino_fit()

parent be6e704e
No related branches found
No related tags found
No related merge requests found
Pipeline #59209 passed
#' kfino_fit a function to detect outlier with a Kalman Filtering approach
#' @param datain an input data.frame of one time course to study (unique IDE)
#' @param Tvar char, time column name in the data.frame datain, a numeric vector
#' Tvar can be expressed as a proportion of day in seconds
#' Tvar should be expressed as a proportion of day in seconds
#' @param Yvar char, name of the variable to predict in the data.frame datain
#' @param param list, a list of initialization parameters
#' @param doOptim logical, if TRUE optimisation of the initial parameters
#' @param doOptim logical, if TRUE optimization of the initial parameters,
#' default TRUE
#' @param method character, the method used to optimize the initial parameters:
#' Expectation-Maximization algorithm `"EM"` or Maximization
#' Likelihood `"ML"`, default `"ML"`
#' @param threshold numeric, threshold to qualify an observation as outlier
#' according to the label_pred, default 0.5
#' @param kappa numeric, truncation setting, default 10
#' @param kappaOpt numeric, truncation setting, default 7
#' @param kappa numeric, truncation setting for likelihood optimization,
#' default 10
#' @param kappaOpt numeric, truncation setting for initial parameters'
#' optimization, default 7
#'
#' @details The initialization parameter list param contains:
#' @details The initialization parameter list `param` contains:
#' \describe{
#' \item{mm}{(optional) target weight, NULL if the user wants to optimize it}
#' \item{pp}{(optional) probability to be correctly weighed, NULL if the user
#' wants to optimize it}
#' \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
#' \item{mm}{(optional) numeric, target weight, NULL if the user wants to
#' optimize it}
#' \item{pp}{(optional) numeric, probability to be correctly weighed, NULL if
#' the user wants to optimize it}
#' \item{m0}{(optional) numeric, initial weight, NULL if the user wants to
#' optimize it}
#' \item{aa}{numeric, rate of weight change, default 0.001 }
#' \item{expertMin}{numeric, the minimal weight expected by the user}
#' \item{expertMax}{numeric, the maximal weight expected by the user}
#' \item{sigma2_m0}{variance of m0, default 1}
#' \item{sigma2_m0}{numeric, variance of m0, default 1}
#' \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
#' default 0.05}
#' \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
#' default 5}
#' \item{K}{numeric, a constant value in the outlier function (trapezium),
#' by default K=5}
#' \item{seqp}{numeric, sequence of pp probability to be correctly weighted.
#' default seq(0.5,0.7,0.1)}
#' \item{seqp}{numeric vector, sequence of pp probability to be correctly
#' weighted. default seq(0.5,0.7,0.1)}
#' }
#' It has to be given by the user following his knowledge of the animal or
#' the data set. All parameters are compulsory except m0, mm and pp that can be
......@@ -41,7 +45,7 @@
#' m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
#' sub-sampling is performed to speed the algorithm if the number of possible
#' observations studied is greater than 500. Optimization is performed using
#' `"EM"` or `"ML"` methods.
#' `"EM"` or `"ML"` method.
#'
#' @importFrom stats dnorm quantile na.omit
#' @importFrom dplyr mutate filter left_join arrange %>%
......@@ -61,7 +65,7 @@
#' data(spring1)
#' library(dplyr)
#'
#' # --- With Optimisation on initial parameters - ML method
#' # --- With Optimization on initial parameters - ML method
#' t0 <- Sys.time()
#' param1<-list(m0=NULL,
#' mm=NULL,
......@@ -80,14 +84,14 @@
#' doOptim=TRUE,method="ML",param=param1)
#' Sys.time() - t0
#'
#' # --- With Optimisation on initial parameters - EM method
#' # --- With Optimization on initial parameters - EM method
#' t0 <- Sys.time()
#' resu1b<-kfino_fit(datain=spring1,
#' Tvar="dateNum",Yvar="Poids",
#' doOptim=TRUE,method="EM",param=param1)
#' Sys.time() - t0
#'
#' # --- Without Optimisation on initial parameters
#' # --- Without Optimization on initial parameters
#' t0 <- Sys.time()
#' param2<-list(m0=41,
#' mm=45,
......@@ -194,7 +198,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
if (N > 500){
# optim with sub-sampling
print("-------:")
print("Optimisation of initial parameters ")
print("Optimization of initial parameters ")
print("with sub-sampling and ML method - result:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
......@@ -261,7 +265,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
Y=YY
Tps=TpsTps
N=NN
print("Optimised parameters with ML method: ")
print("Optimized parameters with ML method: ")
cat("Optimized m0: ",m0opt,"\n")
cat("Optimized mm: ",mmopt,"\n")
cat("Optimized pp: ",popt,"\n")
......@@ -283,7 +287,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
# optimization without sub-sampling, 2 methods, EM or ML
if (method == "EM"){
print("-------:")
print("Optimisation of initial parameters with EM method - result:")
print("Optimization of initial parameters with EM method - result:")
print("no sub-sampling performed:")
bornem0=quantile(Y[1:N/2], probs = c(.2, .8))
cat("range m0: ",bornem0,"\n")
......@@ -316,7 +320,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
m0_tmp=Res_EM$m0[[1]]
m_tmp=Res_EM$mm[[1]]
p_tmp=Res_EM$pp
print(Res_EM$likelihood)
if (k==N_etape_EM) break
}
Vopt_low=Res_EM$likelihood
......@@ -351,7 +354,6 @@ kfino_fit<-function(datain,Tvar,Yvar,
m0_tmp=Res_EM$m0[[1]]
m_tmp=Res_EM$mm[[1]]
p_tmp=Res_EM$pp
print(Res_EM$likelihood)
if (k==N_etape_EM) break
}
Vopt_up=Res_EM$likelihood
......@@ -436,7 +438,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
popt<-popt_low
}
print("Optimised parameters with EM method: ")
print("Optimized parameters with EM method: ")
cat("Optimized m0: ",m0opt,"\n")
cat("Optimized mm: ",mmopt,"\n")
cat("Optimized pp: ",popt,"\n")
......@@ -455,7 +457,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
} else if (method == "ML"){
print("-------:")
print("Optimisation of initial parameters with ML method - result:")
print("Optimization of initial parameters with ML method - result:")
print("no sub-sampling performed:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
......@@ -503,7 +505,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
}
}
print("Optimised parameters: ")
print("Optimized parameters: ")
cat("Optimized m0: ",m0opt,"\n")
cat("Optimized mm: ",mmopt,"\n")
cat("Optimized pp: ",popt,"\n")
......@@ -531,8 +533,8 @@ kfino_fit<-function(datain,Tvar,Yvar,
X<-c(m0,pp,mm)
}
print("-------:")
print("Optimisation of initial parameters - result:")
print("Not enough data => No optimisation performed:")
print("Optimization of initial parameters - result:")
print("Not enough data => No optimization performed:")
print("Used parameters: ")
print(X)
print("-------:")
......@@ -563,7 +565,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
X<-c(m0,pp,mm)
}
print("-------:")
print("No optimisation of initial parameters:")
print("No optimization of initial parameters:")
print("Used parameters: ")
print(X)
resultat=KBO_known(param=list(m0=X[[1]],
......@@ -620,7 +622,7 @@ kfino_fit<-function(datain,Tvar,Yvar,
#--------------------------------------
# 1. a whole dataset with the detected outliers flagged and prediction
# 2. a dataset with the prediction on possible values
# 3. optimisation results (a list of vectors)
# 3. optimization results (a list of vectors)
mylist<-list(dt.out,dt.pred,resultat)
names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
class(mylist) = c("kfino")
......
This diff is collapsed.
......@@ -20,13 +20,13 @@ kfino_fit(
\item{datain}{an input data.frame of one time course to study (unique IDE)}
\item{Tvar}{char, time column name in the data.frame datain, a numeric vector
Tvar can be expressed as a proportion of day in seconds}
Tvar should be expressed as a proportion of day in seconds}
\item{Yvar}{char, name of the variable to predict in the data.frame datain}
\item{param}{list, a list of initialization parameters}
\item{doOptim}{logical, if TRUE optimisation of the initial parameters
\item{doOptim}{logical, if TRUE optimization of the initial parameters,
default TRUE}
\item{method}{character, the method used to optimize the initial parameters:
......@@ -36,9 +36,11 @@ Likelihood `"ML"`, default `"ML"`}
\item{threshold}{numeric, threshold to qualify an observation as outlier
according to the label_pred, default 0.5}
\item{kappa}{numeric, truncation setting, default 10}
\item{kappa}{numeric, truncation setting for likelihood optimization,
default 10}
\item{kappaOpt}{numeric, truncation setting, default 7}
\item{kappaOpt}{numeric, truncation setting for initial parameters'
optimization, default 7}
}
\value{
a S3 list with two data frames and a list of vectors of
......@@ -55,24 +57,26 @@ kfino results
kfino_fit a function to detect outlier with a Kalman Filtering approach
}
\details{
The initialization parameter list param contains:
The initialization parameter list `param` contains:
\describe{
\item{mm}{(optional) target weight, NULL if the user wants to optimize it}
\item{pp}{(optional) probability to be correctly weighed, NULL if the user
wants to optimize it}
\item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
\item{mm}{(optional) numeric, target weight, NULL if the user wants to
optimize it}
\item{pp}{(optional) numeric, probability to be correctly weighed, NULL if
the user wants to optimize it}
\item{m0}{(optional) numeric, initial weight, NULL if the user wants to
optimize it}
\item{aa}{numeric, rate of weight change, default 0.001 }
\item{expertMin}{numeric, the minimal weight expected by the user}
\item{expertMax}{numeric, the maximal weight expected by the user}
\item{sigma2_m0}{variance of m0, default 1}
\item{sigma2_m0}{numeric, variance of m0, default 1}
\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
default 0.05}
\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
default 5}
\item{K}{numeric, a constant value in the outlier function (trapezium),
by default K=5}
\item{seqp}{numeric, sequence of pp probability to be correctly weighted.
default seq(0.5,0.7,0.1)}
\item{seqp}{numeric vector, sequence of pp probability to be correctly
weighted. default seq(0.5,0.7,0.1)}
}
It has to be given by the user following his knowledge of the animal or
the data set. All parameters are compulsory except m0, mm and pp that can be
......@@ -82,13 +86,13 @@ range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
sub-sampling is performed to speed the algorithm if the number of possible
observations studied is greater than 500. Optimization is performed using
`"EM"` or `"ML"` methods.
`"EM"` or `"ML"` method.
}
\examples{
data(spring1)
library(dplyr)
# --- With Optimisation on initial parameters - ML method
# --- With Optimization on initial parameters - ML method
t0 <- Sys.time()
param1<-list(m0=NULL,
mm=NULL,
......@@ -107,14 +111,14 @@ resu1<-kfino_fit(datain=spring1,
doOptim=TRUE,method="ML",param=param1)
Sys.time() - t0
# --- With Optimisation on initial parameters - EM method
# --- With Optimization on initial parameters - EM method
t0 <- Sys.time()
resu1b<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids",
doOptim=TRUE,method="EM",param=param1)
Sys.time() - t0
# --- Without Optimisation on initial parameters
# --- Without Optimization on initial parameters
t0 <- Sys.time()
param2<-list(m0=41,
mm=45,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment