Skip to content
Snippets Groups Projects
Commit 00aef95f authored by sanchezi's avatar sanchezi
Browse files

consolidation fonctions + rajout tests unitaires

parent cf354171
No related branches found
No related tags found
No related merge requests found
Pipeline #55412 passed
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
export(KBO_L) export(KBO_L)
export(KBO_known) export(KBO_known)
export(kfino_fit) export(kfino_fit)
export(kfino_fit2)
export(kfino_plot) export(kfino_plot)
export(loi_outlier) export(loi_outlier)
importFrom(dplyr,"%>%") importFrom(dplyr,"%>%")
......
#' graphical function #' kfino_plot a graphical function for the result of a kfino run
#' #'
#' @param resuin a list resulting of the kfino algorithm #' @param resuin a list resulting of the kfino algorithm
#' @param typeG char, type of graphic, either detection of outliers (with #' @param typeG char, type of graphic, either detection of outliers (with
#' qualitative or quantitative display) or prediction #' qualitative or quantitative display) or prediction. must be
#' "quanti" or "quali" or "prediction"
#' @param Tvar char, time variable in the data.frame datain #' @param Tvar char, time variable in the data.frame datain
#' @param Yvar char, variable which was analysed in the data.frame datain #' @param Yvar char, variable which was analysed in the data.frame datain
#' @param Ident char, column name of the individual id to be analyzed #' @param Ident char, column name of the individual id to be analyzed
...@@ -10,14 +11,14 @@ ...@@ -10,14 +11,14 @@
#' #'
#' @details The produced graphic can be, according to typeG: #' @details The produced graphic can be, according to typeG:
#' \describe{ #' \describe{
#' \item{quali}{The detection of outliers with a qualitative rule: OK values, #' \item{quali}{The detection of outliers with a qualitative rule: OK values,
#' KO values (outliers) and Bad values (impossible values defined by #' KO values (outliers) and Bad values (impossible values defined by
#' the user)} #' the user)}
#' \item{quanti}{The detection of outliers with a quantitative display using #' \item{quanti}{The detection of outliers with a quantitative display using
#' the calculated probability of the kafino algorithm} #' the calculated probability of the kafino algorithm}
#' \item{prediction}{The prediction of the analyzed variable on OK values} #' \item{prediction}{The prediction of the analyzed variable on OK values}
#' } #' }
#' #'
#' @importFrom ggplot2 aes aes_string ggplot geom_point geom_line #' @importFrom ggplot2 aes aes_string ggplot geom_point geom_line
#' @importFrom ggplot2 scale_color_manual ggtitle #' @importFrom ggplot2 scale_color_manual ggtitle
#' @return a ggplot2 graphic #' @return a ggplot2 graphic
...@@ -26,23 +27,34 @@ ...@@ -26,23 +27,34 @@
#' @examples #' @examples
#' data(spring1) #' data(spring1)
#' library(dplyr) #' library(dplyr)
#' #'
#' print(colnames(spring1)) #' print(colnames(spring1))
#' #'
#' # --- Without Optimisation on initial parameters #' # --- Without Optimisation on initial parameters
#' param2<-list(m0=41,
#' mm=45,
#' pp=0.5,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' resu2<-kfino_fit(datain=spring1, #' resu2<-kfino_fit(datain=spring1,
#' Tvar="dateNum",Yvar="Poids", #' Tvar="dateNum",Yvar="Poids",
#' expertMin=30,expertMax=75, #' param=param2,
#' doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) #' doOptim=FALSE)
#' #'
#' # flags are qualitative #' # flags are qualitative
#' kfino_plot(resuin=resu2,typeG="quali", #' kfino_plot(resuin=resu2,typeG="quali",
#' Tvar="Day",Yvar="Poids",Ident="IDE") #' Tvar="Day",Yvar="Poids",Ident="IDE")
#' #'
#' # flags are quantitative #' # flags are quantitative
#' kfino_plot(resuin=resu2,typeG="quanti", #' kfino_plot(resuin=resu2,typeG="quanti",
#' Tvar="Day",Yvar="Poids",Ident="IDE") #' Tvar="Day",Yvar="Poids",Ident="IDE")
#' #'
#' # predictions on OK values #' # predictions on OK values
#' kfino_plot(resuin=resu2,typeG="prediction", #' kfino_plot(resuin=resu2,typeG="prediction",
#' Tvar="Day",Yvar="Poids",Ident="IDE") #' Tvar="Day",Yvar="Poids",Ident="IDE")
...@@ -63,7 +75,7 @@ kfino_plot<-function(resuin, ...@@ -63,7 +75,7 @@ kfino_plot<-function(resuin,
if (typeG %in% c("prediction","quanti") & is.null(resuin[[2]])) { if (typeG %in% c("prediction","quanti") & is.null(resuin[[2]])) {
stop("NULL object - No graph to provide. Please check your input object.") stop("NULL object - No graph to provide. Please check your input object.")
} }
# Some formatting # Some formatting
tp<-as.data.frame(resuin[[1]]) tp<-as.data.frame(resuin[[1]])
myIDE<-unique(tp[,Ident]) myIDE<-unique(tp[,Ident])
...@@ -75,43 +87,45 @@ kfino_plot<-function(resuin, ...@@ -75,43 +87,45 @@ kfino_plot<-function(resuin,
tp.title1<-paste0(title," - ",myIDE) tp.title1<-paste0(title," - ",myIDE)
tp.title2<-paste0(title," - ",myIDE) tp.title2<-paste0(title," - ",myIDE)
} }
# graphics # graphics
if (typeG == "quali"){ if (typeG == "quali"){
if (!is.null(resuin[[2]])){ if (!is.null(resuin[[2]])){
g1<-ggplot(tp,aes_string(x=Tvar))+ g1<-ggplot(tp,aes_string(x=Tvar))+
geom_point( aes_string(y=Yvar,color="flag")) + geom_point( aes_string(y=Yvar,color="flag")) +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr), geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr),
color="green") + color="green") +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr), geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr),
color="green") + color="green") +
scale_color_manual(values = scale_color_manual(values =
c("KO"="purple", "OK" = "black", "Bad"="red")) + c("KO"="purple", "OK" = "black", "Bad"="red")) +
ggtitle(tp.title1) ggtitle(tp.title1)
} else { } else {
g1<-ggplot(tp,aes_string(x=Tvar))+ g1<-ggplot(tp,aes_string(x=Tvar))+
geom_point( aes_string(y=Yvar,color="flag")) + geom_point( aes_string(y=Yvar,color="flag")) +
scale_color_manual(values = scale_color_manual(values =
c("KO"="purple", "OK" = "black", "Bad"="red")) + c("KO"="purple", "OK" = "black", "Bad"="red")) +
ggtitle(tp.title1) ggtitle(tp.title1)
} }
return(g1) return(g1)
} else if (typeG == "quanti"){ } else if (typeG == "quanti"){
g1<-ggplot(tp,aes_string(x=Tvar))+ if (!is.null(resuin[[2]])){
geom_point( aes_string(y=Yvar,color="label_pred")) + g1<-ggplot(tp,aes_string(x=Tvar))+
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) + geom_point( aes_string(y=Yvar,color="label_pred")) +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr), geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr),
color="green") + color="green") +
geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr), geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr),
color="green") + color="green") +
ggtitle(tp.title1) ggtitle(tp.title1)
return(g1) return(g1)
}
} else if (typeG == "prediction"){ } else if (typeG == "prediction"){
tp2<-filter(tp,.data$flag == "OK") tp2<-filter(tp,.data$flag == "OK")
g1<-ggplot(tp2,aes_string(x=Tvar))+ g1<-ggplot(tp2,aes_string(x=Tvar))+
geom_point( aes_string(y=Yvar)) + geom_point( aes_string(y=Yvar)) +
geom_line(data=tp2, aes(y=.data$prediction)) + geom_line(data=tp2, aes(y=.data$prediction)) +
geom_line(data=tp2, aes(y=.data$lwr),color="green") + geom_line(data=tp2, aes(y=.data$lwr),color="green") +
geom_line(data=tp2, aes(y=.data$upr),color="green") + geom_line(data=tp2, aes(y=.data$upr),color="green") +
...@@ -120,3 +134,4 @@ kfino_plot<-function(resuin, ...@@ -120,3 +134,4 @@ kfino_plot<-function(resuin,
} }
} }
#-------------------- End of file -----------------------------------
This diff is collapsed.
#' kafino_fit2 a function to detect outlier with a Kalman Filtering approach
#' @param datain an input data.frame of one time course to study (unique IDE)
#' @param Tvar char, time column name in the data.frame datain, a numeric vector
#' Tvar can be expressed as a proportion of day in seconds
#' @param Yvar char, name of the variable to predict in the data.frame datain
#' @param param list, a list of initialization parameter
#' @param doOptim logical, if TRUE optimisation of the initial parameters
#' default TRUE
#' @param threshold numeric, threshold to qualify an observation as outlier
#' according to the label_pred, default 0.5
#'
#' @details The initialization parameter list param contains:
#' \describe{
#' \item{mm}{target weight, NULL if the user wants to optimize it}
#' \item{pp}{probability to be correctly weighed, NULL if the user wants to
#' optimize it}
#' \item{m0}{Initial weight, NULL if the user wants to optimize it}
#' \item{aa}{numeric, rate of weight change, default 0.001 }
#' \item{expertMin}{numeric, the minimal weight expected by the user}
#' \item{expertMax}{numeric, the maximal weight expected by the user}
#' \item{sigma2_m0}{variance of m0}
#' \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
#' default 0.05}
#' \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
#' default 5}
#' \item{K}{numeric, a constant value in the outlier function (trapezoid),
#' by default K=2 - increasing K, XXX}
#' \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
#' }
#' It can be given by the user following his knowledge of the animal or dataset
#' or entirely constructed by the function. In the optimisation step, this
#' vector is initialized according to the input data (between the expert
#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
#' sub-sampling is performed to speed the algorithm if the number of possible
#' observations studied is greater than 500.
#'
#' @importFrom stats dnorm quantile na.omit
#' @importFrom dplyr mutate filter left_join arrange %>%
#' @importFrom dplyr .data if_else row_number select
#' @return a S3 list with two data frames and a list of vectors of
#' kafino results
#' \describe{
#' \item{detectOutlier}{The whole dataset with the detected outliers flagged
#' and prediction}
#' \item{PredictionOK}{A dataset with the predictions on possible values}
#' \item{kafino.results}{kfino results (a list of vectors) on optimized input
#' parameters or not}
#' }
#' @export
#' @examples
#' data(spring1)
#' library(dplyr)
#'
#' # --- With Optimisation on initial parameters
#' t0 <- Sys.time()
#' param1<-list(m0=NULL,
#' mm=NULL,
#' pp=NULL,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#'
#' resu1<-kfino_fit2(datain=spring1,
#' Tvar="dateNum",Yvar="Poids",
#' doOptim=TRUE,param=param1)
#' Sys.time() - t0
#' # --- Without Optimisation on initial parameters
#' t0 <- Sys.time()
#' param2<-list(m0=41,
#' mm=45,
#' pp=0.5,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' resu2<-kfino_fit2(datain=spring1,
#' Tvar="dateNum",Yvar="Poids",
#' param=param2,
#' doOptim=FALSE)
#' Sys.time() - t0
#'
#' # complex data on merinos2 dataset
#' data(merinos2)
#'
#' t0 <- Sys.time()
#' param3<-list(m0=NULL,
#' mm=NULL,
#' pp=NULL,
#' aa=0.001,
#' expertMin=10,
#' expertMax=45,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' resu3<-kfino_fit2(datain=merinos2,
#' Tvar="dateNum",Yvar="Poids",
#' doOptim=TRUE,param=param3)
#' Sys.time() - t0
kfino_fit2<-function(datain,Tvar,Yvar,
param=NULL,
doOptim=TRUE,threshold=0.5){
if( any(is.null(param[["expertMin"]]) |
is.null(param[["expertMax"]])) )
stop('You have to define expertMin and expertMax.')
if( any(!is.numeric(param[["expertMin"]]) |
!is.numeric(param[["expertMax"]])) )
stop('expertMin and expertMax must be numeric.')
if( !is.numeric(t(datain[,Yvar])))
stop('Input parameter Yvar must contain numeric values.')
if( !is.numeric(t(datain[,Tvar])))
stop('Input parameter Tvar must contain numeric values.')
#-------------------------------------------------------------
# load objects
m0<-param[["m0"]]
mm<-param[["mm"]]
pp<-param[["pp"]]
if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] }
expertMin<-param[["expertMin"]]
expertMax<-param[["expertMax"]]
if(is.null(param[["sigma2_m0"]])){
sigma2_m0<-1
} else {sigma2_m0<-param[["sigma2_m0"]]}
if(is.null(param[["sigma2_mm"]])){
sigma2_mm<-0.05
} else {sigma2_mm<-param[["sigma2_mm"]]}
if(is.null(param[["sigma2_pp"]])){
sigma2_pp<-5
} else {sigma2_pp<-param[["sigma2_pp"]]}
if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] }
seqp<-param[["seqp"]]
#-------------------------------------------------------------
# Flag impossible weight values (expert knowledge)
# the algorithm works on the dataset with possible values
# Create an artificial column numbering the rows for joining purpose
datain<-as.data.frame(datain)
datain<-datain %>%
mutate(rowNum=row_number(),
flag1=if_else(.data[[Yvar]] > expertMin &
.data[[Yvar]] <= expertMax,"OK","Bad"))
tp.dt<-datain %>% filter(.data$flag1 == "OK")
Y<-tp.dt[,Yvar]
N<-nrow(tp.dt)
Tps<-tp.dt[,Tvar]
#WARNING WARNING: AU lieu de calculer L qui est arrondi à 0,
# je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée
# je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier()
# ca ne me parait pas disproportionné.
# Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin)
dix=sqrt(expertMax - expertMin)
#------------------------------------------------------------------------
# Optimisation sur les paramètres initiaux ou pas
#------------------------------------------------------------------------
if (doOptim == TRUE){
if (N > 500){
# optim avec sous-echantillonnage
print("-------:")
print("Optimisation of initial parameters with sub-sampling - result:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
cat("bornem0: ",bornem0,"\n")
cat("m0opt: ",m0opt,"\n")
cat("mmopt: ",mmopt,"\n")
popt=0.5
#--- Saving datain before sub-sampling
YY=Y
TpsTps=Tps
NN=N
Subechant=sort(sample(1:NN,50))
Y=YY[Subechant]
Tps=TpsTps[Subechant]
N=50
Vopt=KBO_L(list(m0=m0opt,
mm=mmopt,
pp=popt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
for (m0 in seq(bornem0[1],bornem0[2],2) ){
for (mm in seq((m0-5),(m0+20),2) ){
for (p in seqp){
# A voir si 50 sous-echantillons au hasard suffisent. Comme dans
# Robbins Monroe, permet aussi de reduire l'impact de la troncature
Subechant=sort(sample(1:NN,50))
Y=YY[Subechant]
Tps=TpsTps[Subechant]
V=KBO_L(list(m0=m0,
mm=mm,
pp=p,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
if (V > Vopt){
Vopt=V
m0opt=m0
mmopt=mm
popt=p
}
}
}
}
# Le fait de prendre des sous-echantillons remarquables
# (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs
# (au lieu de 4 comme imaginé cela vient p.e de la fonction sample())
Y=YY
Tps=TpsTps
N=NN
print("Optimised parameters: ")
print(c(mmopt,popt,m0opt))
print("-------:")
resultat=KBO_known(param=list(mm=mmopt,
pp=popt,
m0=m0opt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold,Y=Y,Tps=Tps,N=N)
} else if (N > 50){
# optim sans sous-echantillonnage
print("-------:")
print("Optimisation of initial parameters - result:")
print("no sub-sampling performed:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
cat("bornem0: ",bornem0,"\n")
cat("m0opt: ",m0opt,"\n")
cat("mmopt: ",mmopt,"\n")
popt=0.5
Vopt=KBO_L(list(m0=m0opt,
mm=mmopt,
pp=popt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
for (m0 in seq(bornem0[1],bornem0[2],2) ){
for (mm in seq((m0-5),(m0+20),2) ){
for (p in seqp){
V=KBO_L(list(m0=m0,
mm=mm,
pp=p,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
if (V > Vopt){
Vopt=V
m0opt=m0
mmopt=mm
popt=p
}
}
}
}
print("Optimised parameters: ")
print(c(mmopt,popt,m0opt))
print("-------:")
resultat=KBO_known(param=list(mm=mmopt,
pp=popt,
m0=m0opt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
# N petit, si trop petit on ne fait rien
if (N > 5) {
# Not enough data - no optim
if (is.null(param[["m0"]])){
X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
round(quantile(Y[(3*N/4):N], probs = 0.8)))
} else {
X<-c(m0,pp,mm)
}
print("-------:")
print("Optimisation of initial parameters - result:")
print("Not enough data => so no optimisation performed:")
print("Used parameters: ")
print(X)
print("-------:")
resultat=KBO_known(param=list(m0=X[[1]],
pp=X[[2]],
mm=X[[3]],
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
warning("Not enough data between expert knowledge. The algorithm is not performed.")
resultat<-NULL
}
}
} else {
# Pas d'optimisation et test si N petit - si trop petit on ne fait rien
if (N > 5) {
# No optimisation on initial parameters
if (is.null(param[["m0"]])){
X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
round(quantile(Y[(3*N/4):N], probs = 0.8)))
} else {
X<-c(m0,pp,mm)
}
print("-------:")
print("No optimisation of initial parameters:")
print("Used parameters: ")
print(X)
resultat=KBO_known(param=list(m0=X[[1]],
pp=X[[2]],
mm=X[[3]],
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
warning("Not enough data between expert knowledge. The algorithm is not performed.")
resultat<-NULL
}
}
#---------------------------------------------------------------------------
# Formatting output results
#---------------------------------------------------------------------------
# If resultat NULL then create output with datain and 2 NULL objects
# else create output with detectoutlier, PredictionOK and kafino.results
# useful for the kafino_plot() function
#---------------------------------------------------------------------------
if (is.null(resultat)){
dt.out<-datain %>% mutate(flag=.data$flag1)
dt.pred<-NULL
resultat<-NULL
mylist<-list(dt.out,dt.pred,resultat)
names(mylist)<-c("detectOutlier","PredictionOK","kafino.results")
class(mylist) = c("kfino")
return(invisible(mylist))
} else {
prediction=na.omit(resultat$prediction)
label_pred=round(na.omit(resultat$label),2)
lwr=na.omit(resultat$lwr)
upr=na.omit(resultat$upr)
flag=na.omit(resultat$flag)
dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum),
prediction,label_pred,lwr,upr,flag)
# join dt.pred with the initial datain containing all the observations
dt.out<-left_join(datain,dt.pred,by="rowNum")
dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag))
dt.out<-arrange(dt.out,.data$rowNum)
#--------------------------------------
# return a S3 list object
#--------------------------------------
# 1. a whole dataset with the detected outliers flagged and prediction
# 2. a dataset with the prediction on possible values
# 3. optimisation results (a list of vectors)
mylist<-list(dt.out,dt.pred,resultat)
names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
class(mylist) = c("kfino")
return(invisible(mylist))
}
}
#-------------------------------------- End of file --------------------------
...@@ -37,7 +37,23 @@ loi_outlier<-function(y, ...@@ -37,7 +37,23 @@ loi_outlier<-function(y,
#' @export #' @export
#' #'
#' @examples #' @examples
#' # not run #' set.seed(1234)
#' Y<-rnorm(n=10,mean=50,4)
#' Tps<-seq(1,10)
#' N=10
#' param2<-list(m0=41,
#' mm=45,
#' pp=0.5,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' print(Y)
#' KBO_known(param=param2,threshold=0.5,Y,Tps,N)
KBO_known<-function(param,threshold,Y,Tps,N){ KBO_known<-function(param,threshold,Y,Tps,N){
# load objects # load objects
mm=param[["mm"]] mm=param[["mm"]]
...@@ -204,7 +220,23 @@ KBO_known<-function(param,threshold,Y,Tps,N){ ...@@ -204,7 +220,23 @@ KBO_known<-function(param,threshold,Y,Tps,N){
#' @export #' @export
#' #'
#' @examples #' @examples
#' # not run #' set.seed(1234)
#' Y<-rnorm(n=10,mean=50,4)
#' Tps<-seq(1,10)
#' N=10
#' param2<-list(m0=41,
#' mm=45,
#' pp=0.5,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' print(Y)
#' KBO_L(param=param2,Y,Tps,N,dix=6)
KBO_L<-function(param,Y,Tps,N,dix){ KBO_L<-function(param,Y,Tps,N,dix){
# load objects # load objects
mm=param[["mm"]] mm=param[["mm"]]
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R % Please edit documentation in R/utils_functions.R
\name{KBO_L} \name{KBO_L}
\alias{KBO_L} \alias{KBO_L}
\title{KBO_L a function to calculate a likelihood on optimized parameters} \title{KBO_L a function to calculate a likelihood on optimized parameters}
...@@ -27,5 +27,21 @@ KBO_L a function to calculate a likelihood on optimized parameters ...@@ -27,5 +27,21 @@ KBO_L a function to calculate a likelihood on optimized parameters
uses the same input parameter list than the main function uses the same input parameter list than the main function
} }
\examples{ \examples{
# not run set.seed(1234)
Y<-rnorm(n=10,mean=50,4)
Tps<-seq(1,10)
N=10
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
print(Y)
KBO_L(param=param2,Y,Tps,N,dix=6)
} }
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R % Please edit documentation in R/utils_functions.R
\name{KBO_known} \name{KBO_known}
\alias{KBO_known} \alias{KBO_known}
\title{KBO_known} \title{KBO_known}
...@@ -27,5 +27,21 @@ KBO_known ...@@ -27,5 +27,21 @@ KBO_known
uses the same input parameter list than the main function uses the same input parameter list than the main function
} }
\examples{ \examples{
# not run set.seed(1234)
Y<-rnorm(n=10,mean=50,4)
Tps<-seq(1,10)
N=10
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
print(Y)
KBO_known(param=param2,threshold=0.5,Y,Tps,N)
} }
...@@ -2,23 +2,9 @@ ...@@ -2,23 +2,9 @@
% Please edit documentation in R/kfino.R % Please edit documentation in R/kfino.R
\name{kfino_fit} \name{kfino_fit}
\alias{kfino_fit} \alias{kfino_fit}
\title{kafino_fit a function to detect outlier with a Kalman Filtering approach} \title{kfino_fit a function to detect outlier with a Kalman Filtering approach}
\usage{ \usage{
kfino_fit( kfino_fit(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
datain,
Tvar,
Yvar,
expertMin = NULL,
expertMax = NULL,
X = NULL,
doOptim = TRUE,
threshold = 0.5,
aa = 0.001,
sigma2_mm = 0.05,
sigma2_pp,
K = 2,
minp = 0.4
)
} }
\arguments{ \arguments{
\item{datain}{an input data.frame of one time course to study (unique IDE)} \item{datain}{an input data.frame of one time course to study (unique IDE)}
...@@ -28,57 +14,54 @@ Tvar can be expressed as a proportion of day in seconds} ...@@ -28,57 +14,54 @@ Tvar can be expressed as a proportion of day in seconds}
\item{Yvar}{char, name of the variable to predict in the data.frame datain} \item{Yvar}{char, name of the variable to predict in the data.frame datain}
\item{expertMin}{numeric, the minimal weight expected by the user} \item{param}{list, a list of initialization parameters}
\item{expertMax}{numeric, the maximal weight expected by the user} \item{doOptim}{logical, if TRUE optimisation of the initial parameters
\item{X}{vector, a vector of initialization parameter (will not be optimized)}
\item{doOptim}{logical, if TRUE optimisation of the initial parameters
default TRUE} default TRUE}
\item{threshold}{numeric, threshold to qualify an observation as outlier \item{threshold}{numeric, threshold to qualify an observation as outlier
according to the label_pred, default 0.5} according to the label_pred, default 0.5}
\item{aa}{numeric, rate of weight change, default 0.001}
\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
default 0.05}
\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
default 5}
\item{K}{numeric, cst in the outlier function (trapezoid), by default K=2
increasing K, XXX}
\item{minp}{numeric, minimal pp probability to be correctly weighted.}
} }
\value{ \value{
a S3 list with two data frames and a list of vectors of a S3 list with two data frames and a list of vectors of
kafino results kfino results
\describe{ \describe{
\item{detectOutlier}{The whole dataset with the detected outliers flagged \item{detectOutlier}{The whole dataset with the detected outliers flagged
and prediction} and prediction}
\item{PredictionOK}{A dataset with the predictions on possible values} \item{PredictionOK}{A dataset with the predictions on possible values}
\item{kafino.results}{kfino results (a list of vectors) on optimized input \item{kafino.results}{kfino results (a list of vectors) on optimized input
parameters or not} parameters or not}
} }
} }
\description{ \description{
kafino_fit a function to detect outlier with a Kalman Filtering approach kfino_fit a function to detect outlier with a Kalman Filtering approach
} }
\details{ \details{
The initialization parameter vector X contains: The initialization parameter list param contains:
\describe{ \describe{
\item{mm}{target weight} \item{mm}{(optional) target weight, NULL if the user wants to optimize it}
\item{pp}{probability to be correctly weighed} \item{pp}{(optional) probability to be correctly weighed, NULL if the user
\item{m0}{Initial weight} wants to optimize it}
\item{m0}{(optional) Initial weight, NULL if the user wants to optimize it}
\item{aa}{numeric, rate of weight change, default 0.001 }
\item{expertMin}{numeric, the minimal weight expected by the user}
\item{expertMax}{numeric, the maximal weight expected by the user}
\item{sigma2_m0}{variance of m0, default 1}
\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
default 0.05}
\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
default 5}
\item{K}{numeric, a constant value in the outlier function (trapezium),
by default K=5}
\item{seqp}{numeric, sequence of pp probability to be correctly weighted.
default seq(0.5,0.7,0.1)}
} }
It can be given by the user following his knowledge of the animal or dataset It has to be given by the user following his knowledge of the animal or
or entirely constructed by the function. In the optimisation step, this the dataset. All parameters are compulsory execpt m0, mm and pp that can be
vector is initialized according to the input data (between the expert optimized by the algorithm. In the optimisation step, those three parameters
range) using quantile of the Y distribution (varying between 0.2 and 0.8 for are initialized according to the input data (between the expert
m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A
sub-sampling is performed to speed the algorithm if the number of possible sub-sampling is performed to speed the algorithm if the number of possible
observations studied is greater than 500. observations studied is greater than 500.
} }
...@@ -88,27 +71,58 @@ library(dplyr) ...@@ -88,27 +71,58 @@ library(dplyr)
# --- With Optimisation on initial parameters # --- With Optimisation on initial parameters
t0 <- Sys.time() t0 <- Sys.time()
param1<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu1<-kfino_fit(datain=spring1, resu1<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids", Tvar="dateNum",Yvar="Poids",
expertMin=30,expertMax=75, doOptim=TRUE,param=param1)
doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
Sys.time() - t0 Sys.time() - t0
# --- Without Optimisation on initial parameters # --- Without Optimisation on initial parameters
t0 <- Sys.time() t0 <- Sys.time()
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu2<-kfino_fit(datain=spring1, resu2<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids", Tvar="dateNum",Yvar="Poids",
expertMin=30,expertMax=75, param=param2,
X=c(45,0.5,41), doOptim=FALSE)
doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
Sys.time() - t0 Sys.time() - t0
# complex data on merinos2 dataset # complex data on merinos2 dataset
data(merinos2) data(merinos2)
t0 <- Sys.time() t0 <- Sys.time()
param3<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=10,
expertMax=45,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu3<-kfino_fit(datain=merinos2, resu3<-kfino_fit(datain=merinos2,
Tvar="dateNum",Yvar="Poids", Tvar="dateNum",Yvar="Poids",
expertMin=10,expertMax=45, doOptim=TRUE,param=param3)
doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5)
Sys.time() - t0 Sys.time() - t0
} }
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kfino2.R
\name{kfino_fit2}
\alias{kfino_fit2}
\title{kafino_fit2 a function to detect outlier with a Kalman Filtering approach}
\usage{
kfino_fit2(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
}
\arguments{
\item{datain}{an input data.frame of one time course to study (unique IDE)}
\item{Tvar}{char, time column name in the data.frame datain, a numeric vector
Tvar can be expressed as a proportion of day in seconds}
\item{Yvar}{char, name of the variable to predict in the data.frame datain}
\item{param}{list, a list of initialization parameter}
\item{doOptim}{logical, if TRUE optimisation of the initial parameters
default TRUE}
\item{threshold}{numeric, threshold to qualify an observation as outlier
according to the label_pred, default 0.5}
}
\value{
a S3 list with two data frames and a list of vectors of
kafino results
\describe{
\item{detectOutlier}{The whole dataset with the detected outliers flagged
and prediction}
\item{PredictionOK}{A dataset with the predictions on possible values}
\item{kafino.results}{kfino results (a list of vectors) on optimized input
parameters or not}
}
}
\description{
kafino_fit2 a function to detect outlier with a Kalman Filtering approach
}
\details{
The initialization parameter list param contains:
\describe{
\item{mm}{target weight, NULL if the user wants to optimize it}
\item{pp}{probability to be correctly weighed, NULL if the user wants to
optimize it}
\item{m0}{Initial weight, NULL if the user wants to optimize it}
\item{aa}{numeric, rate of weight change, default 0.001 }
\item{expertMin}{numeric, the minimal weight expected by the user}
\item{expertMax}{numeric, the maximal weight expected by the user}
\item{sigma2_m0}{variance of m0}
\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
default 0.05}
\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
default 5}
\item{K}{numeric, a constant value in the outlier function (trapezoid),
by default K=2 - increasing K, XXX}
\item{seqp}{numeric, minimal pp probability to be correctly weighted.}
}
It can be given by the user following his knowledge of the animal or dataset
or entirely constructed by the function. In the optimisation step, this
vector is initialized according to the input data (between the expert
range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
sub-sampling is performed to speed the algorithm if the number of possible
observations studied is greater than 500.
}
\examples{
data(spring1)
library(dplyr)
# --- With Optimisation on initial parameters
t0 <- Sys.time()
param1<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu1<-kfino_fit2(datain=spring1,
Tvar="dateNum",Yvar="Poids",
doOptim=TRUE,param=param1)
Sys.time() - t0
# --- Without Optimisation on initial parameters
t0 <- Sys.time()
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu2<-kfino_fit2(datain=spring1,
Tvar="dateNum",Yvar="Poids",
param=param2,
doOptim=FALSE)
Sys.time() - t0
# complex data on merinos2 dataset
data(merinos2)
t0 <- Sys.time()
param3<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=10,
expertMax=45,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu3<-kfino_fit2(datain=merinos2,
Tvar="dateNum",Yvar="Poids",
doOptim=TRUE,param=param3)
Sys.time() - t0
}
...@@ -2,15 +2,16 @@ ...@@ -2,15 +2,16 @@
% Please edit documentation in R/graph_functions.R % Please edit documentation in R/graph_functions.R
\name{kfino_plot} \name{kfino_plot}
\alias{kfino_plot} \alias{kfino_plot}
\title{graphical function} \title{kfino_plot a graphical function for the result of a kfino run}
\usage{ \usage{
kfino_plot(resuin, typeG, Tvar, Yvar, Ident, title = NULL) kfino_plot(resuin, typeG, Tvar, Yvar, Ident, title = NULL)
} }
\arguments{ \arguments{
\item{resuin}{a list resulting of the kfino algorithm} \item{resuin}{a list resulting of the kfino algorithm}
\item{typeG}{char, type of graphic, either detection of outliers (with \item{typeG}{char, type of graphic, either detection of outliers (with
qualitative or quantitative display) or prediction} qualitative or quantitative display) or prediction. must be
"quanti" or "quali" or "prediction"}
\item{Tvar}{char, time variable in the data.frame datain} \item{Tvar}{char, time variable in the data.frame datain}
...@@ -24,13 +25,13 @@ qualitative or quantitative display) or prediction} ...@@ -24,13 +25,13 @@ qualitative or quantitative display) or prediction}
a ggplot2 graphic a ggplot2 graphic
} }
\description{ \description{
graphical function kfino_plot a graphical function for the result of a kfino run
} }
\details{ \details{
The produced graphic can be, according to typeG: The produced graphic can be, according to typeG:
\describe{ \describe{
\item{quali}{The detection of outliers with a qualitative rule: OK values, \item{quali}{The detection of outliers with a qualitative rule: OK values,
KO values (outliers) and Bad values (impossible values defined by KO values (outliers) and Bad values (impossible values defined by
the user)} the user)}
\item{quanti}{The detection of outliers with a quantitative display using \item{quanti}{The detection of outliers with a quantitative display using
the calculated probability of the kafino algorithm} the calculated probability of the kafino algorithm}
...@@ -44,19 +45,30 @@ library(dplyr) ...@@ -44,19 +45,30 @@ library(dplyr)
print(colnames(spring1)) print(colnames(spring1))
# --- Without Optimisation on initial parameters # --- Without Optimisation on initial parameters
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu2<-kfino_fit(datain=spring1, resu2<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids", Tvar="dateNum",Yvar="Poids",
expertMin=30,expertMax=75, param=param2,
doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) doOptim=FALSE)
# flags are qualitative # flags are qualitative
kfino_plot(resuin=resu2,typeG="quali", kfino_plot(resuin=resu2,typeG="quali",
Tvar="Day",Yvar="Poids",Ident="IDE") Tvar="Day",Yvar="Poids",Ident="IDE")
# flags are quantitative # flags are quantitative
kfino_plot(resuin=resu2,typeG="quanti", kfino_plot(resuin=resu2,typeG="quanti",
Tvar="Day",Yvar="Poids",Ident="IDE") Tvar="Day",Yvar="Poids",Ident="IDE")
# predictions on OK values # predictions on OK values
kfino_plot(resuin=resu2,typeG="prediction", kfino_plot(resuin=resu2,typeG="prediction",
Tvar="Day",Yvar="Poids",Ident="IDE") Tvar="Day",Yvar="Poids",Ident="IDE")
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R % Please edit documentation in R/utils_functions.R
\name{loi_outlier} \name{loi_outlier}
\alias{loi_outlier} \alias{loi_outlier}
\title{loi_outlier This function defines an outlier distribution (Surface of a \title{loi_outlier This function defines an outlier distribution (Surface of a
......
test_that("Output type", { test_that("kfino - no optimisation - computed correctly", {
data(spring1) data(spring1)
param1<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu1<-kfino_fit(datain=spring1, resu1<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids", Tvar="dateNum",Yvar="Poids",
expertMin=30,expertMax=75, param=param1,
doOptim=FALSE,aa=0.001,sigma2_mm=0.05, doOptim=FALSE)
K=2,sigma2_pp=5)
# Output type
expect_equal(length(resu1),3) expect_equal(length(resu1),3)
expect_s3_class(resu1[[1]],"data.frame") expect_s3_class(resu1[[1]],"data.frame")
expect_s3_class(resu1[[2]],"data.frame") expect_s3_class(resu1[[2]],"data.frame")
expect_type(resu1[[3]],"list") expect_type(resu1[[3]],"list")
#expect_true(is_SBM(netA)) expect_equal(length(resu1[[3]]),6)
# expected result
expect_equal(as.vector(resu1[[3]]$likelihood),1.245621e-150)
}) })
test_that("kfino - optimisation - computed correctly", {
data(spring1)
param1<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu1<-kfino_fit(datain=spring1,
Tvar="dateNum",Yvar="Poids",
param=param1,
doOptim=TRUE)
# Output type
expect_equal(length(resu1),3)
expect_s3_class(resu1[[1]],"data.frame")
expect_s3_class(resu1[[2]],"data.frame")
expect_type(resu1[[3]],"list")
expect_equal(length(resu1[[3]]),6)
# expected result
expect_equal(as.vector(resu1[[3]]$likelihood),1.035855e-135)
})
test_that("KBO_L - likelihood - computed correctly", {
set.seed(1234)
Y<-rnorm(n=10,mean=50,4)
Tps<-seq(1,10)
N=10
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu<-KBO_L(param=param2,Y,Tps,N,dix=6)
expect_equal(as.vector(resu),0.00490257592)
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment