diff --git a/NAMESPACE b/NAMESPACE index 27b81e64119211b38b2f266f70c305c485c1e912..47bb4f55c70ce4f7bb52179727c92e83e5934bad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,6 @@ export(KBO_L) export(KBO_known) export(kfino_fit) -export(kfino_fit2) export(kfino_plot) export(loi_outlier) importFrom(dplyr,"%>%") diff --git a/R/graph_functions.R b/R/graph_functions.R index dfe806af8bddd519cb109fb8d087504dc40471b0..cb0bce9368cdecf5692c3f7af88cb03d5f86d46c 100644 --- a/R/graph_functions.R +++ b/R/graph_functions.R @@ -1,8 +1,9 @@ -#' graphical function +#' kfino_plot a graphical function for the result of a kfino run #' #' @param resuin a list resulting of the kfino algorithm -#' @param typeG char, type of graphic, either detection of outliers (with -#' qualitative or quantitative display) or prediction +#' @param typeG char, type of graphic, either detection of outliers (with +#' qualitative or quantitative display) or prediction. must be +#' "quanti" or "quali" or "prediction" #' @param Tvar char, time variable in the data.frame datain #' @param Yvar char, variable which was analysed in the data.frame datain #' @param Ident char, column name of the individual id to be analyzed @@ -10,14 +11,14 @@ #' #' @details The produced graphic can be, according to typeG: #' \describe{ -#' \item{quali}{The detection of outliers with a qualitative rule: OK values, -#' KO values (outliers) and Bad values (impossible values defined by +#' \item{quali}{The detection of outliers with a qualitative rule: OK values, +#' KO values (outliers) and Bad values (impossible values defined by #' the user)} #' \item{quanti}{The detection of outliers with a quantitative display using #' the calculated probability of the kafino algorithm} #' \item{prediction}{The prediction of the analyzed variable on OK values} #' } -#' +#' #' @importFrom ggplot2 aes aes_string ggplot geom_point geom_line #' @importFrom ggplot2 scale_color_manual ggtitle #' @return a ggplot2 graphic @@ -26,23 +27,34 @@ #' @examples #' data(spring1) #' library(dplyr) -#' +#' #' print(colnames(spring1)) -#' +#' #' # --- Without Optimisation on initial parameters +#' param2<-list(m0=41, +#' mm=45, +#' pp=0.5, +#' aa=0.001, +#' expertMin=30, +#' expertMax=75, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) #' resu2<-kfino_fit(datain=spring1, #' Tvar="dateNum",Yvar="Poids", -#' expertMin=30,expertMax=75, -#' doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) -#' +#' param=param2, +#' doOptim=FALSE) +#' #' # flags are qualitative #' kfino_plot(resuin=resu2,typeG="quali", #' Tvar="Day",Yvar="Poids",Ident="IDE") -#' +#' #' # flags are quantitative #' kfino_plot(resuin=resu2,typeG="quanti", #' Tvar="Day",Yvar="Poids",Ident="IDE") -#' +#' #' # predictions on OK values #' kfino_plot(resuin=resu2,typeG="prediction", #' Tvar="Day",Yvar="Poids",Ident="IDE") @@ -63,7 +75,7 @@ kfino_plot<-function(resuin, if (typeG %in% c("prediction","quanti") & is.null(resuin[[2]])) { stop("NULL object - No graph to provide. Please check your input object.") } - + # Some formatting tp<-as.data.frame(resuin[[1]]) myIDE<-unique(tp[,Ident]) @@ -75,43 +87,45 @@ kfino_plot<-function(resuin, tp.title1<-paste0(title," - ",myIDE) tp.title2<-paste0(title," - ",myIDE) } - + # graphics if (typeG == "quali"){ if (!is.null(resuin[[2]])){ g1<-ggplot(tp,aes_string(x=Tvar))+ - geom_point( aes_string(y=Yvar,color="flag")) + + geom_point( aes_string(y=Yvar,color="flag")) + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr), color="green") + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr), color="green") + - scale_color_manual(values = + scale_color_manual(values = c("KO"="purple", "OK" = "black", "Bad"="red")) + ggtitle(tp.title1) } else { g1<-ggplot(tp,aes_string(x=Tvar))+ - geom_point( aes_string(y=Yvar,color="flag")) + - scale_color_manual(values = + geom_point( aes_string(y=Yvar,color="flag")) + + scale_color_manual(values = c("KO"="purple", "OK" = "black", "Bad"="red")) + ggtitle(tp.title1) } return(g1) } else if (typeG == "quanti"){ - g1<-ggplot(tp,aes_string(x=Tvar))+ - geom_point( aes_string(y=Yvar,color="label_pred")) + - geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) + - geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr), + if (!is.null(resuin[[2]])){ + g1<-ggplot(tp,aes_string(x=Tvar))+ + geom_point( aes_string(y=Yvar,color="label_pred")) + + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$prediction)) + + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$lwr), color="green") + - geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr), + geom_line(data=tp[!is.na(tp$prediction),], aes(y=.data$upr), color="green") + - ggtitle(tp.title1) - return(g1) + ggtitle(tp.title1) + return(g1) + } } else if (typeG == "prediction"){ tp2<-filter(tp,.data$flag == "OK") - + g1<-ggplot(tp2,aes_string(x=Tvar))+ - geom_point( aes_string(y=Yvar)) + + geom_point( aes_string(y=Yvar)) + geom_line(data=tp2, aes(y=.data$prediction)) + geom_line(data=tp2, aes(y=.data$lwr),color="green") + geom_line(data=tp2, aes(y=.data$upr),color="green") + @@ -120,3 +134,4 @@ kfino_plot<-function(resuin, } } +#-------------------- End of file ----------------------------------- diff --git a/R/kfino.R b/R/kfino.R index 825a5bdcf7558ddf05e893341405f94e7a3ec31c..cb516deabaa5bc70ee4775ef1cb4978e756b5fea 100644 --- a/R/kfino.R +++ b/R/kfino.R @@ -1,397 +1,180 @@ -#---------------------------------------------------------------------------- -# objective: Détection d'outliers sur courbe de pesées d'ovins - WOW -# version modèle de markov caché avec filtre de Kalman-Bucy -# HMM_KB algorithm -# author : B. Cloez & I.Sanchez (INRAE MISTEA) -#---------------------------------------------------------------------------- -####---Procedure d estimation ---- -# Algorithme du document "Modèle de Markov caché -# version avec troncature et sous-echantillonage pour optimisation -#---------------------------------------------------------------------------- -#' kafino_fit a function to detect outlier with a Kalman Filtering approach +#' kfino_fit a function to detect outlier with a Kalman Filtering approach #' @param datain an input data.frame of one time course to study (unique IDE) #' @param Tvar char, time column name in the data.frame datain, a numeric vector #' Tvar can be expressed as a proportion of day in seconds #' @param Yvar char, name of the variable to predict in the data.frame datain -#' @param expertMin numeric, the minimal weight expected by the user -#' @param expertMax numeric, the maximal weight expected by the user -#' @param X vector, a vector of initialization parameter (will not be optimized) -#' @param doOptim logical, if TRUE optimisation of the initial parameters +#' @param param list, a list of initialization parameters +#' @param doOptim logical, if TRUE optimisation of the initial parameters #' default TRUE -#' @param threshold numeric, threshold to qualify an observation as outlier +#' @param threshold numeric, threshold to qualify an observation as outlier #' according to the label_pred, default 0.5 -#' @param aa numeric, rate of weight change, default 0.001 -#' @param sigma2_mm numeric, variance of mm, related to the unit of Tvar, -#' default 0.05 -#' @param sigma2_pp numeric, variance of pp, related to the unit of Yvar, -#' default 5 -#' @param K numeric, cst in the outlier function (trapezoid), by default K=2 -#' increasing K, XXX -#' @param minp numeric, minimal pp probability to be correctly weighted. -#' -#' @details The initialization parameter vector X contains: +#' +#' @details The initialization parameter list param contains: #' \describe{ -#' \item{mm}{target weight} -#' \item{pp}{probability to be correctly weighed} -#' \item{m0}{Initial weight} +#' \item{mm}{(optional) target weight, NULL if the user wants to optimize it} +#' \item{pp}{(optional) probability to be correctly weighed, NULL if the user +#' wants to optimize it} +#' \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it} +#' \item{aa}{numeric, rate of weight change, default 0.001 } +#' \item{expertMin}{numeric, the minimal weight expected by the user} +#' \item{expertMax}{numeric, the maximal weight expected by the user} +#' \item{sigma2_m0}{variance of m0, default 1} +#' \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, +#' default 0.05} +#' \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, +#' default 5} +#' \item{K}{numeric, a constant value in the outlier function (trapezium), +#' by default K=5} +#' \item{seqp}{numeric, sequence of pp probability to be correctly weighted. +#' default seq(0.5,0.7,0.1)} #' } -#' It can be given by the user following his knowledge of the animal or dataset -#' or entirely constructed by the function. In the optimisation step, this -#' vector is initialized according to the input data (between the expert -#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for -#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A +#' It has to be given by the user following his knowledge of the animal or +#' the dataset. All parameters are compulsory execpt m0, mm and pp that can be +#' optimized by the algorithm. In the optimisation step, those three parameters +#' are initialized according to the input data (between the expert +#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for +#' m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A #' sub-sampling is performed to speed the algorithm if the number of possible -#' observations studied is greater than 500. -#' +#' observations studied is greater than 500. +#' #' @importFrom stats dnorm quantile na.omit #' @importFrom dplyr mutate filter left_join arrange %>% #' @importFrom dplyr .data if_else row_number select -#' @return a S3 list with two data frames and a list of vectors of -#' kafino results +#' +#' @return a S3 list with two data frames and a list of vectors of +#' kfino results #' \describe{ -#' \item{detectOutlier}{The whole dataset with the detected outliers flagged +#' \item{detectOutlier}{The whole dataset with the detected outliers flagged #' and prediction} #' \item{PredictionOK}{A dataset with the predictions on possible values} -#' \item{kafino.results}{kfino results (a list of vectors) on optimized input +#' \item{kafino.results}{kfino results (a list of vectors) on optimized input #' parameters or not} -#' } +#' } #' @export -#' @examples +#' @examples #' data(spring1) #' library(dplyr) #' #' # --- With Optimisation on initial parameters #' t0 <- Sys.time() +#' param1<-list(m0=NULL, +#' mm=NULL, +#' pp=NULL, +#' aa=0.001, +#' expertMin=30, +#' expertMax=75, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) +#' #' resu1<-kfino_fit(datain=spring1, #' Tvar="dateNum",Yvar="Poids", -#' expertMin=30,expertMax=75, -#' doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) +#' doOptim=TRUE,param=param1) #' Sys.time() - t0 #' # --- Without Optimisation on initial parameters #' t0 <- Sys.time() +#' param2<-list(m0=41, +#' mm=45, +#' pp=0.5, +#' aa=0.001, +#' expertMin=30, +#' expertMax=75, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) #' resu2<-kfino_fit(datain=spring1, #' Tvar="dateNum",Yvar="Poids", -#' expertMin=30,expertMax=75, -#' X=c(45,0.5,41), -#' doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) +#' param=param2, +#' doOptim=FALSE) #' Sys.time() - t0 -#' +#' #' # complex data on merinos2 dataset #' data(merinos2) -#' +#' #' t0 <- Sys.time() +#' param3<-list(m0=NULL, +#' mm=NULL, +#' pp=NULL, +#' aa=0.001, +#' expertMin=10, +#' expertMax=45, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) #' resu3<-kfino_fit(datain=merinos2, #' Tvar="dateNum",Yvar="Poids", -#' expertMin=10,expertMax=45, -#' doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) +#' doOptim=TRUE,param=param3) #' Sys.time() - t0 kfino_fit<-function(datain,Tvar,Yvar, - expertMin=NULL,expertMax=NULL, - X=NULL, - doOptim=TRUE,threshold=0.5,aa=0.001, - sigma2_mm=0.05,sigma2_pp,K=2,minp=0.4){ - - if( any(is.null(expertMin) | is.null(expertMax)) ) + param=NULL, + doOptim=TRUE,threshold=0.5){ + + if( any(is.null(param[["expertMin"]]) | + is.null(param[["expertMax"]])) ) stop('You have to define expertMin and expertMax.') - if( any(!is.numeric(expertMin) | !is.numeric(expertMax)) ) + if( any(!is.numeric(param[["expertMin"]]) | + !is.numeric(param[["expertMax"]])) ) stop('expertMin and expertMax must be numeric.') - if( !is.numeric(t(datain[,Yvar]))) + if( !is.numeric(t(datain[,Yvar]))) stop('Input parameter Yvar must contain numeric values.') - if( !is.numeric(t(datain[,Tvar]))) + if( !is.numeric(t(datain[,Tvar]))) stop('Input parameter Tvar must contain numeric values.') - - datain<-as.data.frame(datain) - + + #------------------------------------------------------------- + # load objects + m0<-param[["m0"]] + mm<-param[["mm"]] + pp<-param[["pp"]] + if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] } + expertMin<-param[["expertMin"]] + expertMax<-param[["expertMax"]] + if(is.null(param[["sigma2_m0"]])){ + sigma2_m0<-1 + } else {sigma2_m0<-param[["sigma2_m0"]]} + if(is.null(param[["sigma2_mm"]])){ + sigma2_mm<-0.05 + } else {sigma2_mm<-param[["sigma2_mm"]]} + if(is.null(param[["sigma2_pp"]])){ + sigma2_pp<-5 + } else {sigma2_pp<-param[["sigma2_pp"]]} + if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] } + if(is.null(param[["seqp"]])){ + seqp<-seq(0.5,0.7,1) + } else {seqp<-param[["seqp"]]} + #------------------------------------------------------------- + # Flag impossible weight values (expert knowledge) # the algorithm works on the dataset with possible values # Create an artificial column numbering the rows for joining purpose + datain<-as.data.frame(datain) datain<-datain %>% - mutate(rowNum=row_number(), - flag1=if_else(.data[[Yvar]] > expertMin & - .data[[Yvar]] <= expertMax,"OK","Bad")) + mutate(rowNum=row_number(), + flag1=if_else(.data[[Yvar]] > expertMin & + .data[[Yvar]] <= expertMax,"OK","Bad")) tp.dt<-datain %>% filter(.data$flag1 == "OK") - + Y<-tp.dt[,Yvar] N<-nrow(tp.dt) Tps<-tp.dt[,Tvar] - #--- function defining an outlier distribution (Surface of a trapezium) - # uses expertMin and expertMax - loi_outlier<-function(y){ - 2/((K+1)*(expertMax - expertMin)) + - (2*(K-1)/(K+1))*((y - expertMin)/((expertMax - expertMin)^2)) - } - - # variance of m0 - sigma2_m0=1 - - FK_para_connu_tronc=function(Param){ - #notation - mm=Param[1] - pp=Param[2] - m0=Param[3] - aa=aa - - # paramètre de troncature - kappa <-10 - - # initialisation (1.1.1) - #-------------------- - m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp) - sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp) - - l0<- loi_outlier(Y[1]) - loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp)) - - p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0) - p1=1-p0 - - m=c(m0,m1) - p=c(p0,p1) - sigma2=c(sigma2_m0,sigma1) - L=c(l0,loinorm1) - - Xmean=c(p0*m0 + p1*m1) - Pmean=c(p1) - q=c(1,1) - sigma2_new=c(sigma2_m0) - - #iteration (1.1.2) - #----------------------- - #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon - #avant troncature - for (k in 1:(kappa-1)){ - # je crée le vecteur des nouveaux m, ##pour plus tard: au lieu de les - # faire vide, je prend direct m_{u0} - mnew=rep(0,2^(k+1)) - sigma2new=rep(0 ,2^(k+1)) - pnew=rep(0 ,2^(k+1)) - Lnew=rep(0 ,2^(k+1)) - # CR = constante de renormalisation qui intervient dans le dénominateur des pu - qnew=rep(0 ,2^(k+1)) - diffTps<-Tps[k+1] - Tps[k] - #--- numérateur de pu0 - tpbeta<-loi_outlier(Y[k+1]) - - pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta - Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta - mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0 - - sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) + - (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa) - qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp) - qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp - sommevar=sigma2new[1:(2^k)] + sigma2_pp - tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar)) - pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm - Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm - mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] + mnew[1:(2^k)]*sigma2_pp)/sommevar - sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar - - CR<-sum(pnew) - Proba1<-sum(pnew[(1+2^k):2^(k+1)]) - SommeMasse<-sum(pnew*mnew) - - m=mnew - sigma2=sigma2new - p=pnew/CR - Xmean=c(Xmean,SommeMasse/CR) - Pmean=c(Pmean,Proba1/CR) - L=Lnew - q=qnew - sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2) - } - - #apres troncature - #---------------------- - for (k in kappa:(N-1)){ - # je cree le vecteur des nouveaux m, - # pour plus tard: au lieu de les faire vide, je prend direct m_{u0} - mnew=rep(0,2^(kappa+1)) - sigma2new=rep(0 ,2^(kappa+1)) - pnew=rep(0 ,2^(kappa+1)) - Lnew=rep(0 ,2^(kappa+1)) - # CR = constante de renormalisation qui intervient dans le dénominateur des pu - qnew=rep(0 ,2^(kappa+1)) - diffTps<-Tps[k+1] - Tps[k] - - #--- numérateur de pu0 - tpbeta<-loi_outlier(Y[k+1]) - - pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta - Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta - mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0 - sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) + - (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa) - qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp) - qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp - sommevar=sigma2new[1:(2^kappa)] + sigma2_pp - tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar)) - pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm - Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm - mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] + - mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1 - sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar - - CR<-sum(pnew) - Proba1<-sum(pnew[(1+2^kappa):2^(kappa+1)]) - SommeMasse<-sum(pnew*mnew) - - selection=order(pnew, decreasing=T)[1:2^kappa] - - m=mnew[selection] - sigma2=sigma2new[selection] - p=pnew[selection]/sum(pnew[selection]) - Xmean=c(Xmean,SommeMasse/CR) - Pmean=c(Pmean,Proba1/CR) - L=Lnew[selection] - q=qnew[selection] - sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2) - } - - #---------------------------------------------- - # Ajout Intervalle de confiance des estimations - # Variance (see section 1.2.1) - # IC est approximé... - #---------------------------------------------- - lwr <- Xmean - 1.96*sqrt(sigma2_new) - upr <- Xmean + 1.96*sqrt(sigma2_new) - # flag - flag<-if_else(Pmean > threshold,"OK","KO") - - #--- Calcul des derniers éléments - Vraisemblance=L%*%q - - resultat=list("prediction"=Xmean, - "label"=Pmean, - "likelihood"=Vraisemblance, - "lwr"=lwr, - "upr"=upr, - "flag"=flag) - return(resultat) - } - - #WARNING WARNING: AU lieu de calculer L qui est arrondi à 0, - # je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée - # je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier() - # ca ne me parait pas disproportionné. + #WARNING WARNING: AU lieu de calculer L qui est arrondi à 0, + # je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée + # je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier() + # ca ne me parait pas disproportionné. # Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin) dix=sqrt(expertMax - expertMin) - - KBO_vraiss=function(Param){ - #notation - mm=Param[1] - pp=Param[2] - m0=Param[3] - aa=aa - #---- paramètre de troncature - # Ici je met kappa =7, ca me fais retirer les proba <0.01 au lieu de 0.001 - # comme qd kappa=10 mais ca divise par 10 le temps de calcul - kappa=7 - - # initialisation (1.1.1) - #-------------------- - m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp) - sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp) - - l0<- loi_outlier(Y[1]) - loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp)) - - p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0) - p1=1-p0 - - m=c(m0,m1) - p=c(p0,p1) - sigma2=c(sigma2_m0,sigma1) - L=dix*c(l0,loinorm1) #Attention *2 pr le grandir - - q=c(1,1) #attention - - #iteration (1.1.2) - #----------------------- - #// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon - # avant troncature - for (k in 1:(kappa-1)){ - mnew=rep(0,2^(k+1)) - sigma2new=rep(0 ,2^(k+1)) - pnew=rep(0 ,2^(k+1)) - Lnew=rep(0 ,2^(k+1)) - # CR = constante de renormalisation qui intervient dans le dénominateur des pu - qnew=rep(0 ,2^(k+1)) - diffTps<-Tps[k+1] - Tps[k] - #--- numérateur de pu0 - tpbeta<-loi_outlier(Y[k+1]) - pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta - Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta - mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0 - sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) + - (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa) - qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp) - qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp - sommevar=sigma2new[1:(2^k)] + sigma2_pp - tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar)) - pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm - Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm - mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] + mnew[1:(2^k)]*sigma2_pp)/sommevar - sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar - - m=mnew - sigma2=sigma2new - p=pnew/sum(pnew) - - L=dix*Lnew # fois 2 pr le grandir - q=dix*qnew - } - - # apres troncature - #---------------------- - for (k in kappa:(N-1)){ - # je cree le vecteur des nouveaux m, - mnew=rep(0,2^(kappa+1)) - sigma2new=rep(0 ,2^(kappa+1)) - pnew=rep(0 ,2^(kappa+1)) - Lnew=rep(0 ,2^(kappa+1)) - # CR = constante de renormalisation qui intervient dans le dénominateur des pu - qnew=rep(0 ,2^(kappa+1)) - diffTps<-Tps[k+1] - Tps[k] - #--- numérateur de pu0 - tpbeta<-loi_outlier(Y[k+1]) - pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta - Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta - mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0 - sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) + - (1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa) - qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp) - qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp - sommevar=sigma2new[1:(2^kappa)] + sigma2_pp - tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar)) - pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm - Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm - mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] + - mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1 - sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar - - selection=order(pnew, decreasing=T)[1:2^kappa] - - m=mnew[selection] - sigma2=sigma2new[selection] - p=pnew[selection]/sum(pnew[selection]) - L=dix*Lnew[selection] #fois 2 pr le grandir - q=dix*qnew[selection] - } - - Vraisemblance=L%*%q - - return(Vraisemblance) - } - - - #------------------------------------------------ + + #------------------------------------------------------------------------ # Optimisation sur les paramètres initiaux ou pas - #------------------------------------------------ - # mm=Param[1], pp=Param[2], m0=Param[3] + #------------------------------------------------------------------------ if (doOptim == TRUE){ - + if (N > 500){ # optim avec sous-echantillonnage print("-------:") @@ -399,33 +182,55 @@ kfino_fit<-function(datain,Tvar,Yvar, bornem0=quantile(Y[1:N/4], probs = c(.2, .8)) m0opt=quantile(Y[1:N/4], probs = c(.5)) mmopt=quantile(Y[(3*N/4):N], probs = c(.5)) - + cat("bornem0: ",bornem0,"\n") cat("m0opt: ",m0opt,"\n") cat("mmopt: ",mmopt,"\n") - + popt=0.5 #--- Saving datain before sub-sampling YY=Y TpsTps=Tps NN=N - - Subechant=sort(sample(1:NN,50)) + + #Subechant=sort(sample(1:NN,50)) + Subechant=sort(.Internal(sample(NN, 50L, FALSE, NULL))) Y=YY[Subechant] Tps=TpsTps[Subechant] N=50 - Vopt=KBO_vraiss(c(mmopt,popt,m0opt)) - + Vopt=KBO_L(list(m0=m0opt, + mm=mmopt, + pp=popt, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_mm=sigma2_mm, + sigma2_m0=sigma2_m0, + sigma2_pp=sigma2_pp, + K=K), + Y,Tps,N,dix) + for (m0 in seq(bornem0[1],bornem0[2],2) ){ for (mm in seq((m0-5),(m0+20),2) ){ - for (p in seq(minp,(minp + 0.3),0.1)){ + for (p in seqp){ # A voir si 50 sous-echantillons au hasard suffisent. Comme dans # Robbins Monroe, permet aussi de reduire l'impact de la troncature - Subechant=sort(sample(1:NN,50)) + #Subechant=sort(sample(1:NN,50)) + Subechant=sort(.Internal(sample(NN, 50L, FALSE, NULL))) Y=YY[Subechant] Tps=TpsTps[Subechant] - - V=KBO_vraiss(c(mm,p,m0)) + + V=KBO_L(list(m0=m0, + mm=mm, + pp=p, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_mm=sigma2_mm, + sigma2_m0=sigma2_m0, + sigma2_pp=sigma2_pp, + K=K), + Y,Tps,N,dix) if (V > Vopt){ Vopt=V m0opt=m0 @@ -435,20 +240,29 @@ kfino_fit<-function(datain,Tvar,Yvar, } } } - - # Le fait de prendre des sous-echantillons remarquables - # (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs + + # Le fait de prendre des sous-echantillons remarquables + # (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs # (au lieu de 4 comme imaginé cela vient p.e de la fonction sample()) Y=YY Tps=TpsTps N=NN - param=c(mmopt,popt,m0opt) print("Optimised parameters: ") - print(param) + print(c(mmopt,popt,m0opt)) print("-------:") - - resultat=FK_para_connu_tronc(param) - + + resultat=KBO_known(param=list(mm=mmopt, + pp=popt, + m0=m0opt, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_m0=sigma2_m0, + sigma2_mm=sigma2_mm, + sigma2_pp=sigma2_pp, + K=K), + threshold,Y=Y,Tps=Tps,N=N) + } else if (N > 50){ # optim sans sous-echantillonnage print("-------:") @@ -457,19 +271,40 @@ kfino_fit<-function(datain,Tvar,Yvar, bornem0=quantile(Y[1:N/4], probs = c(.2, .8)) m0opt=quantile(Y[1:N/4], probs = c(.5)) mmopt=quantile(Y[(3*N/4):N], probs = c(.5)) - + cat("bornem0: ",bornem0,"\n") cat("m0opt: ",m0opt,"\n") cat("mmopt: ",mmopt,"\n") - + popt=0.5 - - Vopt=KBO_vraiss(c(mmopt,popt,m0opt)) - + + Vopt=KBO_L(list(m0=m0opt, + mm=mmopt, + pp=popt, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_mm=sigma2_mm, + sigma2_m0=sigma2_m0, + sigma2_pp=sigma2_pp, + K=K), + Y,Tps,N,dix) + for (m0 in seq(bornem0[1],bornem0[2],2) ){ for (mm in seq((m0-5),(m0+20),2) ){ - for (p in seq(minp,(minp + 0.3),0.1)){ - V=KBO_vraiss(c(mm,p,m0)) + for (p in seqp){ + V=KBO_L(list(m0=m0, + mm=mm, + pp=p, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_mm=sigma2_mm, + sigma2_m0=sigma2_m0, + sigma2_pp=sigma2_pp, + K=K), + Y,Tps,N,dix) + if (V > Vopt){ Vopt=V m0opt=m0 @@ -479,21 +314,32 @@ kfino_fit<-function(datain,Tvar,Yvar, } } } - - param=c(mmopt,popt,m0opt) + print("Optimised parameters: ") - print(param) + print(c(mmopt,popt,m0opt)) print("-------:") - - resultat=FK_para_connu_tronc(param) - + + resultat=KBO_known(param=list(mm=mmopt, + pp=popt, + m0=m0opt, + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_m0=sigma2_m0, + sigma2_mm=sigma2_mm, + sigma2_pp=sigma2_pp, + K=K), + threshold=threshold,Y=Y,Tps=Tps,N=N) + } else { # N petit, si trop petit on ne fait rien if (N > 5) { # Not enough data - no optim - if (is.null(X)){ - X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, + if (is.null(param[["m0"]])){ + X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, round(quantile(Y[(3*N/4):N], probs = 0.8))) + } else { + X<-c(m0,pp,mm) } print("-------:") print("Optimisation of initial parameters - result:") @@ -501,9 +347,19 @@ kfino_fit<-function(datain,Tvar,Yvar, print("Used parameters: ") print(X) print("-------:") - resultat=FK_para_connu_tronc(X) + resultat=KBO_known(param=list(m0=X[[1]], + pp=X[[2]], + mm=X[[3]], + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_m0=sigma2_m0, + sigma2_mm=sigma2_mm, + sigma2_pp=sigma2_pp, + K=K), + threshold=threshold,Y=Y,Tps=Tps,N=N) } else { - print("Warning: not enough data between expert knowledge. The algorithm is not performed.") + warning("Not enough data between expert knowledge. The algorithm is not performed.") resultat<-NULL } } @@ -511,35 +367,48 @@ kfino_fit<-function(datain,Tvar,Yvar, # Pas d'optimisation et test si N petit - si trop petit on ne fait rien if (N > 5) { # No optimisation on initial parameters - if (is.null(X)){ - X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, + if (is.null(param[["m0"]])){ + X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, round(quantile(Y[(3*N/4):N], probs = 0.8))) + } else { + X<-c(m0,pp,mm) } print("-------:") print("No optimisation of initial parameters:") print("Used parameters: ") print(X) - resultat=FK_para_connu_tronc(X) + resultat=KBO_known(param=list(m0=X[[1]], + pp=X[[2]], + mm=X[[3]], + aa=aa, + expertMin=expertMin, + expertMax=expertMax, + sigma2_m0=sigma2_m0, + sigma2_mm=sigma2_mm, + sigma2_pp=sigma2_pp, + K=K), + threshold=threshold,Y=Y,Tps=Tps,N=N) } else { - print("Warning: not enough data between expert knowledge. The algorithm is not performed.") + warning("Not enough data between expert knowledge. The algorithm is not performed.") resultat<-NULL } } - - #-------------------------------- + + #--------------------------------------------------------------------------- # Formatting output results + #--------------------------------------------------------------------------- # If resultat NULL then create output with datain and 2 NULL objects # else create output with detectoutlier, PredictionOK and kafino.results # useful for the kafino_plot() function - #-------------------------------- + #--------------------------------------------------------------------------- if (is.null(resultat)){ dt.out<-datain %>% mutate(flag=.data$flag1) dt.pred<-NULL - resultat<-NULL - + resultat<-NULL + mylist<-list(dt.out,dt.pred,resultat) names(mylist)<-c("detectOutlier","PredictionOK","kafino.results") - class(mylist) = c("kafino") + class(mylist) = c("kfino") return(invisible(mylist)) } else { prediction=na.omit(resultat$prediction) @@ -547,13 +416,14 @@ kfino_fit<-function(datain,Tvar,Yvar, lwr=na.omit(resultat$lwr) upr=na.omit(resultat$upr) flag=na.omit(resultat$flag) - + dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum), - prediction,label_pred,lwr,upr,flag) - + prediction,label_pred,lwr,upr,flag) + # join dt.pred with the initial datain containing all the observations dt.out<-left_join(datain,dt.pred,by="rowNum") - dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag)) + dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", + .data$flag1, .data$flag)) dt.out<-arrange(dt.out,.data$rowNum) #-------------------------------------- @@ -571,4 +441,4 @@ kfino_fit<-function(datain,Tvar,Yvar, -#-------------------------------------- End of file -------------------------- \ No newline at end of file +#-------------------------------------- End of file -------------------------- diff --git a/R/kfino2.R b/R/kfino2.R deleted file mode 100644 index 0daa68b1a9edeaecc159198c88c08d29846b4649..0000000000000000000000000000000000000000 --- a/R/kfino2.R +++ /dev/null @@ -1,436 +0,0 @@ -#' kafino_fit2 a function to detect outlier with a Kalman Filtering approach -#' @param datain an input data.frame of one time course to study (unique IDE) -#' @param Tvar char, time column name in the data.frame datain, a numeric vector -#' Tvar can be expressed as a proportion of day in seconds -#' @param Yvar char, name of the variable to predict in the data.frame datain -#' @param param list, a list of initialization parameter -#' @param doOptim logical, if TRUE optimisation of the initial parameters -#' default TRUE -#' @param threshold numeric, threshold to qualify an observation as outlier -#' according to the label_pred, default 0.5 -#' -#' @details The initialization parameter list param contains: -#' \describe{ -#' \item{mm}{target weight, NULL if the user wants to optimize it} -#' \item{pp}{probability to be correctly weighed, NULL if the user wants to -#' optimize it} -#' \item{m0}{Initial weight, NULL if the user wants to optimize it} -#' \item{aa}{numeric, rate of weight change, default 0.001 } -#' \item{expertMin}{numeric, the minimal weight expected by the user} -#' \item{expertMax}{numeric, the maximal weight expected by the user} -#' \item{sigma2_m0}{variance of m0} -#' \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, -#' default 0.05} -#' \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, -#' default 5} -#' \item{K}{numeric, a constant value in the outlier function (trapezoid), -#' by default K=2 - increasing K, XXX} -#' \item{seqp}{numeric, minimal pp probability to be correctly weighted.} -#' } -#' It can be given by the user following his knowledge of the animal or dataset -#' or entirely constructed by the function. In the optimisation step, this -#' vector is initialized according to the input data (between the expert -#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for -#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A -#' sub-sampling is performed to speed the algorithm if the number of possible -#' observations studied is greater than 500. -#' -#' @importFrom stats dnorm quantile na.omit -#' @importFrom dplyr mutate filter left_join arrange %>% -#' @importFrom dplyr .data if_else row_number select -#' @return a S3 list with two data frames and a list of vectors of -#' kafino results -#' \describe{ -#' \item{detectOutlier}{The whole dataset with the detected outliers flagged -#' and prediction} -#' \item{PredictionOK}{A dataset with the predictions on possible values} -#' \item{kafino.results}{kfino results (a list of vectors) on optimized input -#' parameters or not} -#' } -#' @export -#' @examples -#' data(spring1) -#' library(dplyr) -#' -#' # --- With Optimisation on initial parameters -#' t0 <- Sys.time() -#' param1<-list(m0=NULL, -#' mm=NULL, -#' pp=NULL, -#' aa=0.001, -#' expertMin=30, -#' expertMax=75, -#' sigma2_m0=1, -#' sigma2_mm=0.05, -#' sigma2_pp=5, -#' K=2, -#' seqp=seq(0.5,0.7,0.1)) -#' -#' resu1<-kfino_fit2(datain=spring1, -#' Tvar="dateNum",Yvar="Poids", -#' doOptim=TRUE,param=param1) -#' Sys.time() - t0 -#' # --- Without Optimisation on initial parameters -#' t0 <- Sys.time() -#' param2<-list(m0=41, -#' mm=45, -#' pp=0.5, -#' aa=0.001, -#' expertMin=30, -#' expertMax=75, -#' sigma2_m0=1, -#' sigma2_mm=0.05, -#' sigma2_pp=5, -#' K=2, -#' seqp=seq(0.5,0.7,0.1)) -#' resu2<-kfino_fit2(datain=spring1, -#' Tvar="dateNum",Yvar="Poids", -#' param=param2, -#' doOptim=FALSE) -#' Sys.time() - t0 -#' -#' # complex data on merinos2 dataset -#' data(merinos2) -#' -#' t0 <- Sys.time() -#' param3<-list(m0=NULL, -#' mm=NULL, -#' pp=NULL, -#' aa=0.001, -#' expertMin=10, -#' expertMax=45, -#' sigma2_m0=1, -#' sigma2_mm=0.05, -#' sigma2_pp=5, -#' K=2, -#' seqp=seq(0.5,0.7,0.1)) -#' resu3<-kfino_fit2(datain=merinos2, -#' Tvar="dateNum",Yvar="Poids", -#' doOptim=TRUE,param=param3) -#' Sys.time() - t0 -kfino_fit2<-function(datain,Tvar,Yvar, - param=NULL, - doOptim=TRUE,threshold=0.5){ - - if( any(is.null(param[["expertMin"]]) | - is.null(param[["expertMax"]])) ) - stop('You have to define expertMin and expertMax.') - if( any(!is.numeric(param[["expertMin"]]) | - !is.numeric(param[["expertMax"]])) ) - stop('expertMin and expertMax must be numeric.') - if( !is.numeric(t(datain[,Yvar]))) - stop('Input parameter Yvar must contain numeric values.') - if( !is.numeric(t(datain[,Tvar]))) - stop('Input parameter Tvar must contain numeric values.') - - #------------------------------------------------------------- - # load objects - m0<-param[["m0"]] - mm<-param[["mm"]] - pp<-param[["pp"]] - if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] } - expertMin<-param[["expertMin"]] - expertMax<-param[["expertMax"]] - if(is.null(param[["sigma2_m0"]])){ - sigma2_m0<-1 - } else {sigma2_m0<-param[["sigma2_m0"]]} - if(is.null(param[["sigma2_mm"]])){ - sigma2_mm<-0.05 - } else {sigma2_mm<-param[["sigma2_mm"]]} - if(is.null(param[["sigma2_pp"]])){ - sigma2_pp<-5 - } else {sigma2_pp<-param[["sigma2_pp"]]} - if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] } - seqp<-param[["seqp"]] - #------------------------------------------------------------- - - # Flag impossible weight values (expert knowledge) - # the algorithm works on the dataset with possible values - # Create an artificial column numbering the rows for joining purpose - datain<-as.data.frame(datain) - datain<-datain %>% - mutate(rowNum=row_number(), - flag1=if_else(.data[[Yvar]] > expertMin & - .data[[Yvar]] <= expertMax,"OK","Bad")) - tp.dt<-datain %>% filter(.data$flag1 == "OK") - - Y<-tp.dt[,Yvar] - N<-nrow(tp.dt) - Tps<-tp.dt[,Tvar] - - #WARNING WARNING: AU lieu de calculer L qui est arrondi à 0, - # je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée - # je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier() - # ca ne me parait pas disproportionné. - # Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin) - dix=sqrt(expertMax - expertMin) - - #------------------------------------------------------------------------ - # Optimisation sur les paramètres initiaux ou pas - #------------------------------------------------------------------------ - if (doOptim == TRUE){ - - if (N > 500){ - # optim avec sous-echantillonnage - print("-------:") - print("Optimisation of initial parameters with sub-sampling - result:") - bornem0=quantile(Y[1:N/4], probs = c(.2, .8)) - m0opt=quantile(Y[1:N/4], probs = c(.5)) - mmopt=quantile(Y[(3*N/4):N], probs = c(.5)) - - cat("bornem0: ",bornem0,"\n") - cat("m0opt: ",m0opt,"\n") - cat("mmopt: ",mmopt,"\n") - - popt=0.5 - #--- Saving datain before sub-sampling - YY=Y - TpsTps=Tps - NN=N - - Subechant=sort(sample(1:NN,50)) - Y=YY[Subechant] - Tps=TpsTps[Subechant] - N=50 - Vopt=KBO_L(list(m0=m0opt, - mm=mmopt, - pp=popt, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_mm=sigma2_mm, - sigma2_m0=sigma2_m0, - sigma2_pp=sigma2_pp, - K=K), - Y,Tps,N,dix) - - for (m0 in seq(bornem0[1],bornem0[2],2) ){ - for (mm in seq((m0-5),(m0+20),2) ){ - for (p in seqp){ - # A voir si 50 sous-echantillons au hasard suffisent. Comme dans - # Robbins Monroe, permet aussi de reduire l'impact de la troncature - Subechant=sort(sample(1:NN,50)) - Y=YY[Subechant] - Tps=TpsTps[Subechant] - - V=KBO_L(list(m0=m0, - mm=mm, - pp=p, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_mm=sigma2_mm, - sigma2_m0=sigma2_m0, - sigma2_pp=sigma2_pp, - K=K), - Y,Tps,N,dix) - if (V > Vopt){ - Vopt=V - m0opt=m0 - mmopt=mm - popt=p - } - } - } - } - - # Le fait de prendre des sous-echantillons remarquables - # (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs - # (au lieu de 4 comme imaginé cela vient p.e de la fonction sample()) - Y=YY - Tps=TpsTps - N=NN - print("Optimised parameters: ") - print(c(mmopt,popt,m0opt)) - print("-------:") - - resultat=KBO_known(param=list(mm=mmopt, - pp=popt, - m0=m0opt, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_m0=sigma2_m0, - sigma2_mm=sigma2_mm, - sigma2_pp=sigma2_pp, - K=K), - threshold,Y=Y,Tps=Tps,N=N) - - } else if (N > 50){ - # optim sans sous-echantillonnage - print("-------:") - print("Optimisation of initial parameters - result:") - print("no sub-sampling performed:") - bornem0=quantile(Y[1:N/4], probs = c(.2, .8)) - m0opt=quantile(Y[1:N/4], probs = c(.5)) - mmopt=quantile(Y[(3*N/4):N], probs = c(.5)) - - cat("bornem0: ",bornem0,"\n") - cat("m0opt: ",m0opt,"\n") - cat("mmopt: ",mmopt,"\n") - - popt=0.5 - - Vopt=KBO_L(list(m0=m0opt, - mm=mmopt, - pp=popt, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_mm=sigma2_mm, - sigma2_m0=sigma2_m0, - sigma2_pp=sigma2_pp, - K=K), - Y,Tps,N,dix) - - for (m0 in seq(bornem0[1],bornem0[2],2) ){ - for (mm in seq((m0-5),(m0+20),2) ){ - for (p in seqp){ - V=KBO_L(list(m0=m0, - mm=mm, - pp=p, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_mm=sigma2_mm, - sigma2_m0=sigma2_m0, - sigma2_pp=sigma2_pp, - K=K), - Y,Tps,N,dix) - - if (V > Vopt){ - Vopt=V - m0opt=m0 - mmopt=mm - popt=p - } - } - } - } - - print("Optimised parameters: ") - print(c(mmopt,popt,m0opt)) - print("-------:") - - resultat=KBO_known(param=list(mm=mmopt, - pp=popt, - m0=m0opt, - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_m0=sigma2_m0, - sigma2_mm=sigma2_mm, - sigma2_pp=sigma2_pp, - K=K), - threshold=threshold,Y=Y,Tps=Tps,N=N) - - } else { - # N petit, si trop petit on ne fait rien - if (N > 5) { - # Not enough data - no optim - if (is.null(param[["m0"]])){ - X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, - round(quantile(Y[(3*N/4):N], probs = 0.8))) - } else { - X<-c(m0,pp,mm) - } - print("-------:") - print("Optimisation of initial parameters - result:") - print("Not enough data => so no optimisation performed:") - print("Used parameters: ") - print(X) - print("-------:") - resultat=KBO_known(param=list(m0=X[[1]], - pp=X[[2]], - mm=X[[3]], - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_m0=sigma2_m0, - sigma2_mm=sigma2_mm, - sigma2_pp=sigma2_pp, - K=K), - threshold=threshold,Y=Y,Tps=Tps,N=N) - } else { - warning("Not enough data between expert knowledge. The algorithm is not performed.") - resultat<-NULL - } - } - } else { - # Pas d'optimisation et test si N petit - si trop petit on ne fait rien - if (N > 5) { - # No optimisation on initial parameters - if (is.null(param[["m0"]])){ - X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5, - round(quantile(Y[(3*N/4):N], probs = 0.8))) - } else { - X<-c(m0,pp,mm) - } - print("-------:") - print("No optimisation of initial parameters:") - print("Used parameters: ") - print(X) - resultat=KBO_known(param=list(m0=X[[1]], - pp=X[[2]], - mm=X[[3]], - aa=aa, - expertMin=expertMin, - expertMax=expertMax, - sigma2_m0=sigma2_m0, - sigma2_mm=sigma2_mm, - sigma2_pp=sigma2_pp, - K=K), - threshold=threshold,Y=Y,Tps=Tps,N=N) - } else { - warning("Not enough data between expert knowledge. The algorithm is not performed.") - resultat<-NULL - } - } - - #--------------------------------------------------------------------------- - # Formatting output results - #--------------------------------------------------------------------------- - # If resultat NULL then create output with datain and 2 NULL objects - # else create output with detectoutlier, PredictionOK and kafino.results - # useful for the kafino_plot() function - #--------------------------------------------------------------------------- - if (is.null(resultat)){ - dt.out<-datain %>% mutate(flag=.data$flag1) - dt.pred<-NULL - resultat<-NULL - - mylist<-list(dt.out,dt.pred,resultat) - names(mylist)<-c("detectOutlier","PredictionOK","kafino.results") - class(mylist) = c("kfino") - return(invisible(mylist)) - } else { - prediction=na.omit(resultat$prediction) - label_pred=round(na.omit(resultat$label),2) - lwr=na.omit(resultat$lwr) - upr=na.omit(resultat$upr) - flag=na.omit(resultat$flag) - - dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum), - prediction,label_pred,lwr,upr,flag) - - # join dt.pred with the initial datain containing all the observations - dt.out<-left_join(datain,dt.pred,by="rowNum") - dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag)) - dt.out<-arrange(dt.out,.data$rowNum) - - #-------------------------------------- - # return a S3 list object - #-------------------------------------- - # 1. a whole dataset with the detected outliers flagged and prediction - # 2. a dataset with the prediction on possible values - # 3. optimisation results (a list of vectors) - mylist<-list(dt.out,dt.pred,resultat) - names(mylist)<-c("detectOutlier","PredictionOK","kfino.results") - class(mylist) = c("kfino") - return(invisible(mylist)) - } -} - - - -#-------------------------------------- End of file -------------------------- diff --git a/R/utils.R b/R/utils_functions.R similarity index 92% rename from R/utils.R rename to R/utils_functions.R index 9816d619c755c6b893eea8410f2a6b0db96d46fa..64500a05bfdc22776d09726ad82470df5faf63f6 100644 --- a/R/utils.R +++ b/R/utils_functions.R @@ -37,7 +37,23 @@ loi_outlier<-function(y, #' @export #' #' @examples -#' # not run +#' set.seed(1234) +#' Y<-rnorm(n=10,mean=50,4) +#' Tps<-seq(1,10) +#' N=10 +#' param2<-list(m0=41, +#' mm=45, +#' pp=0.5, +#' aa=0.001, +#' expertMin=30, +#' expertMax=75, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) +#' print(Y) +#' KBO_known(param=param2,threshold=0.5,Y,Tps,N) KBO_known<-function(param,threshold,Y,Tps,N){ # load objects mm=param[["mm"]] @@ -204,7 +220,23 @@ KBO_known<-function(param,threshold,Y,Tps,N){ #' @export #' #' @examples -#' # not run +#' set.seed(1234) +#' Y<-rnorm(n=10,mean=50,4) +#' Tps<-seq(1,10) +#' N=10 +#' param2<-list(m0=41, +#' mm=45, +#' pp=0.5, +#' aa=0.001, +#' expertMin=30, +#' expertMax=75, +#' sigma2_m0=1, +#' sigma2_mm=0.05, +#' sigma2_pp=5, +#' K=2, +#' seqp=seq(0.5,0.7,0.1)) +#' print(Y) +#' KBO_L(param=param2,Y,Tps,N,dix=6) KBO_L<-function(param,Y,Tps,N,dix){ # load objects mm=param[["mm"]] diff --git a/man/KBO_L.Rd b/man/KBO_L.Rd index 2759c2546ba9bc5203172ee91cf616dd92ef980c..adf7c35199b311ba2b037302c7d07909db59d3c9 100644 --- a/man/KBO_L.Rd +++ b/man/KBO_L.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/utils_functions.R \name{KBO_L} \alias{KBO_L} \title{KBO_L a function to calculate a likelihood on optimized parameters} @@ -27,5 +27,21 @@ KBO_L a function to calculate a likelihood on optimized parameters uses the same input parameter list than the main function } \examples{ -# not run +set.seed(1234) +Y<-rnorm(n=10,mean=50,4) +Tps<-seq(1,10) +N=10 +param2<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) +print(Y) +KBO_L(param=param2,Y,Tps,N,dix=6) } diff --git a/man/KBO_known.Rd b/man/KBO_known.Rd index d16c04368865909d5bec9109f5357876d98462a6..445975117934e6ab63d4fbac48e1bd46fb6f3d02 100644 --- a/man/KBO_known.Rd +++ b/man/KBO_known.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/utils_functions.R \name{KBO_known} \alias{KBO_known} \title{KBO_known} @@ -27,5 +27,21 @@ KBO_known uses the same input parameter list than the main function } \examples{ -# not run +set.seed(1234) +Y<-rnorm(n=10,mean=50,4) +Tps<-seq(1,10) +N=10 +param2<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) +print(Y) +KBO_known(param=param2,threshold=0.5,Y,Tps,N) } diff --git a/man/kfino_fit.Rd b/man/kfino_fit.Rd index 506427acafd3b9acfaf8c7a406bf677c1acab88e..5f0567bdb1e9853bcd5a97c7f4796b31975cea5b 100644 --- a/man/kfino_fit.Rd +++ b/man/kfino_fit.Rd @@ -2,23 +2,9 @@ % Please edit documentation in R/kfino.R \name{kfino_fit} \alias{kfino_fit} -\title{kafino_fit a function to detect outlier with a Kalman Filtering approach} +\title{kfino_fit a function to detect outlier with a Kalman Filtering approach} \usage{ -kfino_fit( - datain, - Tvar, - Yvar, - expertMin = NULL, - expertMax = NULL, - X = NULL, - doOptim = TRUE, - threshold = 0.5, - aa = 0.001, - sigma2_mm = 0.05, - sigma2_pp, - K = 2, - minp = 0.4 -) +kfino_fit(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5) } \arguments{ \item{datain}{an input data.frame of one time course to study (unique IDE)} @@ -28,57 +14,54 @@ Tvar can be expressed as a proportion of day in seconds} \item{Yvar}{char, name of the variable to predict in the data.frame datain} -\item{expertMin}{numeric, the minimal weight expected by the user} +\item{param}{list, a list of initialization parameters} -\item{expertMax}{numeric, the maximal weight expected by the user} - -\item{X}{vector, a vector of initialization parameter (will not be optimized)} - -\item{doOptim}{logical, if TRUE optimisation of the initial parameters +\item{doOptim}{logical, if TRUE optimisation of the initial parameters default TRUE} -\item{threshold}{numeric, threshold to qualify an observation as outlier +\item{threshold}{numeric, threshold to qualify an observation as outlier according to the label_pred, default 0.5} - -\item{aa}{numeric, rate of weight change, default 0.001} - -\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, -default 0.05} - -\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, -default 5} - -\item{K}{numeric, cst in the outlier function (trapezoid), by default K=2 -increasing K, XXX} - -\item{minp}{numeric, minimal pp probability to be correctly weighted.} } \value{ -a S3 list with two data frames and a list of vectors of -kafino results +a S3 list with two data frames and a list of vectors of +kfino results \describe{ -\item{detectOutlier}{The whole dataset with the detected outliers flagged +\item{detectOutlier}{The whole dataset with the detected outliers flagged and prediction} \item{PredictionOK}{A dataset with the predictions on possible values} -\item{kafino.results}{kfino results (a list of vectors) on optimized input +\item{kafino.results}{kfino results (a list of vectors) on optimized input parameters or not} } } \description{ -kafino_fit a function to detect outlier with a Kalman Filtering approach +kfino_fit a function to detect outlier with a Kalman Filtering approach } \details{ -The initialization parameter vector X contains: +The initialization parameter list param contains: \describe{ - \item{mm}{target weight} - \item{pp}{probability to be correctly weighed} - \item{m0}{Initial weight} + \item{mm}{(optional) target weight, NULL if the user wants to optimize it} + \item{pp}{(optional) probability to be correctly weighed, NULL if the user + wants to optimize it} + \item{m0}{(optional) Initial weight, NULL if the user wants to optimize it} + \item{aa}{numeric, rate of weight change, default 0.001 } + \item{expertMin}{numeric, the minimal weight expected by the user} + \item{expertMax}{numeric, the maximal weight expected by the user} + \item{sigma2_m0}{variance of m0, default 1} + \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, + default 0.05} + \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, + default 5} + \item{K}{numeric, a constant value in the outlier function (trapezium), + by default K=5} + \item{seqp}{numeric, sequence of pp probability to be correctly weighted. + default seq(0.5,0.7,0.1)} } -It can be given by the user following his knowledge of the animal or dataset -or entirely constructed by the function. In the optimisation step, this -vector is initialized according to the input data (between the expert -range) using quantile of the Y distribution (varying between 0.2 and 0.8 for -m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A +It has to be given by the user following his knowledge of the animal or +the dataset. All parameters are compulsory execpt m0, mm and pp that can be +optimized by the algorithm. In the optimisation step, those three parameters +are initialized according to the input data (between the expert +range) using quantile of the Y distribution (varying between 0.2 and 0.8 for +m0 and 0.5 for mm). pp is a sequence varying between 0.5 and 0.7. A sub-sampling is performed to speed the algorithm if the number of possible observations studied is greater than 500. } @@ -88,27 +71,58 @@ library(dplyr) # --- With Optimisation on initial parameters t0 <- Sys.time() +param1<-list(m0=NULL, + mm=NULL, + pp=NULL, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) + resu1<-kfino_fit(datain=spring1, Tvar="dateNum",Yvar="Poids", - expertMin=30,expertMax=75, - doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) + doOptim=TRUE,param=param1) Sys.time() - t0 # --- Without Optimisation on initial parameters t0 <- Sys.time() +param2<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) resu2<-kfino_fit(datain=spring1, Tvar="dateNum",Yvar="Poids", - expertMin=30,expertMax=75, - X=c(45,0.5,41), - doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) + param=param2, + doOptim=FALSE) Sys.time() - t0 # complex data on merinos2 dataset data(merinos2) t0 <- Sys.time() +param3<-list(m0=NULL, + mm=NULL, + pp=NULL, + aa=0.001, + expertMin=10, + expertMax=45, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) resu3<-kfino_fit(datain=merinos2, Tvar="dateNum",Yvar="Poids", - expertMin=10,expertMax=45, - doOptim=TRUE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) + doOptim=TRUE,param=param3) Sys.time() - t0 } diff --git a/man/kfino_fit2.Rd b/man/kfino_fit2.Rd deleted file mode 100644 index f602c533cadb5db5dcc356da2817f43c7acbac67..0000000000000000000000000000000000000000 --- a/man/kfino_fit2.Rd +++ /dev/null @@ -1,126 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kfino2.R -\name{kfino_fit2} -\alias{kfino_fit2} -\title{kafino_fit2 a function to detect outlier with a Kalman Filtering approach} -\usage{ -kfino_fit2(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5) -} -\arguments{ -\item{datain}{an input data.frame of one time course to study (unique IDE)} - -\item{Tvar}{char, time column name in the data.frame datain, a numeric vector -Tvar can be expressed as a proportion of day in seconds} - -\item{Yvar}{char, name of the variable to predict in the data.frame datain} - -\item{param}{list, a list of initialization parameter} - -\item{doOptim}{logical, if TRUE optimisation of the initial parameters -default TRUE} - -\item{threshold}{numeric, threshold to qualify an observation as outlier -according to the label_pred, default 0.5} -} -\value{ -a S3 list with two data frames and a list of vectors of -kafino results -\describe{ -\item{detectOutlier}{The whole dataset with the detected outliers flagged - and prediction} -\item{PredictionOK}{A dataset with the predictions on possible values} -\item{kafino.results}{kfino results (a list of vectors) on optimized input - parameters or not} -} -} -\description{ -kafino_fit2 a function to detect outlier with a Kalman Filtering approach -} -\details{ -The initialization parameter list param contains: -\describe{ - \item{mm}{target weight, NULL if the user wants to optimize it} - \item{pp}{probability to be correctly weighed, NULL if the user wants to - optimize it} - \item{m0}{Initial weight, NULL if the user wants to optimize it} - \item{aa}{numeric, rate of weight change, default 0.001 } - \item{expertMin}{numeric, the minimal weight expected by the user} - \item{expertMax}{numeric, the maximal weight expected by the user} - \item{sigma2_m0}{variance of m0} - \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar, - default 0.05} - \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar, - default 5} - \item{K}{numeric, a constant value in the outlier function (trapezoid), - by default K=2 - increasing K, XXX} - \item{seqp}{numeric, minimal pp probability to be correctly weighted.} -} -It can be given by the user following his knowledge of the animal or dataset -or entirely constructed by the function. In the optimisation step, this -vector is initialized according to the input data (between the expert -range) using quantile of the Y distribution (varying between 0.2 and 0.8 for -m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A -sub-sampling is performed to speed the algorithm if the number of possible -observations studied is greater than 500. -} -\examples{ -data(spring1) -library(dplyr) - -# --- With Optimisation on initial parameters -t0 <- Sys.time() -param1<-list(m0=NULL, - mm=NULL, - pp=NULL, - aa=0.001, - expertMin=30, - expertMax=75, - sigma2_m0=1, - sigma2_mm=0.05, - sigma2_pp=5, - K=2, - seqp=seq(0.5,0.7,0.1)) - -resu1<-kfino_fit2(datain=spring1, - Tvar="dateNum",Yvar="Poids", - doOptim=TRUE,param=param1) -Sys.time() - t0 -# --- Without Optimisation on initial parameters -t0 <- Sys.time() -param2<-list(m0=41, - mm=45, - pp=0.5, - aa=0.001, - expertMin=30, - expertMax=75, - sigma2_m0=1, - sigma2_mm=0.05, - sigma2_pp=5, - K=2, - seqp=seq(0.5,0.7,0.1)) -resu2<-kfino_fit2(datain=spring1, - Tvar="dateNum",Yvar="Poids", - param=param2, - doOptim=FALSE) -Sys.time() - t0 - -# complex data on merinos2 dataset -data(merinos2) - -t0 <- Sys.time() -param3<-list(m0=NULL, - mm=NULL, - pp=NULL, - aa=0.001, - expertMin=10, - expertMax=45, - sigma2_m0=1, - sigma2_mm=0.05, - sigma2_pp=5, - K=2, - seqp=seq(0.5,0.7,0.1)) -resu3<-kfino_fit2(datain=merinos2, - Tvar="dateNum",Yvar="Poids", - doOptim=TRUE,param=param3) -Sys.time() - t0 -} diff --git a/man/kfino_plot.Rd b/man/kfino_plot.Rd index f264dc311f0f4dde44f6bdb55e405919208b4536..49a6957e725be57858398d5a8b15e6627d489fb4 100644 --- a/man/kfino_plot.Rd +++ b/man/kfino_plot.Rd @@ -2,15 +2,16 @@ % Please edit documentation in R/graph_functions.R \name{kfino_plot} \alias{kfino_plot} -\title{graphical function} +\title{kfino_plot a graphical function for the result of a kfino run} \usage{ kfino_plot(resuin, typeG, Tvar, Yvar, Ident, title = NULL) } \arguments{ \item{resuin}{a list resulting of the kfino algorithm} -\item{typeG}{char, type of graphic, either detection of outliers (with -qualitative or quantitative display) or prediction} +\item{typeG}{char, type of graphic, either detection of outliers (with +qualitative or quantitative display) or prediction. must be +"quanti" or "quali" or "prediction"} \item{Tvar}{char, time variable in the data.frame datain} @@ -24,13 +25,13 @@ qualitative or quantitative display) or prediction} a ggplot2 graphic } \description{ -graphical function +kfino_plot a graphical function for the result of a kfino run } \details{ The produced graphic can be, according to typeG: \describe{ - \item{quali}{The detection of outliers with a qualitative rule: OK values, - KO values (outliers) and Bad values (impossible values defined by + \item{quali}{The detection of outliers with a qualitative rule: OK values, + KO values (outliers) and Bad values (impossible values defined by the user)} \item{quanti}{The detection of outliers with a quantitative display using the calculated probability of the kafino algorithm} @@ -44,19 +45,30 @@ library(dplyr) print(colnames(spring1)) # --- Without Optimisation on initial parameters +param2<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) resu2<-kfino_fit(datain=spring1, Tvar="dateNum",Yvar="Poids", - expertMin=30,expertMax=75, - doOptim=FALSE,aa=0.001,sigma2_mm=0.05,K=2,sigma2_pp=5) - + param=param2, + doOptim=FALSE) + # flags are qualitative kfino_plot(resuin=resu2,typeG="quali", Tvar="Day",Yvar="Poids",Ident="IDE") - + # flags are quantitative kfino_plot(resuin=resu2,typeG="quanti", Tvar="Day",Yvar="Poids",Ident="IDE") - + # predictions on OK values kfino_plot(resuin=resu2,typeG="prediction", Tvar="Day",Yvar="Poids",Ident="IDE") diff --git a/man/loi_outlier.Rd b/man/loi_outlier.Rd index 14d1ac847e05fc1c4c9c5afd95a7964ae4a82c39..15b1de6d9bca1c468a192fd2027ea7b749d659a9 100644 --- a/man/loi_outlier.Rd +++ b/man/loi_outlier.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/utils_functions.R \name{loi_outlier} \alias{loi_outlier} \title{loi_outlier This function defines an outlier distribution (Surface of a diff --git a/tests/testthat/test-outputAlgo.R b/tests/testthat/test-outputAlgo.R index bceeda3b2219ac4c59440d9fd4a17a4c7fa1224e..f6ec6bc39022b29ed7ca7efe37ddc0267e4468c8 100644 --- a/tests/testthat/test-outputAlgo.R +++ b/tests/testthat/test-outputAlgo.R @@ -1,14 +1,81 @@ -test_that("Output type", { +test_that("kfino - no optimisation - computed correctly", { data(spring1) + param1<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) resu1<-kfino_fit(datain=spring1, - Tvar="dateNum",Yvar="Poids", - expertMin=30,expertMax=75, - doOptim=FALSE,aa=0.001,sigma2_mm=0.05, - K=2,sigma2_pp=5) + Tvar="dateNum",Yvar="Poids", + param=param1, + doOptim=FALSE) + # Output type expect_equal(length(resu1),3) expect_s3_class(resu1[[1]],"data.frame") expect_s3_class(resu1[[2]],"data.frame") expect_type(resu1[[3]],"list") - #expect_true(is_SBM(netA)) + expect_equal(length(resu1[[3]]),6) + + # expected result + expect_equal(as.vector(resu1[[3]]$likelihood),1.245621e-150) }) + + +test_that("kfino - optimisation - computed correctly", { + data(spring1) + param1<-list(m0=NULL, + mm=NULL, + pp=NULL, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) + resu1<-kfino_fit(datain=spring1, + Tvar="dateNum",Yvar="Poids", + param=param1, + doOptim=TRUE) + + # Output type + expect_equal(length(resu1),3) + expect_s3_class(resu1[[1]],"data.frame") + expect_s3_class(resu1[[2]],"data.frame") + expect_type(resu1[[3]],"list") + expect_equal(length(resu1[[3]]),6) + + # expected result + expect_equal(as.vector(resu1[[3]]$likelihood),1.035855e-135) +}) + +test_that("KBO_L - likelihood - computed correctly", { + set.seed(1234) + Y<-rnorm(n=10,mean=50,4) + Tps<-seq(1,10) + N=10 + param2<-list(m0=41, + mm=45, + pp=0.5, + aa=0.001, + expertMin=30, + expertMax=75, + sigma2_m0=1, + sigma2_mm=0.05, + sigma2_pp=5, + K=2, + seqp=seq(0.5,0.7,0.1)) + resu<-KBO_L(param=param2,Y,Tps,N,dix=6) + + expect_equal(as.vector(resu),0.00490257592) + +}) +