diff --git a/DESCRIPTION b/DESCRIPTION index e08c3cfb03d1fe70a2dff30420ac2a99966755cc..ab69e371aa9bf136c933121a790bf040500c0b1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,12 +5,19 @@ Authors@R: c( person("Bertrand", "Cloez", email = "bertrand.cloez@inrae.fr", role = c("aut")), person("Isabelle", "Sanchez", email = "isabelle.sanchez@inrae.fr", role = c("aut", "cre")), person("Benedicte", "Fontez", email = "benedicte.fontez@supagro.fr", role = c("ctr"))) -Author: Bertrand Cloez [aut, cre], - Isabelle Sanchez [ctr], +Author: Bertrand Cloez [aut], + Isabelle Sanchez [aut, cre], Benedicte Fontez [ctr] Maintainer: Isabelle Sanchez <isabelle.sanchez@inrae.fr> Description: A method for detecting outliers with a Kalman filter on impulsed - noised outliers and prediction on cleaned data. + noised outliers and prediction on cleaned data. kfino is a robust sequential + algorithm allowing to filter data with a large number of outliers. This + algorithm is based on simple latent linear Gaussian processes as in the + Kalman Filter method and is devoted to detect impulse-noised outliers. These + are data points that differ significantly from other observations. `ML` + (Maximization Likelihood) and `EM` (Expectation-Maximization algorithm) + algorithms were implemented in `kfino`. The method is described in full + details in the following arxiv preprint: <https://arxiv.org/abs/2208.00961>. License: GPL-3 Depends: R (>= 4.1.0) Encoding: UTF-8 diff --git a/R/utils_functions.R b/R/utils_functions.R index 0d94b02e21964623129cfcd50f1ced5299d5dc84..486944010ca5459e59e4f41caa71c429ceeef560 100644 --- a/R/utils_functions.R +++ b/R/utils_functions.R @@ -6,7 +6,7 @@ # KBO_EM() #------------------------------------------------------------------- -#' doutlier This function defines an outlier distribution (Surface of a +#' doutlier defines an outlier distribution (Surface of a #' trapezium) and uses input parameters given in the main function kfino_fit() #' #' @param y numeric, point @@ -15,7 +15,14 @@ #' @param expertMax numeric, the maximal weight expected by the user #' #' @details this function is used to calculate an outlier distribution -#' following a trapezium shape +#' following a trapezium shape. +#' \eqn{y \mapsto \text{doutlier}(y,K,\text{expertMin},\text{expertMax})} +#' is the probability density function on +#' \eqn{[\text{expertMin},\text{expertMax}]} which is linear and verifies +#' \eqn{\text{doutlier}(\text{expertMax},K,\text{expertMin},\text{expertMax}) +#' =K*\text{doutlier}(\text{expertMin},K,\text{expertMin},\text{expertMax}).} +#' In particular, when $K=1$ this corresponds to the uniform distribution. +#' #' @return a numeric value #' @export #' @@ -34,7 +41,7 @@ doutlier<-function(y, #' KBO_known a function to calculate a likelihood on given parameters #' #' @param param list, see initial parameter list in \code{kfino_fit} -#' @param threshold numeric, threshold for CI, default 0.5 +#' @param threshold numeric, threshold for confidence interval, default 0.5 #' @param kappa numeric, truncation setting for likelihood optimization, #' default 10 #' @param Y character, name of the numeric variable to predict in the @@ -50,8 +57,8 @@ doutlier<-function(y, #' \item{prediction}{vector, the prediction of weights} #' \item{label}{vector, probability to be an outlier} #' \item{likelihood}{numeric, the calculated likelihood} -#' \item{lwr}{vector of lower bound CI of the prediction } -#' \item{upr}{vector of upper bound CI of the prediction } +#' \item{lwr}{vector of lower bound confidence interval of the prediction } +#' \item{upr}{vector of upper bound confidence interval of the prediction } #' \item{flag}{char, is an outlier or not} #' } #' @export diff --git a/man/KBO_known.Rd b/man/KBO_known.Rd index e28e51fafc0f7ee221d2d15ee76b7935cb19fa78..fe48f4c112bb7d00259f1d5ebe5c219128c6b8ee 100644 --- a/man/KBO_known.Rd +++ b/man/KBO_known.Rd @@ -9,7 +9,7 @@ KBO_known(param, threshold, kappa = 10, Y, Tps, N) \arguments{ \item{param}{list, see initial parameter list in \code{kfino_fit}} -\item{threshold}{numeric, threshold for CI, default 0.5} +\item{threshold}{numeric, threshold for confidence interval, default 0.5} \item{kappa}{numeric, truncation setting for likelihood optimization, default 10} @@ -29,8 +29,8 @@ a list \item{prediction}{vector, the prediction of weights} \item{label}{vector, probability to be an outlier} \item{likelihood}{numeric, the calculated likelihood} - \item{lwr}{vector of lower bound CI of the prediction } - \item{upr}{vector of upper bound CI of the prediction } + \item{lwr}{vector of lower bound confidence interval of the prediction } + \item{upr}{vector of upper bound confidence interval of the prediction } \item{flag}{char, is an outlier or not} } } diff --git a/man/doutlier.Rd b/man/doutlier.Rd index efd7eaa4a3348bd58deace1e7ef4401e3586dfcd..58f1886141154c7be8f76fea88509dafefafb7b4 100644 --- a/man/doutlier.Rd +++ b/man/doutlier.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils_functions.R \name{doutlier} \alias{doutlier} -\title{doutlier This function defines an outlier distribution (Surface of a +\title{doutlier defines an outlier distribution (Surface of a trapezium) and uses input parameters given in the main function kfino_fit()} \usage{ doutlier(y, K, expertMin, expertMax) @@ -20,12 +20,18 @@ doutlier(y, K, expertMin, expertMax) a numeric value } \description{ -doutlier This function defines an outlier distribution (Surface of a +doutlier defines an outlier distribution (Surface of a trapezium) and uses input parameters given in the main function kfino_fit() } \details{ this function is used to calculate an outlier distribution - following a trapezium shape + following a trapezium shape. + \eqn{y \mapsto \text{doutlier}(y,K,\text{expertMin},\text{expertMax})} + is the probability density function on + \eqn{[\text{expertMin},\text{expertMax}]} which is linear and verifies + \eqn{\text{doutlier}(\text{expertMax},K,\text{expertMin},\text{expertMax}) + =K*\text{doutlier}(\text{expertMin},K,\text{expertMin},\text{expertMax}).} + In particular, when $K=1$ this corresponds to the uniform distribution. } \examples{ doutlier(2,5,10,45) diff --git a/man/kfino.Rd b/man/kfino.Rd index 19a0444fe19ccd3c4668e44495b10f7875dcd251..a2ec5bf77f3b1d02d8953e9adbe37b334cce01f3 100644 --- a/man/kfino.Rd +++ b/man/kfino.Rd @@ -8,7 +8,7 @@ \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -A method for detecting outliers with a Kalman filter on impulsed noised outliers and prediction on cleaned data. +A method for detecting outliers with a Kalman filter on impulsed noised outliers and prediction on cleaned data. kfino is a robust sequential algorithm allowing to filter data with a large number of outliers. This algorithm is based on simple latent linear Gaussian processes as in the Kalman Filter method and is devoted to detect impulse-noised outliers. These are data points that differ significantly from other observations. `ML` (Maximization Likelihood) and `EM` (Expectation-Maximization algorithm) algorithms were implemented in `kfino`. The method is described in full details in the following arxiv preprint: \url{https://arxiv.org/abs/2208.00961}. } \details{ xxxxxxxx xxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxx.