Skip to content
Snippets Groups Projects
Commit 7e3aa72b authored by sanchezi's avatar sanchezi
Browse files

extrait fonctions internes pour accéler code

parent ed8ae1f7
No related branches found
No related tags found
No related merge requests found
Pipeline #55331 passed
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
export(KBO_L)
export(KBO_known)
export(kfino_fit) export(kfino_fit)
export(kfino_fit2)
export(kfino_plot) export(kfino_plot)
importFrom(dplyr,"%>%") importFrom(dplyr,"%>%")
importFrom(dplyr,.data) importFrom(dplyr,.data)
......
#' kafino_fit2 a function to detect outlier with a Kalman Filtering approach
#' @param datain an input data.frame of one time course to study (unique IDE)
#' @param Tvar char, time column name in the data.frame datain, a numeric vector
#' Tvar can be expressed as a proportion of day in seconds
#' @param Yvar char, name of the variable to predict in the data.frame datain
#' @param param list, a list of initialization parameter
#' @param doOptim logical, if TRUE optimisation of the initial parameters
#' default TRUE
#' @param threshold numeric, threshold to qualify an observation as outlier
#' according to the label_pred, default 0.5
#'
#' @details The initialization parameter list param contains:
#' \describe{
#' \item{mm}{target weight, NULL if the user wants to optimize it}
#' \item{pp}{probability to be correctly weighed, NULL if the user wants to
#' optimize it}
#' \item{m0}{Initial weight, NULL if the user wants to optimize it}
#' \item{aa}{numeric, rate of weight change, default 0.001 }
#' \item{expertMin}{numeric, the minimal weight expected by the user}
#' \item{expertMax}{numeric, the maximal weight expected by the user}
#' \item{sigma2_m0}{variance of m0}
#' \item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
#' default 0.05}
#' \item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
#' default 5}
#' \item{K}{numeric, a constant value in the outlier function (trapezoid),
#' by default K=2 - increasing K, XXX}
#' \item{seqp}{numeric, minimal pp probability to be correctly weighted.}
#' }
#' It can be given by the user following his knowledge of the animal or dataset
#' or entirely constructed by the function. In the optimisation step, this
#' vector is initialized according to the input data (between the expert
#' range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
#' m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
#' sub-sampling is performed to speed the algorithm if the number of possible
#' observations studied is greater than 500.
#'
#' @importFrom stats dnorm quantile na.omit
#' @importFrom dplyr mutate filter left_join arrange %>%
#' @importFrom dplyr .data if_else row_number select
#' @return a S3 list with two data frames and a list of vectors of
#' kafino results
#' \describe{
#' \item{detectOutlier}{The whole dataset with the detected outliers flagged
#' and prediction}
#' \item{PredictionOK}{A dataset with the predictions on possible values}
#' \item{kafino.results}{kfino results (a list of vectors) on optimized input
#' parameters or not}
#' }
#' @export
#' @examples
#' data(spring1)
#' library(dplyr)
#'
#' # --- With Optimisation on initial parameters
#' t0 <- Sys.time()
#' param1<-list(m0=NULL,
#' mm=NULL,
#' pp=NULL,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#'
#' resu1<-kfino_fit2(datain=spring1,
#' Tvar="dateNum",Yvar="Poids",
#' doOptim=TRUE,param=param1)
#' Sys.time() - t0
#' # --- Without Optimisation on initial parameters
#' t0 <- Sys.time()
#' param2<-list(m0=41,
#' mm=45,
#' pp=0.5,
#' aa=0.001,
#' expertMin=30,
#' expertMax=75,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' resu2<-kfino_fit2(datain=spring1,
#' Tvar="dateNum",Yvar="Poids",
#' param=param2,
#' doOptim=FALSE)
#' Sys.time() - t0
#'
#' # complex data on merinos2 dataset
#' data(merinos2)
#'
#' t0 <- Sys.time()
#' param3<-list(m0=NULL,
#' mm=NULL,
#' pp=NULL,
#' aa=0.001,
#' expertMin=10,
#' expertMax=45,
#' sigma2_m0=1,
#' sigma2_mm=0.05,
#' sigma2_pp=5,
#' K=2,
#' seqp=seq(0.5,0.7,0.1))
#' resu3<-kfino_fit2(datain=merinos2,
#' Tvar="dateNum",Yvar="Poids",
#' doOptim=TRUE,param=param3)
#' Sys.time() - t0
kfino_fit2<-function(datain,Tvar,Yvar,
param=NULL,
doOptim=TRUE,threshold=0.5){
if( any(is.null(param[["expertMin"]]) |
is.null(param[["expertMax"]])) )
stop('You have to define expertMin and expertMax.')
if( any(!is.numeric(param[["expertMin"]]) |
!is.numeric(param[["expertMax"]])) )
stop('expertMin and expertMax must be numeric.')
if( !is.numeric(t(datain[,Yvar])))
stop('Input parameter Yvar must contain numeric values.')
if( !is.numeric(t(datain[,Tvar])))
stop('Input parameter Tvar must contain numeric values.')
#-------------------------------------------------------------
# load objects
m0<-param[["m0"]]
mm<-param[["mm"]]
pp<-param[["pp"]]
if(is.null(param[["aa"]])){ aa<-0.001 } else { aa<-param[["aa"]] }
expertMin<-param[["expertMin"]]
expertMax<-param[["expertMax"]]
if(is.null(param[["sigma2_m0"]])){
sigma2_m0<-1
} else {sigma2_m0<-param[["sigma2_m0"]]}
if(is.null(param[["sigma2_mm"]])){
sigma2_mm<-0.05
} else {sigma2_mm<-param[["sigma2_mm"]]}
if(is.null(param[["sigma2_pp"]])){
sigma2_pp<-5
} else {sigma2_pp<-param[["sigma2_pp"]]}
if(is.null(param[["K"]])){ K<-5 } else { K<-param[["K"]] }
seqp<-param[["seqp"]]
#-------------------------------------------------------------
# Flag impossible weight values (expert knowledge)
# the algorithm works on the dataset with possible values
# Create an artificial column numbering the rows for joining purpose
datain<-as.data.frame(datain)
datain<-datain %>%
mutate(rowNum=row_number(),
flag1=if_else(.data[[Yvar]] > expertMin &
.data[[Yvar]] <= expertMax,"OK","Bad"))
tp.dt<-datain %>% filter(.data$flag1 == "OK")
Y<-tp.dt[,Yvar]
N<-nrow(tp.dt)
Tps<-tp.dt[,Tvar]
#WARNING WARNING: AU lieu de calculer L qui est arrondi à 0,
# je calcule 10^N fois Lu et 10^N q. Au total pr chaque donnée
# je multiplie par 100 mais comme c'est l'ordre de grandeur de loioutlier()
# ca ne me parait pas disproportionné.
# Au lieu de 10 et 10 je fais simplement sqrt(expertMax - expertMin)
dix=sqrt(expertMax - expertMin)
#------------------------------------------------------------------------
# Optimisation sur les paramètres initiaux ou pas
#------------------------------------------------------------------------
if (doOptim == TRUE){
if (N > 500){
# optim avec sous-echantillonnage
print("-------:")
print("Optimisation of initial parameters with sub-sampling - result:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
cat("bornem0: ",bornem0,"\n")
cat("m0opt: ",m0opt,"\n")
cat("mmopt: ",mmopt,"\n")
popt=0.5
#--- Saving datain before sub-sampling
YY=Y
TpsTps=Tps
NN=N
Subechant=sort(sample(1:NN,50))
Y=YY[Subechant]
Tps=TpsTps[Subechant]
N=50
Vopt=KBO_L(list(m0=m0opt,
mm=mmopt,
pp=popt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
for (m0 in seq(bornem0[1],bornem0[2],2) ){
for (mm in seq((m0-5),(m0+20),2) ){
for (p in seqp){
# A voir si 50 sous-echantillons au hasard suffisent. Comme dans
# Robbins Monroe, permet aussi de reduire l'impact de la troncature
Subechant=sort(sample(1:NN,50))
Y=YY[Subechant]
Tps=TpsTps[Subechant]
V=KBO_L(list(m0=m0,
mm=mm,
pp=p,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
if (V > Vopt){
Vopt=V
m0opt=m0
mmopt=mm
popt=p
}
}
}
}
# Le fait de prendre des sous-echantillons remarquables
# (ie 50 au lieu de 200) ne divise que par 2 le temps de calculs
# (au lieu de 4 comme imaginé cela vient p.e de la fonction sample())
Y=YY
Tps=TpsTps
N=NN
print("Optimised parameters: ")
print(c(mmopt,popt,m0opt))
print("-------:")
resultat=KBO_known(param=list(mm=mmopt,
pp=popt,
m0=m0opt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold,Y=Y,Tps=Tps,N=N)
} else if (N > 50){
# optim sans sous-echantillonnage
print("-------:")
print("Optimisation of initial parameters - result:")
print("no sub-sampling performed:")
bornem0=quantile(Y[1:N/4], probs = c(.2, .8))
m0opt=quantile(Y[1:N/4], probs = c(.5))
mmopt=quantile(Y[(3*N/4):N], probs = c(.5))
cat("bornem0: ",bornem0,"\n")
cat("m0opt: ",m0opt,"\n")
cat("mmopt: ",mmopt,"\n")
popt=0.5
Vopt=KBO_L(list(m0=m0opt,
mm=mmopt,
pp=popt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
for (m0 in seq(bornem0[1],bornem0[2],2) ){
for (mm in seq((m0-5),(m0+20),2) ){
for (p in seqp){
V=KBO_L(list(m0=m0,
mm=mm,
pp=p,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_mm=sigma2_mm,
sigma2_m0=sigma2_m0,
sigma2_pp=sigma2_pp,
K=K),
Y,Tps,N,dix)
if (V > Vopt){
Vopt=V
m0opt=m0
mmopt=mm
popt=p
}
}
}
}
print("Optimised parameters: ")
print(c(mmopt,popt,m0opt))
print("-------:")
resultat=KBO_known(param=list(mm=mmopt,
pp=popt,
m0=m0opt,
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
# N petit, si trop petit on ne fait rien
if (N > 5) {
# Not enough data - no optim
if (is.null(param[["m0"]])){
X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
round(quantile(Y[(3*N/4):N], probs = 0.8)))
} else {
X<-c(m0,pp,mm)
}
print("-------:")
print("Optimisation of initial parameters - result:")
print("Not enough data => so no optimisation performed:")
print("Used parameters: ")
print(X)
print("-------:")
resultat=KBO_known(param=list(m0=X[[1]],
pp=X[[2]],
mm=X[[3]],
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
warning("Not enough data between expert knowledge. The algorithm is not performed.")
resultat<-NULL
}
}
} else {
# Pas d'optimisation et test si N petit - si trop petit on ne fait rien
if (N > 5) {
# No optimisation on initial parameters
if (is.null(param[["m0"]])){
X<-c(trunc(quantile(Y[1:N/4], probs = .2)), 0.5,
round(quantile(Y[(3*N/4):N], probs = 0.8)))
} else {
X<-c(m0,pp,mm)
}
print("-------:")
print("No optimisation of initial parameters:")
print("Used parameters: ")
print(X)
resultat=KBO_known(param=list(m0=X[[1]],
pp=X[[2]],
mm=X[[3]],
aa=aa,
expertMin=expertMin,
expertMax=expertMax,
sigma2_m0=sigma2_m0,
sigma2_mm=sigma2_mm,
sigma2_pp=sigma2_pp,
K=K),
threshold=threshold,Y=Y,Tps=Tps,N=N)
} else {
warning("Not enough data between expert knowledge. The algorithm is not performed.")
resultat<-NULL
}
}
#---------------------------------------------------------------------------
# Formatting output results
#---------------------------------------------------------------------------
# If resultat NULL then create output with datain and 2 NULL objects
# else create output with detectoutlier, PredictionOK and kafino.results
# useful for the kafino_plot() function
#---------------------------------------------------------------------------
if (is.null(resultat)){
dt.out<-datain %>% mutate(flag=.data$flag1)
dt.pred<-NULL
resultat<-NULL
mylist<-list(dt.out,dt.pred,resultat)
names(mylist)<-c("detectOutlier","PredictionOK","kafino.results")
class(mylist) = c("kfino")
return(invisible(mylist))
} else {
prediction=na.omit(resultat$prediction)
label_pred=round(na.omit(resultat$label),2)
lwr=na.omit(resultat$lwr)
upr=na.omit(resultat$upr)
flag=na.omit(resultat$flag)
dt.pred=cbind.data.frame(select(tp.dt,.data$rowNum),
prediction,label_pred,lwr,upr,flag)
# join dt.pred with the initial datain containing all the observations
dt.out<-left_join(datain,dt.pred,by="rowNum")
dt.out<-mutate(dt.out,flag=if_else(.data$flag1 == "Bad", .data$flag1, .data$flag))
dt.out<-arrange(dt.out,.data$rowNum)
#--------------------------------------
# return a S3 list object
#--------------------------------------
# 1. a whole dataset with the detected outliers flagged and prediction
# 2. a dataset with the prediction on possible values
# 3. optimisation results (a list of vectors)
mylist<-list(dt.out,dt.pred,resultat)
names(mylist)<-c("detectOutlier","PredictionOK","kfino.results")
class(mylist) = c("kfino")
return(invisible(mylist))
}
}
#-------------------------------------- End of file --------------------------
R/utils.R 0 → 100644
#' KBO_known
#'
#' @param param list, a list of 10 input parameters for mm, pp and m0
#' @param threshold numeric, threshold for CI
#' @param Y num
#' @param Tps num
#' @param N num
#'
#'
#' @details uses the same input parameter list than the main function
#' @return a list
#' @export
#'
#' @examples
#' # not run
KBO_known<-function(param,threshold,Y,Tps,N){
# load objects
mm=param[["mm"]]
pp=param[["pp"]]
m0=param[["m0"]]
aa=param[["aa"]]
expertMin=param[["expertMin"]]
expertMax=param[["expertMax"]]
sigma2_m0=param[["sigma2_m0"]]
sigma2_mm=param[["sigma2_mm"]]
sigma2_pp=param[["sigma2_pp"]]
K=param[["K"]]
# paramètre de troncature
kappa <-10
#--- function defining an outlier distribution (Surface of a trapezium)
# uses expertMin and expertMax
loi_outlier<-function(y){
2/((K+1)*(expertMax - expertMin)) +
(2*(K-1)/(K+1))*((y - expertMin)/((expertMax - expertMin)^2))
}
# initialisation (1.1.1)
#--------------------
m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp)
sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp)
l0<- loi_outlier(Y[1])
loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp))
p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0)
p1=1-p0
m=c(m0,m1)
p=c(p0,p1)
sigma2=c(sigma2_m0,sigma1)
L=c(l0,loinorm1)
Xmean=c(p0*m0 + p1*m1)
Pmean=c(p1)
q=c(1,1)
sigma2_new=c(sigma2_m0)
#iteration (1.1.2)
#-----------------------
#// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
#avant troncature
for (k in 1:(kappa-1)){
# je crée le vecteur des nouveaux m, ##pour plus tard: au lieu de les
# faire vide, je prend direct m_{u0}
mnew=rep(0,2^(k+1))
sigma2new=rep(0 ,2^(k+1))
pnew=rep(0 ,2^(k+1))
Lnew=rep(0 ,2^(k+1))
# CR = constante de renormalisation qui intervient dans le dénominateur des pu
qnew=rep(0 ,2^(k+1))
diffTps<-Tps[k+1] - Tps[k]
#--- numérateur de pu0
tpbeta<-loi_outlier(Y[k+1])
pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta
Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta
mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) +
(1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp)
qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp
sommevar=sigma2new[1:(2^k)] + sigma2_pp
tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar))
pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm
Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm
mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] +
mnew[1:(2^k)]*sigma2_pp)/sommevar
sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar
CR<-sum(pnew)
Proba1<-sum(pnew[(1+2^k):2^(k+1)])
SommeMasse<-sum(pnew*mnew)
m=mnew
sigma2=sigma2new
p=pnew/CR
Xmean=c(Xmean,SommeMasse/CR)
Pmean=c(Pmean,Proba1/CR)
L=Lnew
q=qnew
sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2)
}
#apres troncature
#----------------------
for (k in kappa:(N-1)){
# je cree le vecteur des nouveaux m,
# pour plus tard: au lieu de les faire vide, je prend direct m_{u0}
mnew=rep(0,2^(kappa+1))
sigma2new=rep(0 ,2^(kappa+1))
pnew=rep(0 ,2^(kappa+1))
Lnew=rep(0 ,2^(kappa+1))
# CR = constante de renormalisation qui intervient dans le dénominateur des pu
qnew=rep(0 ,2^(kappa+1))
diffTps<-Tps[k+1] - Tps[k]
#--- numérateur de pu0
tpbeta<-loi_outlier(Y[k+1])
pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta
mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) +
(1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp)
qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp
sommevar=sigma2new[1:(2^kappa)] + sigma2_pp
tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar))
pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm
Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm
mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] +
mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1
sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar
CR<-sum(pnew)
Proba1<-sum(pnew[(1+2^kappa):2^(kappa+1)])
SommeMasse<-sum(pnew*mnew)
selection=order(pnew, decreasing=T)[1:2^kappa]
m=mnew[selection]
sigma2=sigma2new[selection]
p=pnew[selection]/sum(pnew[selection])
Xmean=c(Xmean,SommeMasse/CR)
Pmean=c(Pmean,Proba1/CR)
L=Lnew[selection]
q=qnew[selection]
sigma2_new<-c(sigma2_new,sum(p*sigma2) + sum(p*m^2) - (sum(p*m))^2)
}
#----------------------------------------------
# Ajout Intervalle de confiance des estimations
# Variance (see section 1.2.1)
# IC est approximé...
#----------------------------------------------
lwr <- Xmean - 1.96*sqrt(sigma2_new)
upr <- Xmean + 1.96*sqrt(sigma2_new)
# flag
flag<-if_else(Pmean > threshold,"OK","KO")
#--- Calcul des derniers éléments
Vraisemblance=L%*%q
resultat=list("prediction"=Xmean,
"label"=Pmean,
"likelihood"=Vraisemblance,
"lwr"=lwr,
"upr"=upr,
"flag"=flag)
return(resultat)
}
#------------------------------------------------------------------------
#' KBO_L a function to calculate a likelihood on optimized parameters
#'
#' @param param a list of 10 input parameters mm, pp and m0
#' @param Y num
#' @param Tps num
#' @param N num
#' @param dix num
#'
#' @details uses the same input parameter list than the main function
#' @return a likelihood
#' @export
#'
#' @examples
#' # not run
KBO_L<-function(param,Y,Tps,N,dix){
# load objects
mm=param[["mm"]]
pp=param[["pp"]]
m0=param[["m0"]]
aa=param[["aa"]]
expertMin=param[["expertMin"]]
expertMax=param[["expertMax"]]
sigma2_m0<-param[["sigma2_m0"]]
sigma2_mm<-param[["sigma2_mm"]]
sigma2_pp<-param[["sigma2_pp"]]
K<-param[["K"]]
#--- function defining an outlier distribution (Surface of a trapezium)
# uses expertMin and expertMax
loi_outlier<-function(y){
2/((K+1)*(expertMax - expertMin)) +
(2*(K-1)/(K+1))*((y - expertMin)/((expertMax - expertMin)^2))
}
#---- paramètre de troncature
# Ici je met kappa =7, ca me fais retirer les proba <0.01 au lieu de 0.001
# comme qd kappa=10 mais ca divise par 10 le temps de calcul
kappa=7
# initialisation (1.1.1)
#--------------------
m1= (sigma2_pp*m0 + Y[1]*sigma2_m0)/(sigma2_m0+sigma2_pp)
sigma1=(sigma2_m0*sigma2_pp)/(sigma2_m0+sigma2_pp)
l0<- loi_outlier(Y[1])
loinorm1<-dnorm(Y[1],m0, sqrt(sigma2_m0+sigma2_pp))
p0= ((1-pp)*l0) / (pp*loinorm1 + (1-pp)*l0)
p1=1-p0
m=c(m0,m1)
p=c(p0,p1)
sigma2=c(sigma2_m0,sigma1)
L=dix*c(l0,loinorm1) #Attention *2 pr le grandir
q=c(1,1) #attention
#iteration (1.1.2)
#-----------------------
#// Pour l'instant, je fais comme si kappa<N-1 mettre un if sinon
# avant troncature
for (k in 1:(kappa-1)){
mnew=rep(0,2^(k+1))
sigma2new=rep(0 ,2^(k+1))
pnew=rep(0 ,2^(k+1))
Lnew=rep(0 ,2^(k+1))
# CR = constante de renormalisation qui intervient dans le dénominateur des pu
qnew=rep(0 ,2^(k+1))
diffTps<-Tps[k+1] - Tps[k]
#--- numérateur de pu0
tpbeta<-loi_outlier(Y[k+1])
pnew[1:(2^k)]=p[1:(2^k)]*(1-pp)*tpbeta
Lnew[1:(2^k)]=L[1:(2^k)]*tpbeta
mnew[1:(2^k)]= m[1:(2^k)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
sigma2new[1:(2^k)] = sigma2[1:(2^k)]*exp(-2*aa*(diffTps)) +
(1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
qnew[1:(2^k)] <- q[1:(2^k)]*(1-pp)
qnew[(1+2^k):2^(k+1)] <- q[1:(2^k)]*pp
sommevar=sigma2new[1:(2^k)] + sigma2_pp
tpnorm<-dnorm(Y[k+1], mnew[1:(2^k)], sqrt(sommevar))
pnew[(1+2^k):2^(k+1)]=p[1:(2^k)]*pp*tpnorm
Lnew[(1+2^k):2^(k+1)]=L[1:(2^k)]*tpnorm
mnew[(1+2^k):2^(k+1)] = (sigma2new[1:(2^k)]*Y[k+1] + mnew[1:(2^k)]*sigma2_pp)/sommevar
sigma2new[(1+2^k):2^(k+1)] = sigma2new[1:(2^k)] * sigma2_pp/sommevar
m=mnew
sigma2=sigma2new
p=pnew/sum(pnew)
L=dix*Lnew # fois 2 pr le grandir
q=dix*qnew
}
# apres troncature
#----------------------
for (k in kappa:(N-1)){
# je cree le vecteur des nouveaux m,
mnew=rep(0,2^(kappa+1))
sigma2new=rep(0 ,2^(kappa+1))
pnew=rep(0 ,2^(kappa+1))
Lnew=rep(0 ,2^(kappa+1))
# CR = constante de renormalisation qui intervient dans le dénominateur des pu
qnew=rep(0 ,2^(kappa+1))
diffTps<-Tps[k+1] - Tps[k]
#--- numérateur de pu0
tpbeta<-loi_outlier(Y[k+1])
pnew[1:(2^kappa)]=p[1:(2^kappa)]*(1-pp)*tpbeta
Lnew[1:(2^kappa)]=L[1:(2^kappa)]*tpbeta
mnew[1:(2^kappa)]= m[1:(2^kappa)]*exp(-aa*diffTps) + mm*(1-exp(-aa*diffTps)) #m_u0
sigma2new[1:(2^kappa)] = sigma2[1:(2^kappa)]*exp(-2*aa*(diffTps)) +
(1-exp(-2*aa*(diffTps)))*sigma2_mm/(2*aa)
qnew[1:(2^kappa)]=q[1:(2^kappa)]*(1-pp)
qnew[(1+2^kappa):2^(kappa+1)]=q[1:(2^kappa)]*pp
sommevar=sigma2new[1:(2^kappa)] + sigma2_pp
tpnorm<-dnorm(Y[k+1], mnew[1:(2^kappa)], sqrt(sommevar))
pnew[(1+2^kappa):2^(kappa+1)]=p[1:(2^kappa)]*pp*tpnorm
Lnew[(1+2^kappa):2^(kappa+1)]=L[1:(2^kappa)]*tpnorm
mnew[(1+2^kappa):2^(kappa+1)] = (sigma2new[1:(2^kappa)]*Y[k+1] +
mnew[1:(2^kappa)]*sigma2_pp)/sommevar #m_u1
sigma2new[(1+2^kappa):2^(kappa+1)] = sigma2new[1:(2^kappa)] * sigma2_pp/sommevar
selection=order(pnew, decreasing=T)[1:2^kappa]
m=mnew[selection]
sigma2=sigma2new[selection]
p=pnew[selection]/sum(pnew[selection])
L=dix*Lnew[selection] #fois 2 pr le grandir
q=dix*qnew[selection]
}
Vraisemblance=L%*%q
return(Vraisemblance)
}
#------------------------ End of file -----------------------------------
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{KBO_L}
\alias{KBO_L}
\title{KBO_L a function to calculate a likelihood on optimized parameters}
\usage{
KBO_L(param, Y, Tps, N, dix)
}
\arguments{
\item{param}{a list of 10 input parameters mm, pp and m0}
\item{Y}{num}
\item{Tps}{num}
\item{N}{num}
\item{dix}{num}
}
\value{
a likelihood
}
\description{
KBO_L a function to calculate a likelihood on optimized parameters
}
\details{
uses the same input parameter list than the main function
}
\examples{
# not run
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{KBO_known}
\alias{KBO_known}
\title{KBO_known}
\usage{
KBO_known(param, threshold, Y, Tps, N)
}
\arguments{
\item{param}{list, a list of 10 input parameters for mm, pp and m0}
\item{threshold}{numeric, threshold for CI}
\item{Y}{num}
\item{Tps}{num}
\item{N}{num}
}
\value{
a list
}
\description{
KBO_known
}
\details{
uses the same input parameter list than the main function
}
\examples{
# not run
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kfino2.R
\name{kfino_fit2}
\alias{kfino_fit2}
\title{kafino_fit2 a function to detect outlier with a Kalman Filtering approach}
\usage{
kfino_fit2(datain, Tvar, Yvar, param = NULL, doOptim = TRUE, threshold = 0.5)
}
\arguments{
\item{datain}{an input data.frame of one time course to study (unique IDE)}
\item{Tvar}{char, time column name in the data.frame datain, a numeric vector
Tvar can be expressed as a proportion of day in seconds}
\item{Yvar}{char, name of the variable to predict in the data.frame datain}
\item{param}{list, a list of initialization parameter}
\item{doOptim}{logical, if TRUE optimisation of the initial parameters
default TRUE}
\item{threshold}{numeric, threshold to qualify an observation as outlier
according to the label_pred, default 0.5}
}
\value{
a S3 list with two data frames and a list of vectors of
kafino results
\describe{
\item{detectOutlier}{The whole dataset with the detected outliers flagged
and prediction}
\item{PredictionOK}{A dataset with the predictions on possible values}
\item{kafino.results}{kfino results (a list of vectors) on optimized input
parameters or not}
}
}
\description{
kafino_fit2 a function to detect outlier with a Kalman Filtering approach
}
\details{
The initialization parameter list param contains:
\describe{
\item{mm}{target weight, NULL if the user wants to optimize it}
\item{pp}{probability to be correctly weighed, NULL if the user wants to
optimize it}
\item{m0}{Initial weight, NULL if the user wants to optimize it}
\item{aa}{numeric, rate of weight change, default 0.001 }
\item{expertMin}{numeric, the minimal weight expected by the user}
\item{expertMax}{numeric, the maximal weight expected by the user}
\item{sigma2_m0}{variance of m0}
\item{sigma2_mm}{numeric, variance of mm, related to the unit of Tvar,
default 0.05}
\item{sigma2_pp}{numeric, variance of pp, related to the unit of Yvar,
default 5}
\item{K}{numeric, a constant value in the outlier function (trapezoid),
by default K=2 - increasing K, XXX}
\item{seqp}{numeric, minimal pp probability to be correctly weighted.}
}
It can be given by the user following his knowledge of the animal or dataset
or entirely constructed by the function. In the optimisation step, this
vector is initialized according to the input data (between the expert
range) using quantile of the Y distribution (varying between 0.2 and 0.8 for
m0 and 0.5 for mm). pp is a sequence varying between minp to (minp+0.3). A
sub-sampling is performed to speed the algorithm if the number of possible
observations studied is greater than 500.
}
\examples{
data(spring1)
library(dplyr)
# --- With Optimisation on initial parameters
t0 <- Sys.time()
param1<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu1<-kfino_fit2(datain=spring1,
Tvar="dateNum",Yvar="Poids",
doOptim=TRUE,param=param1)
Sys.time() - t0
# --- Without Optimisation on initial parameters
t0 <- Sys.time()
param2<-list(m0=41,
mm=45,
pp=0.5,
aa=0.001,
expertMin=30,
expertMax=75,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu2<-kfino_fit2(datain=spring1,
Tvar="dateNum",Yvar="Poids",
param=param2,
doOptim=FALSE)
Sys.time() - t0
# complex data on merinos2 dataset
data(merinos2)
t0 <- Sys.time()
param3<-list(m0=NULL,
mm=NULL,
pp=NULL,
aa=0.001,
expertMin=10,
expertMax=45,
sigma2_m0=1,
sigma2_mm=0.05,
sigma2_pp=5,
K=2,
seqp=seq(0.5,0.7,0.1))
resu3<-kfino_fit2(datain=merinos2,
Tvar="dateNum",Yvar="Poids",
doOptim=TRUE,param=param3)
Sys.time() - t0
}
...@@ -5,10 +5,10 @@ test_that("Output type", { ...@@ -5,10 +5,10 @@ test_that("Output type", {
expertMin=30,expertMax=75, expertMin=30,expertMax=75,
doOptim=FALSE,aa=0.001,sigma2_mm=0.05, doOptim=FALSE,aa=0.001,sigma2_mm=0.05,
K=2,sigma2_pp=5) K=2,sigma2_pp=5)
expect_equal(str(resu1),"list") expect_equal(length(resu1),3)
expect_equal(str(resu1[[1]]),"data.frame") expect_s3_class(resu1[[1]],"data.frame")
expect_equal(str(resu1[[2]]),"data.frame") expect_s3_class(resu1[[2]],"data.frame")
expect_equal(str(resu1[[3]]),"list") expect_type(resu1[[3]],"list")
#expect_true(is_SBM(netA)) #expect_true(is_SBM(netA))
}) })
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment