#  CODE NUMERO 3
#  SCENARIOS AVEC TOUS LES POINTS RMQS / AVEC LES DONNEES clc

# Ce code permet de faire tourner les 4 scénarios, à 2 niveaux (500 et 100m)

library(sf)
library(terra)
library(ggplot2)
library(dplyr)
library(tidyr) 
library(SamplingStrata) 
library(clhs) 
library(viridis)
library(tmap)



  
  set.seed(123) # not done by JRC but important 
  
  #Replace default kmeans function to use a different algorithm (to avoid convergence issues)
  kmeans.default <- function(x, algorithm="MacQueen", ...){base::kmeans.default(x, algorithm, ...)}
  
  # load the specific functions for this project
  setwd("~/serena/smlsamplingeu")
  source("~/smlsamplingeu/functionssML.R")
  
  
  
  setwd("/home/nsaby/serena/data/sml/europe/")
  stacka <- rast(c("FR_OC.tif", "FR_pH.tif" ,
                   "FR_TXT.tif","FR_N.tif",
                   "FR_P.tif",
                   "FR_CEC.tif","FR_BD010.tif") ) 
  
  stackb <- rast(c("~/smlsamplingeu/export_data/FR_CLC_100m.tif",
                   "~/smlsamplingeu/export_data/FR_metzger_100m.tif",
                   "~/smlsamplingeu/export_data/FR_regions_100m.tif",
                   "~/smlsamplingeu/export_data/FR_WRB_100m.tif",
                   "~/smlsamplingeu/export_data/FR_IGCS_100m.tif",
                   "~/smlsamplingeu/export_data/soilRegion.tif"))
  
  stack = c(stacka,stackb)
  
  stack500 = rast("~/smlsamplingeu/export_data/stack500_lulucf.tif")
  
 
  
  sample1 <- readRDS("~/smlsamplingeu/data/11septembre_sample1.rds")
  sample1500 <- readRDS("~/smlsamplingeu/data/11septembre_sample1500.rds")
  
  rnd.lhs<-readRDS( "~/smlsamplingeu/data/11septembre_rnd.lhs.rds")
  rnd.lhs500<-readRDS( "~/smlsamplingeu/data/11septembre_rnd.lhs500.rds")
  
  
  
  sample2 <- as.data.frame(sample1)
  
  
  # set the numbers of pixels to sample
  clhsNb = 15000
  SIsample = 100000
  
  # the list of clc codes to exclude
  excludeCLC = c(1,2,3,4,5,6,7,8,9,
                 30,31,34,35,37,38,39,40,
                 41,42,43,44,48,49,50
  )
  
  # exclure les sols sans sols
  
  excludeWRB = 0
  
  X= Y = c("FR_OC",  "FR_pH","FR_TXT_1",  "FR_TXT_2",  "FR_P","FR_N","FR_BD010")
  
  
  #charger données RMQS-----
  
  load("~/smlsamplingeu/data/data_ETM_C_H1.RData")#Données ponctuelles
  
  smn = data_ETM_C_H1%>%
    dplyr::select(id_site,x_reel,y_reel) %>%
    st_as_sf(coords = 2:3,
             crs = 2154) %>%
    st_transform(crs(stack))
  
  
  
  # Include points from existing surveys (let the algorithm decide)
  # Suppose we have a regular grid we want to preserve at least in part
  
  sample3 <-  terra::extract(stack,
                             smn,
                             xy = TRUE
  )
  
  sample3 <- as.data.frame(sample3)
  sample3 <- sample3[complete.cases(sample3),]
  
  sample3 <- sample3 %>%
    rename(FR_metzger = climate_metzeger, FR_regions = code_supra, FR_IGCS = ger, FR_WRB = WRBLEV1)
  # summary(sample3)
  # plot(y~x, sample3)
  
  
  
  #Remove NUTS region with not enough free soil (Brussels) to avoid code errors (too few samples for clustering). 
  #This might be changed if targeting also soils in urban areas.
  #Remove non-soil classes
  # selection of the pixels using clhs
  sample2 <- sample1[rnd.lhs$index_samples,]
  sample2 <- sample2 %>%
    rename(FR_WRB = WRBLEV1)
  
  # simplified LU using personal function if necessary !!
  sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  
  sample2 <- filter(sample2, !FR_CLC %in% excludeCLC )
  sample3 <- filter(sample3, !FR_CLC %in% excludeCLC )
  
  #remove the non soil
  sample2 <- filter(sample2, !FR_WRB %in% excludeWRB )
  sample3 <- filter(sample3, !FR_WRB %in% excludeWRB )
  
  
  saveRDS( sample3, "~/smlsamplingeu/data/avec_rmqs_sample3.rds")
  sample3 <- readRDS("~/smlsamplingeu/data/avec_rmqs_sample3.rds")
  #--------------------------------------------------------------------------------------------------
  # Add existing points to the new randomly selected ones
  #--------------------------------------------------------------------------------------------------
  sample4 <- rbind(sample3  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sample2 %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  #--------------------------------------------------------------------------------------------------
  #Convert CLC classes from numerical to factors. Round numerical values to speed up clutering computations
  #--------------------------------------------------------------------------------------------------
  sample4$FR_CLC <- as.factor(sample4$FR_CLC)
  sample4$FR_WRB <- as.factor(sample4$FR_WRB)
  sample4$FR_regions <- as.factor(sample4$FR_regions)
  # sample4$SOILNR <- as.factor(sample4$SOILNR)
  library(sp)
  sample4.sp <- sample4
  coordinates(sample4.sp)=~x+y
  
  #--------------------------------------------------------------------------------------------------
  # Create an index to indicate to the clhs which samples to keep anyway
  #--------------------------------------------------------------------------------------------------
  samples_tobekept <- 1:dim(sample3)[1]
  
  #--------------------------------------------------------------------------------------------------
  #Reduce initial sample and optimize soil properties sampling space by latin hypercube sampling, but retaining the legacy survey points
  #--------------------------------------------------------------------------------------------------
  
  rnd.lhs = clhs::clhs(sample4.sp[,c(1:10)], 
                       must.include=samples_tobekept, 
                       size=5000, iter=2000, progress=T,
                       simple=F,
                       use.coords=T)
  
  
  
  
  sample5 <- sample4[rnd.lhs$index_samples,]
  
  plot(y~x, sample5)
  
  sample5$id <- c(1:nrow(sample5))
  
  #-------------------------------------------------------------------------------------------------
  # Proceeds as before
  #--------------------------------------------------------------------------------------------------
  # Build initial dataframe of target properties (reporting at NUTS2 by CLC class level)
  #--------------------------------------------------------------------------------------------------
  sample5$domains <- paste(sample5$FR_WRB, sample5$FR_CLC, sample5$FR_regions, sep = ".")
  
   frame2 <- buildFrameDF(df = sample5,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  #--------------------------------------------------------------------------------------------------
  #Build initial strata
  #--------------------------------------------------------------------------------------------------
  strata2 <- buildStrataDF(frame2, progress=T)
  
  ndom <- length(unique(sample5$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           domainvalue=c(1:ndom)))
  
  #Find initial solutions for each domain (SOILNR) by kmeans clustering
  
  init_sol4 <- KmeansSolution2(frame=frame2,
                               errors=cv,
                               maxclusters = 6) 
  
  nstrata4 <- tapply(init_sol4$suggestions,
                     init_sol4$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution4 <- prepareSuggestion(init_sol4,frame2,nstrata4)
  
  
  #Optimize initial solution by Genetic Algorithm
  
  solution4 <- optimStrata(method = "continuous",
                           errors = cv, 
                           framesamp = frame2,
                           iter = 50,
                           pops = 20,
                           nStrata = nstrata4,
                           suggestions = initial_solution4)
  
  framenew4 <- solution4$framenew
  
  outstrata4 <- solution4$aggr_strata
  
  strataStructure <- summaryStrata(framenew4,
                                   outstrata4,
                                   progress=T)
  
  sum(strataStructure$Allocation)
  
  
  sample5 <- selectSample(framenew4, 
                          outstrata4,
                          writeFiles = F)
  
  points5 <- sample4[sample5$ID,]
  
  
  # Check how many legacy points were retained
  
  plot(y~x, points5)
  
  points(sample3$x, sample3$y, pch=3)
  
  overpoints <- (merge(points5, sample3, by=c("x", "y")))
  
  points(overpoints$x, overpoints$y, col="red", pch=19)
  
  
  RMQSSol1 = list(
    smlsample = sample5,
    optimstrata =  solution4,
    tableau = framenew4,
    eval = NA,
    NbSample = sum(strataStructure$Allocation),
    ExpCV = NA
  )
  
  
  # Stratégie forced use of rmqs----------------
  # Include points from existing surveys (forcing to keep existing points)
  # Suppose we have a regular grid we want to preserve in total
  
  
  # sample1 <- sampleRandom(stack1, 80000, xy=T)
  # sample2 <- as.data.frame(sample1)
  # 
  # #Remove areas where soil is not present
  # 
  # sample2 <- filter(sample2, !FR_CLC %in% c(1:9, 30, 33, 34, 35, 37:43))
  
  
  #--------------------------------------------------------------------------------------------------
  #Convert CLC classes from numerical to factors. Round numerical values to speed up clutering computations
  #--------------------------------------------------------------------------------------------------
  # sample2$FR_CLC <- as.factor(sample2$FR_CLC)
  # sample2$FR_NUTS2 <- as.factor(sample2$FR_NUTS2)
  # sample2$FR_OC <- round(sample2$FR_OC)
  # sample2$FR_N <- round(sample2$FR_N, 1)
  # sample2$FR_BD010 <- round(sample2$FR_BD010, 1)
  # sample2$FR_pH <- round(sample2$FR_pH, 1)
  
  # sample2.sp <- sample2
  # coordinates(sample2.sp)=~x+y
  # 
  # #--------------------------------------------------------------------------------------------------
  # #Reduce initial sample and optimize soil properties sampling space by latin hypercube sampling
  # #--------------------------------------------------------------------------------------------------
  # #rnd.lhs = clhs::clhs(sample2[,c(4,6:9)], size=5000, iter=20000, progress=T, simple=F, use.coords=T)
  # rnd.lhs = clhs::clhs(sample2.sp[,c(1:10)], size=5000, iter=2000, progress=T, simple=F, use.coords=T)
  
  # TEST2 S2 100m------
  sampleRand <- sample1[rnd.lhs$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8 <- optimStrata(method = "continuous",
                           errors = cv, 
                           framesamp = framesamp,
                           framecens = framecens,
                           iter = 50,
                           pops = 20,
                           nStrata = nstrata8,
                           suggestions = initial_solution8)
  
  framenew8 <- solution8$framenew
  outstrata8 <- solution8$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
 
  
  ############
  
  
  
  
  
  
  # Test 2  S4 100m-------
 sampleRand <- sample1[rnd.lhs$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s4_100 <- optimStrata(method = "continuous",
                           errors = cv, 
                           framesamp = framesamp,
                           framecens = framecens,
                           iter = 50,
                           pops = 20,
                           nStrata = nstrata8,
                           suggestions = initial_solution8)
  
  framenew8 <- solution8_s4_100$framenew
  outstrata8 <- solution8_s4_100$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
  
  
  # S3 100m-------
  
  sampleRand <- sample1[rnd.lhs$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger)  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger,
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_IGCS, 
                           sample8$FR_CLC, 
                           sample8$FR_regions, 
                           sample8$FR_metzger, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s3_100 <- optimStrata(method = "continuous",
                                  errors = cv, 
                                  framesamp = framesamp,
                                  framecens = framecens,
                                  iter = 50,
                                  pops = 20,
                                  nStrata = nstrata8,
                                  suggestions = initial_solution8)
  
  framenew8 <- solution8_s3_100$framenew
  outstrata8 <- solution8_s3_100$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")

  
    # S1 100m-------
  
  sampleRand <- sample1[rnd.lhs$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,  
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,  
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_WRB,
                           sample8$FR_CLC,
                           sample8$FR_regions,
                           sample8$FR_metzger,
                           sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s1_100m <- optimStrata(method = "continuous",
                           errors = cv, 
                           framesamp = framesamp,
                           framecens = framecens,
                           iter = 50,
                           pops = 20,
                           nStrata = nstrata8,
                           suggestions = initial_solution8)
  
  framenew8 <- solution8_s1_100m$framenew
  outstrata8 <- solution8_s1_100m$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
  
  
  # S1 500m----
  
  sampleRand <- sample1500[rnd.lhs500$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,  
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger,  
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_WRB,
                           sample8$FR_CLC,
                           sample8$FR_regions,
                           sample8$FR_metzger,
                           sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s1_100m <- optimStrata(method = "continuous",
                                   errors = cv, 
                                   framesamp = framesamp,
                                   framecens = framecens,
                                   iter = 50,
                                   pops = 20,
                                   nStrata = nstrata8,
                                   suggestions = initial_solution8)
  
  framenew8 <- solution8_s1_100m$framenew
  outstrata8 <- solution8_s1_100m$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
  
  # S2 500m----
  
  sampleRand <- sample1500[rnd.lhs500$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_WRB) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8 <- optimStrata(method = "continuous",
                           errors = cv, 
                           framesamp = framesamp,
                           framecens = framecens,
                           iter = 50,
                           pops = 20,
                           nStrata = nstrata8,
                           suggestions = initial_solution8)
  
  framenew8 <- solution8$framenew
  outstrata8 <- solution8$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
  
  # S3 500m-----
  
  sampleRand <- sample1500[rnd.lhs500$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger)  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger,
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_IGCS, 
                           sample8$FR_CLC, 
                           sample8$FR_regions, 
                           sample8$FR_metzger, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s3_100 <- optimStrata(method = "continuous",
                                  errors = cv, 
                                  framesamp = framesamp,
                                  framecens = framecens,
                                  iter = 50,
                                  pops = 20,
                                  nStrata = nstrata8,
                                  suggestions = initial_solution8)
  
  framenew8 <- solution8_s3_100$framenew
  outstrata8 <- solution8_s3_100$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")
  
  
  
  # S4 500m------------
  
  sampleRand <- sample1500[rnd.lhs500$index_samples,]
  sampleRand <- sampleRand %>% 
    rename(FR_WRB= WRBLEV1)
  
  sampleRand$FR_regions<- as.factor(sampleRand$FR_regions)
  sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS)
  sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC)
  
  # simplified LU using personal function if necessary !!
  # sample2$FR_CLC_S =  occuprecode(sample2$FR_CLC)
  sampleRand$FR_CLC_S =  occuprecode(sampleRand$FR_CLC)
  
  # tab1 <- table(sampleRand$FR_NUTS2)
  tab1 <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    ungroup()
  
  # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  
  sampleRand<- sampleRand %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    left_join(tab1 %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  #--------------------------------------------------------------------------------------------------
  # Add legacy points to the LHS sample
  #--------------------------------------------------------------------------------------------------
  # tab1smn <- table(sample3$FR_NUTS2)
  tab1smn <- sampleRand %>% 
    group_by(FR_CLC, FR_regions, FR_IGCS) %>% 
    summarise(n=n()) %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    ungroup()
  # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),]
  
  sample7<- sample3 %>% 
    mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, 
                         sep = ".")) %>%
    left_join(tab1smn %>% 
                dplyr::select(domains, n), by= "domains") %>% 
    filter(n>10)
  
  
  sample8 <- rbind(sample7  %>%
                     dplyr::select(FR_OC:SOILNR,x,y) ,
                   sampleRand %>%
                     dplyr::select(FR_OC:SOILNR,x,y)
  )
  
  
  #tab1 <- table(sample8$FR_NUTS2)
  #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] 
  sample8$id <- c(1:nrow(sample8))
  sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sep = ".")
  
  ndom <- length(unique(sample8$domains))
  
  frame8 <- buildFrameDF(df = sample8,
                         id = "id",
                         X = X,
                         Y = Y,
                         domainvalue = "domains")
  
  strata8 <- buildStrataDF(frame8, progress=T)
  
  ndom <- length(unique(sample8$domains))
  
  cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                           CV1=rep(0.05,ndom),
                           CV2=rep(0.05,ndom),
                           CV3=rep(0.05,ndom),
                           CV4=rep(0.05,ndom),
                           CV5=rep(0.05,ndom),
                           CV6=rep(0.05,ndom),
                           CV7=rep(0.05,ndom),
                           
                           domainvalue=c(1:ndom)))
  
  #--------------------------------------------------------------------------------------------------
  #Find initial solutions for each domain (NUTS2) by kmeans clustering
  #--------------------------------------------------------------------------------------------------
  init_sol8 <- KmeansSolution2(frame=frame8,
                               errors=cv,
                               maxclusters = 8) 
  
  nstrata8 <- tapply(init_sol8$suggestions,
                     init_sol8$domainvalue,
                     FUN=function(x) length(unique(x)))
  
  initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8)
  
  #--------------------------------------------------------------------------------------------------
  #Optimize initial solution by Genetic Algorithm
  #--------------------------------------------------------------------------------------------------
  ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1]))
  framecens <- frame8[ind_framecens,]
  #----Selection of units to be sampled from the frame
  # (complement to the previous)
  framesamp <- frame8[ind_framecens!=T,]
  
  solution8_s4_100 <- optimStrata(method = "continuous",
                                  errors = cv, 
                                  framesamp = framesamp,
                                  framecens = framecens,
                                  iter = 50,
                                  pops = 20,
                                  nStrata = nstrata8,
                                  suggestions = initial_solution8)
  
  framenew8 <- solution8_s4_100$framenew
  outstrata8 <- solution8_s4_100$aggr_strata
  
  strataStructure8 <- summaryStrata(framenew8,
                                    outstrata8,
                                    progress=T)
  
  #--------------------------------------------------------------
  #Total number of samples required
  #--------------------------------------------------------------
  sum(strataStructure8$Allocation)
  
  
  sample9 <- selectSample(framenew8, 
                          outstrata8,
                          writeFiles = F)
  
  points8 <- sample8[sample9$ID,]
  
  plot(y~x, sample7)
  points(points8$x, points8$y, pch=3, col="red")