# CODE NUMERO 3 # SCENARIOS AVEC TOUS LES POINTS RMQS / AVEC LES DONNEES clc # Ce code permet de faire tourner les 4 scénarios, à 2 niveaux (500 et 100m) library(sf) library(terra) library(ggplot2) library(dplyr) library(tidyr) library(SamplingStrata) library(clhs) library(viridis) library(tmap) set.seed(123) # not done by JRC but important #Replace default kmeans function to use a different algorithm (to avoid convergence issues) kmeans.default <- function(x, algorithm="MacQueen", ...){base::kmeans.default(x, algorithm, ...)} # load the specific functions for this project setwd("~/serena/smlsamplingeu") source("~/smlsamplingeu/functionssML.R") setwd("/home/nsaby/serena/data/sml/europe/") stacka <- rast(c("FR_OC.tif", "FR_pH.tif" , "FR_TXT.tif","FR_N.tif", "FR_P.tif", "FR_CEC.tif","FR_BD010.tif") ) stackb <- rast(c("~/smlsamplingeu/export_data/FR_CLC_100m.tif", "~/smlsamplingeu/export_data/FR_metzger_100m.tif", "~/smlsamplingeu/export_data/FR_regions_100m.tif", "~/smlsamplingeu/export_data/FR_WRB_100m.tif", "~/smlsamplingeu/export_data/FR_IGCS_100m.tif", "~/smlsamplingeu/export_data/soilRegion.tif")) stack = c(stacka,stackb) stack500 = rast("~/smlsamplingeu/export_data/stack500_lulucf.tif") sample1 <- readRDS("~/smlsamplingeu/data/11septembre_sample1.rds") sample1500 <- readRDS("~/smlsamplingeu/data/11septembre_sample1500.rds") rnd.lhs<-readRDS( "~/smlsamplingeu/data/11septembre_rnd.lhs.rds") rnd.lhs500<-readRDS( "~/smlsamplingeu/data/11septembre_rnd.lhs500.rds") sample2 <- as.data.frame(sample1) # set the numbers of pixels to sample clhsNb = 15000 SIsample = 100000 # the list of clc codes to exclude excludeCLC = c(1,2,3,4,5,6,7,8,9, 30,31,34,35,37,38,39,40, 41,42,43,44,48,49,50 ) # exclure les sols sans sols excludeWRB = 0 X= Y = c("FR_OC", "FR_pH","FR_TXT_1", "FR_TXT_2", "FR_P","FR_N","FR_BD010") #charger données RMQS----- load("~/smlsamplingeu/data/data_ETM_C_H1.RData")#Données ponctuelles smn = data_ETM_C_H1%>% dplyr::select(id_site,x_reel,y_reel) %>% st_as_sf(coords = 2:3, crs = 2154) %>% st_transform(crs(stack)) # Include points from existing surveys (let the algorithm decide) # Suppose we have a regular grid we want to preserve at least in part sample3 <- terra::extract(stack, smn, xy = TRUE ) sample3 <- as.data.frame(sample3) sample3 <- sample3[complete.cases(sample3),] sample3 <- sample3 %>% rename(FR_metzger = climate_metzeger, FR_regions = code_supra, FR_IGCS = ger, FR_WRB = WRBLEV1) # summary(sample3) # plot(y~x, sample3) #Remove NUTS region with not enough free soil (Brussels) to avoid code errors (too few samples for clustering). #This might be changed if targeting also soils in urban areas. #Remove non-soil classes # selection of the pixels using clhs sample2 <- sample1[rnd.lhs$index_samples,] sample2 <- sample2 %>% rename(FR_WRB = WRBLEV1) # simplified LU using personal function if necessary !! sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sample2 <- filter(sample2, !FR_CLC %in% excludeCLC ) sample3 <- filter(sample3, !FR_CLC %in% excludeCLC ) #remove the non soil sample2 <- filter(sample2, !FR_WRB %in% excludeWRB ) sample3 <- filter(sample3, !FR_WRB %in% excludeWRB ) saveRDS( sample3, "~/smlsamplingeu/data/avec_rmqs_sample3.rds") sample3 <- readRDS("~/smlsamplingeu/data/avec_rmqs_sample3.rds") #-------------------------------------------------------------------------------------------------- # Add existing points to the new randomly selected ones #-------------------------------------------------------------------------------------------------- sample4 <- rbind(sample3 %>% dplyr::select(FR_OC:SOILNR,x,y) , sample2 %>% dplyr::select(FR_OC:SOILNR,x,y) ) #-------------------------------------------------------------------------------------------------- #Convert CLC classes from numerical to factors. Round numerical values to speed up clutering computations #-------------------------------------------------------------------------------------------------- sample4$FR_CLC <- as.factor(sample4$FR_CLC) sample4$FR_WRB <- as.factor(sample4$FR_WRB) sample4$FR_regions <- as.factor(sample4$FR_regions) # sample4$SOILNR <- as.factor(sample4$SOILNR) library(sp) sample4.sp <- sample4 coordinates(sample4.sp)=~x+y #-------------------------------------------------------------------------------------------------- # Create an index to indicate to the clhs which samples to keep anyway #-------------------------------------------------------------------------------------------------- samples_tobekept <- 1:dim(sample3)[1] #-------------------------------------------------------------------------------------------------- #Reduce initial sample and optimize soil properties sampling space by latin hypercube sampling, but retaining the legacy survey points #-------------------------------------------------------------------------------------------------- rnd.lhs = clhs::clhs(sample4.sp[,c(1:10)], must.include=samples_tobekept, size=5000, iter=2000, progress=T, simple=F, use.coords=T) sample5 <- sample4[rnd.lhs$index_samples,] plot(y~x, sample5) sample5$id <- c(1:nrow(sample5)) #------------------------------------------------------------------------------------------------- # Proceeds as before #-------------------------------------------------------------------------------------------------- # Build initial dataframe of target properties (reporting at NUTS2 by CLC class level) #-------------------------------------------------------------------------------------------------- sample5$domains <- paste(sample5$FR_WRB, sample5$FR_CLC, sample5$FR_regions, sep = ".") frame2 <- buildFrameDF(df = sample5, id = "id", X = X, Y = Y, domainvalue = "domains") #-------------------------------------------------------------------------------------------------- #Build initial strata #-------------------------------------------------------------------------------------------------- strata2 <- buildStrataDF(frame2, progress=T) ndom <- length(unique(sample5$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #Find initial solutions for each domain (SOILNR) by kmeans clustering init_sol4 <- KmeansSolution2(frame=frame2, errors=cv, maxclusters = 6) nstrata4 <- tapply(init_sol4$suggestions, init_sol4$domainvalue, FUN=function(x) length(unique(x))) initial_solution4 <- prepareSuggestion(init_sol4,frame2,nstrata4) #Optimize initial solution by Genetic Algorithm solution4 <- optimStrata(method = "continuous", errors = cv, framesamp = frame2, iter = 50, pops = 20, nStrata = nstrata4, suggestions = initial_solution4) framenew4 <- solution4$framenew outstrata4 <- solution4$aggr_strata strataStructure <- summaryStrata(framenew4, outstrata4, progress=T) sum(strataStructure$Allocation) sample5 <- selectSample(framenew4, outstrata4, writeFiles = F) points5 <- sample4[sample5$ID,] # Check how many legacy points were retained plot(y~x, points5) points(sample3$x, sample3$y, pch=3) overpoints <- (merge(points5, sample3, by=c("x", "y"))) points(overpoints$x, overpoints$y, col="red", pch=19) RMQSSol1 = list( smlsample = sample5, optimstrata = solution4, tableau = framenew4, eval = NA, NbSample = sum(strataStructure$Allocation), ExpCV = NA ) # Stratégie forced use of rmqs---------------- # Include points from existing surveys (forcing to keep existing points) # Suppose we have a regular grid we want to preserve in total # sample1 <- sampleRandom(stack1, 80000, xy=T) # sample2 <- as.data.frame(sample1) # # #Remove areas where soil is not present # # sample2 <- filter(sample2, !FR_CLC %in% c(1:9, 30, 33, 34, 35, 37:43)) #-------------------------------------------------------------------------------------------------- #Convert CLC classes from numerical to factors. Round numerical values to speed up clutering computations #-------------------------------------------------------------------------------------------------- # sample2$FR_CLC <- as.factor(sample2$FR_CLC) # sample2$FR_NUTS2 <- as.factor(sample2$FR_NUTS2) # sample2$FR_OC <- round(sample2$FR_OC) # sample2$FR_N <- round(sample2$FR_N, 1) # sample2$FR_BD010 <- round(sample2$FR_BD010, 1) # sample2$FR_pH <- round(sample2$FR_pH, 1) # sample2.sp <- sample2 # coordinates(sample2.sp)=~x+y # # #-------------------------------------------------------------------------------------------------- # #Reduce initial sample and optimize soil properties sampling space by latin hypercube sampling # #-------------------------------------------------------------------------------------------------- # #rnd.lhs = clhs::clhs(sample2[,c(4,6:9)], size=5000, iter=20000, progress=T, simple=F, use.coords=T) # rnd.lhs = clhs::clhs(sample2.sp[,c(1:10)], size=5000, iter=2000, progress=T, simple=F, use.coords=T) # TEST2 S2 100m------ sampleRand <- sample1[rnd.lhs$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8$framenew outstrata8 <- solution8$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") ############ # Test 2 S4 100m------- sampleRand <- sample1[rnd.lhs$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s4_100 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s4_100$framenew outstrata8 <- solution8_s4_100$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S3 100m------- sampleRand <- sample1[rnd.lhs$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sample8$FR_metzger, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s3_100 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s3_100$framenew outstrata8 <- solution8_s3_100$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S1 100m------- sampleRand <- sample1[rnd.lhs$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sample8$FR_metzger, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s1_100m <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s1_100m$framenew outstrata8 <- solution8_s1_100m$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S1 500m---- sampleRand <- sample1500[rnd.lhs500$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, FR_metzger, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sample8$FR_metzger, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s1_100m <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s1_100m$framenew outstrata8 <- solution8_s1_100m$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S2 500m---- sampleRand <- sample1500[rnd.lhs500$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_WRB<- as.factor(sampleRand$FR_WRB) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_WRB) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_WRB, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_WRB, sample8$FR_CLC, sample8$FR_regions, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8$framenew outstrata8 <- solution8$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S3 500m----- sampleRand <- sample1500[rnd.lhs500$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) sampleRand$FR_metzger<- as.factor(sampleRand$FR_metzger) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS, FR_metzger) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, FR_metzger, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sample8$FR_metzger, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s3_100 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s3_100$framenew outstrata8 <- solution8_s3_100$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red") # S4 500m------------ sampleRand <- sample1500[rnd.lhs500$index_samples,] sampleRand <- sampleRand %>% rename(FR_WRB= WRBLEV1) sampleRand$FR_regions<- as.factor(sampleRand$FR_regions) sampleRand$FR_IGCS<- as.factor(sampleRand$FR_IGCS) sampleRand$FR_CLC<- as.factor(sampleRand$FR_CLC) # simplified LU using personal function if necessary !! # sample2$FR_CLC_S = occuprecode(sample2$FR_CLC) sampleRand$FR_CLC_S = occuprecode(sampleRand$FR_CLC) # tab1 <- table(sampleRand$FR_NUTS2) tab1 <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% ungroup() # sampleRand <- sampleRand[sampleRand$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sampleRand<- sampleRand %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% left_join(tab1 %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) #-------------------------------------------------------------------------------------------------- # Add legacy points to the LHS sample #-------------------------------------------------------------------------------------------------- # tab1smn <- table(sample3$FR_NUTS2) tab1smn <- sampleRand %>% group_by(FR_CLC, FR_regions, FR_IGCS) %>% summarise(n=n()) %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% ungroup() # sample7 <- sample3[sample3$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample7<- sample3 %>% mutate(domains=paste(FR_CLC, FR_regions, FR_IGCS, sep = ".")) %>% left_join(tab1smn %>% dplyr::select(domains, n), by= "domains") %>% filter(n>10) sample8 <- rbind(sample7 %>% dplyr::select(FR_OC:SOILNR,x,y) , sampleRand %>% dplyr::select(FR_OC:SOILNR,x,y) ) #tab1 <- table(sample8$FR_NUTS2) #sample8 <- sample8[sample8$FR_NUTS2 %in% names(tab1[tab1 >= 10]),] sample8$id <- c(1:nrow(sample8)) sample8$domains <- paste(sample8$FR_IGCS, sample8$FR_CLC, sample8$FR_regions, sep = ".") ndom <- length(unique(sample8$domains)) frame8 <- buildFrameDF(df = sample8, id = "id", X = X, Y = Y, domainvalue = "domains") strata8 <- buildStrataDF(frame8, progress=T) ndom <- length(unique(sample8$domains)) cv <- as.data.frame(list(DOM=rep("DOM1",ndom), CV1=rep(0.05,ndom), CV2=rep(0.05,ndom), CV3=rep(0.05,ndom), CV4=rep(0.05,ndom), CV5=rep(0.05,ndom), CV6=rep(0.05,ndom), CV7=rep(0.05,ndom), domainvalue=c(1:ndom))) #-------------------------------------------------------------------------------------------------- #Find initial solutions for each domain (NUTS2) by kmeans clustering #-------------------------------------------------------------------------------------------------- init_sol8 <- KmeansSolution2(frame=frame8, errors=cv, maxclusters = 8) nstrata8 <- tapply(init_sol8$suggestions, init_sol8$domainvalue, FUN=function(x) length(unique(x))) initial_solution8 <- prepareSuggestion(init_sol8,frame8,nstrata8) #-------------------------------------------------------------------------------------------------- #Optimize initial solution by Genetic Algorithm #-------------------------------------------------------------------------------------------------- ind_framecens <- c(rep(T, dim(sample7)[1]), rep(F, dim(sampleRand)[1])) framecens <- frame8[ind_framecens,] #----Selection of units to be sampled from the frame # (complement to the previous) framesamp <- frame8[ind_framecens!=T,] solution8_s4_100 <- optimStrata(method = "continuous", errors = cv, framesamp = framesamp, framecens = framecens, iter = 50, pops = 20, nStrata = nstrata8, suggestions = initial_solution8) framenew8 <- solution8_s4_100$framenew outstrata8 <- solution8_s4_100$aggr_strata strataStructure8 <- summaryStrata(framenew8, outstrata8, progress=T) #-------------------------------------------------------------- #Total number of samples required #-------------------------------------------------------------- sum(strataStructure8$Allocation) sample9 <- selectSample(framenew8, outstrata8, writeFiles = F) points8 <- sample8[sample9$ID,] plot(y~x, sample7) points(points8$x, points8$y, pch=3, col="red")